12#include "gadgetconfig.h"
21#include "../data/allvars.h"
22#include "../data/dtypes.h"
23#include "../data/intposconvert.h"
24#include "../data/mymalloc.h"
25#include "../logs/logs.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../ngbtree/ngbtree.h"
30#include "../sort/cxxsort.h"
31#include "../sph/kernel.h"
32#include "../sph/sph.h"
33#include "../system/system.h"
40inline int sph::sph_hydro_evaluate_particle_node_opening_criterion(pinfo &pdat,
ngbnode *nop)
54 for(
int i = 0; i < 3; i++)
56 search_min[i] = pdat.searchcenter[i] - inthsml;
57 search_range[i] = inthsml + inthsml;
66 if(left[0] > search_range[0] && right[0] > left[0])
73 if(left[1] > search_range[1] && right[1] > left[1])
80 if(left[2] > search_range[2] && right[2] > left[2])
86inline void sph::sph_hydro_check_particle_particle_interaction(pinfo &pdat,
int p,
int p_type,
unsigned char shmrank)
88#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
89 if(skip_actual_force_computation)
110 double rad2 = posdiff[0] * posdiff[0] + posdiff[1] * posdiff[1] + posdiff[2] * posdiff[2];
111 if(rad2 > distsq || rad2 == 0)
117 int n = pdat.numngb++;
119 Ngbhydrodat[n].SphCore = SphP;
120 Ngbhydrodat[n].IntPos = P->
IntPos;
121 Ngbhydrodat[n].Mass = P->
getMass();
137 double rad2 = posdiff[0] * posdiff[0] + posdiff[1] * posdiff[1] + posdiff[2] * posdiff[2];
138 if(rad2 > distsq || rad2 == 0)
144 int n = pdat.numngb++;
146 Ngbhydrodat[n].SphCore = &foreignpoint->
SphCore;
147 Ngbhydrodat[n].IntPos = foreignpoint->
IntPos;
148 Ngbhydrodat[n].Mass = foreignpoint->
Mass;
149 Ngbhydrodat[n].TimeBinHydro = foreignpoint->
TimeBinHydro;
155inline void sph::sph_hydro_open_node(pinfo &pdat,
ngbnode *nop,
int mintopleafnode,
int committed)
165 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d "
166 "first_nontoplevelnode=%d",
170 unsigned char next_shmrank;
177 next_shmrank = shmrank;
210 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
215 sph_hydro_interact(pdat, p, type, shmrank, mintopleafnode, committed);
218 shmrank = next_shmrank;
222inline void sph::sph_hydro_interact(pinfo &pdat,
int no,
char no_type,
unsigned char shmrank,
int mintopleafnode,
int committed)
226 sph_hydro_check_particle_particle_interaction(pdat, no, no_type, shmrank);
239 int openflag = sph_hydro_evaluate_particle_node_opening_criterion(pdat, nop);
248 Terminate(
"this should not happen any more");
259 int min_buffer_space =
315 Ngbhydrodat = (ngbdata_hydro *)
Mem.mymalloc(
"Ngbhydrodat",
MAX_NGBS *
sizeof(ngbdata_hydro));
324 for(
int i = 0; i < ntarget; i++)
326 int target = targetlist[i];
328 clear_hydro_result(&
Tp->
SphP[target]);
337#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
338 workstack_data *WorkStackBak = (workstack_data *)
Mem.mymalloc(
"WorkStackBak",
NumOnWorkStack *
sizeof(workstack_data));
347#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
348 for(
int rep = 0; rep < 2; rep++)
352 skip_actual_force_computation =
true;
356 skip_actual_force_computation =
false;
375 int min_buffer_space =
377 if(min_buffer_space >= committed)
382 int mintopleaf =
WorkStack[item].MinTopLeafNode;
386 get_pinfo(target, pdat);
400 Terminate(
"item=%d: no=%d now we should be able to open it!", item, no);
403 sph_hydro_open_node(pdat, nop, mintopleaf, committed);
406 hydro_evaluate_kernel(pdat);
413 Terminate(
"Can't even process a single particle");
434#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
439#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
440 Mem.myfree(WorkStackBak);
443 Mem.myfree(Ngbhydrodat);
446 for(
int i = 0; i < ntarget; i++)
448 int target = targetlist[i];
461 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX,
D->Communicator);
475 D->mpi_printf(
"SPH-HYDRO: hydro-force computation done. took %8.3f\n",
Logs.
timediff(ta, tb));
477 struct detailed_timings
479 double tree, wait, fetch, all;
482 double fillfacFgnNodes, fillfacFgnPoints;
484 detailed_timings timer, tisum, timax;
489 timer.all = timer.tree + timer.wait + timer.fetch +
TIMER_DIFF(CPU_HYDRO);
496 MPI_Reduce((
double *)&timer, (
double *)&tisum, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_SUM, 0,
498 MPI_Reduce((
double *)&timer, (
double *)&timax, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_MAX, 0,
507 fprintf(
Logs.
FdHydro,
" work-load balance: %g part/sec: raw=%g, effective=%g\n",
511 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
512 "fill=%g cycles=%d\n",
513 timax.numnodes, timax.numnodes /
MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes /
D->NTask,
514 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints /
D->NTask, timax.fillfacFgnPoints, max_ncycles);
515 fprintf(
Logs.
FdHydro,
" avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g sec\n", tisum.all /
D->NTask,
516 tisum.tree /
D->NTask, tisum.wait /
D->NTask, tisum.fetch /
D->NTask);
523#ifdef EXPLICIT_VECTORIZATION
524void sph::hydro_evaluate_kernel(pinfo &pdat)
534 double shinv, shinv3, shinv4;
535 kernel_hinv(SphP_i->
Hsml, &shinv, &shinv3, &shinv4);
541 Vec4d dwnorm(
NORM * shinv3);
542 Vec4d dwknorm(
NORM * shinv4);
546#ifdef PRESSURE_ENTROPY_SPH
547 Vec4d p_over_rho2_i((
double)SphP_i->
Pressure / ((
double)SphP_i->PressureSphDensity * (
double)SphP_i->PressureSphDensity));
552 Vec4d sound_i(SphP_i->
Csnd);
553 Vec4d h_i(SphP_i->
Hsml);
556 for(
int i = 0; i <
NUMDIMS; i++)
561#ifdef PRESSURE_ENTROPY_SPH
562 Vec4d DhsmlDerivedDensityFactor_i(SphP_i->DhsmlDerivedDensityFactor);
563 Vec4d EntropyToInvGammaPred_i(SphP_i->EntropyToInvGammaPred);
566#if !defined(NO_SHEAR_VISCOSITY_LIMITER) && !defined(TIMEDEP_ART_VISC)
570#ifdef TIMEDEP_ART_VISC
571 Vec4d alpha_i(SphP_i->Alpha);
575 double dacc[3] = {0};
577 Vec4d MaxSignalVel = sound_i;
579 const int vector_length = 4;
580 const int array_length = (pdat.numngb + vector_length - 1) & (-vector_length);
582 for(
int n = pdat.numngb; n < array_length; n++)
583 Ngbhydrodat[n] = Ngbhydrodat[0];
585 for(
int n = 0; n < array_length; n += vector_length)
592 ngbdata_hydro *P0_j = &Ngbhydrodat[n + 0];
593 ngbdata_hydro *P1_j = &Ngbhydrodat[n + 1];
594 ngbdata_hydro *P2_j = &Ngbhydrodat[n + 2];
595 ngbdata_hydro *P3_j = &Ngbhydrodat[n + 3];
599 double posdiff[array_length][3];
600 for(
int i = 0; i < 4; i++)
605 for(
int i = 0; i <
NUMDIMS; i++)
607 dpos[i] = Vec4d(posdiff[0][i], posdiff[1][i], posdiff[2][i], posdiff[3][i]);
612 for(
int i = 0; i <
NUMDIMS; i++)
614 r2 += dpos[i] * dpos[i];
620 for(
int i = 0; i <
NUMDIMS; i++)
627#ifdef PRESSURE_ENTROPY_SPH
628 Vec4d rho_press_j(ngb0->PressureSphDensity, ngb1->PressureSphDensity, ngb2->PressureSphDensity, ngb3->PressureSphDensity);
629 Vec4d p_over_rho2_j = pressure / (rho_press_j * rho_press_j);
631 Vec4d p_over_rho2_j = pressure / (rho_j * rho_j);
636 kernel_main_vector(u, dwnorm, dwknorm, &wk_i, &dwk_i);
637 Vec4db decision = (r < h_i);
638 Vec4d fac = select(decision, 1., 0.);
643 Vec4d hinv_j = 1 / h_j;
645 Vec4d hinv3_j = hinv_j * hinv_j * hinv_j;
649 Vec4d hinv3_j = hinv_j * hinv_j;
653 Vec4d hinv3_j = hinv_j;
655 Vec4d hinv4_j = hinv3_j * hinv_j;
659 kernel_main_vector(u,
NORM * hinv3_j,
NORM * hinv4_j, &wk_j, &dwk_j);
660 decision = (r < h_j);
661 fac = select(decision, 1., 0.);
666 Vec4d vsig = sound_i + sound_j;
667 if(n + vector_length > pdat.numngb)
669 wk_i.cutoff(vector_length - (array_length - pdat.numngb));
670 dwk_i.cutoff(vector_length - (array_length - pdat.numngb));
671 wk_j.cutoff(vector_length - (array_length - pdat.numngb));
672 dwk_j.cutoff(vector_length - (array_length - pdat.numngb));
673 vsig.cutoff(vector_length - (array_length - pdat.numngb));
676 Vec4d dwk_ij = 0.5 * (dwk_i + dwk_j);
678 MaxSignalVel = max(MaxSignalVel, vsig);
683 for(
int i = 0; i <
NUMDIMS; i++)
685 dv[i] = v_i[i] - v_j[i];
689 for(
int i = 0; i <
NUMDIMS; i++)
691 vdotr2 += dv[i] * dpos[i];
697 decision = (vdotr2 < 0);
698 Vec4d viscosity_fac = select(decision, 1, 0);
702 Vec4d mu_ij =
fac_mu * vdotr2 / r;
706#if defined(NO_SHEAR_VISCOSITY_LIMITER) || defined(TIMEDEP_ART_VISC)
712 Vec4d f_j =
abs(DivVel_j) / (
abs(DivVel_j) + CurlVel_j + 0.0001 * sound_j /
fac_mu * hinv_j);
715#ifdef TIMEDEP_ART_VISC
716 Vec4d alpha_j(ngb0->Alpha, ngb1->Alpha, ngb2->Alpha, ngb3->Alpha);
717 Vec4d BulkVisc_ij = 0.5 * (alpha_i + alpha_j);
722 Vec4d rho_ij_inv = 2.0 / (rho_i + rho_j);
723 visc = 0.25 * BulkVisc_ij * vsig * (-mu_ij) * rho_ij_inv * (f_i + f_j);
724 Vec4d mass_j(P0_j->Mass, P1_j->Mass, P2_j->Mass, P3_j->Mass);
725#ifdef VISCOSITY_LIMITER_FOR_LARGE_TIMESTEPS
727 Vec4i timeBin_j(P0_j->TimeBinHydro, P1_j->TimeBinHydro, P2_j->TimeBinHydro, P3_j->TimeBinHydro);
729 Vec4i timebin = max(timeBin_i, timeBin_j);
733 Vec4ib decision_i = (timebin != 0);
734 Vec4i factor_timebin = select(decision_i, Vec4i(1), Vec4i(0));
737 decision = (dt > 0 && dwk_ij < 0);
739 Vec4d visc_alternavtive = 0.5 * fac_vsic_fix * vdotr2 / ((P_i->
getMass() + mass_j) * dwk_ij * r * dt);
741 Vec4d visc2 = select(decision, visc_alternavtive, visc);
742 visc = min(visc, visc2);
745 Vec4d hfc_visc = mass_j * visc * dwk_ij / r * viscosity_fac;
747#ifndef PRESSURE_ENTROPY_SPH
749 dwk_i *= DhsmlDensityFactor_i;
752 dwk_j *= DhsmlDensityFactor_j;
754 Vec4d hfc = mass_j * (p_over_rho2_i * dwk_i + p_over_rho2_j * dwk_j) / r + hfc_visc;
756 Vec4d EntropyToInvGammaPred_j(ngb0->EntropyToInvGammaPred, ngb1->EntropyToInvGammaPred, ngb2->EntropyToInvGammaPred,
757 ngb3->EntropyToInvGammaPred);
758 Vec4d DhsmlDerivedDensityFactor_j(ngb0->DhsmlDerivedDensityFactor, ngb1->DhsmlDerivedDensityFactor,
759 ngb2->DhsmlDerivedDensityFactor, ngb3->DhsmlDerivedDensityFactor);
762 (p_over_rho2_i * dwk_i * EntropyToInvGammaPred_j / EntropyToInvGammaPred_i +
763 p_over_rho2_j * dwk_j * EntropyToInvGammaPred_i / EntropyToInvGammaPred_j) /
768 (p_over_rho2_i * dwk_i * SphP_i->DhsmlDerivedDensityFactor + p_over_rho2_j * dwk_j * DhsmlDerivedDensityFactor_j) / r;
774 for(
int i = 0; i <
NUMDIMS; i++)
776 dacc[i] += horizontal_add(-hfc * dpos[i]);
778 dentr += horizontal_add(0.5 * (hfc_visc)*vdotr2);
786 for(
int i = 0; i < 4; i++)
800void sph::hydro_evaluate_kernel(pinfo &pdat)
810#ifdef PRESSURE_ENTROPY_SPH
811 double p_over_rho2_i = (double)SphP_i->
Pressure / ((
double)SphP_i->PressureSphDensity * (double)SphP_i->PressureSphDensity);
827 double MaxSignalVel = kernel.
sound_i;
829 for(
int n = 0; n < pdat.numngb; n++)
832 ngbdata_hydro *P_j = &Ngbhydrodat[n];
838 kernel.
dx = posdiff[0];
839 kernel.
dy = posdiff[1];
840 kernel.
dz = posdiff[2];
842 double r2 = kernel.
dx * kernel.
dx + kernel.
dy * kernel.
dy + kernel.
dz * kernel.
dz;
845 if(r2 < kernel.
h_i * kernel.
h_i || r2 < kernel.
h_j * kernel.
h_j)
850#ifdef PRESSURE_ENTROPY_SPH
851 double p_over_rho2_j =
852 (double)SphP_j->
Pressure / ((
double)SphP_j->PressureSphDensity * (double)SphP_j->PressureSphDensity);
868 double hinv, hinv3, hinv4;
869 if(kernel.
r < kernel.
h_i)
871 kernel_hinv(kernel.
h_i, &hinv, &hinv3, &hinv4);
872 double u = kernel.
r * hinv;
881 if(kernel.
r < kernel.
h_j)
883 kernel_hinv(kernel.
h_j, &hinv, &hinv3, &hinv4);
884 double u = kernel.
r * hinv;
897 if(kernel.
vsig > MaxSignalVel)
898 MaxSignalVel = kernel.
vsig;
906 kernel.
vsig -= 3 * mu_ij;
908#if defined(NO_SHEAR_VISCOSITY_LIMITER) || defined(TIMEDEP_ART_VISC)
919#ifdef TIMEDEP_ART_VISC
920 double BulkVisc_ij = 0.5 * (SphP_i->Alpha + SphP_j->Alpha);
926 visc = 0.25 * BulkVisc_ij * kernel.
vsig * (-mu_ij) * kernel.
rho_ij_inv * (f_i + f_j);
927#ifdef VISCOSITY_LIMITER_FOR_LARGE_TIMESTEPS
928 int timebin = std::max<int>(P_i->
TimeBinHydro, P_j->TimeBinHydro);
932 if(dt > 0 && kernel.
dwk_ij < 0)
934 visc = std::min<double>(
935 visc, 0.5 * fac_vsic_fix * kernel.
vdotr2 / ((P_i->
getMass() + P_j->Mass) * kernel.
dwk_ij * kernel.
r * dt));
940 double hfc_visc = P_j->Mass * visc * kernel.
dwk_ij / kernel.
r;
942#ifndef PRESSURE_ENTROPY_SPH
947 double hfc = P_j->Mass * (p_over_rho2_i * kernel.
dwk_i + p_over_rho2_j * kernel.
dwk_j) / kernel.
r + hfc_visc;
950 double hfc = P_j->Mass *
951 (p_over_rho2_i * kernel.
dwk_i * SphP_j->EntropyToInvGammaPred / SphP_i->EntropyToInvGammaPred +
952 p_over_rho2_j * kernel.
dwk_j * SphP_i->EntropyToInvGammaPred / SphP_j->EntropyToInvGammaPred) /
957 (p_over_rho2_i * kernel.
dwk_i * SphP_i->DhsmlDerivedDensityFactor +
958 p_over_rho2_j * kernel.
dwk_j * SphP_j->DhsmlDerivedDensityFactor) /
965 daccx += (-hfc * kernel.
dx);
966 daccy += (-hfc * kernel.
dy);
967 daccz += (-hfc * kernel.
dz);
968 dentr += (0.5 * (hfc_visc)*kernel.
vdotr2);
989 for(
int k = 0; k < 3; k++)
global_data_all_processes All
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MyIntPosType nearest_image_intpos_to_intpos_Y(const MyIntPosType a, const MyIntPosType b)
MyIntPosType nearest_image_intpos_to_intpos_Z(const MyIntPosType a, const MyIntPosType b)
MyIntPosType nearest_image_intpos_to_intpos_X(const MyIntPosType a, const MyIntPosType b)
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
int * GetNodeIDForSimulCommRank
int drift_particle(particle_data *P, sph_particle_data *SphP, integertime time1, bool ignore_light_cone=false)
This function drifts a particle i to time1.
TimeBinData TimeBinsGravity
TimeBinData TimeBinsHydro
void hydro_forces_determine(int ntarget, int *targetlist)
long long sum_NumForeignPoints
workstack_data * WorkStack
sph_particle_data * get_SphPp(int n, unsigned char shmrank)
void cleanup_shared_memory_access(void)
domain< simparticles > * D
pdata get_Pp(int n, unsigned char shmrank)
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
int * get_nextnodep(unsigned char shmrank)
long long sum_NumForeignNodes
void tree_fetch_foreign_nodes(enum ftype fetch_type)
void prepare_shared_memory_access(void)
foreign_sphpoint_data * Foreign_Points
fetch_data * StackToFetch
int AllocWorkStackBaseHigh
ngbnode * get_nodep(int no)
void tree_add_to_fetch_stack(ngbnode *nop, int nodetoopen, unsigned char shmrank)
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
void tree_initialize_leaf_node_access_info(void)
int AllocWorkStackBaseLow
double mycxxsort(T *begin, T *end, Tcomp comp)
#define LEVEL_ALWAYS_OPEN
#define TREE_MIN_WORKSTACK_SIZE
#define NODE_TYPE_FETCHED_NODE
#define TREE_EXPECTED_CYCLES
#define NODE_TYPE_FETCHED_PARTICLE
#define NODE_TYPE_LOCAL_NODE
#define NODE_TYPE_LOCAL_PARTICLE
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
expr pow(half base, half exp)
long long GlobalNActiveParticles
unsigned char nextnode_shmrank
std::atomic< unsigned char > cannot_be_opened_locally
vector< MyIntPosType > center
unsigned char sibling_shmrank
unsigned char Nextnode_shmrank
sph_particle_data_hydrocore SphCore
double cf_atime2_hubble_a
int HighestSynchronizedTimeBin
int ComovingIntegrationOn
std::atomic< integertime > Ti_Current
MySignedIntPosType center_offset_max[3]
MySignedIntPosType center_offset_min[3]
void drift_node(integertime time1, simparticles *Sp)
integertime get_Ti_Current(void)
unsigned char getTimeBinHydro(void)
unsigned char getType(void)
MyFloat DhsmlDensityFactor
void myflush(FILE *fstream)
#define TREE_NUM_BEFORE_NODESPLIT