GADGET-4
hydra.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <algorithm>
20
21#include "../data/allvars.h"
22#include "../data/dtypes.h"
23#include "../data/intposconvert.h"
24#include "../data/mymalloc.h"
25#include "../logs/logs.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../ngbtree/ngbtree.h"
30#include "../sort/cxxsort.h"
31#include "../sph/kernel.h"
32#include "../sph/sph.h"
33#include "../system/system.h"
34
40inline int sph::sph_hydro_evaluate_particle_node_opening_criterion(pinfo &pdat, ngbnode *nop)
41{
42 if(nop->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type)
43 return NODE_OPEN;
44
45 if(nop->Ti_Current != All.Ti_Current)
47
48 MyNgbTreeFloat dist = std::max<MyNgbTreeFloat>(nop->MaxHsml, pdat.hsml);
49
50 MyIntPosType search_min[3], search_range[3];
51
52 MyIntPosType inthsml = dist * Tp->FacCoordToInt;
53
54 for(int i = 0; i < 3; i++)
55 {
56 search_min[i] = pdat.searchcenter[i] - inthsml;
57 search_range[i] = inthsml + inthsml;
58 }
59
60 MyIntPosType left[3], right[3];
61
62 left[0] = Tp->nearest_image_intpos_to_intpos_X(nop->center_offset_min[0] + nop->center[0], search_min[0]);
63 right[0] = Tp->nearest_image_intpos_to_intpos_X(nop->center_offset_max[0] + nop->center[0], search_min[0]);
64
65 /* check whether we can stop walking along this branch */
66 if(left[0] > search_range[0] && right[0] > left[0])
67 return NODE_DISCARD;
68
69 left[1] = Tp->nearest_image_intpos_to_intpos_Y(nop->center_offset_min[1] + nop->center[1], search_min[1]);
70 right[1] = Tp->nearest_image_intpos_to_intpos_Y(nop->center_offset_max[1] + nop->center[1], search_min[1]);
71
72 /* check whether we can stop walking along this branch */
73 if(left[1] > search_range[1] && right[1] > left[1])
74 return NODE_DISCARD;
75
76 left[2] = Tp->nearest_image_intpos_to_intpos_Z(nop->center_offset_min[2] + nop->center[2], search_min[2]);
77 right[2] = Tp->nearest_image_intpos_to_intpos_Z(nop->center_offset_max[2] + nop->center[2], search_min[2]);
78
79 /* check whether we can stop walking along this branch */
80 if(left[2] > search_range[2] && right[2] > left[2])
81 return NODE_DISCARD;
82
83 return NODE_OPEN;
84}
85
86inline void sph::sph_hydro_check_particle_particle_interaction(pinfo &pdat, int p, int p_type, unsigned char shmrank)
87{
88#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
89 if(skip_actual_force_computation)
90 return;
91#endif
92
93 if(p_type == NODE_TYPE_LOCAL_PARTICLE) /* local particle */
94 {
95 particle_data *P = get_Pp(p, shmrank);
96 sph_particle_data *SphP = get_SphPp(p, shmrank);
97
98 if(P->getType() > 0)
99 return;
100
101 if(P->get_Ti_Current() != All.Ti_Current)
102 Tp->drift_particle(P, SphP, All.Ti_Current); // this function avoids race conditions
103
104 MyNgbTreeFloat dist = std::max<MyNgbTreeFloat>(SphP->Hsml, pdat.hsml);
105 MyNgbTreeFloat distsq = dist * dist;
106
107 double posdiff[3];
108 Tp->nearest_image_intpos_to_pos(P->IntPos, pdat.searchcenter, posdiff); /* converts the integer distance to floating point */
109
110 double rad2 = posdiff[0] * posdiff[0] + posdiff[1] * posdiff[1] + posdiff[2] * posdiff[2];
111 if(rad2 > distsq || rad2 == 0)
112 return;
113
114 if(pdat.numngb >= MAX_NGBS)
115 Terminate("pdat.numngb >= MAX_NGBS");
116
117 int n = pdat.numngb++;
118
119 Ngbhydrodat[n].SphCore = SphP;
120 Ngbhydrodat[n].IntPos = P->IntPos;
121 Ngbhydrodat[n].Mass = P->getMass();
122#ifndef LEAN
123 Ngbhydrodat[n].TimeBinHydro = P->TimeBinHydro;
124#endif
125 }
126 else if(p_type == NODE_TYPE_FETCHED_PARTICLE)
127 {
128 foreign_sphpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
129
130 MyNgbTreeFloat dist = std::max<MyNgbTreeFloat>(foreignpoint->SphCore.Hsml, pdat.hsml);
131 MyNgbTreeFloat distsq = dist * dist;
132
133 /* converts the integer distance to floating point */
134 double posdiff[3];
135 Tp->nearest_image_intpos_to_pos(foreignpoint->IntPos, pdat.searchcenter, posdiff);
136
137 double rad2 = posdiff[0] * posdiff[0] + posdiff[1] * posdiff[1] + posdiff[2] * posdiff[2];
138 if(rad2 > distsq || rad2 == 0)
139 return;
140
141 if(pdat.numngb >= MAX_NGBS)
142 Terminate("pdat.numngb >= MAX_NGBS");
143
144 int n = pdat.numngb++;
145
146 Ngbhydrodat[n].SphCore = &foreignpoint->SphCore;
147 Ngbhydrodat[n].IntPos = foreignpoint->IntPos;
148 Ngbhydrodat[n].Mass = foreignpoint->Mass;
149 Ngbhydrodat[n].TimeBinHydro = foreignpoint->TimeBinHydro;
150 }
151 else
152 Terminate("unexpected");
153}
154
155inline void sph::sph_hydro_open_node(pinfo &pdat, ngbnode *nop, int mintopleafnode, int committed)
156{
157 /* open node */
158 int p = nop->nextnode;
159 unsigned char shmrank = nop->nextnode_shmrank;
160
161 while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
162 {
163 if(p < 0)
164 Terminate(
165 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d "
166 "first_nontoplevelnode=%d",
167 p, nop->sibling, nop->nextnode, shmrank, nop->sibling_shmrank, nop->OriginTask, MaxPart + D->NTopnodes);
168
169 int next;
170 unsigned char next_shmrank;
171 char type;
172
173 if(p < MaxPart) /* a local particle */
174 {
175 /* note: here shmrank cannot change */
176 next = get_nextnodep(shmrank)[p];
177 next_shmrank = shmrank;
179 }
180 else if(p < MaxPart + MaxNodes) /* an internal node */
181 {
182 ngbnode *nop = get_nodep(p, shmrank);
183 next = nop->sibling;
184 next_shmrank = nop->sibling_shmrank;
186 }
187 else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
188 {
189 Terminate("not expected for SPH");
190 }
191 else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
192 {
193 ngbnode *nop = get_nodep(p, shmrank);
194 next = nop->sibling;
195 next_shmrank = nop->sibling_shmrank;
197 }
198 else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
199 {
200 foreign_sphpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
201
202 next = foreignpoint->Nextnode;
203 next_shmrank = foreignpoint->Nextnode_shmrank;
205 }
206 else
207 {
208 /* a pseudo point */
209 Terminate(
210 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
211 "shmrank=%d",
213 }
214
215 sph_hydro_interact(pdat, p, type, shmrank, mintopleafnode, committed);
216
217 p = next;
218 shmrank = next_shmrank;
219 }
220}
221
222inline void sph::sph_hydro_interact(pinfo &pdat, int no, char no_type, unsigned char shmrank, int mintopleafnode, int committed)
223{
224 if(no_type <= NODE_TYPE_FETCHED_PARTICLE) // we are interacting with a particle
225 {
226 sph_hydro_check_particle_particle_interaction(pdat, no, no_type, shmrank);
227 }
228 else // we are interacting with a node
229 {
230 ngbnode *nop = get_nodep(no, shmrank);
231
232 if(nop->not_empty == 0)
233 return;
234
235 if(no < MaxPart + MaxNodes) // we have a top-levelnode
236 if(nop->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
237 mintopleafnode = no;
238
239 int openflag = sph_hydro_evaluate_particle_node_opening_criterion(pdat, nop);
240
241 if(openflag == NODE_OPEN) /* we need to open it */
242 {
243 if(nop->cannot_be_opened_locally.load(std::memory_order_acquire))
244 {
245 // are we in the same shared memory node?
247 {
248 Terminate("this should not happen any more");
249 }
250 else
251 {
252 tree_add_to_fetch_stack(nop, no, shmrank); // will only add unique copies
253
254 tree_add_to_work_stack(pdat.target, no, shmrank, mintopleafnode);
255 }
256 }
257 else
258 {
259 int min_buffer_space =
261
262 if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
263 sph_hydro_open_node(pdat, nop, mintopleafnode, committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
264 else
265 tree_add_to_work_stack(pdat.target, no, shmrank, mintopleafnode);
266 }
267 }
268 }
269}
270
271void sph::hydro_forces_determine(int ntarget, int *targetlist)
272{
274 TIMER_START(CPU_HYDRO);
275
276 D->mpi_printf("SPH-HYDRO: Begin hydro-force calculation. (presently allocated=%g MB)\n", Mem.getAllocatedBytesInMB());
277 D->mpi_printf("SPH-HYDRO: global Nhydro=%llu (task zero: NumGas=%d, Nhydro=%d)\n", Tp->TimeBinsHydro.GlobalNActiveParticles,
278 Tp->NumGas, ntarget);
279
280 double ta = Logs.second();
281
282 // let's grab at most half the still available memory for imported points and nodes
283 int nspace = (0.33 * Mem.FreeBytes) / (sizeof(ngbnode) + 8 * sizeof(foreign_sphpoint_data));
284
285 MaxForeignNodes = nspace;
286 MaxForeignPoints = 8 * nspace;
287 NumForeignNodes = 0;
289
292
293 /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
294 Foreign_Nodes = (ngbnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(ngbnode));
295 Foreign_Points = (foreign_sphpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
297
299
300 max_ncycles = 0;
301
303
305 {
306 fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;
307 fac_vsic_fix = All.cf_hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);
308 }
309 else
310 {
311 fac_mu = 1.0;
312 fac_vsic_fix = 1.0;
313 }
314
315 Ngbhydrodat = (ngbdata_hydro *)Mem.mymalloc("Ngbhydrodat", MAX_NGBS * sizeof(ngbdata_hydro));
316
317 NumOnWorkStack = 0;
321
322 WorkStack = (workstack_data *)Mem.mymalloc("WorkStack", AllocWorkStackBaseHigh * sizeof(workstack_data));
323
324 for(int i = 0; i < ntarget; i++)
325 {
326 int target = targetlist[i];
327
328 clear_hydro_result(&Tp->SphP[target]);
329
330 WorkStack[NumOnWorkStack].Target = target;
333 WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
335 }
336
337#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
338 workstack_data *WorkStackBak = (workstack_data *)Mem.mymalloc("WorkStackBak", NumOnWorkStack * sizeof(workstack_data));
339 int NumOnWorkStackBak = NumOnWorkStack;
340 memcpy(WorkStackBak, WorkStack, NumOnWorkStack * sizeof(workstack_data));
341#endif
342
343 // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
345 StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
346
347#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
348 for(int rep = 0; rep < 2; rep++)
349 {
350 if(rep == 0)
351 {
352 skip_actual_force_computation = true;
353 }
354 else
355 {
356 skip_actual_force_computation = false;
357 NumOnWorkStack = NumOnWorkStackBak;
358 memcpy(WorkStack, WorkStackBak, NumOnWorkStack * sizeof(workstack_data));
359 }
360#endif
361
362 while(NumOnWorkStack > 0) // repeat until we are out of work
363 {
364 NewOnWorkStack = 0; // gives the new entries
365 NumOnFetchStack = 0;
367
368 TIMER_START(CPU_HYDROWALK);
369
370 int item = 0;
371
372 while(item < NumOnWorkStack)
373 {
374 int committed = 8 * TREE_NUM_BEFORE_NODESPLIT;
375 int min_buffer_space =
377 if(min_buffer_space >= committed)
378 {
379 int target = WorkStack[item].Target;
380 int no = WorkStack[item].Node;
381 int shmrank = WorkStack[item].ShmRank;
382 int mintopleaf = WorkStack[item].MinTopLeafNode;
383 item++;
384
385 pinfo pdat;
386 get_pinfo(target, pdat);
387
388 if(no == MaxPart)
389 {
390 // we have a pristine particle that's processed for the first time
391 sph_hydro_interact(pdat, no, NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
392 }
393 else
394 {
395 // we have a node that we previously could not open
396 ngbnode *nop = get_nodep(no, shmrank);
397
399 {
400 Terminate("item=%d: no=%d now we should be able to open it!", item, no);
401 }
402 else
403 sph_hydro_open_node(pdat, nop, mintopleaf, committed);
404 }
405
406 hydro_evaluate_kernel(pdat);
407 }
408 else
409 break;
410 }
411
412 if(item == 0 && NumOnWorkStack > 0)
413 Terminate("Can't even process a single particle");
414
415 TIMER_STOP(CPU_HYDROWALK);
416
417 TIMER_START(CPU_HYDROFETCH);
418
420
421 TIMER_STOP(CPU_HYDROFETCH);
422
423 /* now reorder the workstack such that we are first going to do residual pristine particles, and then
424 * imported nodes that hang below the first leaf nodes */
426 memmove(WorkStack, WorkStack + item, NumOnWorkStack * sizeof(workstack_data));
427
428 /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
430
431 max_ncycles++;
432 }
433
434#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
435 }
436#endif
437
438 Mem.myfree(StackToFetch);
439#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
440 Mem.myfree(WorkStackBak);
441#endif
442 Mem.myfree(WorkStack);
443 Mem.myfree(Ngbhydrodat);
444
445 /* now factor in a prefactor for the computed rates */
446 for(int i = 0; i < ntarget; i++)
447 {
448 int target = targetlist[i];
449
450 double fac = GAMMA_MINUS1 / (All.cf_atime2_hubble_a * pow(Tp->SphP[target].Density, GAMMA_MINUS1));
451
452 Tp->SphP[target].DtEntropy *= fac;
453 }
454
455 /* Now the tree-based hydrodynamical force computation is finished,
456 * output some performance metrics
457 */
458
459 TIMER_START(CPU_HYDROIMBALANCE);
460
461 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
462
463 TIMER_STOP(CPU_HYDROIMBALANCE);
464
466
467 /* free temporary buffers */
468 Mem.myfree(Foreign_Points);
469 Mem.myfree(Foreign_Nodes);
470
471 double tb = Logs.second();
472
473 TIMER_STOPSTART(CPU_HYDRO, CPU_LOGS);
474
475 D->mpi_printf("SPH-HYDRO: hydro-force computation done. took %8.3f\n", Logs.timediff(ta, tb));
476
477 struct detailed_timings
478 {
479 double tree, wait, fetch, all;
480 double numnodes;
482 double fillfacFgnNodes, fillfacFgnPoints;
483 };
484 detailed_timings timer, tisum, timax;
485
486 timer.tree = TIMER_DIFF(CPU_HYDROWALK);
487 timer.wait = TIMER_DIFF(CPU_HYDROIMBALANCE);
488 timer.fetch = TIMER_DIFF(CPU_HYDROFETCH);
489 timer.all = timer.tree + timer.wait + timer.fetch + TIMER_DIFF(CPU_HYDRO);
490 timer.numnodes = NumNodes;
491 timer.NumForeignNodes = NumForeignNodes;
492 timer.NumForeignPoints = NumForeignPoints;
493 timer.fillfacFgnNodes = NumForeignNodes / ((double)MaxForeignNodes);
494 timer.fillfacFgnPoints = NumForeignPoints / ((double)MaxForeignPoints);
495
496 MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
497 D->Communicator);
498 MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
499 D->Communicator);
500
502
503 if(D->ThisTask == 0)
504 {
505 fprintf(Logs.FdHydro, "Nf=%9lld highest active timebin=%d total-Nf=%lld\n", Tp->TimeBinsHydro.GlobalNActiveParticles,
507 fprintf(Logs.FdHydro, " work-load balance: %g part/sec: raw=%g, effective=%g\n",
508 timax.tree / ((tisum.tree + 1e-20) / D->NTask), Tp->TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
509 Tp->TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * D->NTask));
510 fprintf(Logs.FdHydro,
511 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
512 "fill=%g cycles=%d\n",
513 timax.numnodes, timax.numnodes / MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes / D->NTask,
514 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints / D->NTask, timax.fillfacFgnPoints, max_ncycles);
515 fprintf(Logs.FdHydro, " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g sec\n", tisum.all / D->NTask,
516 tisum.tree / D->NTask, tisum.wait / D->NTask, tisum.fetch / D->NTask);
518 }
519
520 TIMER_STOP(CPU_LOGS);
521}
522
523#ifdef EXPLICIT_VECTORIZATION
524void sph::hydro_evaluate_kernel(pinfo &pdat)
525{
526#ifndef LEAN
527 particle_data *P_i = &Tp->P[pdat.target];
528 sph_particle_data *SphP_i = &Tp->SphP[pdat.target];
529
530 /* the particles needs to be active */
532 Terminate("bummer");
533
534 double shinv, shinv3, shinv4;
535 kernel_hinv(SphP_i->Hsml, &shinv, &shinv3, &shinv4);
536
537 Vec4d hinv(shinv);
538 Vec4d hinv3(shinv3);
539 Vec4d hinv4(shinv4);
540
541 Vec4d dwnorm(NORM * shinv3);
542 Vec4d dwknorm(NORM * shinv4);
543
544 Vec4d rho_i(SphP_i->Density);
545
546#ifdef PRESSURE_ENTROPY_SPH
547 Vec4d p_over_rho2_i((double)SphP_i->Pressure / ((double)SphP_i->PressureSphDensity * (double)SphP_i->PressureSphDensity));
548#else
549 Vec4d p_over_rho2_i((double)SphP_i->Pressure / ((double)SphP_i->Density * (double)SphP_i->Density));
550#endif
551
552 Vec4d sound_i(SphP_i->Csnd);
553 Vec4d h_i(SphP_i->Hsml);
554
555 Vec4d v_i[3];
556 for(int i = 0; i < NUMDIMS; i++)
557 {
558 v_i[i] = SphP_i->VelPred[i];
559 }
560 Vec4d DhsmlDensityFactor_i(SphP_i->DhsmlDensityFactor);
561#ifdef PRESSURE_ENTROPY_SPH
562 Vec4d DhsmlDerivedDensityFactor_i(SphP_i->DhsmlDerivedDensityFactor);
563 Vec4d EntropyToInvGammaPred_i(SphP_i->EntropyToInvGammaPred);
564#endif
565
566#if !defined(NO_SHEAR_VISCOSITY_LIMITER) && !defined(TIMEDEP_ART_VISC)
567 Vec4d f_i(fabs(SphP_i->DivVel) / (fabs(SphP_i->DivVel) + SphP_i->CurlVel + 0.0001 * SphP_i->Csnd / SphP_i->Hsml / fac_mu));
568#endif
569
570#ifdef TIMEDEP_ART_VISC
571 Vec4d alpha_i(SphP_i->Alpha);
572#endif
573 /* Now start the actual SPH computation for this particle */
574
575 double dacc[3] = {0};
576 double dentr = 0;
577 Vec4d MaxSignalVel = sound_i;
578
579 const int vector_length = 4;
580 const int array_length = (pdat.numngb + vector_length - 1) & (-vector_length);
581
582 for(int n = pdat.numngb; n < array_length; n++) /* fill up neighbour array so that sensible data is accessed */
583 Ngbhydrodat[n] = Ngbhydrodat[0];
584
585 for(int n = 0; n < array_length; n += vector_length)
586 {
587 sph_particle_data_hydrocore *ngb0 = Ngbhydrodat[n + 0].SphCore;
588 sph_particle_data_hydrocore *ngb1 = Ngbhydrodat[n + 1].SphCore;
589 sph_particle_data_hydrocore *ngb2 = Ngbhydrodat[n + 2].SphCore;
590 sph_particle_data_hydrocore *ngb3 = Ngbhydrodat[n + 3].SphCore;
591
592 ngbdata_hydro *P0_j = &Ngbhydrodat[n + 0];
593 ngbdata_hydro *P1_j = &Ngbhydrodat[n + 1];
594 ngbdata_hydro *P2_j = &Ngbhydrodat[n + 2];
595 ngbdata_hydro *P3_j = &Ngbhydrodat[n + 3];
596
597 /* converts the integer distance to floating point */
598 Vec4d dpos[NUMDIMS];
599 double posdiff[array_length][3];
600 for(int i = 0; i < 4; i++)
601 {
602 Tp->nearest_image_intpos_to_pos(P_i->IntPos, Ngbhydrodat[n + i].IntPos, &(posdiff[i][0]));
603 }
604
605 for(int i = 0; i < NUMDIMS; i++)
606 {
607 dpos[i] = Vec4d(posdiff[0][i], posdiff[1][i], posdiff[2][i], posdiff[3][i]);
608 }
609
610 Vec4d r2(0);
611
612 for(int i = 0; i < NUMDIMS; i++)
613 {
614 r2 += dpos[i] * dpos[i];
615 }
616
617 Vec4d r = sqrt(r2);
618
619 Vec4d v_j[NUMDIMS];
620 for(int i = 0; i < NUMDIMS; i++)
621 {
622 v_j[i] = Vec4d(ngb0->VelPred[i], ngb1->VelPred[i], ngb2->VelPred[i], ngb3->VelPred[i]);
623 }
624
625 Vec4d pressure(ngb0->Pressure, ngb1->Pressure, ngb2->Pressure, ngb3->Pressure);
626 Vec4d rho_j(ngb0->Density, ngb1->Density, ngb2->Density, ngb3->Density);
627#ifdef PRESSURE_ENTROPY_SPH
628 Vec4d rho_press_j(ngb0->PressureSphDensity, ngb1->PressureSphDensity, ngb2->PressureSphDensity, ngb3->PressureSphDensity);
629 Vec4d p_over_rho2_j = pressure / (rho_press_j * rho_press_j);
630#else
631 Vec4d p_over_rho2_j = pressure / (rho_j * rho_j);
632#endif
633
634 Vec4d wk_i, dwk_i;
635 Vec4d u = r * hinv;
636 kernel_main_vector(u, dwnorm, dwknorm, &wk_i, &dwk_i);
637 Vec4db decision = (r < h_i);
638 Vec4d fac = select(decision, 1., 0.);
639 wk_i *= fac;
640 dwk_i *= fac;
641
642 Vec4d h_j(ngb0->Hsml, ngb1->Hsml, ngb2->Hsml, ngb3->Hsml);
643 Vec4d hinv_j = 1 / h_j;
644#ifdef THREEDIMS
645 Vec4d hinv3_j = hinv_j * hinv_j * hinv_j;
646#endif
647
648#ifdef TWODIMS
649 Vec4d hinv3_j = hinv_j * hinv_j;
650#endif
651
652#ifdef ONEDIMS
653 Vec4d hinv3_j = hinv_j;
654#endif
655 Vec4d hinv4_j = hinv3_j * hinv_j;
656
657 Vec4d wk_j, dwk_j;
658 u = r * hinv_j;
659 kernel_main_vector(u, NORM * hinv3_j, NORM * hinv4_j, &wk_j, &dwk_j);
660 decision = (r < h_j);
661 fac = select(decision, 1., 0.);
662 wk_j *= fac;
663 dwk_j *= fac;
664
665 Vec4d sound_j(ngb0->Csnd, ngb1->Csnd, ngb2->Csnd, ngb3->Csnd);
666 Vec4d vsig = sound_i + sound_j;
667 if(n + vector_length > pdat.numngb)
668 {
669 wk_i.cutoff(vector_length - (array_length - pdat.numngb));
670 dwk_i.cutoff(vector_length - (array_length - pdat.numngb));
671 wk_j.cutoff(vector_length - (array_length - pdat.numngb));
672 dwk_j.cutoff(vector_length - (array_length - pdat.numngb));
673 vsig.cutoff(vector_length - (array_length - pdat.numngb));
674 }
675
676 Vec4d dwk_ij = 0.5 * (dwk_i + dwk_j);
677
678 MaxSignalVel = max(MaxSignalVel, vsig);
679
680 Vec4d visc(0);
681
682 Vec4d dv[NUMDIMS];
683 for(int i = 0; i < NUMDIMS; i++)
684 {
685 dv[i] = v_i[i] - v_j[i];
686 }
687
688 Vec4d vdotr2(0);
689 for(int i = 0; i < NUMDIMS; i++)
690 {
691 vdotr2 += dv[i] * dpos[i];
692 }
693
695 vdotr2 += All.cf_atime2_hubble_a * r2;
696
697 decision = (vdotr2 < 0);
698 Vec4d viscosity_fac = select(decision, 1, 0);
699
700 /* ... artificial viscosity */
701
702 Vec4d mu_ij = fac_mu * vdotr2 / r;
703
704 vsig -= 3 * mu_ij;
705
706#if defined(NO_SHEAR_VISCOSITY_LIMITER) || defined(TIMEDEP_ART_VISC)
707 Vec4d f_i(1);
708 Vec4d f_j(1);
709#else
710 Vec4d DivVel_j(ngb0->DivVel, ngb1->DivVel, ngb2->DivVel, ngb3->DivVel);
711 Vec4d CurlVel_j(ngb0->CurlVel, ngb1->CurlVel, ngb2->CurlVel, ngb3->CurlVel);
712 Vec4d f_j = abs(DivVel_j) / (abs(DivVel_j) + CurlVel_j + 0.0001 * sound_j / fac_mu * hinv_j);
713#endif
714
715#ifdef TIMEDEP_ART_VISC
716 Vec4d alpha_j(ngb0->Alpha, ngb1->Alpha, ngb2->Alpha, ngb3->Alpha);
717 Vec4d BulkVisc_ij = 0.5 * (alpha_i + alpha_j);
718
719#else
720 Vec4d BulkVisc_ij(All.ArtBulkViscConst);
721#endif
722 Vec4d rho_ij_inv = 2.0 / (rho_i + rho_j);
723 visc = 0.25 * BulkVisc_ij * vsig * (-mu_ij) * rho_ij_inv * (f_i + f_j);
724 Vec4d mass_j(P0_j->Mass, P1_j->Mass, P2_j->Mass, P3_j->Mass);
725#ifdef VISCOSITY_LIMITER_FOR_LARGE_TIMESTEPS
726 Vec4i timeBin_i(P_i->TimeBinHydro);
727 Vec4i timeBin_j(P0_j->TimeBinHydro, P1_j->TimeBinHydro, P2_j->TimeBinHydro, P3_j->TimeBinHydro);
728
729 Vec4i timebin = max(timeBin_i, timeBin_j);
730 Vec4i integer_time(((integertime)1) << timebin[0], ((integertime)1) << timebin[1], ((integertime)1) << timebin[2],
731 ((integertime)1) << timebin[3]);
732
733 Vec4ib decision_i = (timebin != 0);
734 Vec4i factor_timebin = select(decision_i, Vec4i(1), Vec4i(0));
735 Vec4d dt = to_double(2 * integer_time * factor_timebin) * All.Timebase_interval;
736
737 decision = (dt > 0 && dwk_ij < 0);
738
739 Vec4d visc_alternavtive = 0.5 * fac_vsic_fix * vdotr2 / ((P_i->getMass() + mass_j) * dwk_ij * r * dt);
740
741 Vec4d visc2 = select(decision, visc_alternavtive, visc);
742 visc = min(visc, visc2);
743#endif
744
745 Vec4d hfc_visc = mass_j * visc * dwk_ij / r * viscosity_fac;
746
747#ifndef PRESSURE_ENTROPY_SPH
748 /* Formulation derived from the Lagrangian */
749 dwk_i *= DhsmlDensityFactor_i;
750 Vec4d DhsmlDensityFactor_j(ngb0->DhsmlDensityFactor, ngb1->DhsmlDensityFactor, ngb2->DhsmlDensityFactor,
751 ngb3->DhsmlDensityFactor);
752 dwk_j *= DhsmlDensityFactor_j;
753
754 Vec4d hfc = mass_j * (p_over_rho2_i * dwk_i + p_over_rho2_j * dwk_j) / r + hfc_visc;
755#else
756 Vec4d EntropyToInvGammaPred_j(ngb0->EntropyToInvGammaPred, ngb1->EntropyToInvGammaPred, ngb2->EntropyToInvGammaPred,
757 ngb3->EntropyToInvGammaPred);
758 Vec4d DhsmlDerivedDensityFactor_j(ngb0->DhsmlDerivedDensityFactor, ngb1->DhsmlDerivedDensityFactor,
759 ngb2->DhsmlDerivedDensityFactor, ngb3->DhsmlDerivedDensityFactor);
760 /* leading order term */
761 Vec4d hfc = mass_j *
762 (p_over_rho2_i * dwk_i * EntropyToInvGammaPred_j / EntropyToInvGammaPred_i +
763 p_over_rho2_j * dwk_j * EntropyToInvGammaPred_i / EntropyToInvGammaPred_j) /
764 r;
765
766 /* grad-h term */
767 hfc += mass_j *
768 (p_over_rho2_i * dwk_i * SphP_i->DhsmlDerivedDensityFactor + p_over_rho2_j * dwk_j * DhsmlDerivedDensityFactor_j) / r;
769
770 /* add viscous term */
771 hfc += hfc_visc;
772#endif
773
774 for(int i = 0; i < NUMDIMS; i++)
775 {
776 dacc[i] += horizontal_add(-hfc * dpos[i]);
777 }
778 dentr += horizontal_add(0.5 * (hfc_visc)*vdotr2);
779 }
780
781 SphP_i->HydroAccel[0] += dacc[0];
782 SphP_i->HydroAccel[1] += dacc[1];
783 SphP_i->HydroAccel[2] += dacc[2];
784 SphP_i->DtEntropy += dentr;
785
786 for(int i = 0; i < 4; i++)
787 {
788 if(SphP_i->MaxSignalVel < MaxSignalVel[i])
789 SphP_i->MaxSignalVel = MaxSignalVel[i];
790 }
791#endif
792}
793
794#else
795
800void sph::hydro_evaluate_kernel(pinfo &pdat)
801{
802#ifndef LEAN
803 particle_data *P_i = &Tp->P[pdat.target];
804 sph_particle_data *SphP_i = &Tp->SphP[pdat.target];
805
806 /* the particles needs to be active */
808 Terminate("bummer");
809
810#ifdef PRESSURE_ENTROPY_SPH
811 double p_over_rho2_i = (double)SphP_i->Pressure / ((double)SphP_i->PressureSphDensity * (double)SphP_i->PressureSphDensity);
812#else
813 double p_over_rho2_i = (double)SphP_i->Pressure / ((double)SphP_i->Density * (double)SphP_i->Density);
814#endif
815
816 kernel_hydra kernel;
817
818 kernel.sound_i = SphP_i->Csnd;
819 kernel.h_i = SphP_i->Hsml;
820
821 /* Now start the actual SPH computation for this particle */
822
823 double daccx = 0;
824 double daccy = 0;
825 double daccz = 0;
826 double dentr = 0;
827 double MaxSignalVel = kernel.sound_i;
828
829 for(int n = 0; n < pdat.numngb; n++)
830 {
831 sph_particle_data_hydrocore *SphP_j = Ngbhydrodat[n].SphCore;
832 ngbdata_hydro *P_j = &Ngbhydrodat[n];
833
834 /* converts the integer distance to floating point */
835 double posdiff[3];
836 Tp->nearest_image_intpos_to_pos(P_i->IntPos, P_j->IntPos, posdiff);
837
838 kernel.dx = posdiff[0];
839 kernel.dy = posdiff[1];
840 kernel.dz = posdiff[2];
841
842 double r2 = kernel.dx * kernel.dx + kernel.dy * kernel.dy + kernel.dz * kernel.dz;
843 kernel.h_j = SphP_j->Hsml;
844
845 if(r2 < kernel.h_i * kernel.h_i || r2 < kernel.h_j * kernel.h_j)
846 {
847 kernel.r = sqrt(r2);
848 if(kernel.r > 0)
849 {
850#ifdef PRESSURE_ENTROPY_SPH
851 double p_over_rho2_j =
852 (double)SphP_j->Pressure / ((double)SphP_j->PressureSphDensity * (double)SphP_j->PressureSphDensity);
853#else
854 double p_over_rho2_j = (double)SphP_j->Pressure / ((double)SphP_j->Density * (double)SphP_j->Density);
855#endif
856
857 kernel.sound_j = SphP_j->Csnd;
858
859 kernel.dvx = SphP_i->VelPred[0] - SphP_j->VelPred[0];
860 kernel.dvy = SphP_i->VelPred[1] - SphP_j->VelPred[1];
861 kernel.dvz = SphP_i->VelPred[2] - SphP_j->VelPred[2];
862 kernel.vdotr2 = kernel.dx * kernel.dvx + kernel.dy * kernel.dvy + kernel.dz * kernel.dvz;
863 kernel.rho_ij_inv = 2.0 / (SphP_i->Density + SphP_j->Density);
864
866 kernel.vdotr2 += All.cf_atime2_hubble_a * r2;
867
868 double hinv, hinv3, hinv4;
869 if(kernel.r < kernel.h_i)
870 {
871 kernel_hinv(kernel.h_i, &hinv, &hinv3, &hinv4);
872 double u = kernel.r * hinv;
873 kernel_main(u, hinv3, hinv4, &kernel.wk_i, &kernel.dwk_i, COMPUTE_DWK);
874 }
875 else
876 {
877 kernel.dwk_i = 0;
878 kernel.wk_i = 0;
879 }
880
881 if(kernel.r < kernel.h_j)
882 {
883 kernel_hinv(kernel.h_j, &hinv, &hinv3, &hinv4);
884 double u = kernel.r * hinv;
885 kernel_main(u, hinv3, hinv4, &kernel.wk_j, &kernel.dwk_j, COMPUTE_DWK);
886 }
887 else
888 {
889 kernel.dwk_j = 0;
890 kernel.wk_j = 0;
891 }
892
893 kernel.dwk_ij = 0.5 * (kernel.dwk_i + kernel.dwk_j);
894
895 kernel.vsig = kernel.sound_i + kernel.sound_j;
896
897 if(kernel.vsig > MaxSignalVel)
898 MaxSignalVel = kernel.vsig;
899
900 double visc = 0;
901
902 if(kernel.vdotr2 < 0) /* ... artificial viscosity */
903 {
904 double mu_ij = fac_mu * kernel.vdotr2 / kernel.r;
905
906 kernel.vsig -= 3 * mu_ij;
907
908#if defined(NO_SHEAR_VISCOSITY_LIMITER) || defined(TIMEDEP_ART_VISC)
909 double f_i = 1.;
910 double f_j = 1.;
911#else
912 double f_i =
913 fabs(SphP_i->DivVel) / (fabs(SphP_i->DivVel) + SphP_i->CurlVel + 0.0001 * SphP_i->Csnd / SphP_i->Hsml / fac_mu);
914
915 double f_j =
916 fabs(SphP_j->DivVel) / (fabs(SphP_j->DivVel) + SphP_j->CurlVel + 0.0001 * kernel.sound_j / fac_mu / kernel.h_j);
917#endif
918
919#ifdef TIMEDEP_ART_VISC
920 double BulkVisc_ij = 0.5 * (SphP_i->Alpha + SphP_j->Alpha);
921
922#else
923 double BulkVisc_ij = All.ArtBulkViscConst;
924#endif
925
926 visc = 0.25 * BulkVisc_ij * kernel.vsig * (-mu_ij) * kernel.rho_ij_inv * (f_i + f_j);
927#ifdef VISCOSITY_LIMITER_FOR_LARGE_TIMESTEPS
928 int timebin = std::max<int>(P_i->TimeBinHydro, P_j->TimeBinHydro);
929
930 double dt = 2 * (timebin ? (((integertime)1) << timebin) : 0) * All.Timebase_interval;
931
932 if(dt > 0 && kernel.dwk_ij < 0)
933 {
934 visc = std::min<double>(
935 visc, 0.5 * fac_vsic_fix * kernel.vdotr2 / ((P_i->getMass() + P_j->Mass) * kernel.dwk_ij * kernel.r * dt));
936 }
937#endif
938 }
939
940 double hfc_visc = P_j->Mass * visc * kernel.dwk_ij / kernel.r;
941
942#ifndef PRESSURE_ENTROPY_SPH
943 /* Formulation derived from the Lagrangian */
944 kernel.dwk_i *= SphP_i->DhsmlDensityFactor;
945 kernel.dwk_j *= SphP_j->DhsmlDensityFactor;
946
947 double hfc = P_j->Mass * (p_over_rho2_i * kernel.dwk_i + p_over_rho2_j * kernel.dwk_j) / kernel.r + hfc_visc;
948#else
949 /* leading order term */
950 double hfc = P_j->Mass *
951 (p_over_rho2_i * kernel.dwk_i * SphP_j->EntropyToInvGammaPred / SphP_i->EntropyToInvGammaPred +
952 p_over_rho2_j * kernel.dwk_j * SphP_i->EntropyToInvGammaPred / SphP_j->EntropyToInvGammaPred) /
953 kernel.r;
954
955 /* grad-h term */
956 hfc += P_j->Mass *
957 (p_over_rho2_i * kernel.dwk_i * SphP_i->DhsmlDerivedDensityFactor +
958 p_over_rho2_j * kernel.dwk_j * SphP_j->DhsmlDerivedDensityFactor) /
959 kernel.r;
960
961 /* add viscous term */
962 hfc += hfc_visc;
963#endif
964
965 daccx += (-hfc * kernel.dx);
966 daccy += (-hfc * kernel.dy);
967 daccz += (-hfc * kernel.dz);
968 dentr += (0.5 * (hfc_visc)*kernel.vdotr2);
969 }
970 }
971 }
972
973 SphP_i->HydroAccel[0] += daccx;
974 SphP_i->HydroAccel[1] += daccy;
975 SphP_i->HydroAccel[2] += daccz;
976 SphP_i->DtEntropy += dentr;
977
978 if(SphP_i->MaxSignalVel < MaxSignalVel)
979 SphP_i->MaxSignalVel = MaxSignalVel;
980#endif
981}
982#endif
983
984/* this routine clears the fields in the SphP particle structure that are additively computed by the SPH density loop
985 * by summing over neighbours
986 */
987inline void sph::clear_hydro_result(sph_particle_data *SphP)
988{
989 for(int k = 0; k < 3; k++)
990 SphP->HydroAccel[k] = 0;
991
992 SphP->DtEntropy = 0;
993 SphP->MaxSignalVel = 0;
994}
global_data_all_processes All
Definition: main.cc:40
MyReal FacCoordToInt
Definition: intposconvert.h:38
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MyIntPosType nearest_image_intpos_to_intpos_Y(const MyIntPosType a, const MyIntPosType b)
MyIntPosType nearest_image_intpos_to_intpos_Z(const MyIntPosType a, const MyIntPosType b)
MyIntPosType nearest_image_intpos_to_intpos_X(const MyIntPosType a, const MyIntPosType b)
FILE * FdHydro
Definition: logs.h:49
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
size_t FreeBytes
Definition: mymalloc.h:48
int * GetNodeIDForSimulCommRank
int Island_ThisTask
int drift_particle(particle_data *P, sph_particle_data *SphP, integertime time1, bool ignore_light_cone=false)
This function drifts a particle i to time1.
Definition: predict.cc:313
TimeBinData TimeBinsGravity
Definition: simparticles.h:205
particle_data * P
Definition: simparticles.h:54
sph_particle_data * SphP
Definition: simparticles.h:59
TimeBinData TimeBinsHydro
Definition: simparticles.h:204
void hydro_forces_determine(int ntarget, int *targetlist)
Definition: hydra.cc:271
double fac_mu
Definition: sph.h:32
sph_particle_data * get_SphPp(int n, unsigned char shmrank)
Definition: tree.h:292
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
Definition: tree.h:197
void tree_add_to_fetch_stack(ngbnode *nop, int nodetoopen, unsigned char shmrank)
Definition: tree.h:207
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
Definition: tree.h:232
#define NUMDIMS
Definition: constants.h:369
#define GAMMA_MINUS1
Definition: constants.h:110
#define GAMMA
Definition: constants.h:99
int integertime
Definition: constants.h:331
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define LEVEL_ALWAYS_OPEN
Definition: dtypes.h:383
float MyNgbTreeFloat
Definition: dtypes.h:88
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
#define COMPUTE_DWK
Definition: kernel.h:110
#define NORM
Definition: kernel.h:47
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
Definition: logs.h:257
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
Definition: logs.h:250
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
half abs(half arg)
Definition: half.hpp:2620
half fabs(half arg)
Definition: half.hpp:2627
expr pow(half base, half exp)
Definition: half.hpp:2803
expr sqrt(half arg)
Definition: half.hpp:2777
#define MAX_NGBS
Definition: sph.h:18
long long GlobalNActiveParticles
Definition: timestep.h:19
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
unsigned char Nextnode_shmrank
Definition: ngbtree.h:111
sph_particle_data_hydrocore SphCore
Definition: ngbtree.h:105
signed char TimeBinHydro
Definition: ngbtree.h:113
MyIntPosType IntPos[3]
Definition: ngbtree.h:108
integertime Ti_Current
Definition: allvars.h:188
double h_i
Definition: kernel.h:31
double dx
Definition: kernel.h:27
double dvx
Definition: kernel.h:29
double dwk_ij
Definition: kernel.h:31
double dwk_j
Definition: kernel.h:30
double vdotr2
Definition: kernel.h:29
double vsig
Definition: kernel.h:28
double dz
Definition: kernel.h:27
double sound_j
Definition: kernel.h:28
double dvy
Definition: kernel.h:29
double h_j
Definition: kernel.h:31
double r
Definition: kernel.h:28
double dy
Definition: kernel.h:27
double wk_i
Definition: kernel.h:30
double wk_j
Definition: kernel.h:30
double sound_i
Definition: kernel.h:28
double dvz
Definition: kernel.h:29
double rho_ij_inv
Definition: kernel.h:31
double dwk_i
Definition: kernel.h:30
std::atomic< integertime > Ti_Current
Definition: ngbtree.h:56
MyNgbTreeFloat MaxHsml
Definition: ngbtree.h:52
MySignedIntPosType center_offset_max[3]
Definition: ngbtree.h:48
MySignedIntPosType center_offset_min[3]
Definition: ngbtree.h:47
void drift_node(integertime time1, simparticles *Sp)
Definition: ngbtree.h:58
integertime get_Ti_Current(void)
MyDouble getMass(void)
unsigned char getTimeBinHydro(void)
unsigned char getType(void)
signed char TimeBinHydro
Definition: particle_data.h:73
MyIntPosType IntPos[3]
Definition: particle_data.h:53
void myflush(FILE *fstream)
Definition: system.cc:125
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16