GADGET-4
timestep_treebased.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/intposconvert.h"
23#include "../data/mymalloc.h"
24#include "../data/simparticles.h"
25#include "../domain/domain.h"
26#include "../gravtree/gravtree.h"
27#include "../logs/logs.h"
28#include "../logs/timer.h"
29#include "../main/simulation.h"
30#include "../mpi_utils/mpi_utils.h"
31#include "../ngbtree/ngbtree.h"
32#include "../sort/cxxsort.h"
33#include "../system/system.h"
34#include "../time_integration/timestep.h"
35
36inline int sph::sph_treetimestep_evaluate_particle_node_opening_criterion(pinfo &pdat, ngbnode *nop)
37{
38 if(nop->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type)
39 return NODE_OPEN;
40
41 if(nop->Ti_Current != All.Ti_Current)
43
44 sph_particle_data *SphP = &Tp->SphP[pdat.target];
45
46 double vsig = SphP->Csnd + nop->MaxCsnd;
47
48 MyIntPosType new_range_min[3], new_range_max[3];
49 MySignedIntPosType left[3], right[3];
50 double vleft, vright;
51
52 // ----------- x-checks
53
54 vleft = (-vsig + (nop->vmin[0] - SphP->VelPred[0]));
55 if(fabs(vleft) * SphP->CurrentMaxTiStep > MaxBoxDist)
56 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vleft);
57
58 vright = (vsig + (nop->vmax[0] - SphP->VelPred[0]));
59 if(fabs(vright) * SphP->CurrentMaxTiStep > MaxBoxDist)
60 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vright);
61
62 new_range_min[0] = nop->center_offset_min[0] + nop->center[0] +
63 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vleft + 2 * pdat.hsml);
64 new_range_max[0] = nop->center_offset_max[0] + nop->center[0] +
65 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vright - 2 * pdat.hsml);
66
67 left[0] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_X(new_range_min[0], pdat.searchcenter[0]);
68 right[0] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_X(new_range_max[0], pdat.searchcenter[0]);
69
70 /* check whether we can stop walking along this branch */
71 if(right[0] < 0 || left[0] > 0)
72 return NODE_DISCARD;
73
74 // ----------- y-checks
75
76 vleft = (-vsig + (nop->vmin[1] - SphP->VelPred[1]));
77 if(fabs(vleft) * SphP->CurrentMaxTiStep > MaxBoxDist)
78 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vleft);
79
80 vright = (vsig + (nop->vmax[1] - SphP->VelPred[1]));
81 if(fabs(vright) * SphP->CurrentMaxTiStep > MaxBoxDist)
82 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vright);
83
84 new_range_min[1] = nop->center_offset_min[1] + nop->center[1] +
85 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vleft + 2 * pdat.hsml);
86 new_range_max[1] = nop->center_offset_max[1] + nop->center[1] +
87 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vright - 2 * pdat.hsml);
88
89 left[1] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_Y(new_range_min[1], pdat.searchcenter[1]);
90 right[1] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_Y(new_range_max[1], pdat.searchcenter[1]);
91
92 /* check whether we can stop walking along this branch */
93 if(right[1] < 0 || left[1] > 0)
94 return NODE_DISCARD;
95
96 // ----------- z-checks
97
98 vleft = (-vsig + (nop->vmin[2] - SphP->VelPred[2]));
99 if(fabs(vleft) * SphP->CurrentMaxTiStep > MaxBoxDist)
100 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vleft);
101
102 vright = (vsig + (nop->vmax[2] - SphP->VelPred[2]));
103 if(fabs(vright) * SphP->CurrentMaxTiStep > MaxBoxDist)
104 SphP->CurrentMaxTiStep = MaxBoxDist / fabs(vright);
105
106 new_range_min[2] = nop->center_offset_min[2] + nop->center[2] +
107 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vleft + 2 * pdat.hsml);
108 new_range_max[2] = nop->center_offset_max[2] + nop->center[2] +
109 (MyIntPosType)Tp->pos_to_signedintpos(SphP->CurrentMaxTiStep * vright - 2 * pdat.hsml);
110
111 left[2] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_Z(new_range_min[2], pdat.searchcenter[2]);
112 right[2] = (MySignedIntPosType)Tp->nearest_image_intpos_to_intpos_Z(new_range_max[2], pdat.searchcenter[2]);
113
114 /* check whether we can stop walking along this branch */
115 if(right[2] < 0 || left[2] > 0)
116 return NODE_DISCARD;
117
118 return NODE_OPEN;
119}
120
121inline void sph::sph_treetimestep_check_particle_particle_interaction(pinfo &pdat, int p, int p_type, unsigned char shmrank)
122{
123#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
124 if(skip_actual_force_computation)
125 return;
126#endif
127
128 sph_particle_data *SphP_i = &Tp->SphP[pdat.target];
129
130 if(p_type == NODE_TYPE_LOCAL_PARTICLE) /* local particle */
131 {
132 particle_data *P_j = get_Pp(p, shmrank);
133 sph_particle_data *SphP_j = get_SphPp(p, shmrank);
134
135 if(P_j->getType() > 0)
136 return;
137
138 if(P_j->get_Ti_Current() != All.Ti_Current)
139 Tp->drift_particle(P_j, SphP_j, All.Ti_Current); // this function avoids race conditions
140
141 double dxyz[3];
142 Tp->nearest_image_intpos_to_pos(P_j->IntPos, pdat.searchcenter, dxyz); /* converts the integer distance to floating point */
143
144 double dist2 = dxyz[0] * dxyz[0] + dxyz[1] * dxyz[1] + dxyz[2] * dxyz[2];
145
146 if(dist2 > 0)
147 {
148 double dist = sqrt(dist2);
149 double vsig = SphP_i->Csnd + SphP_j->Csnd -
150 ((SphP_j->VelPred[0] - SphP_i->VelPred[0]) * dxyz[0] + (SphP_j->VelPred[1] - SphP_i->VelPred[1]) * dxyz[1] +
151 (SphP_j->VelPred[2] - SphP_i->VelPred[2]) * dxyz[2]) /
152 dist;
153
154 if(vsig > 0)
155 {
156 dist += 2 * SphP_i->Hsml; /* take one smoothing length as minimum distance in order to protect against unreasonably
157 small steps if two particles are very close */
158
159 double dt = dist / vsig;
160 if(SphP_i->CurrentMaxTiStep > dt)
161 SphP_i->CurrentMaxTiStep = dt;
162 }
163 }
164 }
165 else if(p_type == NODE_TYPE_FETCHED_PARTICLE)
166 {
167 foreign_sphpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
168
169 sph_particle_data_hydrocore *SphP_j = &foreignpoint->SphCore;
170
171 /* converts the integer distance to floating point */
172 double dxyz[3];
173 Tp->nearest_image_intpos_to_pos(foreignpoint->IntPos, pdat.searchcenter, dxyz);
174
175 double dist2 = dxyz[0] * dxyz[0] + dxyz[1] * dxyz[1] + dxyz[2] * dxyz[2];
176
177 if(dist2 > 0)
178 {
179 double dist = sqrt(dist2);
180 double vsig = SphP_i->Csnd + SphP_j->Csnd -
181 ((SphP_j->VelPred[0] - SphP_i->VelPred[0]) * dxyz[0] + (SphP_j->VelPred[1] - SphP_i->VelPred[1]) * dxyz[1] +
182 (SphP_j->VelPred[2] - SphP_i->VelPred[2]) * dxyz[2]) /
183 dist;
184
185 if(vsig > 0)
186 {
187 dist += 2 * SphP_i->Hsml; /* take one smoothing length as minimum distance in order to protect against unreasonably
188 small steps if two particles are very close */
189
190 double dt = dist / vsig;
191 if(SphP_i->CurrentMaxTiStep > dt)
192 SphP_i->CurrentMaxTiStep = dt;
193 }
194 }
195 }
196 else
197 Terminate("unexpected");
198}
199
200inline void sph::sph_treetimestep_open_node(pinfo &pdat, ngbnode *nop, int mintopleafnode, int committed)
201{
202 /* open node */
203 int p = nop->nextnode;
204 unsigned char shmrank = nop->nextnode_shmrank;
205
206 while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
207 {
208 if(p < 0)
209 Terminate(
210 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d "
211 "first_nontoplevelnode=%d",
212 p, nop->sibling, nop->nextnode, shmrank, nop->sibling_shmrank, nop->OriginTask, MaxPart + D->NTopnodes);
213
214 int next;
215 unsigned char next_shmrank;
216 char type;
217
218 if(p < MaxPart) /* a local particle */
219 {
220 /* note: here shmrank cannot change */
221 next = get_nextnodep(shmrank)[p];
222 next_shmrank = shmrank;
224 }
225 else if(p < MaxPart + MaxNodes) /* an internal node */
226 {
227 ngbnode *nop = get_nodep(p, shmrank);
228 next = nop->sibling;
229 next_shmrank = nop->sibling_shmrank;
231 }
232 else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
233 {
234 Terminate("not expected for SPH");
235 }
236 else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
237 {
238 ngbnode *nop = get_nodep(p, shmrank);
239 next = nop->sibling;
240 next_shmrank = nop->sibling_shmrank;
242 }
243 else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
244 {
245 foreign_sphpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
246
247 next = foreignpoint->Nextnode;
248 next_shmrank = foreignpoint->Nextnode_shmrank;
250 }
251 else
252 {
253 /* a pseudo point */
254 Terminate(
255 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
256 "shmrank=%d",
258 }
259
260 sph_treetimestep_interact(pdat, p, type, shmrank, mintopleafnode, committed);
261
262 p = next;
263 shmrank = next_shmrank;
264 }
265}
266
267inline void sph::sph_treetimestep_interact(pinfo &pdat, int no, char no_type, unsigned char shmrank, int mintopleafnode, int committed)
268{
269 if(no_type <= NODE_TYPE_FETCHED_PARTICLE) // we are interacting with a particle
270 {
271 sph_treetimestep_check_particle_particle_interaction(pdat, no, no_type, shmrank);
272 }
273 else // we are interacting with a node
274 {
275 ngbnode *nop = get_nodep(no, shmrank);
276
277 if(nop->not_empty == 0)
278 return;
279
280 if(no < MaxPart + MaxNodes) // we have a top-levelnode
281 if(nop->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
282 mintopleafnode = no;
283
284 int openflag = sph_treetimestep_evaluate_particle_node_opening_criterion(pdat, nop);
285
286 if(openflag == NODE_OPEN) /* we need to open it */
287 {
288 if(nop->cannot_be_opened_locally.load(std::memory_order_acquire))
289 {
290 // are we in the same shared memory node?
292 {
293 Terminate("this should not happen any more");
294 }
295 else
296 {
297 tree_add_to_fetch_stack(nop, no, shmrank); // will only add unique copies
298
299 tree_add_to_work_stack(pdat.target, no, shmrank, mintopleafnode);
300 }
301 }
302 else
303 {
304 int min_buffer_space =
306
307 if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
308 sph_treetimestep_open_node(pdat, nop, mintopleafnode, committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
309 else
310 tree_add_to_work_stack(pdat.target, no, shmrank, mintopleafnode);
311 }
312 }
313 }
314}
315
317{
318 if(Tp->TotNumGas > 0)
319 {
320 TIMER_START(CPU_TREE_TIMESTEPS);
321
322 D->mpi_printf("TIMESTEP-TREEWALK: Begin\n");
323
324 MaxBoxDist = 0.25 * Tp->RegionLen / (1 << LEVEL_ALWAYS_OPEN);
325
326 double ta = Logs.second();
327
328 // let's grab at most half the still available memory for imported points and nodes
329 int nspace = (0.33 * Mem.FreeBytes) / (sizeof(ngbnode) + 8 * sizeof(foreign_sphpoint_data));
330
331 MaxForeignNodes = nspace;
332 MaxForeignPoints = 8 * nspace;
333 NumForeignNodes = 0;
335
338
339 /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
340 Foreign_Nodes = (ngbnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(ngbnode));
341 Foreign_Points = (foreign_sphpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
343
345
346 max_ncycles = 0;
347
349
350 NumOnWorkStack = 0;
354
355 WorkStack = (workstack_data *)Mem.mymalloc("WorkStack", AllocWorkStackBaseHigh * sizeof(workstack_data));
356
357 for(int i = 0; i < Tp->TimeBinsHydro.NActiveParticles; i++)
358 {
359 int target = Tp->TimeBinsHydro.ActiveParticleList[i];
360
361 if(Tp->SphP[target].Csnd < MIN_FLOAT_NUMBER)
362 Tp->SphP[target].Csnd = MIN_FLOAT_NUMBER;
363
364 Tp->SphP[target].CurrentMaxTiStep = 2.0 * Tp->SphP[target].Hsml / (Tp->SphP[target].MaxSignalVel + MIN_FLOAT_NUMBER);
365
366 /* note: for cosmological integration, CurrentMaxTiStep stores 1/a^2 times the maximum allowed physical timestep */
369
370 WorkStack[NumOnWorkStack].Target = target;
373 WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
375 }
376
377 // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
379 StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
380
381 while(NumOnWorkStack > 0) // repeat until we are out of work
382 {
383 NewOnWorkStack = 0; // gives the new entries
384 NumOnFetchStack = 0;
386
387 int item = 0;
388
389 while(item < NumOnWorkStack)
390 {
391 int committed = 8 * TREE_NUM_BEFORE_NODESPLIT;
392 int min_buffer_space =
394 if(min_buffer_space >= committed)
395 {
396 int target = WorkStack[item].Target;
397 int no = WorkStack[item].Node;
398 int shmrank = WorkStack[item].ShmRank;
399 int mintopleaf = WorkStack[item].MinTopLeafNode;
400 item++;
401
402 pinfo pdat;
403 get_pinfo(target, pdat);
404
405 if(no == MaxPart)
406 {
407 // we have a pristine particle that's processed for the first time
408 sph_treetimestep_interact(pdat, no, NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
409 }
410 else
411 {
412 // we have a node that we previously could not open
413 ngbnode *nop = get_nodep(no, shmrank);
414
416 {
417 Terminate("item=%d: no=%d now we should be able to open it!", item, no);
418 }
419 else
420 sph_treetimestep_open_node(pdat, nop, mintopleaf, committed);
421 }
422 }
423 else
424 break;
425 }
426
427 if(item == 0 && NumOnWorkStack > 0)
428 Terminate("Can't even process a single particle");
429
431
432 /* now reorder the workstack such that we are first going to do residual pristine particles, and then
433 * imported nodes that hang below the first leaf nodes */
435 memmove(WorkStack, WorkStack + item, NumOnWorkStack * sizeof(workstack_data));
436
437 /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
439
440 max_ncycles++;
441 }
442
443 Mem.myfree(StackToFetch);
444 Mem.myfree(WorkStack);
445
446 /* now multiply the determined values with the specified CourantFactor */
447 for(int i = 0; i < Tp->TimeBinsHydro.NActiveParticles; i++)
448 {
449 int target = Tp->TimeBinsHydro.ActiveParticleList[i];
450
452 }
453
454 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
455
457
458 /* free temporary buffers */
459 Mem.myfree(Foreign_Points);
460 Mem.myfree(Foreign_Nodes);
461
462 double tb = Logs.second();
463
464 TIMER_STOPSTART(CPU_TREE_TIMESTEPS, CPU_LOGS);
465
466 D->mpi_printf("TIMESTEP-TREEWALK: took %g sec, max_ncycles = %d, part/sec = %g\n", Logs.timediff(ta, tb), max_ncycles,
468
469 TIMER_STOP(CPU_LOGS);
470 }
471}
global_data_all_processes All
Definition: main.cc:40
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MySignedIntPosType pos_to_signedintpos(T posdiff)
MyIntPosType nearest_image_intpos_to_intpos_Y(const MyIntPosType a, const MyIntPosType b)
MyReal RegionLen
Definition: intposconvert.h:39
MyIntPosType nearest_image_intpos_to_intpos_Z(const MyIntPosType a, const MyIntPosType b)
MyIntPosType nearest_image_intpos_to_intpos_X(const MyIntPosType a, const MyIntPosType b)
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
size_t FreeBytes
Definition: mymalloc.h:48
int * GetNodeIDForSimulCommRank
int Island_ThisTask
int drift_particle(particle_data *P, sph_particle_data *SphP, integertime time1, bool ignore_light_cone=false)
This function drifts a particle i to time1.
Definition: predict.cc:313
long long TotNumGas
Definition: simparticles.h:47
sph_particle_data * SphP
Definition: simparticles.h:59
TimeBinData TimeBinsHydro
Definition: simparticles.h:204
void tree_based_timesteps(void)
sph_particle_data * get_SphPp(int n, unsigned char shmrank)
Definition: tree.h:292
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
Definition: tree.h:197
void tree_add_to_fetch_stack(ngbnode *nop, int nodetoopen, unsigned char shmrank)
Definition: tree.h:207
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
Definition: tree.h:232
#define MIN_FLOAT_NUMBER
Definition: constants.h:80
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define LEVEL_ALWAYS_OPEN
Definition: dtypes.h:383
int32_t MySignedIntPosType
Definition: dtypes.h:36
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
half fabs(half arg)
Definition: half.hpp:2627
expr sqrt(half arg)
Definition: half.hpp:2777
int NActiveParticles
Definition: timestep.h:18
int * ActiveParticleList
Definition: timestep.h:20
long long GlobalNActiveParticles
Definition: timestep.h:19
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
unsigned char Nextnode_shmrank
Definition: ngbtree.h:111
sph_particle_data_hydrocore SphCore
Definition: ngbtree.h:105
MyIntPosType IntPos[3]
Definition: ngbtree.h:108
integertime Ti_Current
Definition: allvars.h:188
std::atomic< integertime > Ti_Current
Definition: ngbtree.h:56
MyNgbTreeFloat vmax[3]
Definition: ngbtree.h:51
MySignedIntPosType center_offset_max[3]
Definition: ngbtree.h:48
MySignedIntPosType center_offset_min[3]
Definition: ngbtree.h:47
void drift_node(integertime time1, simparticles *Sp)
Definition: ngbtree.h:58
MyNgbTreeFloat vmin[3]
Definition: ngbtree.h:50
MyNgbTreeFloat MaxCsnd
Definition: ngbtree.h:54
integertime get_Ti_Current(void)
unsigned char getType(void)
MyIntPosType IntPos[3]
Definition: particle_data.h:53
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16