GADGET-4
gwalk.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdlib.h>
17#include <string.h>
18
19#include "../data/allvars.h"
20#include "../data/dtypes.h"
21#include "../data/intposconvert.h"
22#include "../data/mymalloc.h"
23#include "../domain/domain.h"
24#include "../gravity/ewald.h"
25#include "../gravtree/gravtree.h"
26#include "../gravtree/gwalk.h"
27#include "../logs/logs.h"
28#include "../logs/timer.h"
29#include "../main/simulation.h"
30#include "../mpi_utils/mpi_utils.h"
31#include "../pm/pm.h"
32#include "../sort/cxxsort.h"
33#include "../sort/peano.h"
34#include "../system/system.h"
35#include "../time_integration/timestep.h"
36
45inline void gwalk::evaluate_particle_particle_interaction(const pinfo &pdat, const int no, const char jtype, int shmrank)
46{
47#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
48 if(skip_actual_force_computation)
49 return;
50#endif
51
52 /* first, let's get the relevant information of our partner particle, which can be in three different lists */
53
54 MyIntPosType *intpos;
55 MyReal mass;
56#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
57 int overlap_check;
58#endif
59
60 MyReal hmax = pdat.h_i;
61
62 if(jtype == NODE_TYPE_LOCAL_PARTICLE)
63 {
64 particle_data *P = get_Pp(no, shmrank);
65
66 intpos = P->IntPos;
67 mass = P->getMass();
68
69#if NSOFTCLASSES > 1
71 if(h_j > hmax)
72 hmax = h_j;
73#endif
74#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
75 overlap_check = P->InsideOutsideFlag;
76#endif
77 }
78 else if(jtype == NODE_TYPE_TREEPOINT_PARTICLE)
79 {
80 int n = no - ImportedNodeOffset;
81
82 gravpoint_data *Pointp = get_pointsp(n, shmrank);
83
84 intpos = Pointp->IntPos;
85 mass = Pointp->Mass;
86
87#if NSOFTCLASSES > 1
89 if(h_j > hmax)
90 hmax = h_j;
91#endif
92#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
93 overlap_check = Pointp->InsideOutsideFlag;
94#endif
95 }
96 else /* a point that was fetched */
97 {
98 int n = no - EndOfForeignNodes;
99
100 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(n, shmrank);
101
102 intpos = foreignpoint->IntPos;
103 mass = foreignpoint->Mass;
104#if NSOFTCLASSES > 1
105 MyReal h_j = All.ForceSoftening[foreignpoint->getSofteningClass()];
106 if(h_j > hmax)
107 hmax = h_j;
108#endif
109#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
110 overlap_check = foreignpoint->InsideOutsideFlag;
111#endif
112 }
113
114#ifdef PMGRID
115 mesh_factors *mfp = &mf[LOW_MESH];
116#if defined(PLACEHIGHRESREGION)
118 {
119 if(overlap_check == FLAG_INSIDE && pdat.InsideOutsideFlag == FLAG_INSIDE)
120 mfp = &mf[HIGH_MESH];
121 }
122#endif
123#endif
124
125 vector<MyReal> dxyz;
126 Tp->nearest_image_intpos_to_pos(intpos, pdat.intpos, dxyz.da);
127
128 MyReal r2 = dxyz.r2();
129 MyReal r = sqrt(r2);
130 MyReal rinv = (r > 0) ? 1 / r : 0;
131
133
134#ifdef PMGRID
136 {
137 if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
138 return; // if we are outside the cut-off radius, we have no interaction
139 }
140#endif
141
142 get_gfactors_monopole(gfac, r, hmax, rinv);
143
144#ifdef EVALPOTENTIAL
145 *pdat.pot -= mass * gfac.fac0;
146#endif
147 *pdat.acc -= (mass * gfac.fac1 * rinv) * dxyz;
148
149 if(DoEwald)
150 {
151 // EWALD treatment, only done for periodic boundaries in case PM is not active
152
153 ewald_data ew;
154 Ewald.ewald_gridlookup(intpos, pdat.intpos, ewald::POINTMASS, ew);
155
156#ifdef EVALPOTENTIAL
157 *pdat.pot += mass * ew.D0phi;
158#endif
159 *pdat.acc += mass * ew.D1phi;
160 }
161
163 *pdat.GravCost += 1;
164
165 interactioncountPP += 1;
166}
167
168inline int gwalk::evaluate_particle_node_opening_criterion_and_interaction(const pinfo &pdat, gravnode *nop)
169{
170 if(nop->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type)
171 return NODE_OPEN;
172
173 MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop->level);
174 MyIntPosType intlen = halflen << 1;
175
176#if defined(PMGRID) || !defined(TREE_NO_SAFETY_BOX)
177 MyIntPosType dist[3];
178 Tp->nearest_image_intpos_to_absolute_intdist(nop->center.da, pdat.intpos, dist);
179#endif
180
181#ifndef TREE_NO_SAFETY_BOX
182 // if we are close to the node, and therefore open it to protect against worst-case force errors
183 if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
184 return NODE_OPEN;
185#endif
186
187#ifdef PMGRID
188 mesh_factors *mfp = &mf[LOW_MESH];
189
191 {
192#ifdef PLACEHIGHRESREGION
194 {
195 int overlap_check = nop->overlap_flag;
196
197 if(pdat.InsideOutsideFlag == FLAG_INSIDE && overlap_check == FLAG_BOUNDARYOVERLAP)
198 Terminate(
199 "this case should not happen: node center=(%g|%g|%g) len=%g particle=(%g|%g|%g) "
200 "rel-dist=(%g|%g|%g)\n",
201 nop->center[0] * Tp->FacIntToCoord, nop->center[1] * Tp->FacIntToCoord, nop->center[2] * Tp->FacIntToCoord,
202 intlen * Tp->FacIntToCoord, pdat.intpos[0] * Tp->FacIntToCoord, pdat.intpos[1] * Tp->FacIntToCoord,
203 pdat.intpos[2] * Tp->FacIntToCoord, pdat.intpos[0] * Tp->FacIntToCoord - nop->center[0] * Tp->FacIntToCoord,
204 pdat.intpos[1] * Tp->FacIntToCoord - nop->center[1] * Tp->FacIntToCoord,
205 pdat.intpos[2] * Tp->FacIntToCoord - nop->center[2] * Tp->FacIntToCoord);
206
207 if(overlap_check == FLAG_INSIDE && pdat.InsideOutsideFlag == FLAG_INSIDE)
208 mfp = &mf[HIGH_MESH];
209 }
210#endif
211
212 /* check whether we can stop walking along this branch */
213 if(dist[0] > mfp->intrcut[0] + halflen)
214 return NODE_DISCARD;
215
216 /* check whether we can stop walking along this branch */
217 if(dist[1] > mfp->intrcut[1] + halflen)
218 return NODE_DISCARD;
219
220 /* check whether we can stop walking along this branch */
221 if(dist[2] > mfp->intrcut[2] + halflen)
222 return NODE_DISCARD;
223 }
224#endif
225
226 /* converts the integer distance to floating point */
227 vector<MyReal> dxyz;
228 Tp->nearest_image_intpos_to_pos(nop->s.da, pdat.intpos, dxyz.da);
229
230 MyReal r2 = dxyz.r2();
231
232 MyReal mass = nop->mass;
233
234 MyReal len = intlen * Tp->FacIntToCoord;
235 MyReal len2 = len * len;
236
237 if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
238 {
239 if(len2 > r2 * theta2)
240 return NODE_OPEN;
241 }
242 else /* check relative opening criterion */
243 {
244#if(MULTIPOLE_ORDER <= 2)
245 if(mass * len2 > r2 * r2 * errTolForceAcc * pdat.aold)
246 return NODE_OPEN;
247#elif(MULTIPOLE_ORDER == 3)
248 if(square(mass * len * len2) > r2 * square(r2 * r2 * errTolForceAcc * pdat.aold))
249 return NODE_OPEN;
250#elif(MULTIPOLE_ORDER == 4)
251 if(mass * len2 * len2 > r2 * r2 * r2 * errTolForceAcc * pdat.aold)
252 return NODE_OPEN;
253#elif(MULTIPOLE_ORDER == 5)
254 if(square(mass * len2 * len2 * len) > r2 * square(r2 * r2 * r2 * errTolForceAcc * pdat.aold))
255 return NODE_OPEN;
256#endif
257 // carry out an additional test to protect against pathological force errors for very large opening angles
258 if(len2 > r2 * thetamax2)
259 return NODE_OPEN;
260 }
261
262 MyReal hmax = pdat.h_i;
263
264#if NSOFTCLASSES > 1
266
267 if(h_j > hmax)
268 {
269 if(r2 < h_j * h_j)
270 {
272 return NODE_OPEN;
273 }
274 hmax = h_j;
275 }
276#endif
277
278 /**************************/
279
280 // now evaluate the multipole moment interaction
281
282#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
283 if(skip_actual_force_computation)
284 return NODE_USE;
285#endif
286
287 MyReal r = sqrt(r2);
288 MyReal rinv = (r > 0) ? 1 / r : 0;
289
291
292#ifdef PMGRID
294 {
295 if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp))
296 return NODE_DISCARD; // if we are outside the cut-off radius, we have no interaction
297 }
298#endif
299
300 get_gfactors_multipole(gfac, r, hmax, rinv);
301
302#ifdef EVALPOTENTIAL
303 MyReal g0 = gfac.fac0;
304 *pdat.pot -= mass * g0; // monopole potential
305#endif
306
307 MyReal g1 = gfac.fac1 * rinv;
308 *pdat.acc -= (mass * g1) * dxyz; // monopole force
309
310#if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
311 MyReal g2 = gfac.fac2 * gfac.rinv2;
312 vector<MyReal> Q2dxyz = nop->Q2Tensor * dxyz;
313 MyReal Q2dxyz2 = Q2dxyz * dxyz;
314 MyReal Q2trace = nop->Q2Tensor.trace();
315#if(MULTIPOLE_ORDER >= 3)
316 MyReal g3 = gfac.fac3 * gfac.rinv3;
317 *pdat.acc -= static_cast<MyReal>(0.5) * (g2 * Q2trace + g3 * Q2dxyz2) * dxyz + g2 * Q2dxyz; // quadrupole force
318#endif
319#ifdef EVALPOTENTIAL
320 *pdat.pot -= static_cast<MyReal>(0.5) * (g1 * Q2trace + g2 * Q2dxyz2); // quadrupole potential
321#endif
322#endif
323
324#if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
325 symtensor3<MyDouble> &Q3 = nop->Q3Tensor;
326
327 symtensor2<MyDouble> Q3dxyz = Q3 * dxyz;
328 vector<MyDouble> Q3dxyz2 = Q3dxyz * dxyz;
329 MyReal Q3dxyz3 = Q3dxyz2 * dxyz;
330 MyReal Q3dxyzTrace = Q3dxyz.trace();
331
332 vector<MyDouble> Q3vec;
333 Q3vec[0] = Q3[dXXX] + Q3[dXYY] + Q3[dXZZ];
334 Q3vec[1] = Q3[dYXX] + Q3[dYYY] + Q3[dYZZ];
335 Q3vec[2] = Q3[dZXX] + Q3[dZYY] + Q3[dZZZ];
336
337#if(MULTIPOLE_ORDER >= 4)
338 MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
339 *pdat.acc -=
340 static_cast<MyReal>(0.5) *
341 (g2 * Q3vec + g3 * Q3dxyz2 + (static_cast<MyReal>(1.0 / 3) * g4 * Q3dxyz3 + g3 * Q3dxyzTrace) * dxyz); // octupole force
342#endif
343#ifdef EVALPOTENTIAL
344 *pdat.pot -= static_cast<MyReal>(1.0 / 6) * (3 * g2 * Q3dxyzTrace + g3 * Q3dxyz3); // octupole potential
345#endif
346#endif
347
348#if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
349 // now compute the hexadecupole force
350 symtensor4<MyDouble> &Q4 = nop->Q4Tensor;
351
352 symtensor3<MyDouble> Q4dxyz = Q4 * dxyz;
353 symtensor2<MyDouble> Q4dxyz2 = Q4dxyz * dxyz;
354 vector<MyDouble> Q4dxyz3 = Q4dxyz2 * dxyz;
355 MyReal Q4dxyz4 = Q4dxyz3 * dxyz;
356 MyReal Q4dxyz2trace = Q4dxyz2.trace();
357
359 QT[qXX] = Q4[sXXXX] + Q4[sXXYY] + Q4[sXXZZ];
360 QT[qYY] = Q4[sYYXX] + Q4[sYYYY] + Q4[sYYZZ];
361 QT[qZZ] = Q4[sZZXX] + Q4[sZZYY] + Q4[sZZZZ];
362 QT[qXY] = Q4[sXYXX] + Q4[sXYYY] + Q4[sXYZZ];
363 QT[qXZ] = Q4[sXZXX] + Q4[sXZYY] + Q4[sXZZZ];
364 QT[qYZ] = Q4[sYZXX] + Q4[sYZYY] + Q4[sYZZZ];
365 MyReal QTtrace = QT.trace();
366
367#if(MULTIPOLE_ORDER >= 5)
368 vector<MyDouble> QTdxyz = QT * dxyz;
369 MyReal g5 = gfac.fac5 * gfac.rinv2 * gfac.rinv3;
370 *pdat.acc -=
371 static_cast<MyReal>(1.0 / 24) * (g3 * (3 * QTtrace * dxyz + 12 * QTdxyz) + g4 * (6 * Q4dxyz2trace * dxyz + 4 * Q4dxyz3) +
372 g5 * Q4dxyz4 * dxyz); // hexadecupole force
373#endif
374#ifdef EVALPOTENTIAL
375 *pdat.pot -= static_cast<MyReal>(1.0 / 24) * (g2 * 3 * QTtrace + g3 * 6 * Q4dxyz2trace + g4 * Q4dxyz4); // hexadecupole potential
376#endif
377#endif
378
379#if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
380 symtensor5<MyDouble> &Q5 = nop->Q5Tensor;
381
382 symtensor4<MyDouble> Q5dxyz = Q5 * dxyz;
383 symtensor3<MyDouble> Q5dxyz2 = Q5dxyz * dxyz;
384 symtensor2<MyDouble> Q5dxyz3 = Q5dxyz2 * dxyz;
385 vector<MyDouble> Q5dxyz4 = Q5dxyz3 * dxyz;
386 MyReal Q5dxyz5 = Q5dxyz4 * dxyz;
387
388 MyReal Q5dxyzTtrace = Q5dxyz[sXXXX] + Q5dxyz[sYYYY] + Q5dxyz[sZZZZ] + 2 * (Q5dxyz[sXXYY] + Q5dxyz[sXXZZ] + Q5dxyz[sYYZZ]);
389
390 // now compute the triakontadipole potential term
391 *pdat.pot -= static_cast<MyReal>(1.0 / 120) * (g3 * 15 * Q5dxyzTtrace + g4 * 10 * Q5dxyz3.trace() + g5 * Q5dxyz5);
392#endif
393
394 if(DoEwald)
395 {
396 // EWALD treatment, only done for periodic boundaries in case PM is not active
397
398 ewald_data ew;
399 Ewald.ewald_gridlookup(nop->s.da, pdat.intpos, ewald::MULTIPOLES, ew);
400
401#ifdef EVALPOTENTIAL
402 *pdat.pot += mass * ew.D0phi;
403#if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
404 *pdat.pot += static_cast<MyReal>(0.5) * (nop->Q2Tensor * ew.D2phi);
405#endif
406#if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
407 *pdat.pot += static_cast<MyReal>(1.0 / 6) * (nop->Q3Tensor * ew.D3phi);
408#endif
409#if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
410 *pdat.pot += static_cast<MyReal>(1.0 / 24) * (nop->Q4Tensor * ew.D4phi);
411#endif
412#if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
413 *pdat.pot += static_cast<MyReal>(1.0 / 120) * (nop->Q5Tensor * ew.D5phi);
414#endif
415#endif
416 *pdat.acc += mass * ew.D1phi;
417#if(MULTIPOLE_ORDER >= 3)
418 *pdat.acc += static_cast<MyReal>(0.5) * (ew.D3phi * nop->Q2Tensor);
419#endif
420#if(MULTIPOLE_ORDER >= 4)
421 *pdat.acc += static_cast<MyReal>(1.0 / 6) * (ew.D4phi * nop->Q3Tensor);
422#endif
423#if(MULTIPOLE_ORDER >= 5)
424 *pdat.acc += static_cast<MyReal>(1.0 / 24) * (ew.D5phi * nop->Q4Tensor);
425#endif
426 }
427
428 interactioncountPN += 1;
429
431 *pdat.GravCost += 1;
432
433 return NODE_USE;
434}
435
436inline void gwalk::gwalk_open_node(const pinfo &pdat, int i, char ptype, gravnode *nop, int mintopleafnode, int committed)
437{
438 /* open node */
439 int p = nop->nextnode;
440 unsigned char shmrank = nop->nextnode_shmrank;
441
442 while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
443 {
444 if(p < 0)
445 Terminate(
446 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d mass=%g "
447 "first_nontoplevelnode=%d",
448 p, nop->sibling, nop->nextnode, shmrank, nop->sibling_shmrank, nop->OriginTask, nop->mass, MaxPart + D->NTopnodes);
449
450 int next;
451 unsigned char next_shmrank;
452 char type;
453
454 if(p < MaxPart) /* a local particle */
455 {
456 /* note: here shmrank cannot change */
457 next = get_nextnodep(shmrank)[p];
458 next_shmrank = shmrank;
460 }
461 else if(p < MaxPart + MaxNodes) /* an internal node */
462 {
463 gravnode *nop = get_nodep(p, shmrank);
464 next = nop->sibling;
465 next_shmrank = nop->sibling_shmrank;
467 }
468 else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
469 {
470 /* note: here shmrank cannot change */
471 next = get_nextnodep(shmrank)[p - MaxNodes];
472 next_shmrank = shmrank;
474 }
475 else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
476 {
477 gravnode *nop = get_nodep(p, shmrank);
478 next = nop->sibling;
479 next_shmrank = nop->sibling_shmrank;
481 }
482 else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
483 {
484 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
485
486 next = foreignpoint->Nextnode;
487 next_shmrank = foreignpoint->Nextnode_shmrank;
489 }
490 else
491 {
492 /* a pseudo point */
493 Terminate(
494 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
495 "shmrank=%d",
497 }
498
499 gravity_force_interact(pdat, i, p, ptype, type, shmrank, mintopleafnode, committed);
500
501 p = next;
502 shmrank = next_shmrank;
503 }
504}
505
506void gwalk::gravity_force_interact(const pinfo &pdat, int i, int no, char ptype, char no_type, unsigned char shmrank,
507 int mintopleafnode, int committed)
508{
509 if(no_type <= NODE_TYPE_FETCHED_PARTICLE) // we are interacting with a particle
510 {
511 evaluate_particle_particle_interaction(pdat, no, no_type, shmrank);
512 }
513 else // we are interacting with a node
514 {
515 gravnode *nop = get_nodep(no, shmrank);
516
517 if(nop->not_empty == 0)
518 return;
519
520 if(no < MaxPart + MaxNodes) // we have a top-level node
521 if(nop->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
522 {
523 mintopleafnode = no;
524
525#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
526 // if the leaf node is on this shared memory, we have all the data, so we for sure don't need to import anything on this
527 // branch
528 if(skip_actual_force_computation)
530 return;
531#endif
532 }
533
534 int openflag = evaluate_particle_node_opening_criterion_and_interaction(pdat, nop);
535
536 if(openflag == NODE_OPEN) /* cell can't be used, need to open it */
537 {
538 if(nop->cannot_be_opened_locally.load(std::memory_order_acquire))
539 {
540 // are we in the same shared memory node?
542 {
543 Terminate("this should not happen any more");
544 }
545 else
546 {
547 tree_add_to_fetch_stack(nop, no, shmrank); // will only add unique copies
548
549 tree_add_to_work_stack(i, no, shmrank, mintopleafnode);
550 }
551 }
552 else
553 {
554 int min_buffer_space =
556
557 if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
558 gwalk_open_node(pdat, i, ptype, nop, mintopleafnode, committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
559 else
560 tree_add_to_work_stack(i, no, shmrank, mintopleafnode);
561 }
562 }
563 }
564}
565
566/*
567
568*/
569
600void gwalk::gravity_tree(int timebin)
601{
602 interactioncountPP = 0;
603 interactioncountPN = 0;
604
606 TIMER_START(CPU_TREE);
607
608 D->mpi_printf("GRAVTREE: Begin tree force. timebin=%d (presently allocated=%g MB)\n", timebin, Mem.getAllocatedBytesInMB());
609
610#ifdef PMGRID
611 set_mesh_factors();
612#endif
613
614 TIMER_START(CPU_TREESTACK);
615
616 // Create list of targets (the work queue). There are initially two possible sources of points, local ones, and imported ones.
617
618 NumOnWorkStack = 0;
622
623 WorkStack = (workstack_data *)Mem.mymalloc("WorkStack", AllocWorkStackBaseHigh * sizeof(workstack_data));
624 ResultIndexList = (int *)Mem.mymalloc("ResultIndexList", NumPartImported * sizeof(int));
625
626 for(int i = 0; i < Tp->TimeBinsGravity.NActiveParticles; i++)
627 {
628 int target = Tp->TimeBinsGravity.ActiveParticleList[i];
629
630 // if we have exported particles, we need to explicitly check whether this particle is among them
631 if(NumPartExported > 0)
632 {
633 MyIntPosType xxb = Tp->P[target].IntPos[0];
634 MyIntPosType yyb = Tp->P[target].IntPos[1];
635 MyIntPosType zzb = Tp->P[target].IntPos[2];
636 MyIntPosType mask = (((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1));
637 unsigned char shiftx = (BITS_FOR_POSITIONS - 3);
638 unsigned char shifty = (BITS_FOR_POSITIONS - 2);
639 unsigned char shiftz = (BITS_FOR_POSITIONS - 1);
640 unsigned char level = 0;
641 unsigned char rotation = 0;
642
643 int no = 0;
644 while(D->TopNodes[no].Daughter >= 0) /* walk down top tree to find correct leaf */
645 {
646 unsigned char pix = (((unsigned char)((xxb & mask) >> (shiftx--))) | ((unsigned char)((yyb & mask) >> (shifty--))) |
647 ((unsigned char)((zzb & mask) >> (shiftz--))));
648 unsigned char subnode = peano_incremental_key(pix, &rotation);
649 mask >>= 1;
650 level++;
651 no = D->TopNodes[no].Daughter + subnode;
652 }
653
654 no = D->TopNodes[no].Leaf;
655 int task = D->TaskOfLeaf[no];
656
657 if(task == D->ThisTask)
658 {
659 WorkStack[NumOnWorkStack].Target = target;
662 WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
664 }
665 }
666 else
667 {
668 WorkStack[NumOnWorkStack].Target = target;
671 WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
673 }
674
675 /* let's do a safety check here to protect against accidental use of zero softening lengths */
676 int softtype = Tp->P[target].getSofteningClass();
677 if(All.ForceSoftening[softtype] == 0)
678 Terminate("Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
679 (long long)Tp->P[target].ID.get(), Tp->P[target].getType(), softtype);
680 }
681
682 int ncount = 0;
683
684 for(int i = 0; i < NumPartImported; i++)
685 {
686#ifndef HIERARCHICAL_GRAVITY
687 if(Points[i].ActiveFlag)
688#endif
689 {
690 ResultIndexList[i] = ncount++;
691
695 WorkStack[NumOnWorkStack].MinTopLeafNode = MaxPart + D->NTopnodes;
697 }
698 }
699
700#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
701 workstack_data *WorkStackBak = (workstack_data *)Mem.mymalloc("WorkStackBak", NumOnWorkStack * sizeof(workstack_data));
702 int NumOnWorkStackBak = NumOnWorkStack;
703 memcpy(WorkStackBak, WorkStack, NumOnWorkStack * sizeof(workstack_data));
704#endif
705
707 (resultsactiveimported_data *)Mem.mymalloc_clear("ResultsActiveImported", ncount * sizeof(resultsactiveimported_data));
708
709 /******************************************/
710 /* now execute the tree walk calculations */
711 /******************************************/
712
713 theta2 = All.ErrTolTheta * All.ErrTolTheta;
714 thetamax2 = All.ErrTolThetaMax * All.ErrTolThetaMax;
715 errTolForceAcc = All.ErrTolForceAcc;
716
719
720 // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
722 StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
723
724 // let's grab at most half the still available memory for imported points and nodes
725 int nspace = (0.5 * Mem.FreeBytes) / (sizeof(gravnode) + 8 * sizeof(foreign_gravpoint_data));
726
727 MaxForeignNodes = nspace;
728 MaxForeignPoints = 8 * nspace;
729 NumForeignNodes = 0;
731
732 /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
733 Foreign_Nodes = (gravnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(gravnode));
734 Foreign_Points = (foreign_gravpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
736
738
739 TIMER_STOP(CPU_TREESTACK);
740
741 double t0 = Logs.second();
742 int max_ncycles = 0;
743
745
746#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
747 for(int rep = 0; rep < 2; rep++)
748 {
749 if(rep == 0)
750 {
751 skip_actual_force_computation = true;
752 }
753 else
754 {
755 skip_actual_force_computation = false;
756 NumOnWorkStack = NumOnWorkStackBak;
757 memcpy(WorkStack, WorkStackBak, NumOnWorkStack * sizeof(workstack_data));
758 }
759#endif
760
761 while(NumOnWorkStack > 0) // repeat until we are out of work
762 {
763 NewOnWorkStack = 0; // gives the new entries
764 NumOnFetchStack = 0;
766
767 TIMER_START(CPU_TREEWALK);
768
769 int item = 0;
770
771 while(item < NumOnWorkStack)
772 {
773 int committed = 8 * TREE_NUM_BEFORE_NODESPLIT;
774 int min_buffer_space =
776 if(min_buffer_space >= committed)
777 {
778 int target = WorkStack[item].Target;
779 int no = WorkStack[item].Node;
780 int shmrank = WorkStack[item].ShmRank;
781 int mintopleaf = WorkStack[item].MinTopLeafNode;
782 item++;
783
784 pinfo pdat;
785 int ptype = get_pinfo(target, pdat);
786
787 if(no == MaxPart)
788 {
789 // we have a pristine particle that's processed for the first time
790 gravity_force_interact(pdat, target, no, ptype, NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
791 }
792 else
793 {
794 // we have a node that we previously could not open
795 gravnode *nop = get_nodep(no, shmrank);
796
798 {
799 Terminate("item=%d: no=%d now we should be able to open it!", item, no);
800 }
801 else
802 gwalk_open_node(pdat, target, ptype, nop, mintopleaf, committed);
803 }
804 }
805 else
806 break;
807 }
808
809 if(item == 0 && NumOnWorkStack > 0)
810 Terminate("Can't even process a single particle");
811
812 TIMER_STOP(CPU_TREEWALK);
813
814 TIMER_START(CPU_TREEFETCH);
815
817
818 TIMER_STOP(CPU_TREEFETCH);
819
820 TIMER_START(CPU_TREESTACK);
821
822 /* now reorder the workstack such that we are first going to do residual pristine particles, and then
823 * imported nodes that hang below the first leaf nodes */
825 memmove(WorkStack, WorkStack + item, NumOnWorkStack * sizeof(workstack_data));
826
827 /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
829
830 TIMER_STOP(CPU_TREESTACK);
831
832 max_ncycles++;
833 }
834
835#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
836 }
837#endif
838
839 TIMER_START(CPU_TREEIMBALANCE);
840
841 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
842
843 TIMER_STOP(CPU_TREEIMBALANCE);
844
846
847 /* free temporary buffers */
848
849 Mem.myfree(Foreign_Points);
850 Mem.myfree(Foreign_Nodes);
851 Mem.myfree(StackToFetch);
852
853 double t1 = Logs.second();
854
855 D->mpi_printf("GRAVTREE: tree-forces are calculated, with %d cycles took %g sec\n", max_ncycles, Logs.timediff(t0, t1));
856
857 /* now communicate the forces in ResultsActiveImported */
859
861#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
862 Mem.myfree(WorkStackBak);
863#endif
864 Mem.myfree(ResultIndexList);
865 Mem.myfree(WorkStack);
866
867 TIMER_STOP(CPU_TREE);
868
869 D->mpi_printf("GRAVTREE: tree-force is done.\n");
870
871 /* gather some diagnostic information */
872
873 TIMER_START(CPU_LOGS);
874
875 struct detailed_timings
876 {
877 double tree, wait, fetch, stack, all, lastpm;
878 double costtotal, numnodes;
879 double interactioncountPP, interactioncountPN;
881 double fillfacFgnNodes, fillfacFgnPoints;
882 };
883 detailed_timings timer, tisum, timax;
884
885 timer.tree = TIMER_DIFF(CPU_TREEWALK);
886 timer.wait = TIMER_DIFF(CPU_TREEIMBALANCE);
887 timer.fetch = TIMER_DIFF(CPU_TREEFETCH);
888 timer.stack = TIMER_DIFF(CPU_TREESTACK);
889 timer.all = timer.tree + timer.wait + timer.fetch + timer.stack + TIMER_DIFF(CPU_TREE);
890 timer.lastpm = All.CPUForLastPMExecution;
891 timer.costtotal = interactioncountPP + interactioncountPN;
892 timer.numnodes = NumNodes;
893 timer.interactioncountPP = interactioncountPP;
894 timer.interactioncountPN = interactioncountPN;
895 timer.NumForeignNodes = NumForeignNodes;
896 timer.NumForeignPoints = NumForeignPoints;
897 timer.fillfacFgnNodes = NumForeignNodes / ((double)MaxForeignNodes);
898 timer.fillfacFgnPoints = NumForeignPoints / ((double)MaxForeignPoints);
899
900 MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
901 D->Communicator);
902 MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
903 D->Communicator);
904
906
907 if(D->ThisTask == 0)
908 {
909 fprintf(Logs.FdTimings, "Nf=%9lld timebin=%d total-Nf=%lld\n", Tp->TimeBinsGravity.GlobalNActiveParticles, timebin,
911 fprintf(Logs.FdTimings, " work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g)\n",
912 timax.tree / ((tisum.tree + 1e-20) / D->NTask), Tp->TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
913 Tp->TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * D->NTask),
914 tisum.costtotal / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
915 tisum.interactioncountPP / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
916 tisum.interactioncountPN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20));
917 fprintf(Logs.FdTimings,
918 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
919 "fill=%g cycles=%d\n",
920 timax.numnodes, timax.numnodes / MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes / D->NTask,
921 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints / D->NTask, timax.fillfacFgnPoints, max_ncycles);
922 fprintf(Logs.FdTimings,
923 " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
924 "(lastpm=%g) sec\n",
925 tisum.all / D->NTask, tisum.tree / D->NTask, tisum.wait / D->NTask, tisum.fetch / D->NTask, tisum.stack / D->NTask,
926 tisum.lastpm / D->NTask);
927 fprintf(Logs.FdTimings, " total interaction cost: %g (imbalance=%g)\n", tisum.costtotal,
928 timax.costtotal / (tisum.costtotal / D->NTask));
930 }
931
932 TIMER_STOP(CPU_LOGS);
933}
934
935/* make sure that we instantiate the template */
936#include "../data/simparticles.h"
937template class gravtree<simparticles>;
global_data_all_processes All
Definition: main.cc:40
MyIDType get(void) const
Definition: idstorage.h:87
@ POINTMASS
Definition: ewald.h:67
@ MULTIPOLES
Definition: ewald.h:68
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:355
void gravity_exchange_forces(void)
Definition: gravtree.cc:134
mesh_factors mf[2]
Definition: gravtree.h:271
resultsactiveimported_data * ResultsActiveImported
Definition: gravtree.h:293
void get_gfactors_multipole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:405
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:525
void gravity_tree(int timebin)
This function computes the gravitational forces for all active particles.
Definition: gwalk.cc:600
void nearest_image_intpos_to_absolute_intdist(const MyIntPosType *a, const MyIntPosType *b, MyIntPosType *delta)
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MyReal FacIntToCoord
Definition: intposconvert.h:37
double timediff(double t0, double t1)
Definition: logs.cc:488
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
size_t FreeBytes
Definition: mymalloc.h:48
int * GetNodeIDForSimulCommRank
int Island_ThisTask
TimeBinData TimeBinsGravity
Definition: simparticles.h:205
particle_data * P
Definition: simparticles.h:54
T trace(void)
Definition: symtensors.h:285
gravpoint_data * get_pointsp(int no, unsigned char shmrank)
Definition: tree.h:376
foreign_gravpoint_data * get_foreignpointsp(int n, unsigned char shmrank)
Definition: tree.h:275
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
Definition: tree.h:197
void tree_add_to_fetch_stack(gravnode *nop, int nodetoopen, unsigned char shmrank)
Definition: tree.h:207
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
Definition: tree.h:232
T r2(void)
Definition: symtensors.h:187
T da[3]
Definition: symtensors.h:106
#define FLAG_BOUNDARYOVERLAP
Definition: constants.h:31
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define LEVEL_ALWAYS_OPEN
Definition: dtypes.h:383
double MyReal
Definition: dtypes.h:82
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_USE
Definition: gravtree.h:35
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define NODE_TYPE_TREEPOINT_PARTICLE
Definition: gravtree.h:30
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define TREE_ACTIVE_CUTTOFF_BASE_PM
Definition: gravtree.h:23
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
Definition: logs.h:257
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
Definition: logs.h:250
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
unsigned char peano_incremental_key(unsigned char pix, unsigned char *rotation)
Definition: peano.cc:107
int NActiveParticles
Definition: timestep.h:18
int * ActiveParticleList
Definition: timestep.h:20
long long GlobalNActiveParticles
Definition: timestep.h:19
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
vector< MyReal > D1phi
Definition: gravtree.h:186
symtensor3< MyReal > D3phi
Definition: gravtree.h:188
MyReal D0phi
Definition: gravtree.h:185
symtensor2< MyReal > D2phi
Definition: gravtree.h:187
unsigned char getSofteningClass(void)
Definition: gravtree.h:143
unsigned char Nextnode_shmrank
Definition: gravtree.h:133
MyIntPosType IntPos[3]
Definition: gravtree.h:130
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
long long TotNumOfForces
Definition: allvars.h:103
MyDouble mass
Definition: gravtree.h:65
vector< MyIntPosType > s
Definition: gravtree.h:66
unsigned char maxsofttype
Definition: gravtree.h:83
unsigned char minsofttype
Definition: gravtree.h:84
unsigned char getSofteningClass(void)
Definition: gravtree.h:118
MyDouble Mass
Definition: gravtree.h:103
MyIntPosType IntPos[3]
Definition: gravtree.h:102
MyDouble getMass(void)
unsigned char getSofteningClass(void)
MyIDStorage ID
Definition: particle_data.h:70
unsigned char getType(void)
MyIntPosType IntPos[3]
Definition: particle_data.h:53
#define sXXYY
#define sXYXX
#define sZZZZ
#define dYZZ
#define sXXXX
#define sZZXX
#define sYYZZ
#define dXYY
#define sZZYY
#define qXX
#define dZZZ
#define dYYY
#define sXZZZ
#define qYY
#define qYZ
#define sYZXX
#define sYYYY
#define sXYYY
#define dXXX
#define sXYZZ
#define qZZ
#define sXZYY
#define dYXX
#define dZYY
#define sYZZZ
#define sXXZZ
#define sYYXX
#define qXY
#define dZXX
#define sXZXX
#define sYZYY
#define dXZZ
#define qXZ
void myflush(FILE *fstream)
Definition: system.cc:125
T square(T const value)
Definition: system.h:36
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16