GADGET-4
fmm.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef FMM
15
16#include <math.h>
17#include <mpi.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21
22#include "../data/allvars.h"
23#include "../data/dtypes.h"
24#include "../data/intposconvert.h"
25#include "../data/mymalloc.h"
26#include "../domain/domain.h"
27#include "../fmm/fmm.h"
28#include "../gravity/ewald.h"
29#include "../gravtree/gravtree.h"
30#include "../logs/logs.h"
31#include "../logs/timer.h"
32#include "../main/simulation.h"
33#include "../mpi_utils/mpi_utils.h"
34#include "../mpi_utils/shared_mem_handler.h"
35#include "../pm/pm.h"
36#include "../sort/cxxsort.h"
37#include "../system/system.h"
38#include "../time_integration/timestep.h"
39
40void fmm::fmm_force_passdown(int no, unsigned char no_shmrank, taylor_data taylor_current)
41{
42 if(no >= MaxPart && no < MaxPart + MaxNodes) /* an internal node */
43 {
44 /* first, let's add the external field expansion to the local one accumulated for this node */
45#ifdef EVALPOTENTIAL
46 taylor_current.coeff.phi += TaylorCoeff[no].coeff.phi;
47#endif
48 taylor_current.coeff.dphi += TaylorCoeff[no].coeff.dphi;
49#if(MULTIPOLE_ORDER >= 2)
50 taylor_current.coeff.d2phi += TaylorCoeff[no].coeff.d2phi;
51#endif
52#if(MULTIPOLE_ORDER >= 3)
53 taylor_current.coeff.d3phi += TaylorCoeff[no].coeff.d3phi;
54#endif
55#if(MULTIPOLE_ORDER >= 4)
56 taylor_current.coeff.d4phi += TaylorCoeff[no].coeff.d4phi;
57#endif
58#if(MULTIPOLE_ORDER >= 5)
59 taylor_current.coeff.d5phi += TaylorCoeff[no].coeff.d5phi;
60#endif
61
62 taylor_current.coeff.interactions += TaylorCoeff[no].coeff.interactions;
63 }
64 else
65 Terminate("this is not an internal node, which should not happen\n");
66
67 gravnode *node_no = get_nodep(no, no_shmrank);
68
69 int p = node_no->nextnode; /* open cell */
70
71 unsigned char shmrank = node_no->nextnode_shmrank;
72
73 while(p != node_no->sibling || (shmrank != node_no->sibling_shmrank && node_no->sibling >= MaxPart + D->NTopnodes))
74 {
75 if(p < MaxPart || (p >= ImportedNodeOffset && p < EndOfTreePoints)) /* we have found a single particle */
76 {
77 if(shmrank != Shmem.Island_ThisTask)
78 Terminate("odd");
79
80 int m, mp;
81 MyIntPosType *intpos;
82
83 if(p >= ImportedNodeOffset) /* an imported Treepoint particle */
84 {
85 m = p - ImportedNodeOffset;
86 intpos = Points[m].IntPos;
87 mp = -1;
88 p = get_nextnodep(shmrank)[p - MaxNodes];
89 }
90 else
91 {
92 m = p;
93 intpos = Tp->P[p].IntPos;
94 mp = p;
95 p = get_nextnodep(shmrank)[p];
96 }
97
98 /* apply expansion to particle */
99
100 vector<MyReal> dxyz;
101 Tp->nearest_image_intpos_to_pos(intpos, node_no->s.da, dxyz.da);
102
103#ifdef EVALPOTENTIAL
104 MyReal pot = taylor_current.coeff.phi + taylor_current.coeff.dphi * dxyz;
105#endif
106 vector<MyReal> dphi = taylor_current.coeff.dphi;
107
108#if(MULTIPOLE_ORDER >= 2)
109 vector<MyReal> d2phidxyz = taylor_current.coeff.d2phi * dxyz;
110 dphi += d2phidxyz;
111#ifdef EVALPOTENTIAL
112 pot += 0.5f * (d2phidxyz * dxyz);
113#endif
114#endif
115#if(MULTIPOLE_ORDER >= 3)
116 vector<MyReal> d3phidxyz2 = contract_twice(taylor_current.coeff.d3phi, dxyz);
117 dphi += 0.5f * d3phidxyz2;
118#ifdef EVALPOTENTIAL
119 pot += static_cast<MyReal>(1.0 / 6) * (dxyz * d3phidxyz2);
120#endif
121#endif
122#if(MULTIPOLE_ORDER >= 4)
123 vector<MyReal> d4phidxyz3 = contract_thrice(taylor_current.coeff.d4phi, dxyz);
124 dphi += static_cast<MyReal>(1.0 / 6) * d4phidxyz3;
125#ifdef EVALPOTENTIAL
126 pot += static_cast<MyReal>(1.0 / 24) * (dxyz * d4phidxyz3);
127#endif
128#endif
129#if(MULTIPOLE_ORDER >= 5)
130 vector<MyReal> d5phidxyz4 = contract_fourtimes(taylor_current.coeff.d5phi, dxyz);
131 dphi += static_cast<MyReal>(1.0 / 24) * d5phidxyz4;
132#ifdef EVALPOTENTIAL
133 pot += static_cast<MyReal>(1.0 / 120) * (dxyz * d5phidxyz4);
134#endif
135#endif
136
137 if(mp >= 0)
138 {
139#ifndef HIERARCHICAL_GRAVITY
140 if(Tp->TimeBinSynchronized[Tp->P[mp].TimeBinGrav])
141#endif
142 {
143 Tp->P[mp].GravAccel -= dphi;
144#ifdef EVALPOTENTIAL
145 Tp->P[mp].Potential += pot;
146#endif
147
148 if(MeasureCostFlag)
149 Tp->P[mp].GravCost += taylor_current.coeff.interactions;
150
151 interactioncountEffective += taylor_current.coeff.interactions;
152 }
153 }
154 else
155 {
156#ifndef HIERARCHICAL_GRAVITY
157 if(Points[m].ActiveFlag)
158#endif
159 {
160 int idx = ResultIndexList[m];
161 ResultsActiveImported[idx].GravAccel -= dphi;
162#ifdef EVALPOTENTIAL
163 ResultsActiveImported[idx].Potential += pot;
164#endif
165 if(MeasureCostFlag)
166 ResultsActiveImported[idx].GravCost += taylor_current.coeff.interactions;
167
168 interactioncountEffective += taylor_current.coeff.interactions;
169 }
170 }
171 }
172 else if(p < MaxPart + MaxNodes) /* an internal node */
173 {
174 gravnode *node_p = get_nodep(p, shmrank);
175
176 if(fmm_depends_on_local_mass(p, shmrank))
177 {
178 taylor_data taylor_sub = taylor_current;
179
180 vector<MyReal> dxyz;
181 Tp->nearest_image_intpos_to_pos(node_p->s.da, node_no->s.da, dxyz.da);
182
183 /* now shift the expansion center */
184
185#ifdef EVALPOTENTIAL
186 taylor_sub.coeff.phi += taylor_current.coeff.dphi * dxyz;
187#endif
188
189#if(MULTIPOLE_ORDER >= 2)
190 vector<MyReal> delta_dphi = taylor_current.coeff.d2phi * dxyz;
191 taylor_sub.coeff.dphi += delta_dphi;
192#ifdef EVALPOTENTIAL
193 taylor_sub.coeff.phi += 0.5f * (delta_dphi * dxyz);
194#endif
195#endif
196#if(MULTIPOLE_ORDER >= 3)
197 symtensor2<MyReal> delta_d2phi = taylor_current.coeff.d3phi * dxyz;
198
199 taylor_sub.coeff.d2phi += delta_d2phi;
200
201 delta_dphi = delta_d2phi * dxyz;
202
203 taylor_sub.coeff.dphi += 0.5f * delta_dphi;
204#ifdef EVALPOTENTIAL
205 taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 6) * (delta_dphi * dxyz);
206#endif
207#endif
208#if(MULTIPOLE_ORDER >= 4)
209 symtensor3<MyReal> delta_d3phi = taylor_current.coeff.d4phi * dxyz;
210 taylor_sub.coeff.d3phi += delta_d3phi;
211
212 delta_d2phi = delta_d3phi * dxyz;
213 taylor_sub.coeff.d2phi += 0.5f * delta_d2phi;
214
215 delta_dphi = delta_d2phi * dxyz;
216 taylor_sub.coeff.dphi += static_cast<MyReal>(1.0 / 6) * delta_dphi;
217#ifdef EVALPOTENTIAL
218 taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 24) * (delta_dphi * dxyz);
219#endif
220#endif
221#if(MULTIPOLE_ORDER >= 5)
222 symtensor4<MyReal> delta_d4phi = taylor_current.coeff.d5phi * dxyz;
223 taylor_sub.coeff.d4phi += delta_d4phi;
224
225 delta_d3phi = delta_d4phi * dxyz;
226 taylor_sub.coeff.d3phi += 0.5f * delta_d3phi;
227
228 delta_d2phi = delta_d3phi * dxyz;
229 taylor_sub.coeff.d2phi += static_cast<MyReal>(1.0 / 6) * delta_d2phi;
230
231 delta_dphi = delta_d2phi * dxyz;
232 taylor_sub.coeff.dphi += static_cast<MyReal>(1.0 / 24) * delta_dphi;
233#ifdef EVALPOTENTIAL
234 taylor_sub.coeff.phi += static_cast<MyReal>(1.0 / 120) * (delta_dphi * dxyz);
235#endif
236#endif
237 fmm_force_passdown(p, shmrank, taylor_sub);
238 }
239
240 p = node_p->sibling;
241 shmrank = node_p->sibling_shmrank;
242 }
243 else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
244 {
245 Terminate("A");
246 gravnode *nop = get_nodep(p, shmrank);
247 p = nop->sibling;
248 shmrank = nop->sibling_shmrank;
249 }
250 else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
251 {
252 Terminate("B");
253 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
254 p = foreignpoint->Nextnode;
255 shmrank = foreignpoint->Nextnode_shmrank;
256 }
257 else
258 {
259 /* a pseudo point */
260 Terminate(
261 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
262 "shmrank=%d",
263 p, MaxPart, MaxNodes, ImportedNodeOffset, EndOfTreePoints, EndOfForeignNodes, shmrank);
264 }
265 }
266}
267
268inline void fmm::fmm_open_both(gravnode *noptr_sink, gravnode *noptr_source, int mintopleafnode, int committed)
269{
270 int self_flag = 0;
271 if(noptr_sink == noptr_source)
272 self_flag = 1;
273
274 /* open node */
275 int p_sink = noptr_sink->nextnode;
276 unsigned char shmrank_sink = noptr_sink->nextnode_shmrank;
277
278 while(p_sink != noptr_sink->sibling ||
279 (shmrank_sink != noptr_sink->sibling_shmrank && noptr_sink->sibling >= MaxPart + D->NTopnodes))
280 {
281 int next_sink;
282 unsigned char next_shmrank_sink;
283 char type_sink;
284
285 if(p_sink < MaxPart) /* a local particle */
286 {
287 /* note: here shmrank cannot change */
288 next_sink = get_nextnodep(shmrank_sink)[p_sink];
289 next_shmrank_sink = shmrank_sink;
290 type_sink = NODE_TYPE_LOCAL_PARTICLE;
291 }
292 else if(p_sink < MaxPart + MaxNodes) /* an internal node */
293 {
294 gravnode *nop = get_nodep(p_sink, shmrank_sink);
295 next_sink = nop->sibling;
296 next_shmrank_sink = nop->sibling_shmrank;
297 type_sink = NODE_TYPE_LOCAL_NODE;
298 }
299 else if(p_sink >= ImportedNodeOffset && p_sink < EndOfTreePoints) /* an imported Treepoint particle */
300 {
301 /* note: here shmrank cannot change */
302 next_sink = get_nextnodep(shmrank_sink)[p_sink - MaxNodes];
303 next_shmrank_sink = shmrank_sink;
305 }
306 else if(p_sink >= EndOfTreePoints && p_sink < EndOfForeignNodes) /* an imported tree node */
307 {
308 gravnode *nop = get_nodep(p_sink, shmrank_sink);
309 next_sink = nop->sibling;
310 next_shmrank_sink = nop->sibling_shmrank;
311 type_sink = NODE_TYPE_FETCHED_NODE;
312 }
313 else if(p_sink >= EndOfForeignNodes)
314 {
315 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p_sink - EndOfForeignNodes, shmrank_sink);
316 next_sink = foreignpoint->Nextnode;
317 next_shmrank_sink = foreignpoint->Nextnode_shmrank;
318 type_sink = NODE_TYPE_FETCHED_PARTICLE;
319 }
320 else
321 {
322 /* a pseudo point */
323 next_sink = 0;
324 type_sink = 0;
325 Terminate("pseudo particle - should not happen");
326 }
327
328 int p_source = noptr_source->nextnode; /* open cell */
329 unsigned char shmrank_source = noptr_source->nextnode_shmrank;
330
331 while(p_source != noptr_source->sibling ||
332 (shmrank_source != noptr_source->sibling_shmrank && noptr_source->sibling >= MaxPart + D->NTopnodes))
333 {
334 int next_source;
335 unsigned char next_shmrank_source;
336 char type_source;
337
338 if(p_source < MaxPart) /* a local particle */
339 {
340 /* note: here shmrank cannot change */
341 next_source = get_nextnodep(shmrank_source)[p_source];
342 next_shmrank_source = shmrank_source;
343 type_source = NODE_TYPE_LOCAL_PARTICLE;
344 }
345 else if(p_source < MaxPart + MaxNodes) /* an internal node */
346 {
347 gravnode *nop = get_nodep(p_source, shmrank_source);
348 next_source = nop->sibling;
349 next_shmrank_source = nop->sibling_shmrank;
350 type_source = NODE_TYPE_LOCAL_NODE;
351 }
352 else if(p_source >= ImportedNodeOffset && p_source < EndOfTreePoints) /* an imported Treepoint particle */
353 {
354 /* note: here shmrank cannot change */
355 next_source = get_nextnodep(shmrank_source)[p_source - MaxNodes];
356 next_shmrank_source = shmrank_source;
357 type_source = NODE_TYPE_TREEPOINT_PARTICLE;
358 }
359 else if(p_source >= EndOfTreePoints && p_source < EndOfForeignNodes) /* an imported tree node */
360 {
361 gravnode *nop = get_nodep(p_source, shmrank_source);
362 next_source = nop->sibling;
363 next_shmrank_source = nop->sibling_shmrank;
364 type_source = NODE_TYPE_FETCHED_NODE;
365 }
366 else if(p_source >= EndOfForeignNodes)
367 {
368 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p_source - EndOfForeignNodes, shmrank_source);
369 next_source = foreignpoint->Nextnode;
370 next_shmrank_source = foreignpoint->Nextnode_shmrank;
371 type_source = NODE_TYPE_FETCHED_PARTICLE;
372 }
373 else
374 {
375 /* a pseudo point */
376 next_source = 0;
377 type_source = 0;
378 Terminate("pseudo particle - should not happen");
379 }
380
381 if(self_flag == 0 || p_source >= p_sink)
382 {
383 if(type_sink >= NODE_TYPE_LOCAL_NODE && type_source <= NODE_TYPE_FETCHED_PARTICLE)
384 {
385 /* in this case we have node-particle interaction, which we swap into a particle-node interaction */
386 fmm_force_interact(p_source, p_sink, type_source, type_sink, shmrank_source, shmrank_sink, mintopleafnode,
387 committed);
388 }
389 else
390 {
391 fmm_force_interact(p_sink, p_source, type_sink, type_source, shmrank_sink, shmrank_source, mintopleafnode,
392 committed);
393 }
394 }
395
396 p_source = next_source;
397 shmrank_source = next_shmrank_source;
398 }
399
400 p_sink = next_sink;
401 shmrank_sink = next_shmrank_sink;
402 }
403}
404
405inline void fmm::fmm_open_node(int no_particle, gravnode *nop, char type_particle, unsigned char shmrank_particle, int mintopleafnode,
406 int committed)
407{
408 int p = nop->nextnode;
409 unsigned char shmrank = nop->nextnode_shmrank;
410
411 while(p != nop->sibling || (shmrank != nop->sibling_shmrank && nop->sibling >= MaxPart + D->NTopnodes))
412 {
413 if(p < 0)
414 Terminate("p=%d < 0", p);
415
416 int next;
417 unsigned char next_shmrank;
418 char type;
419
420 if(p < MaxPart) /* a local particle */
421 {
422 /* note: here shmrank cannot change */
423 next = get_nextnodep(shmrank)[p];
424 next_shmrank = shmrank;
426 }
427 else if(p < MaxPart + MaxNodes) /* an internal node */
428 {
429 gravnode *nop = get_nodep(p, shmrank);
430 next = nop->sibling;
431 next_shmrank = nop->sibling_shmrank;
433 }
434 else if(p >= ImportedNodeOffset && p < EndOfTreePoints) /* an imported Treepoint particle */
435 {
436 /* note: here shmrank cannot change */
437 next = get_nextnodep(shmrank)[p - MaxNodes];
438 next_shmrank = shmrank;
440 }
441 else if(p >= EndOfTreePoints && p < EndOfForeignNodes) /* an imported tree node */
442 {
443 gravnode *nop = get_nodep(p, shmrank);
444 next = nop->sibling;
445 next_shmrank = nop->sibling_shmrank;
447 }
448 else if(p >= EndOfForeignNodes) /* an imported particle below an imported tree node */
449 {
450 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(p - EndOfForeignNodes, shmrank);
451
452 next = foreignpoint->Nextnode;
453 next_shmrank = foreignpoint->Nextnode_shmrank;
455 }
456 else
457 {
458 /* a pseudo point */
459 Terminate(
460 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
461 "shmrank=%d",
462 p, MaxPart, MaxNodes, ImportedNodeOffset, EndOfTreePoints, EndOfForeignNodes, shmrank);
463 }
464
465 fmm_force_interact(no_particle, p, type_particle, type, shmrank_particle, shmrank, mintopleafnode, committed);
466
467 p = next;
468 shmrank = next_shmrank;
469 }
470}
471
472inline void fmm::fmm_particle_particle_interaction(int no_sink, int no_source, int type_sink, int type_source,
473 unsigned char shmrank_sink, unsigned char shmrank_source)
474{
475#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
476 if(skip_actual_force_computation)
477 return;
478#endif
479
480 MyIntPosType *intpos_n, *intpos_m;
481 MyReal mass_n, mass_m;
482 MyReal h_n, h_m;
483#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
484 int test_n, test_m;
485#endif
486
487 /* in one of the following three cases we have a single particle on the sink side */
488 if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
489 {
490 particle_data *P = get_Pp(no_sink, shmrank_sink);
491
492 intpos_n = P->IntPos;
493 mass_n = P->getMass();
495#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
496 test_n = P->InsideOutsideFlag;
497#endif
498 }
499 else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
500 {
501 gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
502
503 intpos_n = Pointp->IntPos;
504 mass_n = Pointp->Mass;
505 h_n = All.ForceSoftening[Pointp->getSofteningClass()];
506#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
507 test_n = Pointp->InsideOutsideFlag;
508#endif
509 }
510 else /* a point that was fetched */
511 {
512 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
513
514 intpos_n = foreignpoint->IntPos;
515 mass_n = foreignpoint->Mass;
516 h_n = All.ForceSoftening[foreignpoint->getSofteningClass()];
517#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
518 test_n = foreignpoint->InsideOutsideFlag;
519#endif
520 }
521
522 /* in one of the following three cases we have a single particle on the source side */
523 if(type_source == NODE_TYPE_LOCAL_PARTICLE)
524 {
525 particle_data *P = get_Pp(no_source, shmrank_source);
526
527 intpos_m = P->IntPos;
528 mass_m = P->getMass();
530#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
531 test_m = P->InsideOutsideFlag;
532#endif
533 }
534 else if(type_source == NODE_TYPE_TREEPOINT_PARTICLE)
535 {
536 gravpoint_data *Pointp = get_pointsp(no_source - ImportedNodeOffset, shmrank_source);
537
538 intpos_m = Pointp->IntPos;
539 mass_m = Pointp->Mass;
540 h_m = All.ForceSoftening[Pointp->getSofteningClass()];
541#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
542 test_m = Pointp->InsideOutsideFlag;
543#endif
544 }
545 else
546 {
547 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_source - EndOfForeignNodes, shmrank_source);
548
549 intpos_m = foreignpoint->IntPos;
550 mass_m = foreignpoint->Mass;
551 h_m = All.ForceSoftening[foreignpoint->getSofteningClass()];
552#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
553 test_m = foreignpoint->InsideOutsideFlag;
554#endif
555 }
556
557 MyReal h_max = (h_m > h_n) ? h_m : h_n;
558
559 vector<MyReal> dxyz;
560 Tp->nearest_image_intpos_to_pos(intpos_m, intpos_n, dxyz.da); /* converts the integer distance to floating point */
561
562 MyReal r2 = dxyz.r2();
563 MyReal r = sqrt(r2);
564 MyReal rinv = (r > 0) ? 1 / r : 0;
565
566 gfactors gfac;
567
568#ifdef PMGRID
569 if(DoPM)
570 {
571 mesh_factors *mfp = &mf[LOW_MESH];
572
573#ifdef PLACEHIGHRESREGION
575 {
576 if(test_m == FLAG_INSIDE && test_n == FLAG_INSIDE)
577 mfp = &mf[HIGH_MESH];
578 }
579#endif
580 if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
581 return;
582 }
583#endif
584
585 get_gfactors_monopole(gfac, r, h_max, rinv);
586
587#ifdef EVALPOTENTIAL
588 MyReal D0 = gfac.fac0;
589#endif
590 vector<MyReal> D1 = gfac.fac1 * rinv * dxyz;
591
592 if(DoEwald)
593 {
594 ewald_data ew;
595 Ewald.ewald_gridlookup(intpos_m, intpos_n, ewald::POINTMASS, ew);
596
597 D1 -= ew.D1phi;
598
599#ifdef EVALPOTENTIAL
600 D0 -= ew.D0phi;
601#endif
602 }
603
604 if(shmrank_sink == Shmem.Island_ThisTask)
605 {
606 if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
607 {
608#ifndef HIERARCHICAL_GRAVITY
609 if(Tp->TimeBinSynchronized[Tp->P[no_sink].TimeBinGrav])
610#endif
611 {
612 Tp->P[no_sink].GravAccel -= mass_m * D1;
613#ifdef EVALPOTENTIAL
614 Tp->P[no_sink].Potential -= mass_m * D0;
615#endif
616 if(MeasureCostFlag)
617 Tp->P[no_sink].GravCost++;
618
619 interactioncountPP += 1;
620 }
621 }
622 else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
623 {
624#ifndef HIERARCHICAL_GRAVITY
625 if(Points[no_sink - ImportedNodeOffset].ActiveFlag)
626#endif
627 {
628 int idx = ResultIndexList[no_sink - ImportedNodeOffset];
629 ResultsActiveImported[idx].GravAccel -= mass_m * D1;
630#ifdef EVALPOTENTIAL
631 ResultsActiveImported[idx].Potential -= mass_m * D0;
632#endif
633 if(MeasureCostFlag)
634 ResultsActiveImported[idx].GravCost++;
635
636 interactioncountPP += 1;
637 }
638 }
639 }
640
641 if(shmrank_source == Shmem.Island_ThisTask)
642 {
643 if(type_source == NODE_TYPE_LOCAL_PARTICLE)
644 {
645#ifndef HIERARCHICAL_GRAVITY
646 if(Tp->TimeBinSynchronized[Tp->P[no_source].TimeBinGrav])
647#endif
648 {
649 Tp->P[no_source].GravAccel += mass_n * D1;
650#ifdef EVALPOTENTIAL
651 Tp->P[no_source].Potential -= mass_n * D0;
652#endif
653
654 if(MeasureCostFlag)
655 Tp->P[no_source].GravCost++;
656
657 interactioncountPP += 1;
658 }
659 }
660 else if(type_source == NODE_TYPE_TREEPOINT_PARTICLE)
661 {
662#ifndef HIERARCHICAL_GRAVITY
663 if(Points[no_source - ImportedNodeOffset].ActiveFlag)
664#endif
665 {
666 int idx = ResultIndexList[no_source - ImportedNodeOffset];
667 ResultsActiveImported[idx].GravAccel += mass_n * D1;
668#ifdef EVALPOTENTIAL
669 ResultsActiveImported[idx].Potential -= mass_n * D0;
670#endif
671 if(MeasureCostFlag)
672 ResultsActiveImported[idx].GravCost++;
673
674 interactioncountPP += 1;
675 }
676 }
677 }
678}
679
680inline void fmm::fmm_particle_node_interaction(int no_sink, int no_source, int type_sink, int type_source, unsigned char shmrank_sink,
681 unsigned char shmrank_source, gravnode *noptr_source, vector<MyReal> &dxyz, MyReal &r2)
682{
683#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
684 if(skip_actual_force_computation)
685 return;
686#endif
687
688 /* 'sink' is a particle
689 * 'source' node is a node with multipole moments.
690 * 'dxyz' is the distance vector, pointing from sink to source, i.e. dxyz = pos(source) - pos(sink)
691 */
692
693 MyReal mass_i, h_i;
694#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
695 int test_point;
696#endif
697
698 MyIntPosType *intpos_i;
699
700 if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
701 {
702 particle_data *P = get_Pp(no_sink, shmrank_sink);
703
704 intpos_i = P->IntPos;
705 mass_i = P->getMass();
707#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
708 test_point = P->InsideOutsideFlag;
709#endif
710 }
711 else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
712 {
713 gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
714
715 intpos_i = Pointp->IntPos;
716 mass_i = Pointp->Mass;
717 h_i = All.ForceSoftening[Pointp->getSofteningClass()];
718#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
719 test_point = Pointp->InsideOutsideFlag;
720#endif
721 }
722 else /* a point that was fetched */
723 {
724 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
725
726 intpos_i = foreignpoint->IntPos;
727 mass_i = foreignpoint->Mass;
728 h_i = All.ForceSoftening[foreignpoint->getSofteningClass()];
729#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
730 test_point = foreignpoint->InsideOutsideFlag;
731#endif
732 }
733
734 MyReal h_j = All.ForceSoftening[noptr_source->getSofteningClass()];
735 MyReal h_max = (h_j > h_i) ? h_j : h_i;
736
737 /* do cell-particle interaction, node can be used */
738 MyReal r = sqrt(r2);
739
740 MyReal rinv = (r > 0) ? 1 / r : 0;
741
742 gfactors gfac;
743
744#ifdef PMGRID
745 if(DoPM)
746 {
747 mesh_factors *mfp = &mf[LOW_MESH];
748
749#ifdef PLACEHIGHRESREGION
751 {
752 int test_node = noptr_source->overlap_flag;
753 if(test_node == FLAG_INSIDE && test_point == FLAG_INSIDE)
754 mfp = &mf[HIGH_MESH];
755 }
756#endif
757
758 if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
759 return;
760 }
761#endif
762
763 get_gfactors_multipole(gfac, r, h_max, rinv);
764
765#ifdef EVALPOTENTIAL
766 MyReal g0 = gfac.fac0;
767 MyReal D0 = g0;
768#endif
769
770 MyReal g1 = gfac.fac1 * rinv;
771 vector<MyReal> D1 = g1 * dxyz;
772
773#if(MULTIPOLE_ORDER >= 2)
774 MyReal g2 = gfac.fac2 * gfac.rinv2;
775 symtensor2<MyReal> aux2 = dxyz % dxyz; // construct outer product of the two vectors
776 symtensor2<MyReal> D2 = g2 * aux2;
777 D2[qXX] += g1;
778 D2[qYY] += g1;
779 D2[qZZ] += g1;
780#endif
781#if(MULTIPOLE_ORDER >= 3)
782 MyReal g3 = gfac.fac3 * gfac.rinv3;
785 setup_D3(INIT, D3, dxyz, aux2, aux3, g2, g3);
786#endif
787
788#if(MULTIPOLE_ORDER >= 4)
789 MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
792 setup_D4(INIT, D4, dxyz, aux2, aux3, aux4, g2, g3, g4);
793#endif
794
795#if(MULTIPOLE_ORDER >= 5)
796 MyReal g5 = gfac.fac5 * gfac.rinv3 * gfac.rinv2;
799 setup_D5(INIT, D5, dxyz, aux3, aux4, aux5, g3, g4, g5);
800#endif
801
802 if(DoEwald)
803 {
804 ewald_data ew;
805 Ewald.ewald_gridlookup(noptr_source->s.da, intpos_i, ewald::MULTIPOLES, ew);
806
807#ifdef EVALPOTENTIAL
808 D0 -= ew.D0phi;
809#endif
810 D1 -= ew.D1phi;
811#if(MULTIPOLE_ORDER >= 2)
812 D2 -= ew.D2phi;
813#endif
814#if(MULTIPOLE_ORDER >= 3)
815 D3 -= ew.D3phi;
816#endif
817#if(MULTIPOLE_ORDER >= 4)
818 D4 -= ew.D4phi;
819#endif
820#if(MULTIPOLE_ORDER >= 5)
821 D5 -= ew.D5phi;
822#endif
823 }
824
825 /* finally store the force on the particle */
826 if(shmrank_sink == Shmem.Island_ThisTask)
827 if(type_sink == NODE_TYPE_LOCAL_PARTICLE || type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
828 {
829 MyReal mass_j = noptr_source->mass;
830
831#if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
832 symtensor2<MyDouble> &Q2_j = noptr_source->Q2Tensor;
833#endif
834#if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
835 symtensor3<MyDouble> &Q3_j = noptr_source->Q3Tensor;
836#endif
837#if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
838 symtensor4<MyDouble> &Q4_j = noptr_source->Q4Tensor;
839#endif
840#if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
841 symtensor5<MyDouble> &Q5_j = noptr_source->Q5Tensor;
842#endif
843
844#ifdef EVALPOTENTIAL
845 MyReal pot = -mass_j * D0;
846#if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
847 pot -= 0.5f * (D2 * Q2_j);
848#endif
849#if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
850 pot -= static_cast<MyReal>(1.0 / 6) * (D3 * Q3_j);
851#endif
852#if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
853 pot -= static_cast<MyReal>(1.0 / 24) * (D4 * Q4_j);
854#endif
855#if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
856
857 pot -= static_cast<MyReal>(1.0 / 120) * (D5 * Q5_j);
858#endif
859#endif
860
861 vector<MyReal> dphi = mass_j * D1;
862
863#if(MULTIPOLE_ORDER >= 3)
864 dphi += static_cast<MyReal>(0.5) * (D3 * Q2_j);
865#endif
866#if(MULTIPOLE_ORDER >= 4)
867 dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_j);
868#endif
869#if(MULTIPOLE_ORDER >= 5)
870 dphi += static_cast<MyReal>(1.0 / 24) * (D5 * Q4_j);
871#endif
872
873 if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
874 {
875#ifndef HIERARCHICAL_GRAVITY
876 if(Tp->TimeBinSynchronized[Tp->P[no_sink].TimeBinGrav])
877#endif
878 {
879 Tp->P[no_sink].GravAccel -= dphi;
880#ifdef EVALPOTENTIAL
881 Tp->P[no_sink].Potential += pot;
882#endif
883 if(MeasureCostFlag)
884 Tp->P[no_sink].GravCost++;
885
886 interactioncountPN += 1;
887 interactioncountEffective += 1;
888 }
889 }
890 else
891 {
892#ifndef HIERARCHICAL_GRAVITY
893 if(Points[no_sink - ImportedNodeOffset].ActiveFlag)
894#endif
895 {
896 int idx = ResultIndexList[no_sink - ImportedNodeOffset];
897 ResultsActiveImported[idx].GravAccel -= dphi;
898#ifdef EVALPOTENTIAL
899 ResultsActiveImported[idx].Potential += pot;
900#endif
901 if(MeasureCostFlag)
902 ResultsActiveImported[idx].GravCost++;
903
904 interactioncountPN += 1;
905 interactioncountEffective += 1;
906 }
907 }
908 }
909
910 if(fmm_depends_on_local_mass(no_source, shmrank_source))
911 if(type_source == NODE_TYPE_LOCAL_NODE)
912 {
913 /* mediating field expansion of point particle on source node */
914#ifdef EVALPOTENTIAL
915 TaylorCoeff[no_source].coeff.phi += (-mass_i) * D0;
916#endif
917 TaylorCoeff[no_source].coeff.dphi += (-mass_i) * D1;
918#if(MULTIPOLE_ORDER >= 2)
919 TaylorCoeff[no_source].coeff.d2phi += (-mass_i) * D2;
920#endif
921#if(MULTIPOLE_ORDER >= 3)
922 TaylorCoeff[no_source].coeff.d3phi += (-mass_i) * D3;
923#endif
924#if(MULTIPOLE_ORDER >= 4)
925 TaylorCoeff[no_source].coeff.d4phi += (-mass_i) * D4;
926#endif
927#if(MULTIPOLE_ORDER >= 5)
928 TaylorCoeff[no_source].coeff.d5phi += (-mass_i) * D5;
929#endif
930 TaylorCoeff[no_source].coeff.interactions += 1;
931
932 interactioncountPN += 1;
933 }
934}
935
936inline void fmm::fmm_node_node_interaction(int no_sink, int no_source, int type_sink, int type_source, unsigned char shmrank_sink,
937 unsigned char shmrank_source, gravnode *noptr_sink, gravnode *noptr_source,
938 vector<MyReal> &dxyz, MyReal &r2)
939{
940#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
941 if(skip_actual_force_computation)
942 return;
943#endif
944
945 /* 'sink' is a node with multipole moments
946 * 'source' node is a node with multipole moments
947 * 'dxyz' is the distance vector, pointing from sink to source, i.e. dxyz = pos(source) - pos(sink)
948 */
949
950 /* now do the node-node interaction */
951 MyReal r = sqrt(r2);
952
953#if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
954 symtensor2<MyDouble> &Q2_m = noptr_source->Q2Tensor;
955 symtensor2<MyDouble> &Q2_n = noptr_sink->Q2Tensor;
956#endif
957#if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
958 symtensor3<MyDouble> &Q3_m = noptr_source->Q3Tensor;
959 symtensor3<MyDouble> &Q3_n = noptr_sink->Q3Tensor;
960#endif
961#if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
962 symtensor4<MyDouble> &Q4_m = noptr_source->Q4Tensor;
963 symtensor4<MyDouble> &Q4_n = noptr_sink->Q4Tensor;
964#endif
965#if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL)
966 symtensor5<MyDouble> &Q5_m = noptr_source->Q5Tensor;
967 symtensor5<MyDouble> &Q5_n = noptr_sink->Q5Tensor;
968#endif
969
970 MyReal mass_m = noptr_source->mass;
971 MyReal mass_n = noptr_sink->mass;
972
973 MyReal rinv = (r > 0) ? 1 / r : 0;
974
975 MyReal h_n = All.ForceSoftening[noptr_sink->getSofteningClass()];
976 MyReal h_m = All.ForceSoftening[noptr_source->getSofteningClass()];
977 MyReal h_max = (h_m > h_n) ? h_m : h_n;
978
979 gfactors gfac;
980
981#ifdef PMGRID
982 if(DoPM)
983 {
984 mesh_factors *mfp = &mf[LOW_MESH];
985
986#ifdef PLACEHIGHRESREGION
988 {
989 if(noptr_source->overlap_flag == FLAG_INSIDE && noptr_sink->overlap_flag == FLAG_INSIDE)
990 mfp = &mf[HIGH_MESH];
991 }
992#endif
993
994 if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp)) /* if we are outside the cut-off radius, we have no interaction */
995 return;
996 }
997#endif
998
999 get_gfactors_multipole(gfac, r, h_max, rinv);
1000
1001#ifdef EVALPOTENTIAL
1002 MyReal g0 = gfac.fac0;
1003 MyReal D0 = g0;
1004#endif
1005
1006 MyReal g1 = gfac.fac1 * rinv;
1007 vector<MyReal> D1 = g1 * dxyz;
1008
1009#if(MULTIPOLE_ORDER >= 2)
1010 MyReal g2 = gfac.fac2 * gfac.rinv2;
1011 symtensor2<MyReal> aux2 = dxyz % dxyz; // construct outer product of the two vectors
1012 symtensor2<MyReal> D2 = g2 * aux2;
1013 D2[qXX] += g1;
1014 D2[qYY] += g1;
1015 D2[qZZ] += g1;
1016#endif
1017
1018#if(MULTIPOLE_ORDER >= 3)
1019 MyReal g3 = gfac.fac3 * gfac.rinv3;
1021 symtensor3<MyReal> aux3;
1022 setup_D3(INIT, D3, dxyz, aux2, aux3, g2, g3);
1023#endif
1024
1025#if(MULTIPOLE_ORDER >= 4)
1026 MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
1028 symtensor4<MyReal> aux4;
1029 setup_D4(INIT, D4, dxyz, aux2, aux3, aux4, g2, g3, g4);
1030#endif
1031
1032#if(MULTIPOLE_ORDER >= 5)
1033 MyReal g5 = gfac.fac5 * gfac.rinv3 * gfac.rinv2;
1035 symtensor5<MyReal> aux5;
1036 setup_D5(INIT, D5, dxyz, aux3, aux4, aux5, g3, g4, g5);
1037#endif
1038
1039 if(DoEwald)
1040 {
1041 ewald_data ew;
1042 Ewald.ewald_gridlookup(noptr_source->s.da, noptr_sink->s.da, ewald::MULTIPOLES, ew);
1043
1044#ifdef EVALPOTENTIAL
1045 D0 -= ew.D0phi;
1046#endif
1047 D1 -= ew.D1phi;
1048#if(MULTIPOLE_ORDER >= 2)
1049 D2 -= ew.D2phi;
1050#endif
1051#if(MULTIPOLE_ORDER >= 3)
1052 D3 -= ew.D3phi;
1053#endif
1054#if(MULTIPOLE_ORDER >= 4)
1055 D4 -= ew.D4phi;
1056#endif
1057#if(MULTIPOLE_ORDER >= 5)
1058 D5 -= ew.D5phi;
1059#endif
1060 }
1061
1062 if(fmm_depends_on_local_mass(no_sink, shmrank_sink))
1063 if(type_sink == NODE_TYPE_LOCAL_NODE)
1064 {
1065#ifdef EVALPOTENTIAL
1066 TaylorCoeff[no_sink].coeff.phi += (-mass_m) * D0;
1067#if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
1068 TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-0.5) * (D2 * Q2_m);
1069#endif
1070#if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
1071 TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 6) * (D3 * Q3_m);
1072#endif
1073#if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
1074 TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 24) * (D4 * Q4_m);
1075#endif
1076#if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
1077 TaylorCoeff[no_sink].coeff.phi += static_cast<MyReal>(-1.0 / 120) * (D5 * Q5_m);
1078#endif
1079#endif
1080
1081 TaylorCoeff[no_sink].coeff.dphi += mass_m * D1;
1082#if(MULTIPOLE_ORDER >= 2)
1083 TaylorCoeff[no_sink].coeff.d2phi += (-mass_m) * D2;
1084#endif
1085#if(MULTIPOLE_ORDER >= 3)
1086 TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(0.5) * (D3 * Q2_m);
1087 TaylorCoeff[no_sink].coeff.d3phi += mass_m * D3;
1088#endif
1089#if(MULTIPOLE_ORDER >= 4)
1090 TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_m);
1091 TaylorCoeff[no_sink].coeff.d2phi += static_cast<MyReal>(-0.5) * (D4 * Q2_m);
1092 TaylorCoeff[no_sink].coeff.d4phi += (-mass_m) * D4;
1093#endif
1094#if(MULTIPOLE_ORDER >= 5)
1095 TaylorCoeff[no_sink].coeff.dphi += static_cast<MyReal>(1.0 / 24) * (D5 * Q4_m);
1096 TaylorCoeff[no_sink].coeff.d2phi += static_cast<MyReal>(-1.0 / 6) * (D5 * Q3_m);
1097 TaylorCoeff[no_sink].coeff.d3phi += static_cast<MyReal>(0.5) * (D5 * Q2_m);
1098 TaylorCoeff[no_sink].coeff.d5phi += mass_m * D5;
1099#endif
1100
1101 TaylorCoeff[no_sink].coeff.interactions += 1;
1102 interactioncountNN += 1;
1103 }
1104
1105 if(fmm_depends_on_local_mass(no_source, shmrank_source))
1106 if(type_source == NODE_TYPE_LOCAL_NODE)
1107 {
1108#ifdef EVALPOTENTIAL
1109 TaylorCoeff[no_source].coeff.phi += (-mass_n) * D0;
1110#if(MULTIPOLE_ORDER >= 3) || ((MULTIPOLE_ORDER >= 2) && defined(EXTRAPOTTERM))
1111 TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(-0.5) * (D2 * Q2_n);
1112#endif
1113#if(MULTIPOLE_ORDER >= 4) || ((MULTIPOLE_ORDER >= 3) && defined(EXTRAPOTTERM))
1114 TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(1.0 / 6) * (D3 * Q3_n);
1115#endif
1116#if(MULTIPOLE_ORDER >= 5) || ((MULTIPOLE_ORDER >= 4) && defined(EXTRAPOTTERM))
1117 TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(-1.0 / 24) * (D4 * Q4_n);
1118#endif
1119#if(MULTIPOLE_ORDER >= 5) && defined(EXTRAPOTTERM)
1120 TaylorCoeff[no_source].coeff.phi += static_cast<MyReal>(1.0 / 120) * (D5 * Q5_n);
1121#endif
1122#endif
1123
1124 TaylorCoeff[no_source].coeff.dphi += (-mass_n) * D1;
1125#if(MULTIPOLE_ORDER >= 2)
1126 TaylorCoeff[no_source].coeff.d2phi += (-mass_n) * D2;
1127#endif
1128#if(MULTIPOLE_ORDER >= 3)
1129 TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(-0.5) * (D3 * Q2_n);
1130 TaylorCoeff[no_source].coeff.d3phi += (-mass_n) * D3;
1131#endif
1132#if(MULTIPOLE_ORDER >= 4)
1133 TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(1.0 / 6) * (D4 * Q3_n);
1134 TaylorCoeff[no_source].coeff.d2phi += static_cast<MyReal>(-0.5) * (D4 * Q2_n);
1135 TaylorCoeff[no_source].coeff.d4phi += (-mass_n) * D4;
1136#endif
1137#if(MULTIPOLE_ORDER >= 5)
1138 TaylorCoeff[no_source].coeff.dphi += static_cast<MyReal>(-1.0 / 24) * (D5 * Q4_n);
1139 TaylorCoeff[no_source].coeff.d2phi += static_cast<MyReal>(1.0 / 6) * (D5 * Q3_n);
1140 TaylorCoeff[no_source].coeff.d3phi += static_cast<MyReal>(-0.5) * (D5 * Q2_n);
1141 TaylorCoeff[no_source].coeff.d5phi += (-mass_n) * D5;
1142#endif
1143
1144 TaylorCoeff[no_source].coeff.interactions += 1;
1145 interactioncountNN += 1;
1146 }
1147}
1148
1149inline int fmm::fmm_evaluate_node_node_opening_criterion(gravnode *noptr_sink, gravnode *noptr_source, vector<MyReal> &dxyz,
1150 MyReal &r2)
1151{
1152 if(noptr_source->level != noptr_sink->level)
1153 Terminate("This shouldn't happen: noptr_level=%d noptr_sink->level=%d ", noptr_source->level, noptr_sink->level);
1154
1155 if(noptr_source->level <=
1156 1) // always open the root node, and the next level (note: full node length does not fit in the integer type)
1157 return NODE_OPEN;
1158
1159 /* Note: we will always have noptr_sink->len == noptr_source->len in our algorithm! */
1160
1161 MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - noptr_sink->level);
1162 MyIntPosType intlen = halflen << 1;
1163
1164#ifndef TREE_NO_SAFETY_BOX
1165 // We always open adjacent nodes to protect against worst-case force errors
1166 MyIntPosType twolens = intlen + (intlen - 1);
1167 MyIntPosType dist[3];
1168 Tp->nearest_image_intpos_to_absolute_intdist(noptr_source->center.da, noptr_sink->center.da, dist);
1169
1170 if(dist[0] < twolens && dist[1] < twolens && dist[2] < twolens)
1171 return NODE_OPEN;
1172#endif
1173
1174 /* converts the integer distance of the centers of mass to floating point */
1175 Tp->nearest_image_intpos_to_pos(noptr_source->s.da, noptr_sink->s.da, dxyz.da);
1176
1177 r2 = dxyz.r2();
1178
1179#ifdef PMGRID
1180 mesh_factors *mfp = &mf[LOW_MESH];
1181
1182#ifdef PLACEHIGHRESREGION
1184 {
1185 int test_source = noptr_source->overlap_flag;
1186 int test_sink = noptr_sink->overlap_flag;
1187
1188 if((test_source == FLAG_BOUNDARYOVERLAP && test_sink != FLAG_OUTSIDE) ||
1189 (test_sink == FLAG_BOUNDARYOVERLAP && test_source != FLAG_OUTSIDE))
1190 {
1191 Terminate("this shouldn't happen any more");
1192 return NODE_OPEN;
1193 }
1194 else
1195 {
1196 if(test_source == FLAG_INSIDE && test_sink == FLAG_INSIDE)
1197 mfp = &mf[HIGH_MESH];
1198 }
1199 }
1200#endif
1201
1202 if(DoPM && r2 > mfp->rcut2 && noptr_sink->level > 1)
1203 {
1204 /* check whether we can ignore any interactions along this branch */
1205 MyIntPosType dist_x = noptr_source->center[0] - noptr_sink->center[0];
1206 dist_x = (((MySignedIntPosType)dist_x) >= 0) ? dist_x : -dist_x;
1207 if(dist_x > mfp->intrcut[0] + intlen)
1208 return NODE_DISCARD;
1209
1210 MyIntPosType dist_y = noptr_source->center[1] - noptr_sink->center[1];
1211 dist_y = (((MySignedIntPosType)dist_y) >= 0) ? dist_y : -dist_y;
1212 if(dist_y > mfp->intrcut[1] + intlen)
1213 return NODE_DISCARD;
1214
1215 MyIntPosType dist_z = noptr_source->center[2] - noptr_sink->center[2];
1216 dist_z = (((MySignedIntPosType)dist_z) >= 0) ? dist_z : -dist_z;
1217 if(dist_z > mfp->intrcut[2] + intlen)
1218 return NODE_DISCARD;
1219 }
1220#endif
1221
1222 /* evaluate generalized opening criterion */
1223
1224 MyReal len = intlen * Tp->FacIntToCoord;
1225 MyReal len2 = len * len;
1226
1227 if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
1228 {
1229 if(4 * len2 > r2 * errTolTheta2)
1230 return NODE_OPEN;
1231 }
1232 else
1233 {
1234 if(4 * len2 > r2 * errTolThetaMax2)
1235 return NODE_OPEN;
1236
1237 MyReal mmax = (noptr_source->mass < noptr_sink->mass) ? noptr_sink->mass : noptr_source->mass;
1238 MyReal amin =
1239 errTolForceAcc * ((noptr_sink->MinOldAcc < noptr_source->MinOldAcc) ? noptr_sink->MinOldAcc : noptr_source->MinOldAcc);
1240#if(MULTIPOLE_ORDER == 1)
1241 if(mmax > r2 * amin)
1242 return NODE_OPEN;
1243#elif(MULTIPOLE_ORDER == 2)
1244 if(square(mmax * len) > r2 * square(r2 * amin))
1245 return NODE_OPEN;
1246#elif(MULTIPOLE_ORDER == 3)
1247 if(mmax * len2 > r2 * r2 * amin)
1248 return NODE_OPEN;
1249#elif(MULTIPOLE_ORDER == 4)
1250 if(square(mmax * len * len2) > r2 * square(r2 * r2 * amin))
1251 return NODE_OPEN;
1252#elif(MULTIPOLE_ORDER == 5)
1253 if(mmax * len2 * len2 > r2 * r2 * r2 * amin)
1254 return NODE_OPEN;
1255#endif
1256 }
1257
1258#if NSOFTCLASSES > 1
1259 MyReal h_m = All.ForceSoftening[noptr_sink->getSofteningClass()];
1260 MyReal h_n = All.ForceSoftening[noptr_source->getSofteningClass()];
1261 MyReal h_max = (h_m > h_n) ? h_m : h_n;
1262
1263 if(r2 < h_max * h_max)
1264 {
1265 if(All.ForceSoftening[noptr_source->minsofttype] < All.ForceSoftening[noptr_source->maxsofttype] &&
1266 h_max > All.ForceSoftening[noptr_sink->minsofttype])
1267 return NODE_OPEN;
1268 else if(All.ForceSoftening[noptr_sink->minsofttype] < All.ForceSoftening[noptr_sink->maxsofttype] &&
1269 h_max > All.ForceSoftening[noptr_source->minsofttype])
1270 return NODE_OPEN;
1271 }
1272#endif
1273
1274 return NODE_USE;
1275}
1276
1277inline int fmm::fmm_evaluate_particle_node_opening_criterion(int no_sink, char type_sink, unsigned char shmrank_sink,
1278 gravnode *nop_source, vector<MyReal> &dxyz, MyReal &r2)
1279{
1280 MyIntPosType *intpos_n;
1281 MyReal mass_n;
1282 MyReal aold;
1283#if NSOFTCLASSES > 1
1284 MyReal h_n;
1285#endif
1286#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1287 int test_point;
1288#endif
1289
1290 if(type_sink == NODE_TYPE_LOCAL_PARTICLE)
1291 {
1292 particle_data *P = get_Pp(no_sink, shmrank_sink);
1293
1294 intpos_n = P->IntPos;
1295 mass_n = P->getMass();
1296 aold = P->OldAcc;
1297#if NSOFTCLASSES > 1
1299#endif
1300#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1301 test_point = P->InsideOutsideFlag;
1302#endif
1303 }
1304 else if(type_sink == NODE_TYPE_TREEPOINT_PARTICLE)
1305 {
1306 gravpoint_data *Pointp = get_pointsp(no_sink - ImportedNodeOffset, shmrank_sink);
1307
1308 intpos_n = Pointp->IntPos;
1309 mass_n = Pointp->Mass;
1310 aold = Pointp->OldAcc;
1311#if NSOFTCLASSES > 1
1312 h_n = All.ForceSoftening[Pointp->getSofteningClass()];
1313#endif
1314#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1315 test_point = Pointp->InsideOutsideFlag;
1316#endif
1317 }
1318 else /* a point that was fetched */
1319 {
1320 foreign_gravpoint_data *foreignpoint = get_foreignpointsp(no_sink - EndOfForeignNodes, shmrank_sink);
1321
1322 intpos_n = foreignpoint->IntPos;
1323 mass_n = foreignpoint->Mass;
1324 aold = foreignpoint->OldAcc;
1325#if NSOFTCLASSES > 1
1326 h_n = All.ForceSoftening[foreignpoint->getSofteningClass()];
1327#endif
1328#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
1329 test_point = foreignpoint->InsideOutsideFlag;
1330#endif
1331 }
1332
1333 if(nop_source->level <= LEVEL_ALWAYS_OPEN) // always open the root node (note: full node length does not fit in the integer type)
1334 return NODE_OPEN;
1335
1336 MyIntPosType halflen = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - nop_source->level);
1337 MyIntPosType intlen = halflen << 1;
1338
1339#ifndef TREE_NO_SAFETY_BOX
1340 MyIntPosType dist[3];
1341 Tp->nearest_image_intpos_to_absolute_intdist(nop_source->center.da, intpos_n, dist);
1342 // if we are close to the node, and therefore open it to protect against worst-case force errors
1343 if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
1344 return NODE_OPEN;
1345#endif
1346
1347 /* check a variant of the classic opening criterion */
1348 Tp->nearest_image_intpos_to_pos(nop_source->s.da, intpos_n, dxyz.da); /* converts the integer distance to floating point */
1349
1350 r2 = dxyz.r2();
1351
1352#ifdef PMGRID
1353 mesh_factors *mfp = &mf[LOW_MESH];
1354
1355#ifdef PLACEHIGHRESREGION
1357 {
1358 int test_node = nop_source->overlap_flag;
1359
1360 if(test_point == FLAG_INSIDE && test_node == FLAG_BOUNDARYOVERLAP)
1361 {
1362 Terminate("this shouldn't happen any more");
1363 return NODE_OPEN;
1364 }
1365 else
1366 {
1367 if(test_node == FLAG_INSIDE && test_point == FLAG_INSIDE)
1368 mfp = &mf[HIGH_MESH];
1369 }
1370 }
1371#endif
1372
1373 if(DoPM && r2 > mfp->rcut2)
1374 {
1375 /* check whether we can ignore any interactions along this branch */
1376 MyIntPosType dist_x = nop_source->center[0] - intpos_n[0];
1377 dist_x = (((MySignedIntPosType)dist_x) >= 0) ? dist_x : -dist_x;
1378 if(dist_x > mfp->intrcut[0] + halflen)
1379 return NODE_DISCARD;
1380
1381 MyIntPosType dist_y = nop_source->center[1] - intpos_n[1];
1382 dist_y = (((MySignedIntPosType)dist_y) >= 0) ? dist_y : -dist_y;
1383 if(dist_y > mfp->intrcut[1] + halflen)
1384 return NODE_DISCARD;
1385
1386 MyIntPosType dist_z = nop_source->center[2] - intpos_n[2];
1387 dist_z = (((MySignedIntPosType)dist_z) >= 0) ? dist_z : -dist_z;
1388 if(dist_z > mfp->intrcut[2] + halflen)
1389 return NODE_DISCARD;
1390 }
1391#endif
1392
1393 MyReal len = intlen * Tp->FacIntToCoord;
1394 MyReal len2 = len * len;
1395
1396 if(All.RelOpeningCriterionInUse == 0) /* check Barnes-Hut opening criterion */
1397 {
1398 if(4 * len2 > r2 * errTolTheta2)
1399 return NODE_OPEN;
1400 }
1401 else /* check relative opening criterion */
1402 {
1403 if(4 * len2 > r2 * errTolThetaMax2)
1404 return NODE_OPEN;
1405
1406 MyReal mmax = (nop_source->mass < mass_n) ? mass_n : nop_source->mass;
1407 MyReal amin = errTolForceAcc * ((aold < nop_source->MinOldAcc) ? aold : nop_source->MinOldAcc);
1408
1409#if(MULTIPOLE_ORDER == 1)
1410 if(mmax > r2 * amin)
1411 return NODE_OPEN;
1412#elif(MULTIPOLE_ORDER == 2)
1413 if(square(mmax * len) > r2 * square(r2 * amin))
1414 return NODE_OPEN;
1415#elif(MULTIPOLE_ORDER == 3)
1416 if(mmax * len2 > r2 * r2 * amin)
1417 return NODE_OPEN;
1418#elif(MULTIPOLE_ORDER == 4)
1419 if(square(mmax * len * len2) > r2 * square(r2 * r2 * amin))
1420 return NODE_OPEN;
1421#elif(MULTIPOLE_ORDER == 5)
1422 if(mmax * len2 * len2 > r2 * r2 * r2 * amin)
1423 return NODE_OPEN;
1424#endif
1425 }
1426
1427#if NSOFTCLASSES > 1
1428 MyReal h_m = All.ForceSoftening[nop_source->getSofteningClass()];
1429
1430 if(h_m > h_n)
1431 {
1432 if(r2 < h_m * h_m)
1433 if(All.ForceSoftening[nop_source->minsofttype] < All.ForceSoftening[nop_source->maxsofttype])
1434 {
1435 return NODE_OPEN;
1436 }
1437 }
1438#endif
1439
1440 return NODE_USE;
1441}
1442
1443/* function to account for interaction of two nodes in the tree */
1444void fmm::fmm_force_interact(int no_sink, int no_source, char type_sink, char type_source, unsigned char shmrank_sink,
1445 unsigned char shmrank_source, int mintopleafnode, int committed)
1446{
1447 if(type_sink <= NODE_TYPE_FETCHED_PARTICLE && type_source <= NODE_TYPE_FETCHED_PARTICLE) /* particle-particle interaction */
1448 {
1449 /* nothing to be done, or if we do not deal with at least one local particle */
1450 if(type_sink == NODE_TYPE_FETCHED_PARTICLE && type_source == NODE_TYPE_FETCHED_PARTICLE)
1451 return;
1452
1453 if(no_sink != no_source || shmrank_source != shmrank_sink) // exclude self-interaction
1454 fmm_particle_particle_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source);
1455 }
1456 else if(!(type_sink > NODE_TYPE_FETCHED_PARTICLE && type_source > NODE_TYPE_FETCHED_PARTICLE)) /* cell-particle interaction */
1457 {
1458 /* we have arranged it such that the particle is always on the sink side, the node on the source side */
1459
1460 gravnode *noptr_source = get_nodep(no_source, shmrank_source);
1461
1462 /* noting to be done if we do not deal with any local mass */
1463 if(fmm_depends_on_local_mass(no_source, shmrank_source) == false &&
1464 (type_sink == NODE_TYPE_FETCHED_PARTICLE ||
1465 (type_sink < NODE_TYPE_FETCHED_PARTICLE && shmrank_sink != Shmem.Island_ThisTask)))
1466 return;
1467
1468 if(noptr_source->not_empty == 0)
1469 return;
1470
1471 if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1472 if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1473 mintopleafnode = no_source;
1474
1475 MyReal r2;
1476 vector<MyReal> dxyz;
1477
1478 int openflag = fmm_evaluate_particle_node_opening_criterion(no_sink, type_sink, shmrank_sink, noptr_source, dxyz, r2);
1479
1480 if(openflag == NODE_USE)
1481 {
1482 fmm_particle_node_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source, noptr_source, dxyz,
1483 r2);
1484 }
1485 else if(openflag == NODE_OPEN) /* open cell in a cell-particle interaction */
1486 {
1487 if(noptr_source->cannot_be_opened_locally)
1488 {
1489 // are we in the same shared memory node?
1491 {
1492 Terminate("this should not happen any more");
1493 }
1494 else
1495 {
1496 tree_add_to_fetch_stack(noptr_source, no_source, shmrank_source);
1497
1498 fmm_add_to_work_stack(no_sink, no_source, shmrank_sink, shmrank_source, mintopleafnode);
1499 }
1500 }
1501 else
1502 {
1503 int min_buffer_space =
1504 std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1505
1506 if(min_buffer_space >= committed + 8 * TREE_NUM_BEFORE_NODESPLIT)
1507 fmm_open_node(no_sink, noptr_source, type_sink, shmrank_sink, mintopleafnode,
1508 committed + 8 * TREE_NUM_BEFORE_NODESPLIT);
1509 else
1510 fmm_add_to_work_stack(no_sink, no_source, shmrank_sink, shmrank_source, mintopleafnode);
1511 }
1512 }
1513 }
1514 else /* cell - cell interaction */
1515 {
1516 gravnode *noptr_sink = get_nodep(no_sink, shmrank_sink);
1517 gravnode *noptr_source = get_nodep(no_source, shmrank_source);
1518
1519 /* at least one of the cells needs to depend on local particles */
1520 if(fmm_depends_on_local_mass(no_sink, shmrank_sink) || fmm_depends_on_local_mass(no_source, shmrank_source))
1521 {
1522 /* both cells need to be non-empty */
1523 if(noptr_sink->not_empty != 0 && noptr_source->not_empty != 0)
1524 {
1525 if(noptr_sink == noptr_source) /* self-interaction */
1526 {
1527 if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1528 if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1529 mintopleafnode = no_source;
1530
1531 if(noptr_sink->cannot_be_opened_locally)
1532 {
1533 Terminate("should not happen because we have a self-interaction of a supposedly local node");
1534 }
1535 else
1536 {
1537 int min_buffer_space =
1538 std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1539
1540 if(min_buffer_space >= committed + 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT)
1541 fmm_open_both(noptr_sink, noptr_sink, mintopleafnode,
1543 else
1544 fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1545 }
1546 }
1547 else
1548 {
1549 MyReal r2;
1550 vector<MyReal> dxyz;
1551
1552 int openflag = fmm_evaluate_node_node_opening_criterion(noptr_sink, noptr_source, dxyz, r2);
1553
1554 if(openflag == NODE_USE)
1555 {
1556 /* evaluate the interaction */
1557 fmm_node_node_interaction(no_sink, no_source, type_sink, type_source, shmrank_sink, shmrank_source, noptr_sink,
1558 noptr_source, dxyz, r2);
1559 }
1560 else if(openflag == NODE_OPEN)
1561 {
1562 /* open both */
1563
1564 if(no_source < MaxPart + MaxNodes) // we have a top-levelnode
1565 if(noptr_source->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1566 mintopleafnode = std::min<int>(mintopleafnode, no_source);
1567
1568 if(no_sink < MaxPart + MaxNodes) // we have a top-levelnode
1569 if(noptr_sink->nextnode >= MaxPart + MaxNodes) // if the next node is not a top-level, we have a leaf node
1570 mintopleafnode = std::min<int>(mintopleafnode, no_sink);
1571
1572 if(noptr_source->cannot_be_opened_locally || noptr_sink->cannot_be_opened_locally)
1573 {
1574 if(noptr_source->cannot_be_opened_locally && noptr_sink->cannot_be_opened_locally)
1575 Terminate("this should not happen, because then both nodes would be foreign");
1576
1577 if(noptr_source->cannot_be_opened_locally)
1578 tree_add_to_fetch_stack(noptr_source, no_source, shmrank_source);
1579
1580 if(noptr_sink->cannot_be_opened_locally)
1581 tree_add_to_fetch_stack(noptr_sink, no_sink, shmrank_sink);
1582
1583 fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1584 }
1585 else
1586 {
1587 int min_buffer_space =
1588 std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1589
1590 if(min_buffer_space >= committed + 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT)
1591 fmm_open_both(noptr_sink, noptr_source, mintopleafnode,
1593 else
1594 fmm_add_to_work_stack(no_source, no_sink, shmrank_source, shmrank_sink, mintopleafnode);
1595 }
1596 }
1597 }
1598 }
1599 }
1600 }
1601}
1602
1603void fmm::fmm_determine_nodes_with_local_mass(int no, int sib)
1604{
1605 gravnode *nop = get_nodep(no, Shmem.Island_ThisTask);
1606
1607 int p = nop->nextnode;
1608
1609 /* if the next node is not a top-level node, we have reached a leaf node, and we need to do nothing */
1610 if(p < MaxPart || p >= FirstNonTopLevelNode)
1611 return;
1612
1613 unsigned char depends_on_local_mass = 0;
1614
1615 while(p != nop->sibling)
1616 {
1617 if(p >= 0)
1618 {
1619 if(p >= MaxPart && p < MaxPart + MaxNodes) /* we have an internal node */
1620 {
1621 int nextsib = get_nodep(p, Shmem.Island_ThisTask)->sibling;
1622
1623 fmm_determine_nodes_with_local_mass(p, nextsib);
1624 }
1625
1626 if(p < MaxPart) /* a particle */
1627 {
1628 Terminate("stop");
1629 }
1630 else if(p < MaxPart + MaxNodes) /* an internal node */
1631 {
1632 depends_on_local_mass |= Topnode_depends_on_local_mass[p];
1633
1634 p = get_nodep(p, Shmem.Island_ThisTask)->sibling;
1635 }
1636 else if(p < MaxPart + MaxNodes + D->NTopleaves) /* a pseudo particle */
1637 {
1638 /* we are processing a local leaf-node which does not have any particles.
1639 * can continue to the next element, which should end the work.
1640 */
1641 Terminate("stop");
1642 }
1643 else
1644 {
1645 Terminate("stop");
1646 }
1647 }
1648 }
1649
1650 Topnode_depends_on_local_mass[no] = depends_on_local_mass;
1651}
1652
1653void fmm::gravity_fmm(int timebin)
1654{
1655 interactioncountPP = 0;
1656 interactioncountPN = 0;
1657 interactioncountNN = 0;
1658 interactioncountEffective = 0;
1659
1661 TIMER_START(CPU_TREE);
1662
1663 D->mpi_printf("FMM: Begin tree force. timebin=%d (presently allocated=%g MB)\n", timebin, All.ErrTolTheta,
1665
1666#ifdef PMGRID
1667 set_mesh_factors();
1668#endif
1669
1670 Topnode_depends_on_local_mass = (char *)Mem.mymalloc_clear("Topnode_depends_on_local_mass", D->NTopnodes * sizeof(char));
1671 Topnode_depends_on_local_mass -= MaxPart;
1672
1673 for(int n = 0; n < D->NTopleaves; n++)
1674 {
1675 if(D->TaskOfLeaf[n] == D->ThisTask)
1676 {
1677 int no = NodeIndex[n];
1678
1679 if(TopNodes[no].not_empty)
1680 Topnode_depends_on_local_mass[no] = 1;
1681 }
1682 }
1683
1684 fmm_determine_nodes_with_local_mass(MaxPart, -1);
1685
1686 TIMER_START(CPU_TREESTACK);
1687
1688 NumOnWorkStack = 0;
1689 AllocWorkStackBaseLow = std::max<int>(1.5 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE);
1690 AllocWorkStackBaseHigh = AllocWorkStackBaseLow + TREE_EXPECTED_CYCLES * TREE_MIN_WORKSTACK_SIZE;
1691 MaxOnWorkStack = std::max<int>(AllocWorkStackBaseLow, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT);
1692
1693 FMM_WorkStack = (fmm_workstack_data *)Mem.mymalloc("FMM_WorkStack", AllocWorkStackBaseHigh * sizeof(fmm_workstack_data));
1694 ResultIndexList = (int *)Mem.mymalloc("ResultIndexList", NumPartImported * sizeof(int));
1695
1696 for(int i = 0; i < Tp->TimeBinsGravity.NActiveParticles; i++)
1697 {
1698 int target = Tp->TimeBinsGravity.ActiveParticleList[i];
1699
1700 /* let's do a safety check here to protect against accidental use of zero softening lengths */
1701 int softtype = Tp->P[target].getSofteningClass();
1702 if(All.ForceSoftening[softtype] == 0)
1703 Terminate("Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
1704 (long long)Tp->P[target].ID.get(), Tp->P[target].getType(), softtype);
1705 }
1706
1707 int ncount = 0;
1708
1709 for(int i = 0; i < NumPartImported; i++)
1710 {
1711#ifndef HIERARCHICAL_GRAVITY
1712 if(Points[i].ActiveFlag)
1713#endif
1714 {
1715 ResultIndexList[i] = ncount++;
1716 }
1717 }
1718
1719 NumOnWorkStack = 0;
1720 NewOnWorkStack = 0;
1721
1722 /* for starting, request the self-interaction between the root node */
1723 fmm_add_to_work_stack(MaxPart, MaxPart, Shmem.Island_ThisTask, Shmem.Island_ThisTask, MaxPart + D->NTopnodes);
1724
1725 NumOnWorkStack = NewOnWorkStack;
1726
1727 ResultsActiveImported =
1728 (resultsactiveimported_data *)Mem.mymalloc_clear("ResultsActiveImported", ncount * sizeof(resultsactiveimported_data));
1729
1730 TaylorCoeff = (taylor_data *)Mem.mymalloc_clear("TaylorCoeff", NumNodes * sizeof(taylor_data));
1731 TaylorCoeff -= MaxPart;
1732
1733 /******************************************/
1734 /* now execute the tree walk calculations */
1735 /******************************************/
1736
1737 errTolForceAcc = All.ErrTolForceAcc;
1738 errTolThetaMax2 = All.ErrTolThetaMax * All.ErrTolThetaMax;
1739 errTolTheta2 = All.ErrTolTheta * All.ErrTolTheta;
1740
1741 sum_NumForeignNodes = 0;
1742 sum_NumForeignPoints = 0;
1743
1744 // set a default size of the fetch stack equal to half the work stack (this may still be somewhat too large)
1745 MaxOnFetchStack = std::max<int>(0.1 * (Tp->NumPart + NumPartImported), TREE_MIN_WORKSTACK_SIZE);
1746 MaxOnFetchStack = std::max<int>(MaxOnFetchStack, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT);
1747 StackToFetch = (fetch_data *)Mem.mymalloc_movable(&StackToFetch, "StackToFetch", MaxOnFetchStack * sizeof(fetch_data));
1748
1749 // let's grab at most half the still available memory for imported points and nodes
1750 int nspace = (0.33 * Mem.FreeBytes) / (sizeof(gravnode) + 8 * sizeof(foreign_gravpoint_data));
1751
1752 MaxForeignNodes = nspace;
1753 MaxForeignPoints = 8 * nspace;
1754 NumForeignNodes = 0;
1755 NumForeignPoints = 0;
1756
1757 /* the following two arrays hold imported tree nodes and imported points to augment the local tree */
1758 Foreign_Nodes = (gravnode *)Mem.mymalloc_movable(&Foreign_Nodes, "Foreign_Nodes", MaxForeignNodes * sizeof(gravnode));
1759 Foreign_Points = (foreign_gravpoint_data *)Mem.mymalloc_movable(&Foreign_Points, "Foreign_Points",
1760 MaxForeignPoints * sizeof(foreign_gravpoint_data));
1761
1762 tree_initialize_leaf_node_access_info();
1763
1764 TIMER_STOP(CPU_TREESTACK);
1765
1766 double t0 = Logs.second();
1767 int max_ncycles = 0;
1768
1769 prepare_shared_memory_access();
1770
1771#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
1772 for(int rep = 0; rep < 2; rep++)
1773 {
1774 if(rep == 0)
1775 {
1776 skip_actual_force_computation = true;
1777 }
1778 else
1779 {
1780 skip_actual_force_computation = false;
1781
1782 NumOnWorkStack = 0;
1783 NewOnWorkStack = 0;
1784
1785 /* for starting, request the self-interaction between the root node */
1786 fmm_add_to_work_stack(MaxPart, MaxPart, Shmem.Island_ThisTask, Shmem.Island_ThisTask, MaxPart + D->NTopnodes);
1787
1788 NumOnWorkStack = NewOnWorkStack;
1789 }
1790#endif
1791
1792 while(NumOnWorkStack > 0) // repeat until we are out of work
1793 {
1794 NewOnWorkStack = 0; // gives the new entries
1795 NumOnFetchStack = 0;
1796 MaxOnWorkStack = std::min<int>(AllocWorkStackBaseLow + max_ncycles * TREE_MIN_WORKSTACK_SIZE, AllocWorkStackBaseHigh);
1797 MaxOnWorkStack = std::max<int>(MaxOnWorkStack, 2 * 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT);
1798
1799 TIMER_START(CPU_TREEWALK);
1800
1801 int item = 0;
1802
1803 while(item < NumOnWorkStack)
1804 {
1805 int min_buffer_space =
1806 std::min<int>(MaxOnWorkStack - (NumOnWorkStack + NewOnWorkStack), MaxOnFetchStack - NumOnFetchStack);
1807
1808 int committed = 8 * 8 * TREE_NUM_BEFORE_NODESPLIT * TREE_NUM_BEFORE_NODESPLIT;
1809
1810 if(min_buffer_space >= committed)
1811 {
1812 int no1 = FMM_WorkStack[item].Node1;
1813 int no2 = FMM_WorkStack[item].Node2;
1814 unsigned char shmrank1 = FMM_WorkStack[item].ShmRank1;
1815 unsigned char shmrank2 = FMM_WorkStack[item].ShmRank2;
1816 int mintopleaf = FMM_WorkStack[item].MinTopLeafNode;
1817 item++;
1818
1819 char type1 = 0, type2 = 0;
1820
1821 if(no1 < MaxPart) /* a local particle */
1823 else if(no1 < MaxPart + MaxNodes) /* an internal node */
1824 type1 = NODE_TYPE_LOCAL_NODE;
1825 else if(no1 >= ImportedNodeOffset && no1 < EndOfTreePoints) /* an imported Treepoint particle */
1827 else if(no1 >= EndOfTreePoints && no1 < EndOfForeignNodes) /* an imported LET node */
1828 type1 = NODE_TYPE_FETCHED_NODE;
1829 else if(no1 >= EndOfForeignNodes) /* an imported LED particle */
1831
1832 if(no2 < MaxPart) /* a local particle */
1834 else if(no2 < MaxPart + MaxNodes) /* an internal node */
1835 type2 = NODE_TYPE_LOCAL_NODE;
1836 else if(no2 >= ImportedNodeOffset && no2 < EndOfTreePoints) /* an imported Treepoint particle */
1838 else if(no2 >= EndOfTreePoints && no2 < EndOfForeignNodes) /* an imported LET node */
1839 type2 = NODE_TYPE_FETCHED_NODE;
1840 else if(no2 >= EndOfForeignNodes) /* an imported LED particle */
1842
1843 if(no1 == MaxPart && no2 == MaxPart)
1844 {
1845 // we have the interaction between the two root nodes
1846 fmm_force_interact(no1, no2, type1, type2, shmrank1, shmrank2, mintopleaf, committed);
1847 }
1848 else
1849 {
1851 {
1852 /* node-node interaction */
1853
1854 gravnode *nop1 = get_nodep(no1, shmrank1);
1855 gravnode *nop2 = get_nodep(no2, shmrank2);
1856
1858 Terminate("how can this be");
1859
1860 fmm_open_both(nop1, nop2, mintopleaf, committed);
1861 }
1862 else
1863 {
1864 /* particle-node interaction, particle should on the sink side */
1865
1866 // we have a node that we previously could not open
1867 gravnode *nop2 = get_nodep(no2, shmrank2);
1868
1869 if(nop2->cannot_be_opened_locally)
1870 Terminate("now we should be able to open it!");
1871
1872 fmm_open_node(no1, nop2, type1, shmrank1, mintopleaf, committed);
1873 }
1874 }
1875 }
1876 else
1877 break;
1878 }
1879
1880 if(item == 0 && NumOnWorkStack > 0)
1881 Terminate("Can't even process a single particle");
1882
1883 TIMER_STOP(CPU_TREEWALK);
1884
1885 TIMER_START(CPU_TREEFETCH);
1886
1887 tree_fetch_foreign_nodes(FETCH_GRAVTREE);
1888
1889 TIMER_STOP(CPU_TREEFETCH);
1890
1891 TIMER_START(CPU_TREESTACK);
1892
1893 /* now reorder the workstack such that we are first going to do residual pristine particles, and then
1894 * imported nodes that hang below the first leaf nodes */
1895 NumOnWorkStack = NumOnWorkStack - item + NewOnWorkStack;
1896 memmove(FMM_WorkStack, FMM_WorkStack + item, NumOnWorkStack * sizeof(fmm_workstack_data));
1897
1898 /* now let's sort such that we can go deep on top-level node branches, allowing us to clear them out eventually */
1899 mycxxsort(FMM_WorkStack, FMM_WorkStack + NumOnWorkStack, compare_fmm_workstack);
1900
1901 TIMER_STOP(CPU_TREESTACK);
1902
1903 max_ncycles++;
1904 }
1905
1906#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
1907 }
1908#endif
1909
1910 TIMER_START(CPU_TREEIMBALANCE);
1911
1912 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX, D->Communicator);
1913
1914 TIMER_STOP(CPU_TREEIMBALANCE);
1915
1916 cleanup_shared_memory_access();
1917
1918 /* free temporary buffers */
1919
1920 Mem.myfree(Foreign_Points);
1921 Mem.myfree(Foreign_Nodes);
1922 Mem.myfree(StackToFetch);
1923
1924 TIMER_START(CPU_TREEWALK);
1925
1926 taylor_data taylor_current{}; // note: the curly braces initialize this to zero in this case
1927
1928 /* propagate node expansions to particles */
1929 if(fmm_depends_on_local_mass(MaxPart, Shmem.Island_ThisTask))
1930 fmm_force_passdown(MaxPart, Shmem.Island_ThisTask, taylor_current);
1931
1932 TIMER_STOP(CPU_TREEWALK);
1933
1934 Mem.myfree(TaylorCoeff + MaxPart);
1935
1936 double t1 = Logs.second();
1937
1938 D->mpi_printf("FMM: Forces calculated, with %d cycles took %g sec\n", max_ncycles, Logs.timediff(t0, t1));
1939
1940 /* now communicate the forces in ResultsActiveImported */
1941 gravity_exchange_forces();
1942
1943 Mem.myfree(ResultsActiveImported);
1944 Mem.myfree(ResultIndexList);
1945 Mem.myfree(FMM_WorkStack);
1946
1947 TIMER_STOP(CPU_TREE);
1948
1949 D->mpi_printf("FMM: tree-force is done.\n");
1950
1951 /* gather some diagnostic information */
1952
1953 TIMER_START(CPU_LOGS);
1954
1955 struct detailed_timings
1956 {
1957 double all, tree, wait, fetch, stack, lastpm;
1958 double costtotal, numnodes;
1959 double interactioncountEffective;
1960 double interactioncountPP, interactioncountPN, interactioncountNN;
1961 double NumForeignNodes, NumForeignPoints;
1962 double fillfacFgnNodes, fillfacFgnPoints;
1963 double sumcost;
1964 };
1965 detailed_timings timer, tisum, timax;
1966
1967 memset(&timer, 0, sizeof(detailed_timings));
1968
1969 if(MeasureCostFlag)
1970 {
1971 double sum = 0;
1972 for(int i = 0; i < Tp->NumPart; i++)
1973 if(Tp->TimeBinSynchronized[Tp->P[i].TimeBinGrav])
1974 sum += Tp->P[i].GravCost;
1975
1976 timer.sumcost = sum;
1977 }
1978
1979 timer.tree = TIMER_DIFF(CPU_TREEWALK);
1980 timer.wait = TIMER_DIFF(CPU_TREEIMBALANCE);
1981 timer.fetch = TIMER_DIFF(CPU_TREEFETCH);
1982 timer.stack = TIMER_DIFF(CPU_TREESTACK);
1983 timer.all = timer.tree + timer.wait + timer.fetch + timer.stack + TIMER_DIFF(CPU_TREE);
1984 tisum.lastpm = All.CPUForLastPMExecution;
1985 timer.costtotal = interactioncountPP + interactioncountEffective;
1986 timer.interactioncountPP = interactioncountPP;
1987 timer.interactioncountPN = interactioncountPN;
1988 timer.interactioncountNN = interactioncountNN;
1989 timer.interactioncountEffective = interactioncountEffective;
1990 timer.NumForeignNodes = NumForeignNodes;
1991 timer.NumForeignPoints = NumForeignPoints;
1992 timer.fillfacFgnNodes = NumForeignNodes / ((double)MaxForeignNodes);
1993 timer.fillfacFgnPoints = NumForeignPoints / ((double)MaxForeignPoints);
1994 timer.numnodes = NumNodes;
1995
1996 MPI_Reduce((double *)&timer, (double *)&tisum, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_SUM, 0,
1997 D->Communicator);
1998 MPI_Reduce((double *)&timer, (double *)&timax, (int)(sizeof(detailed_timings) / sizeof(double)), MPI_DOUBLE, MPI_MAX, 0,
1999 D->Communicator);
2000
2001 All.TotNumOfForces += Tp->TimeBinsGravity.GlobalNActiveParticles;
2002
2003 if(D->ThisTask == 0)
2004 {
2005 fprintf(Logs.FdTimings, "Nf=%9lld FMM timebin=%d total-Nf=%lld\n", Tp->TimeBinsGravity.GlobalNActiveParticles, timebin,
2007 fprintf(Logs.FdTimings, " work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g|%g)\n",
2008 timax.tree / ((tisum.tree + 1e-20) / D->NTask), Tp->TimeBinsGravity.GlobalNActiveParticles / (tisum.tree + 1.0e-20),
2009 Tp->TimeBinsGravity.GlobalNActiveParticles / ((timax.tree + 1.0e-20) * D->NTask),
2010 tisum.costtotal / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2011 tisum.interactioncountPP / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2012 tisum.interactioncountPN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20),
2013 tisum.interactioncountNN / (Tp->TimeBinsGravity.GlobalNActiveParticles + 1.0e-20));
2014 fprintf(Logs.FdTimings,
2015 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
2016 "fill=%g cycles=%d\n",
2017 timax.numnodes, timax.numnodes / MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes / D->NTask,
2018 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints / D->NTask, timax.fillfacFgnPoints, max_ncycles);
2019 fprintf(Logs.FdTimings,
2020 " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
2021 "(lastpm=%g) sec\n",
2022 tisum.all / D->NTask, tisum.tree / D->NTask, tisum.wait / D->NTask, tisum.fetch / D->NTask, tisum.stack / D->NTask,
2023 tisum.lastpm / D->NTask);
2024 fprintf(Logs.FdTimings, " total interaction cost: %g (imbalance=%g) total cost measure: %g %g\n", tisum.costtotal,
2025 timax.costtotal / (tisum.costtotal / D->NTask), tisum.sumcost,
2026 tisum.interactioncountPP + tisum.interactioncountEffective);
2028 }
2029
2030 Mem.myfree(Topnode_depends_on_local_mass + MaxPart);
2031
2032 TIMER_STOP(CPU_LOGS);
2033}
2034
2035#endif
global_data_all_processes All
Definition: main.cc:40
@ POINTMASS
Definition: ewald.h:67
@ MULTIPOLES
Definition: ewald.h:68
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:355
double timediff(double t0, double t1)
Definition: logs.cc:488
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
size_t FreeBytes
Definition: mymalloc.h:48
int * GetNodeIDForSimulCommRank
int Island_ThisTask
Definition: tree.h:91
T r2(void)
Definition: symtensors.h:187
T da[3]
Definition: symtensors.h:106
#define FLAG_OUTSIDE
Definition: constants.h:29
#define FLAG_BOUNDARYOVERLAP
Definition: constants.h:31
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define LEVEL_ALWAYS_OPEN
Definition: dtypes.h:383
int32_t MySignedIntPosType
Definition: dtypes.h:36
double MyReal
Definition: dtypes.h:82
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_MIN_WORKSTACK_SIZE
Definition: gravtree.h:26
#define NODE_USE
Definition: gravtree.h:35
#define NODE_DISCARD
Definition: gravtree.h:37
#define NODE_TYPE_FETCHED_NODE
Definition: gravtree.h:33
#define NODE_TYPE_TREEPOINT_PARTICLE
Definition: gravtree.h:30
#define TREE_EXPECTED_CYCLES
Definition: gravtree.h:27
#define NODE_TYPE_FETCHED_PARTICLE
Definition: gravtree.h:31
#define NODE_OPEN
Definition: gravtree.h:36
#define NODE_TYPE_LOCAL_NODE
Definition: gravtree.h:32
#define NODE_TYPE_LOCAL_PARTICLE
Definition: gravtree.h:29
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
Definition: logs.h:257
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
Definition: logs.h:250
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
unsigned char level
Definition: tree.h:64
unsigned char nextnode_shmrank
Definition: tree.h:66
std::atomic< unsigned char > cannot_be_opened_locally
Definition: tree.h:70
unsigned char not_empty
Definition: tree.h:73
int nextnode
Definition: tree.h:57
vector< MyIntPosType > center
Definition: tree.h:51
int OriginTask
Definition: tree.h:61
unsigned char sibling_shmrank
Definition: tree.h:65
int sibling
Definition: tree.h:53
vector< MyReal > D1phi
Definition: gravtree.h:186
symtensor3< MyReal > D3phi
Definition: gravtree.h:188
MyReal D0phi
Definition: gravtree.h:185
symtensor2< MyReal > D2phi
Definition: gravtree.h:187
unsigned char getSofteningClass(void)
Definition: gravtree.h:143
unsigned char Nextnode_shmrank
Definition: gravtree.h:133
MyIntPosType IntPos[3]
Definition: gravtree.h:130
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
long long TotNumOfForces
Definition: allvars.h:103
MyDouble mass
Definition: gravtree.h:65
vector< MyIntPosType > s
Definition: gravtree.h:66
int getSofteningClass(void)
Definition: gravtree.h:90
unsigned char maxsofttype
Definition: gravtree.h:83
unsigned char minsofttype
Definition: gravtree.h:84
unsigned char getSofteningClass(void)
Definition: gravtree.h:118
MyDouble Mass
Definition: gravtree.h:103
float OldAcc
Definition: gravtree.h:104
MyIntPosType IntPos[3]
Definition: gravtree.h:102
MyDouble getMass(void)
unsigned char getSofteningClass(void)
MyIntPosType IntPos[3]
Definition: particle_data.h:53
#define qXX
#define qYY
#define qZZ
vector< typename which_return< T1, T2 >::type > contract_fourtimes(const symtensor5< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1346
vector< typename which_return< T1, T2 >::type > contract_twice(const symtensor3< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1321
void setup_D5(enum setup_options opt, symtensor5< T > &D5, vector< T > &dxyz, symtensor3< T > &aux3, symtensor4< T > &aux4, symtensor5< T > &aux5, TypeGfac g3, TypeGfac g4, TypeGfac g5)
Definition: symtensors.h:1842
void setup_D3(enum setup_options opt, symtensor3< T > &D3, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, TypeGfac g2, TypeGfac g3)
Definition: symtensors.h:1775
@ INIT
Definition: symtensors.h:1770
vector< typename which_return< T1, T2 >::type > contract_thrice(const symtensor4< T1 > &D, const vector< T2 > &v)
Definition: symtensors.h:1333
void setup_D4(enum setup_options opt, symtensor4< T > &D4, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, symtensor4< T > &aux4, TypeGfac g2, TypeGfac g3, TypeGfac g4)
Definition: symtensors.h:1801
void myflush(FILE *fstream)
Definition: system.cc:125
T square(T const value)
Definition: system.h:36
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16