12#include "gadgetconfig.h"
19#include "../data/allvars.h"
20#include "../data/dtypes.h"
21#include "../data/intposconvert.h"
22#include "../data/mymalloc.h"
23#include "../domain/domain.h"
24#include "../gravity/ewald.h"
25#include "../gravtree/gravtree.h"
26#include "../gravtree/gwalk.h"
27#include "../logs/logs.h"
28#include "../logs/timer.h"
29#include "../main/simulation.h"
30#include "../mpi_utils/mpi_utils.h"
32#include "../sort/cxxsort.h"
33#include "../sort/peano.h"
34#include "../system/system.h"
35#include "../time_integration/timestep.h"
45inline void gwalk::evaluate_particle_particle_interaction(
const pinfo &pdat,
const int no,
const char jtype,
int shmrank)
47#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
48 if(skip_actual_force_computation)
56#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
74#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
75 overlap_check = P->InsideOutsideFlag;
92#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
93 overlap_check = Pointp->InsideOutsideFlag;
102 intpos = foreignpoint->
IntPos;
103 mass = foreignpoint->
Mass;
109#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
110 overlap_check = foreignpoint->InsideOutsideFlag;
116#if defined(PLACEHIGHRESREGION)
130 MyReal rinv = (r > 0) ? 1 / r : 0;
137 if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
145 *pdat.pot -= mass * gfac.fac0;
147 *pdat.acc -= (mass * gfac.fac1 * rinv) * dxyz;
157 *pdat.pot += mass * ew.
D0phi;
159 *pdat.acc += mass * ew.
D1phi;
165 interactioncountPP += 1;
168inline int gwalk::evaluate_particle_node_opening_criterion_and_interaction(
const pinfo &pdat,
gravnode *nop)
176#if defined(PMGRID) || !defined(TREE_NO_SAFETY_BOX)
181#ifndef TREE_NO_SAFETY_BOX
183 if(dist[0] < intlen && dist[1] < intlen && dist[2] < intlen)
192#ifdef PLACEHIGHRESREGION
195 int overlap_check = nop->overlap_flag;
199 "this case should not happen: node center=(%g|%g|%g) len=%g particle=(%g|%g|%g) "
200 "rel-dist=(%g|%g|%g)\n",
213 if(dist[0] > mfp->intrcut[0] + halflen)
217 if(dist[1] > mfp->intrcut[1] + halflen)
221 if(dist[2] > mfp->intrcut[2] + halflen)
239 if(len2 > r2 * theta2)
244#if(MULTIPOLE_ORDER <= 2)
245 if(mass * len2 > r2 * r2 * errTolForceAcc * pdat.aold)
247#elif(MULTIPOLE_ORDER == 3)
248 if(
square(mass * len * len2) > r2 *
square(r2 * r2 * errTolForceAcc * pdat.aold))
250#elif(MULTIPOLE_ORDER == 4)
251 if(mass * len2 * len2 > r2 * r2 * r2 * errTolForceAcc * pdat.aold)
253#elif(MULTIPOLE_ORDER == 5)
254 if(
square(mass * len2 * len2 * len) > r2 *
square(r2 * r2 * r2 * errTolForceAcc * pdat.aold))
258 if(len2 > r2 * thetamax2)
282#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
283 if(skip_actual_force_computation)
288 MyReal rinv = (r > 0) ? 1 / r : 0;
295 if(modify_gfactors_pm_multipole(gfac, r, rinv, mfp))
304 *pdat.pot -= mass * g0;
307 MyReal g1 = gfac.fac1 * rinv;
308 *pdat.acc -= (mass * g1) * dxyz;
310#if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
311 MyReal g2 = gfac.fac2 * gfac.rinv2;
313 MyReal Q2dxyz2 = Q2dxyz * dxyz;
314 MyReal Q2trace = nop->Q2Tensor.trace();
315#if(MULTIPOLE_ORDER >= 3)
316 MyReal g3 = gfac.fac3 * gfac.rinv3;
317 *pdat.acc -=
static_cast<MyReal>(0.5) * (g2 * Q2trace + g3 * Q2dxyz2) * dxyz + g2 * Q2dxyz;
320 *pdat.pot -=
static_cast<MyReal>(0.5) * (g1 * Q2trace + g2 * Q2dxyz2);
324#if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
329 MyReal Q3dxyz3 = Q3dxyz2 * dxyz;
337#if(MULTIPOLE_ORDER >= 4)
338 MyReal g4 = gfac.fac4 * gfac.rinv2 * gfac.rinv2;
340 static_cast<MyReal>(0.5) *
341 (g2 * Q3vec + g3 * Q3dxyz2 + (
static_cast<MyReal>(1.0 / 3) * g4 * Q3dxyz3 + g3 * Q3dxyzTrace) * dxyz);
344 *pdat.pot -=
static_cast<MyReal>(1.0 / 6) * (3 * g2 * Q3dxyzTrace + g3 * Q3dxyz3);
348#if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
355 MyReal Q4dxyz4 = Q4dxyz3 * dxyz;
367#if(MULTIPOLE_ORDER >= 5)
369 MyReal g5 = gfac.fac5 * gfac.rinv2 * gfac.rinv3;
371 static_cast<MyReal>(1.0 / 24) * (g3 * (3 * QTtrace * dxyz + 12 * QTdxyz) + g4 * (6 * Q4dxyz2trace * dxyz + 4 * Q4dxyz3) +
372 g5 * Q4dxyz4 * dxyz);
375 *pdat.pot -=
static_cast<MyReal>(1.0 / 24) * (g2 * 3 * QTtrace + g3 * 6 * Q4dxyz2trace + g4 * Q4dxyz4);
379#if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
386 MyReal Q5dxyz5 = Q5dxyz4 * dxyz;
391 *pdat.pot -=
static_cast<MyReal>(1.0 / 120) * (g3 * 15 * Q5dxyzTtrace + g4 * 10 * Q5dxyz3.
trace() + g5 * Q5dxyz5);
402 *pdat.pot += mass * ew.
D0phi;
403#if(MULTIPOLE_ORDER >= 3) || (MULTIPOLE_ORDER >= 2 && defined(EXTRAPOTTERM))
404 *pdat.pot +=
static_cast<MyReal>(0.5) * (nop->Q2Tensor * ew.
D2phi);
406#if(MULTIPOLE_ORDER >= 4) || (MULTIPOLE_ORDER >= 3 && defined(EXTRAPOTTERM))
407 *pdat.pot +=
static_cast<MyReal>(1.0 / 6) * (nop->Q3Tensor * ew.
D3phi);
409#if(MULTIPOLE_ORDER >= 5) || (MULTIPOLE_ORDER >= 4 && defined(EXTRAPOTTERM))
410 *pdat.pot +=
static_cast<MyReal>(1.0 / 24) * (nop->Q4Tensor * ew.D4phi);
412#if(MULTIPOLE_ORDER >= 5 && defined(EXTRAPOTTERM) && defined(EVALPOTENTIAL))
413 *pdat.pot +=
static_cast<MyReal>(1.0 / 120) * (nop->Q5Tensor * ew.D5phi);
416 *pdat.acc += mass * ew.
D1phi;
417#if(MULTIPOLE_ORDER >= 3)
418 *pdat.acc +=
static_cast<MyReal>(0.5) * (ew.
D3phi * nop->Q2Tensor);
420#if(MULTIPOLE_ORDER >= 4)
421 *pdat.acc +=
static_cast<MyReal>(1.0 / 6) * (ew.D4phi * nop->Q3Tensor);
423#if(MULTIPOLE_ORDER >= 5)
424 *pdat.acc +=
static_cast<MyReal>(1.0 / 24) * (ew.D5phi * nop->Q4Tensor);
428 interactioncountPN += 1;
436inline void gwalk::gwalk_open_node(
const pinfo &pdat,
int i,
char ptype,
gravnode *nop,
int mintopleafnode,
int committed)
446 "p=%d < 0 nop->sibling=%d nop->nextnode=%d shmrank=%d nop->sibling_shmrank=%d nop->foreigntask=%d mass=%g "
447 "first_nontoplevelnode=%d",
451 unsigned char next_shmrank;
458 next_shmrank = shmrank;
472 next_shmrank = shmrank;
494 "should not happen: p=%d MaxPart=%d MaxNodes=%d ImportedNodeOffset=%d EndOfTreePoints=%d EndOfForeignNodes=%d "
499 gravity_force_interact(pdat, i, p, ptype, type, shmrank, mintopleafnode, committed);
502 shmrank = next_shmrank;
506void gwalk::gravity_force_interact(
const pinfo &pdat,
int i,
int no,
char ptype,
char no_type,
unsigned char shmrank,
507 int mintopleafnode,
int committed)
511 evaluate_particle_particle_interaction(pdat, no, no_type, shmrank);
525#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
528 if(skip_actual_force_computation)
534 int openflag = evaluate_particle_node_opening_criterion_and_interaction(pdat, nop);
543 Terminate(
"this should not happen any more");
554 int min_buffer_space =
602 interactioncountPP = 0;
603 interactioncountPN = 0;
608 D->mpi_printf(
"GRAVTREE: Begin tree force. timebin=%d (presently allocated=%g MB)\n", timebin,
Mem.
getAllocatedBytesInMB());
640 unsigned char level = 0;
641 unsigned char rotation = 0;
644 while(
D->TopNodes[no].Daughter >= 0)
646 unsigned char pix = (((
unsigned char)((xxb & mask) >> (shiftx--))) | ((
unsigned char)((yyb & mask) >> (shifty--))) |
647 ((
unsigned char)((zzb & mask) >> (shiftz--))));
651 no =
D->TopNodes[no].Daughter + subnode;
654 no =
D->TopNodes[no].Leaf;
655 int task =
D->TaskOfLeaf[no];
657 if(task ==
D->ThisTask)
678 Terminate(
"Particle with ID=%lld of type=%d and softening type=%d was assigned zero softening\n",
686#ifndef HIERARCHICAL_GRAVITY
700#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
701 workstack_data *WorkStackBak = (workstack_data *)
Mem.mymalloc(
"WorkStackBak",
NumOnWorkStack *
sizeof(workstack_data));
707 (resultsactiveimported_data *)
Mem.mymalloc_clear(
"ResultsActiveImported", ncount *
sizeof(resultsactiveimported_data));
746#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
747 for(
int rep = 0; rep < 2; rep++)
751 skip_actual_force_computation =
true;
755 skip_actual_force_computation =
false;
774 int min_buffer_space =
776 if(min_buffer_space >= committed)
781 int mintopleaf =
WorkStack[item].MinTopLeafNode;
785 int ptype = get_pinfo(target, pdat);
790 gravity_force_interact(pdat, target, no, ptype,
NODE_TYPE_LOCAL_NODE, shmrank, mintopleaf, committed);
799 Terminate(
"item=%d: no=%d now we should be able to open it!", item, no);
802 gwalk_open_node(pdat, target, ptype, nop, mintopleaf, committed);
810 Terminate(
"Can't even process a single particle");
835#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
841 MPI_Allreduce(MPI_IN_PLACE, &max_ncycles, 1, MPI_INT, MPI_MAX,
D->Communicator);
855 D->mpi_printf(
"GRAVTREE: tree-forces are calculated, with %d cycles took %g sec\n", max_ncycles,
Logs.
timediff(t0, t1));
861#ifdef PRESERVE_SHMEM_BINARY_INVARIANCE
862 Mem.myfree(WorkStackBak);
869 D->mpi_printf(
"GRAVTREE: tree-force is done.\n");
875 struct detailed_timings
877 double tree, wait, fetch, stack, all, lastpm;
878 double costtotal, numnodes;
879 double interactioncountPP, interactioncountPN;
881 double fillfacFgnNodes, fillfacFgnPoints;
883 detailed_timings timer, tisum, timax;
889 timer.all = timer.tree + timer.wait + timer.fetch + timer.stack +
TIMER_DIFF(CPU_TREE);
891 timer.costtotal = interactioncountPP + interactioncountPN;
893 timer.interactioncountPP = interactioncountPP;
894 timer.interactioncountPN = interactioncountPN;
900 MPI_Reduce((
double *)&timer, (
double *)&tisum, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_SUM, 0,
902 MPI_Reduce((
double *)&timer, (
double *)&timax, (
int)(
sizeof(detailed_timings) /
sizeof(
double)), MPI_DOUBLE, MPI_MAX, 0,
911 fprintf(
Logs.
FdTimings,
" work-load balance: %g part/sec: raw=%g, effective=%g ia/part: avg=%g (%g|%g)\n",
918 " maximum number of nodes: %g, filled: %g NumForeignNodes: max=%g avg=%g fill=%g NumForeignPoints: max=%g avg=%g "
919 "fill=%g cycles=%d\n",
920 timax.numnodes, timax.numnodes /
MaxNodes, timax.NumForeignNodes, tisum.NumForeignNodes /
D->NTask,
921 timax.fillfacFgnNodes, timax.NumForeignPoints, tisum.NumForeignPoints /
D->NTask, timax.fillfacFgnPoints, max_ncycles);
923 " avg times: <all>=%g <tree>=%g <wait>=%g <fetch>=%g <stack>=%g "
925 tisum.all /
D->NTask, tisum.tree /
D->NTask, tisum.wait /
D->NTask, tisum.fetch /
D->NTask, tisum.stack /
D->NTask,
926 tisum.lastpm /
D->NTask);
927 fprintf(
Logs.
FdTimings,
" total interaction cost: %g (imbalance=%g)\n", tisum.costtotal,
928 timax.costtotal / (tisum.costtotal /
D->NTask));
936#include "../data/simparticles.h"
global_data_all_processes All
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
void gravity_exchange_forces(void)
resultsactiveimported_data * ResultsActiveImported
void get_gfactors_multipole(gfactors &res, const T r, const T h_max, const T rinv)
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
void gravity_tree(int timebin)
This function computes the gravitational forces for all active particles.
void nearest_image_intpos_to_absolute_intdist(const MyIntPosType *a, const MyIntPosType *b, MyIntPosType *delta)
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
int * GetNodeIDForSimulCommRank
TimeBinData TimeBinsGravity
long long sum_NumForeignPoints
workstack_data * WorkStack
gravpoint_data * get_pointsp(int no, unsigned char shmrank)
void cleanup_shared_memory_access(void)
foreign_gravpoint_data * get_foreignpointsp(int n, unsigned char shmrank)
domain< simparticles > * D
pdata get_Pp(int n, unsigned char shmrank)
static bool compare_workstack(const workstack_data &a, const workstack_data &b)
int * get_nextnodep(unsigned char shmrank)
long long sum_NumForeignNodes
void tree_fetch_foreign_nodes(enum ftype fetch_type)
void prepare_shared_memory_access(void)
foreign_gravpoint_data * Foreign_Points
fetch_data * StackToFetch
int AllocWorkStackBaseHigh
gravnode * get_nodep(int no)
void tree_add_to_fetch_stack(gravnode *nop, int nodetoopen, unsigned char shmrank)
void tree_add_to_work_stack(int target, int no, unsigned char shmrank, int mintopleafnode)
void tree_initialize_leaf_node_access_info(void)
int AllocWorkStackBaseLow
#define FLAG_BOUNDARYOVERLAP
double mycxxsort(T *begin, T *end, Tcomp comp)
#define LEVEL_ALWAYS_OPEN
#define BITS_FOR_POSITIONS
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
#define TREE_MIN_WORKSTACK_SIZE
#define NODE_TYPE_FETCHED_NODE
#define NODE_TYPE_TREEPOINT_PARTICLE
#define TREE_EXPECTED_CYCLES
#define NODE_TYPE_FETCHED_PARTICLE
#define NODE_TYPE_LOCAL_NODE
#define TREE_ACTIVE_CUTTOFF_BASE_PM
#define NODE_TYPE_LOCAL_PARTICLE
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
#define TIMER_STORE
Copies the current value of CPU times to a stored variable, such that differences with respect to thi...
#define TIMER_DIFF(counter)
Returns amount elapsed for the timer since last save with TIMER_STORE.
unsigned char peano_incremental_key(unsigned char pix, unsigned char *rotation)
long long GlobalNActiveParticles
unsigned char nextnode_shmrank
std::atomic< unsigned char > cannot_be_opened_locally
vector< MyIntPosType > center
unsigned char sibling_shmrank
symtensor3< MyReal > D3phi
symtensor2< MyReal > D2phi
unsigned char getSofteningClass(void)
unsigned char Nextnode_shmrank
double CPUForLastPMExecution
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
char RelOpeningCriterionInUse
unsigned char maxsofttype
unsigned char minsofttype
unsigned char getSofteningClass(void)
unsigned char getSofteningClass(void)
unsigned char getType(void)
void myflush(FILE *fstream)
#define TREE_NUM_BEFORE_NODESPLIT