GADGET-4
grav_direct.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdlib.h>
17#include <string.h>
18#include <algorithm>
19
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/intposconvert.h"
23#include "../data/mymalloc.h"
24#include "../domain/domain.h"
25#include "../gravtree/gravtree.h"
26#include "../logs/logs.h"
27#include "../logs/timer.h"
28#include "../main/simulation.h"
29#include "../mpi_utils/mpi_utils.h"
30#include "../pm/pm.h"
31#include "../system/system.h"
32#include "../time_integration/timestep.h"
33
34#ifdef ALLOW_DIRECT_SUMMATION
35
39template <>
41{
42 TIMER_START(CPU_TREEDIRECT);
43
44 D->mpi_printf("GRAVDIRECT: direct summation. (presently allocated=%g MB)\n", Mem.getAllocatedBytesInMB());
45
46 double tstart = Logs.second();
47
48 int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * D->NTask);
49 int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * D->NTask);
50 int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * D->NTask);
51 int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * D->NTask);
52
53 struct directdata
54 {
55 MyIntPosType IntPos[3];
56 MyDouble Mass;
57 unsigned char Type;
58#if NSOFTCLASSES > 1
59 unsigned char SofteningClass;
60#endif
61#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
62 unsigned char InsideOutsideFlag : 1;
63#endif
64 };
65 directdata *DirectDataIn, *DirectDataAll;
66
67 struct accdata
68 {
69 MyFloat Acc[3];
70#ifdef EVALPOTENTIAL
71 MyFloat Potential;
72#endif
73 };
74 accdata *DirectAccOut, *DirectAccIn;
75
76 DirectDataIn = (directdata *)Mem.mymalloc("DirectDataIn", Sp->TimeBinsGravity.NActiveParticles * sizeof(directdata));
77
78 int nforces = 0;
79
80 for(int idx = 0; idx < Sp->TimeBinsGravity.NActiveParticles; idx++)
81 {
82 int i = Sp->TimeBinsGravity.ActiveParticleList[idx];
83
84 for(int k = 0; k < 3; k++)
85 DirectDataIn[nforces].IntPos[k] = Sp->P[i].IntPos[k];
86
87 DirectDataIn[nforces].Mass = Sp->P[i].getMass();
88
89 DirectDataIn[nforces].Type = Sp->P[i].getType();
90#if NSOFTCLASSES > 1
91 DirectDataIn[nforces].SofteningClass = Sp->P[i].getSofteningClass();
92#endif
93#if defined(PMGRID) && defined(PLACEHIGHRESREGION)
94 DirectDataIn[nforces].InsideOutsideFlag = Sp->P[i].InsideOutsideFlag;
95#endif
96 nforces++;
97 }
98
99 MPI_Allgather(&nforces, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator);
100
101 int nimport = 0;
102 Recv_offset[0] = 0;
103
104 for(int j = 0; j < D->NTask; j++)
105 {
106 nimport += Recv_count[j];
107
108 if(j > 0)
109 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
110 }
111
112 DirectDataAll = (directdata *)Mem.mymalloc("DirectDataAll", nimport * sizeof(directdata));
113
114 for(int j = 0; j < D->NTask; j++)
115 {
116 Send_count[j] = Recv_count[j] * sizeof(directdata);
117 Send_offset[j] = Recv_offset[j] * sizeof(directdata);
118 }
119
120 myMPI_Allgatherv(DirectDataIn, nforces * sizeof(directdata), MPI_BYTE, DirectDataAll, Send_count, Send_offset, MPI_BYTE,
121 D->Communicator);
122
123 /* subdivide the work evenly */
124 int first, count;
125 subdivide_evenly(nimport, D->NTask, D->ThisTask, &first, &count);
126
127 DirectAccOut = (accdata *)Mem.mymalloc("DirectDataOut", count * sizeof(accdata));
128
129 /* now calculate the forces */
130 for(int i = 0; i < count; i++)
131 {
132 int target = i + first;
133 int result_idx = i;
134
135 vector<double> acc = 0.0;
136#ifdef EVALPOTENTIAL
137 double pot = 0.0;
138#endif
139
140#if NSOFTCLASSES > 1
141 double h_i = All.ForceSoftening[DirectDataAll[target].SofteningClass];
142#else
143 double h_i = All.ForceSoftening[0];
144#endif
145
146 for(int j = 0; j < nimport; j++)
147 {
148#if NSOFTCLASSES > 1
149 double h_j = All.ForceSoftening[DirectDataAll[j].SofteningClass];
150#else
151 double h_j = All.ForceSoftening[0];
152#endif
153 double hmax = (h_j > h_i) ? h_j : h_i;
154
155 vector<double> dxyz;
156 Sp->nearest_image_intpos_to_pos(DirectDataAll[j].IntPos, DirectDataAll[target].IntPos,
157 dxyz.da); /* converts the integer distance to floating point */
158
159 double r2 = dxyz[0] * dxyz[0] + dxyz[1] * dxyz[1] + dxyz[2] * dxyz[2];
160
161 double mass = DirectDataAll[j].Mass;
162
163 /* now evaluate the force component */
164
165 double r = sqrt(r2);
166
167 double rinv = (r > 0) ? 1.0 / r : 0;
168
170
171#ifdef PMGRID
172 mesh_factors *mfp = &mf[LOW_MESH];
173#if defined(PLACEHIGHRESREGION)
175 {
176 if(DirectDataAll[j].InsideOutsideFlag == FLAG_INSIDE && DirectDataAll[target].InsideOutsideFlag == FLAG_INSIDE)
177 mfp = &mf[HIGH_MESH];
178 }
179#endif
181 {
182 if(modify_gfactors_pm_monopole(gfac, r, rinv, mfp))
183 return; // if we are outside the cut-off radius, we have no interaction
184 }
185#endif
186 get_gfactors_monopole(gfac, r, hmax, rinv);
187
188#ifdef EVALPOTENTIAL
189 pot -= mass * gfac.fac0;
190#endif
191 acc -= (mass * gfac.fac1 * rinv) * dxyz;
192
193 if(DoEwald)
194 {
195 // EWALD treatment, only done for periodic boundaries in case PM is not active
196
197 ewald_data ew;
198 Ewald.ewald_gridlookup(DirectDataAll[j].IntPos, DirectDataAll[target].IntPos, ewald::POINTMASS, ew);
199
200#ifdef EVALPOTENTIAL
201 pot += mass * ew.D0phi;
202#endif
203 acc += mass * ew.D1phi;
204 }
205 }
206
207 DirectAccOut[result_idx].Acc[0] = acc[0];
208 DirectAccOut[result_idx].Acc[1] = acc[1];
209 DirectAccOut[result_idx].Acc[2] = acc[2];
210#ifdef EVALPOTENTIAL
211 DirectAccOut[result_idx].Potential = pot;
212#endif
213 }
214
215 /* now send the forces to the right places */
216
217 DirectAccIn = (accdata *)Mem.mymalloc("DirectDataIn", nforces * sizeof(accdata));
218
219 MPI_Request *requests = (MPI_Request *)Mem.mymalloc_movable(&requests, "requests", 2 * D->NTask * sizeof(MPI_Request));
220 int n_requests = 0;
221
222 int recvTask = 0;
223 int sendTask = 0;
224 int send_first, send_count;
225 subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
226
227 while(recvTask < D->NTask && sendTask < D->NTask) /* go through both lists */
228 {
229 while(send_first + send_count < Recv_offset[recvTask])
230 {
231 if(sendTask >= D->NTask - 1)
232 Terminate("sendTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask);
233
234 sendTask++;
235 subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
236 }
237
238 while(Recv_offset[recvTask] + Recv_count[recvTask] < send_first)
239 {
240 if(recvTask >= D->NTask - 1)
241 Terminate("recvTask >= NTask recvTask=%d sendTask=%d", recvTask, sendTask);
242
243 recvTask++;
244 }
245
246 int start = std::max<int>(Recv_offset[recvTask], send_first);
247 int next = std::min<int>(Recv_offset[recvTask] + Recv_count[recvTask], send_first + send_count);
248
249 if(next - start >= 1)
250 {
251 if(D->ThisTask == sendTask)
252 MPI_Isend(DirectAccOut + start - send_first, (next - start) * sizeof(accdata), MPI_BYTE, recvTask, TAG_PDATA_SPH,
253 D->Communicator, &requests[n_requests++]);
254
255 if(D->ThisTask == recvTask)
256 MPI_Irecv(DirectAccIn + start - Recv_offset[recvTask], (next - start) * sizeof(accdata), MPI_BYTE, sendTask, TAG_PDATA_SPH,
257 D->Communicator, &requests[n_requests++]);
258 }
259
260 if(next == Recv_offset[recvTask] + Recv_count[recvTask])
261 recvTask++;
262 else
263 {
264 sendTask++;
265 if(sendTask >= D->NTask)
266 break;
267
268 subdivide_evenly(nimport, D->NTask, sendTask, &send_first, &send_count);
269 }
270 }
271
272 MPI_Waitall(n_requests, requests, MPI_STATUSES_IGNORE);
273 Mem.myfree(requests);
274
275 nforces = 0;
276
277 for(int idx = 0; idx < Sp->TimeBinsGravity.NActiveParticles; idx++)
278 {
279 int i = Sp->TimeBinsGravity.ActiveParticleList[idx];
280
281 for(int k = 0; k < 3; k++)
282 Sp->P[i].GravAccel[k] = DirectAccIn[nforces].Acc[k];
283
284#ifdef EVALPOTENTIAL
285 Sp->P[i].Potential = DirectAccIn[nforces].Potential;
286#endif
287 nforces++;
288 }
289
290 Mem.myfree(DirectAccIn);
291 Mem.myfree(DirectAccOut);
292 Mem.myfree(DirectDataAll);
293 Mem.myfree(DirectDataIn);
294
295 Mem.myfree(Recv_offset);
296 Mem.myfree(Recv_count);
297 Mem.myfree(Send_offset);
298 Mem.myfree(Send_count);
299
300 D->mpi_printf("GRAVDIRECT: force is done.\n");
301
303
304 double tend = Logs.second();
305
306 double timedirect, sumt;
307 timedirect = tend - tstart;
308
309 MPI_Reduce(&timedirect, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, D->Communicator);
310
311 if(D->ThisTask == 0)
312 {
313 fprintf(Logs.FdTimings, "Nf=%9lld timebin=%d active part/task: avg=%g total-Nf=%lld\n",
316 fprintf(Logs.FdTimings, " (direct) took=%g sec part/sec: %g ia/sec: %g\n", timedirect,
317 Sp->TimeBinsGravity.GlobalNActiveParticles / (sumt + 1.0e-20),
320 }
321
322 TIMER_STOP(CPU_TREEDIRECT);
323}
324
325#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
@ POINTMASS
Definition: ewald.h:67
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:355
mesh_factors mf[2]
Definition: gravtree.h:271
char DoEwald
Definition: gravtree.h:361
void get_gfactors_monopole(gfactors &res, const T r, const T h_max, const T rinv)
Definition: gravtree.h:525
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
FILE * FdTimings
Definition: logs.h:47
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
TimeBinData TimeBinsGravity
Definition: simparticles.h:205
particle_data * P
Definition: simparticles.h:54
T da[3]
Definition: symtensors.h:106
#define FLAG_INSIDE
Definition: constants.h:30
#define LOW_MESH
Definition: constants.h:33
#define HIGH_MESH
Definition: constants.h:34
float MyDouble
Definition: dtypes.h:87
float MyFloat
Definition: dtypes.h:86
uint32_t MyIntPosType
Definition: dtypes.h:35
ewald Ewald
Definition: main.cc:42
#define TREE_ACTIVE_CUTTOFF_HIGHRES_PM
Definition: gravtree.h:24
#define TREE_ACTIVE_CUTTOFF_BASE_PM
Definition: gravtree.h:23
int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define Terminate(...)
Definition: macros.h:15
#define TAG_PDATA_SPH
Definition: mpi_utils.h:79
memory Mem
Definition: main.cc:44
expr sqrt(half arg)
Definition: half.hpp:2777
int NActiveParticles
Definition: timestep.h:18
int * ActiveParticleList
Definition: timestep.h:20
long long GlobalNActiveParticles
Definition: timestep.h:19
vector< MyReal > D1phi
Definition: gravtree.h:186
MyReal D0phi
Definition: gravtree.h:185
long long TotNumDirectForces
Definition: allvars.h:104
double ForceSoftening[NSOFTCLASSES+NSOFTCLASSES_HYDRO+2]
Definition: allvars.h:258
MyDouble getMass(void)
unsigned char getSofteningClass(void)
unsigned char getType(void)
vector< MyFloat > GravAccel
Definition: particle_data.h:55
MyIntPosType IntPos[3]
Definition: particle_data.h:53
void subdivide_evenly(long long N, int pieces, int index_bin, long long *first, int *count)
Definition: system.cc:51
void myflush(FILE *fstream)
Definition: system.cc:125