GADGET-4
domain.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
25#include "gadgetconfig.h"
26
27#include <mpi.h>
28#include <cmath>
29#include <cstdio>
30#include <cstdlib>
31#include <cstring>
32
33#include "../data/allvars.h"
34#include "../data/dtypes.h"
35#include "../data/mymalloc.h"
36#include "../domain/domain.h"
37#include "../logs/logs.h"
38#include "../logs/timer.h"
39#include "../main/simulation.h"
40#include "../sort/peano.h"
41#include "../system/system.h"
42#include "../time_integration/timestep.h"
43
50template <typename partset>
52{
53 Mode = mode;
54
55 TIMER_START(CPU_DOMAIN);
56
57 double t0 = Logs.second();
58
59 domain_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep);
60
61 /* map the particles by a shift vector back if desired */
62 do_box_wrapping();
63
64 /* determine which time bins need to be balanced */
65 domain_init_sum_cost();
66
67 /* find total cost factors, including a determination of MultipleDomains */
68 domain_find_total_cost();
69
70 /* allocate some fields that need to stay */
71 domain_allocate();
72
73 /* allocate some arrays we are going to use */
74 domain_key = (peanokey *)Mem.mymalloc_movable(&domain_key, "domain_key", (sizeof(peanokey) * Tp->MaxPart));
75 domain_leaf_cost =
76 (domain_cost_data *)Mem.mymalloc_movable(&domain_leaf_cost, "domain_leaf_cost", (MaxTopNodes * sizeof(domain_cost_data)));
77
78 topNodes = (local_topnode_data *)Mem.mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(local_topnode_data)));
79
80 /* determine top-level tree */
81 domain_determineTopTree();
82
83 /* combine on each MPI task several of the domains (namely the number MultipleDomains) */
84 domain_combine_multipledomains();
85
86 Mem.myfree(topNodes);
87 Mem.myfree(domain_leaf_cost);
88
89 /* move stars out of gas block if present */
90 domain_rearrange_particle_sequence();
91
92 /* finally, carry out the actual particle exchange */
93 if(Mode == STANDARD)
94 domain_exchange();
95 else if(Mode == COLL_SUBFIND)
96 domain_coll_subfind_prepare_exchange();
97
98 double t1 = Logs.second();
99
100 domain_printf("DOMAIN: domain decomposition done. (took in total %g sec)\n", Logs.timediff(t0, t1));
101
102 if(Mode == STANDARD)
103 {
104 TIMER_STOPSTART(CPU_DOMAIN, CPU_PEANO);
105
106 peano_hilbert_order(domain_key);
107
108 TIMER_STOPSTART(CPU_PEANO, CPU_DOMAIN);
109 }
110
111 Mem.myfree(domain_key);
112
113 Mem.myfree(ListOfTopleaves);
114
115 TaskOfLeaf = (int *)Mem.myrealloc_movable(TaskOfLeaf, NTopleaves * sizeof(int));
116 TopNodes = (topnode_data *)Mem.myrealloc_movable(TopNodes, NTopnodes * sizeof(topnode_data));
117
118 ListOfTopleaves = (int *)Mem.mymalloc_movable(&ListOfTopleaves, "ListOfTopleaves", (NTopleaves * sizeof(int)));
119
120 memset(NumTopleafOfTask, 0, NTask * sizeof(int));
121
122 for(int i = 0; i < NTopleaves; i++)
123 NumTopleafOfTask[TaskOfLeaf[i]]++;
124
125 FirstTopleafOfTask[0] = 0;
126 for(int i = 1; i < NTask; i++)
127 FirstTopleafOfTask[i] = FirstTopleafOfTask[i - 1] + NumTopleafOfTask[i - 1];
128
129 memset(NumTopleafOfTask, 0, NTask * sizeof(int));
130
131 for(int i = 0; i < NTopleaves; i++)
132 {
133 int task = TaskOfLeaf[i];
134 int off = FirstTopleafOfTask[task] + NumTopleafOfTask[task]++;
135 ListOfTopleaves[off] = i;
136 }
137
138 if(Mode == STANDARD)
139 {
140 /* the following will reconstruct the timebins and report the balance
141 * in case we are dealing with simparticles, for lcparticles nothing will be done
142 */
143 domain_report_balance();
144 }
145
146#ifdef DOMAIN_SPECIAL_CHECK
147 if(ThisTask == 0 && All.NumCurrentTiStep == 4)
148 Terminate("stop");
149#endif
150
151 TIMER_STOP(CPU_DOMAIN);
152}
153
155template <typename partset>
157{
158 MaxTopNodes = maxtopnodes;
159
160 if(FirstTopleafOfTask)
161 Terminate("domain storage already allocated");
162
163 FirstTopleafOfTask = (int *)Mem.mymalloc_movable(&FirstTopleafOfTask, "FirstTopleafOfTask", NTask * sizeof(int));
164 NumTopleafOfTask = (int *)Mem.mymalloc_movable(&NumTopleafOfTask, "NumTopleafOfTask", NTask * sizeof(int));
165 TopNodes = (topnode_data *)Mem.mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(topnode_data)));
166 TaskOfLeaf = (int *)Mem.mymalloc_movable(&TaskOfLeaf, "TaskOfLeaf", (MaxTopNodes * sizeof(int)));
167 ListOfTopleaves = (int *)Mem.mymalloc_movable(&ListOfTopleaves, "DomainListOfLocalTopleaves", (MaxTopNodes * sizeof(int)));
168}
169
171template <typename partset>
173{
174 int maxtopnodes = All.TopNodeAllocFactor * std::max<int>(All.TopNodeFactor * MultipleDomains * NTask, BASENUMBER);
175 domain_allocate(maxtopnodes);
176}
177
178template <typename partset>
180{
181 if(!FirstTopleafOfTask)
182 Terminate("domain storage not allocated");
183
184 Mem.myfree_movable(ListOfTopleaves);
185 Mem.myfree_movable(TaskOfLeaf);
186 Mem.myfree_movable(TopNodes);
187 Mem.myfree_movable(NumTopleafOfTask);
188 Mem.myfree_movable(FirstTopleafOfTask);
189
190 ListOfTopleaves = NULL;
191 TaskOfLeaf = NULL;
192 TopNodes = NULL;
193 NumTopleafOfTask = NULL;
194 FirstTopleafOfTask = NULL;
195}
196
197template <typename partset>
198void domain<partset>::domain_printf(char *buf)
199{
201 {
202 fprintf(Logs.FdDomain, "%s", buf);
203 }
204}
205
206template <>
208{
209 /* for each timebin that should be balanced, collect the gravity cost of
210 * the particles active in that timebin
211 */
212
213 for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
214 {
215 GravCostPerListedTimeBin[n] = 0.0;
216 MaxGravCostPerListedTimeBin[n] = 0.0;
217 /* do the same for the hydrodynamics
218 */
219 HydroCostPerListedTimeBin[n] = 0.0;
220 }
221
222 for(int i = 0; i < Tp->NumPart; i++)
223 {
224 if(Tp->P[i].GravCost == 0)
225 Tp->P[i].GravCost = 1;
226
227 for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
228 {
229 int bin = ListOfTimeBinsToBeBalanced[n];
230
231 if(bin >= Tp->P[i].TimeBinGrav)
232 {
233 GravCostPerListedTimeBin[n] += Tp->P[i].GravCost;
234 if(MaxGravCostPerListedTimeBin[n] < Tp->P[i].GravCost)
235 MaxGravCostPerListedTimeBin[n] = Tp->P[i].GravCost;
236 }
237
238 if(Tp->P[i].getType() == 0)
239 {
240 if(bin >= Tp->P[i].getTimeBinHydro())
241 HydroCostPerListedTimeBin[n] += 1.0;
242 }
243 }
244 }
245
246 long long sum[2] = {Tp->NumPart, Tp->NumGas};
247
248 MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
249
250 NormFactorLoad = 1.0 / sum[0];
251 NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
252
253 MultipleDomains = 0;
254 TotalCost = 0.0;
255
256 if(NormFactorLoad > 0.0)
257 {
258 MultipleDomains += 1;
259 TotalCost += 1.0;
260 }
261
262 if(NormFactorLoadSph > 0.0)
263 {
264 MultipleDomains += 1;
265 TotalCost += 1.0;
266 }
267
268 MPI_Allreduce(MPI_IN_PLACE, GravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
269 MPI_Allreduce(MPI_IN_PLACE, HydroCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
270
271 MPI_Allreduce(MPI_IN_PLACE, MaxGravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_MAX, Communicator);
272
273 double limit = 1.0 / (All.TopNodeFactor * MultipleDomains * NTask);
274
275 for(int n = 0; n < NumTimeBinsToBeBalanced; n++)
276 {
277 if(GravCostPerListedTimeBin[n] > 0.0)
278 MultipleDomains += 1;
279
280 if(HydroCostPerListedTimeBin[n] > 0.0)
281 MultipleDomains += 1;
282
283 GravCostNormFactors[n] = GravCostPerListedTimeBin[n] > 0.0 ? 1.0 / GravCostPerListedTimeBin[n] : 0.0;
284 HydroCostNormFactors[n] = HydroCostPerListedTimeBin[n] > 0.0 ? 1.0 / HydroCostPerListedTimeBin[n] : 0.0;
285
286 if(MaxGravCostPerListedTimeBin[n] * GravCostNormFactors[n] > limit)
287 GravCostNormFactors[n] = limit / MaxGravCostPerListedTimeBin[n];
288
289 if(HydroCostNormFactors[n] > limit)
290 HydroCostNormFactors[n] = limit;
291
292 TotalCost += GravCostPerListedTimeBin[n] * GravCostNormFactors[n];
293 TotalCost += HydroCostPerListedTimeBin[n] * HydroCostNormFactors[n];
294 }
295}
296
297#ifdef LIGHTCONE_PARTICLES
298template <>
300{
301 long long sum[2] = {Tp->NumPart, Tp->NumGas};
302
303 MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
304
305 NormFactorLoad = 1.0 / sum[0];
306 NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
307
308 MultipleDomains = 0;
309 TotalCost = 0.0;
310
311 if(NormFactorLoad > 0.0)
312 {
313 MultipleDomains += 1;
314 TotalCost += 1.0;
315 }
316
317 if(NormFactorLoadSph > 0.0)
318 {
319 MultipleDomains += 1;
320 TotalCost += 1.0;
321 }
322
323 NumTimeBinsToBeBalanced = 0;
324}
325#endif
326
327template <>
329{
330 if(Mode != STANDARD)
331 return;
332
333 for(int i = 0; i < Tp->NumGas; i++)
334 if(Tp->P[i].getType() != 0) /*If not a gas particle, swap to the end of the list */
335 {
336 particle_data psave = Tp->P[i];
337 peanokey key = domain_key[i];
338
339 Tp->P[i] = Tp->P[Tp->NumGas - 1];
340 Tp->SphP[i] = Tp->SphP[Tp->NumGas - 1];
341 domain_key[i] = domain_key[Tp->NumGas - 1];
342
343 Tp->P[Tp->NumGas - 1] = psave;
344 domain_key[Tp->NumGas - 1] = key;
345
346 Tp->NumGas--;
347 i--;
348 }
349 /*Now we have rearranged the particles,
350 *we don't need to do it again unless there are more stars*/
351}
352
353template <>
355{
356 Tp->reconstruct_timebins();
357
358 TIMER_STOPSTART(CPU_DOMAIN, CPU_LOGS);
359
360 if(Mode != STANDARD)
361 return;
362
363 /* now execute code to report balance */
364
365 /* get total particle counts */
366 long long loc_count[2 * TIMEBINS], glob_count[2 * TIMEBINS];
367
368 for(int i = 0; i < TIMEBINS; i++)
369 {
370 loc_count[i] = Tp->TimeBinsGravity.TimeBinCount[i];
371 loc_count[TIMEBINS + i] = Tp->TimeBinsHydro.TimeBinCount[i];
372 }
373
374 MPI_Reduce(loc_count, glob_count, 2 * TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, Communicator);
375
376 double loc_max_data[2 * TIMEBINS + 3], glob_max_data[2 * TIMEBINS + 3];
377 loc_max_data[2 * TIMEBINS + 0] = Tp->NumPart;
378 loc_max_data[2 * TIMEBINS + 1] = Tp->NumGas;
379 loc_max_data[2 * TIMEBINS + 2] = Tp->NumPart - Tp->NumGas;
380
381 double glob_sum_data[2 * TIMEBINS];
382
383 double *loc_HydroCost = &loc_max_data[0];
384 double *loc_GravCost = &loc_max_data[TIMEBINS];
385 double *max_HydroCost = &glob_max_data[0];
386 double *max_GravCost = &glob_max_data[TIMEBINS];
387 double *glob_HydroCost = &glob_sum_data[0];
388 double *glob_GravCost = &glob_sum_data[TIMEBINS];
389
390 for(int i = 0; i < TIMEBINS; i++)
391 {
392 loc_GravCost[i] = 0;
393 loc_HydroCost[i] = 0;
394 }
395
396#ifdef SELFGRAVITY
397 for(int i = 0; i < Tp->NumPart; i++)
398 {
399 for(int bin = Tp->P[i].TimeBinGrav; bin <= All.HighestOccupiedTimeBin; bin++)
400 {
401 loc_GravCost[bin] += MIN_FLOAT_NUMBER + domain_grav_weight[bin] * Tp->P[i].GravCost;
402 }
403 }
404#endif
405
406 for(int i = 0; i < Tp->NumPart; i++)
407 if(Tp->P[i].getType() == 0)
408 loc_HydroCost[Tp->P[i].getTimeBinHydro()] += 1.0;
409
410 /* now determine the cumulative cost for the hydrodynamics */
411 for(int i = 1; i <= All.HighestOccupiedTimeBin; i++)
412 loc_HydroCost[i] += loc_HydroCost[i - 1];
413
414 MPI_Reduce(loc_max_data, glob_sum_data, 2 * TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, Communicator);
415 MPI_Reduce(loc_max_data, glob_max_data, 2 * TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, Communicator);
416
417 if(ThisTask == 0)
418 {
419 double max_tot = glob_max_data[2 * TIMEBINS + 0];
420 double max_sph = glob_max_data[2 * TIMEBINS + 1];
421 double max_dm = glob_max_data[2 * TIMEBINS + 2];
422
423 long long *tot_count = &glob_count[0];
424 long long *tot_count_sph = &glob_count[TIMEBINS];
425
426 long long tot_cumulative[TIMEBINS];
427 tot_cumulative[0] = tot_count[0];
428
429 for(int i = 1; i < TIMEBINS; i++)
430 tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1];
431
432 double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0;
433
434 for(int i = 0; i < TIMEBINS; i++)
435 {
436 tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask;
437 max_gravcost += domain_to_be_balanced[i] * max_GravCost[i];
438
439 tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask;
440 max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i];
441 }
442
443 double bal_grav_bin[TIMEBINS], bal_grav_bin_rel[TIMEBINS];
444 double bal_hydro_bin[TIMEBINS], bal_hydro_bin_rel[TIMEBINS];
445
446 for(int i = 0; i < TIMEBINS; i++)
447 {
448 if(tot_count[i] > 0)
449 {
450 bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask + SMALLNUM);
451 bal_grav_bin_rel[i] =
452 (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost + SMALLNUM);
453 }
454 else
455 {
456 bal_grav_bin[i] = 0.0;
457 bal_grav_bin_rel[i] = 0.0;
458 }
459
460 if(tot_count_sph[i] > 0)
461 {
462 bal_hydro_bin[i] = max_HydroCost[i] / (glob_HydroCost[i] / NTask + SMALLNUM);
463 bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) /
464 (tot_hydrocost + SMALLNUM);
465 }
466 else
467 {
468 bal_hydro_bin[i] = 0.0;
469 bal_hydro_bin_rel[i] = 0.0;
470 }
471 }
472
473 char buf[MAXLEN_PATH];
474 sprintf(buf, "\nDOMAIN BALANCE, Sync-Point %d, Time: %g\n", All.NumCurrentTiStep, All.Time);
475 domain_printf(buf);
476 sprintf(buf, "Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n");
477 domain_printf(buf);
478
479 long long tot = 0, tot_sph = 0;
480
481 for(int i = TIMEBINS - 1; i >= 0; i--)
482 {
483#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY))
484 if(tot_count_sph[i] > 0 || tot_count[i] > 0)
485#else
486 if(tot_count[i] > 0)
487 tot += tot_count[i];
488
489 if(tot_count_sph[i] > 0)
490#endif
491 {
492 char buf[MAXLEN_PATH];
493 sprintf(buf, "%c%cbin=%2d %10llu %10llu %10llu %6.3f |%6.3f %c %6.3f |%6.3f\n",
494 i == All.HighestActiveTimeBin ? '>' : ' ', i >= All.SmallestTimeBinWithDomainDecomposition ? '|' : ' ', i,
495 tot_count[i], tot_count_sph[i], tot_cumulative[i], bal_grav_bin[i], bal_grav_bin_rel[i],
496 domain_to_be_balanced[i] > 0 ? '*' : ' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]);
497 domain_printf(buf);
498
499 tot += tot_count[i];
500 tot_sph += tot_count_sph[i];
501 }
502 }
503
504 sprintf(buf, "-------------------------------------------------------------------------------------\n");
505 domain_printf(buf);
506 sprintf(buf, "BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n",
507 max_dm / (tot - tot_sph + SMALLNUM) * NTask, max_sph / (tot_sph + SMALLNUM) * NTask, max_tot / (tot + SMALLNUM) * NTask,
508 max_gravcost / (tot_gravcost + SMALLNUM), max_hydrocost / (tot_hydrocost + SMALLNUM));
509 domain_printf(buf);
510 sprintf(buf, "-------------------------------------------------------------------------------------\n");
511 domain_printf(buf);
512 sprintf(buf, "\n");
513 domain_printf(buf);
515 }
516
517 TIMER_STOPSTART(CPU_LOGS, CPU_DOMAIN);
518}
519
520#ifdef LIGHTCONE_PARTICLES
521
522template <>
524{
525}
526
527template <>
529{
530}
531
532#endif
533
534#include "../data/simparticles.h"
535template class domain<simparticles>;
536
537#ifdef LIGHTCONE_PARTICLES
538#include "../data/lcparticles.h"
539template class domain<lcparticles>;
540#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
void domain_free(void)
Definition: domain.cc:179
void domain_allocate(void)
Definition: domain.cc:172
void domain_decomposition(domain_options mode)
Definition: domain.cc:51
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
FILE * FdDomain
Definition: logs.h:52
#define TIMEBINS
Definition: constants.h:332
#define SMALLNUM
Definition: constants.h:83
#define MIN_FLOAT_NUMBER
Definition: constants.h:80
#define MAXLEN_PATH
Definition: constants.h:300
#define BASENUMBER
Definition: constants.h:303
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
@ STANDARD
Definition: domain.h:23
@ RST_STARTFROMSNAP
Definition: dtypes.h:315
@ RST_BEGIN
Definition: dtypes.h:313
@ RST_RESUME
Definition: dtypes.h:314
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:15
memory Mem
Definition: main.cc:44
enum restart_options RestartFlag
Definition: allvars.h:68
int SmallestTimeBinWithDomainDecomposition
Definition: allvars.h:160
void myflush(FILE *fstream)
Definition: system.cc:125