25#include "gadgetconfig.h"
33#include "../data/allvars.h"
34#include "../data/dtypes.h"
35#include "../data/mymalloc.h"
36#include "../domain/domain.h"
37#include "../logs/logs.h"
38#include "../logs/timer.h"
39#include "../main/simulation.h"
40#include "../sort/peano.h"
41#include "../system/system.h"
42#include "../time_integration/timestep.h"
50template <
typename partset>
59 domain_printf(
"DOMAIN: Begin domain decomposition (sync-point %d).\n",
All.
NumCurrentTiStep);
65 domain_init_sum_cost();
68 domain_find_total_cost();
74 domain_key = (
peanokey *)
Mem.mymalloc_movable(&domain_key,
"domain_key", (
sizeof(
peanokey) * Tp->MaxPart));
76 (domain_cost_data *)
Mem.mymalloc_movable(&domain_leaf_cost,
"domain_leaf_cost", (MaxTopNodes *
sizeof(domain_cost_data)));
78 topNodes = (local_topnode_data *)
Mem.mymalloc_movable(&topNodes,
"topNodes", (MaxTopNodes *
sizeof(local_topnode_data)));
81 domain_determineTopTree();
84 domain_combine_multipledomains();
87 Mem.myfree(domain_leaf_cost);
90 domain_rearrange_particle_sequence();
96 domain_coll_subfind_prepare_exchange();
100 domain_printf(
"DOMAIN: domain decomposition done. (took in total %g sec)\n",
Logs.
timediff(t0, t1));
106 peano_hilbert_order(domain_key);
111 Mem.myfree(domain_key);
113 Mem.myfree(ListOfTopleaves);
115 TaskOfLeaf = (
int *)
Mem.myrealloc_movable(TaskOfLeaf, NTopleaves *
sizeof(
int));
118 ListOfTopleaves = (
int *)
Mem.mymalloc_movable(&ListOfTopleaves,
"ListOfTopleaves", (NTopleaves *
sizeof(
int)));
120 memset(NumTopleafOfTask, 0, NTask *
sizeof(
int));
122 for(
int i = 0; i < NTopleaves; i++)
123 NumTopleafOfTask[TaskOfLeaf[i]]++;
125 FirstTopleafOfTask[0] = 0;
126 for(
int i = 1; i < NTask; i++)
127 FirstTopleafOfTask[i] = FirstTopleafOfTask[i - 1] + NumTopleafOfTask[i - 1];
129 memset(NumTopleafOfTask, 0, NTask *
sizeof(
int));
131 for(
int i = 0; i < NTopleaves; i++)
133 int task = TaskOfLeaf[i];
134 int off = FirstTopleafOfTask[task] + NumTopleafOfTask[task]++;
135 ListOfTopleaves[off] = i;
143 domain_report_balance();
146#ifdef DOMAIN_SPECIAL_CHECK
155template <
typename partset>
158 MaxTopNodes = maxtopnodes;
160 if(FirstTopleafOfTask)
161 Terminate(
"domain storage already allocated");
163 FirstTopleafOfTask = (
int *)
Mem.mymalloc_movable(&FirstTopleafOfTask,
"FirstTopleafOfTask", NTask *
sizeof(
int));
164 NumTopleafOfTask = (
int *)
Mem.mymalloc_movable(&NumTopleafOfTask,
"NumTopleafOfTask", NTask *
sizeof(
int));
166 TaskOfLeaf = (
int *)
Mem.mymalloc_movable(&TaskOfLeaf,
"TaskOfLeaf", (MaxTopNodes *
sizeof(
int)));
167 ListOfTopleaves = (
int *)
Mem.mymalloc_movable(&ListOfTopleaves,
"DomainListOfLocalTopleaves", (MaxTopNodes *
sizeof(
int)));
171template <
typename partset>
175 domain_allocate(maxtopnodes);
178template <
typename partset>
181 if(!FirstTopleafOfTask)
182 Terminate(
"domain storage not allocated");
184 Mem.myfree_movable(ListOfTopleaves);
185 Mem.myfree_movable(TaskOfLeaf);
186 Mem.myfree_movable(TopNodes);
187 Mem.myfree_movable(NumTopleafOfTask);
188 Mem.myfree_movable(FirstTopleafOfTask);
190 ListOfTopleaves = NULL;
193 NumTopleafOfTask = NULL;
194 FirstTopleafOfTask = NULL;
197template <
typename partset>
213 for(
int n = 0; n < NumTimeBinsToBeBalanced; n++)
215 GravCostPerListedTimeBin[n] = 0.0;
216 MaxGravCostPerListedTimeBin[n] = 0.0;
219 HydroCostPerListedTimeBin[n] = 0.0;
222 for(
int i = 0; i < Tp->NumPart; i++)
224 if(Tp->P[i].GravCost == 0)
225 Tp->P[i].GravCost = 1;
227 for(
int n = 0; n < NumTimeBinsToBeBalanced; n++)
229 int bin = ListOfTimeBinsToBeBalanced[n];
231 if(bin >= Tp->P[i].TimeBinGrav)
233 GravCostPerListedTimeBin[n] += Tp->P[i].GravCost;
234 if(MaxGravCostPerListedTimeBin[n] < Tp->P[i].GravCost)
235 MaxGravCostPerListedTimeBin[n] = Tp->P[i].GravCost;
238 if(Tp->P[i].getType() == 0)
240 if(bin >= Tp->P[i].getTimeBinHydro())
241 HydroCostPerListedTimeBin[n] += 1.0;
246 long long sum[2] = {Tp->NumPart, Tp->NumGas};
248 MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
250 NormFactorLoad = 1.0 / sum[0];
251 NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
256 if(NormFactorLoad > 0.0)
258 MultipleDomains += 1;
262 if(NormFactorLoadSph > 0.0)
264 MultipleDomains += 1;
268 MPI_Allreduce(MPI_IN_PLACE, GravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
269 MPI_Allreduce(MPI_IN_PLACE, HydroCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_SUM, Communicator);
271 MPI_Allreduce(MPI_IN_PLACE, MaxGravCostPerListedTimeBin, NumTimeBinsToBeBalanced, MPI_DOUBLE, MPI_MAX, Communicator);
275 for(
int n = 0; n < NumTimeBinsToBeBalanced; n++)
277 if(GravCostPerListedTimeBin[n] > 0.0)
278 MultipleDomains += 1;
280 if(HydroCostPerListedTimeBin[n] > 0.0)
281 MultipleDomains += 1;
283 GravCostNormFactors[n] = GravCostPerListedTimeBin[n] > 0.0 ? 1.0 / GravCostPerListedTimeBin[n] : 0.0;
284 HydroCostNormFactors[n] = HydroCostPerListedTimeBin[n] > 0.0 ? 1.0 / HydroCostPerListedTimeBin[n] : 0.0;
286 if(MaxGravCostPerListedTimeBin[n] * GravCostNormFactors[n] > limit)
287 GravCostNormFactors[n] = limit / MaxGravCostPerListedTimeBin[n];
289 if(HydroCostNormFactors[n] > limit)
290 HydroCostNormFactors[n] = limit;
292 TotalCost += GravCostPerListedTimeBin[n] * GravCostNormFactors[n];
293 TotalCost += HydroCostPerListedTimeBin[n] * HydroCostNormFactors[n];
297#ifdef LIGHTCONE_PARTICLES
301 long long sum[2] = {Tp->NumPart, Tp->NumGas};
303 MPI_Allreduce(MPI_IN_PLACE, sum, 2, MPI_LONG_LONG, MPI_SUM, Communicator);
305 NormFactorLoad = 1.0 / sum[0];
306 NormFactorLoadSph = sum[1] > 0.0 ? 1.0 / sum[1] : 0.0;
311 if(NormFactorLoad > 0.0)
313 MultipleDomains += 1;
317 if(NormFactorLoadSph > 0.0)
319 MultipleDomains += 1;
323 NumTimeBinsToBeBalanced = 0;
333 for(
int i = 0; i < Tp->NumGas; i++)
334 if(Tp->P[i].getType() != 0)
339 Tp->P[i] = Tp->P[Tp->NumGas - 1];
340 Tp->SphP[i] = Tp->SphP[Tp->NumGas - 1];
341 domain_key[i] = domain_key[Tp->NumGas - 1];
343 Tp->P[Tp->NumGas - 1] = psave;
344 domain_key[Tp->NumGas - 1] = key;
356 Tp->reconstruct_timebins();
370 loc_count[i] = Tp->TimeBinsGravity.TimeBinCount[i];
371 loc_count[
TIMEBINS + i] = Tp->TimeBinsHydro.TimeBinCount[i];
374 MPI_Reduce(loc_count, glob_count, 2 *
TIMEBINS, MPI_LONG_LONG_INT, MPI_SUM, 0, Communicator);
377 loc_max_data[2 *
TIMEBINS + 0] = Tp->NumPart;
378 loc_max_data[2 *
TIMEBINS + 1] = Tp->NumGas;
379 loc_max_data[2 *
TIMEBINS + 2] = Tp->NumPart - Tp->NumGas;
383 double *loc_HydroCost = &loc_max_data[0];
384 double *loc_GravCost = &loc_max_data[
TIMEBINS];
385 double *max_HydroCost = &glob_max_data[0];
386 double *max_GravCost = &glob_max_data[
TIMEBINS];
387 double *glob_HydroCost = &glob_sum_data[0];
388 double *glob_GravCost = &glob_sum_data[
TIMEBINS];
393 loc_HydroCost[i] = 0;
397 for(
int i = 0; i < Tp->NumPart; i++)
401 loc_GravCost[bin] +=
MIN_FLOAT_NUMBER + domain_grav_weight[bin] * Tp->P[i].GravCost;
406 for(
int i = 0; i < Tp->NumPart; i++)
407 if(Tp->P[i].getType() == 0)
408 loc_HydroCost[Tp->P[i].getTimeBinHydro()] += 1.0;
412 loc_HydroCost[i] += loc_HydroCost[i - 1];
414 MPI_Reduce(loc_max_data, glob_sum_data, 2 *
TIMEBINS, MPI_DOUBLE, MPI_SUM, 0, Communicator);
415 MPI_Reduce(loc_max_data, glob_max_data, 2 *
TIMEBINS + 3, MPI_DOUBLE, MPI_MAX, 0, Communicator);
419 double max_tot = glob_max_data[2 *
TIMEBINS + 0];
420 double max_sph = glob_max_data[2 *
TIMEBINS + 1];
421 double max_dm = glob_max_data[2 *
TIMEBINS + 2];
423 long long *tot_count = &glob_count[0];
424 long long *tot_count_sph = &glob_count[
TIMEBINS];
427 tot_cumulative[0] = tot_count[0];
430 tot_cumulative[i] = tot_count[i] + tot_cumulative[i - 1];
432 double tot_gravcost = 0, max_gravcost = 0, tot_hydrocost = 0, max_hydrocost = 0;
436 tot_gravcost += domain_to_be_balanced[i] * glob_GravCost[i] / NTask;
437 max_gravcost += domain_to_be_balanced[i] * max_GravCost[i];
439 tot_hydrocost += domain_to_be_balanced[i] * glob_HydroCost[i] / NTask;
440 max_hydrocost += domain_to_be_balanced[i] * max_HydroCost[i];
450 bal_grav_bin[i] = max_GravCost[i] / (glob_GravCost[i] / NTask +
SMALLNUM);
451 bal_grav_bin_rel[i] =
452 (tot_gravcost + domain_to_be_balanced[i] * (max_GravCost[i] - glob_GravCost[i] / NTask)) / (tot_gravcost +
SMALLNUM);
456 bal_grav_bin[i] = 0.0;
457 bal_grav_bin_rel[i] = 0.0;
460 if(tot_count_sph[i] > 0)
462 bal_hydro_bin[i] = max_HydroCost[i] / (glob_HydroCost[i] / NTask +
SMALLNUM);
463 bal_hydro_bin_rel[i] = (tot_hydrocost + domain_to_be_balanced[i] * (max_HydroCost[i] - glob_HydroCost[i] / NTask)) /
468 bal_hydro_bin[i] = 0.0;
469 bal_hydro_bin_rel[i] = 0.0;
476 sprintf(buf,
"Timebins: Gravity Hydro cumulative grav-balance hydro-balance\n");
479 long long tot = 0, tot_sph = 0;
481 for(
int i =
TIMEBINS - 1; i >= 0; i--)
483#if(defined(SELFGRAVITY) || defined(EXTERNALGRAVITY))
484 if(tot_count_sph[i] > 0 || tot_count[i] > 0)
489 if(tot_count_sph[i] > 0)
493 sprintf(buf,
"%c%cbin=%2d %10llu %10llu %10llu %6.3f |%6.3f %c %6.3f |%6.3f\n",
495 tot_count[i], tot_count_sph[i], tot_cumulative[i], bal_grav_bin[i], bal_grav_bin_rel[i],
496 domain_to_be_balanced[i] > 0 ?
'*' :
' ', bal_hydro_bin[i], bal_hydro_bin_rel[i]);
500 tot_sph += tot_count_sph[i];
504 sprintf(buf,
"-------------------------------------------------------------------------------------\n");
506 sprintf(buf,
"BALANCE, LOAD: %6.3f %6.3f %6.3f WORK: %6.3f %6.3f\n",
507 max_dm / (tot - tot_sph +
SMALLNUM) * NTask, max_sph / (tot_sph +
SMALLNUM) * NTask, max_tot / (tot +
SMALLNUM) * NTask,
508 max_gravcost / (tot_gravcost +
SMALLNUM), max_hydrocost / (tot_hydrocost +
SMALLNUM));
510 sprintf(buf,
"-------------------------------------------------------------------------------------\n");
520#ifdef LIGHTCONE_PARTICLES
534#include "../data/simparticles.h"
537#ifdef LIGHTCONE_PARTICLES
538#include "../data/lcparticles.h"
global_data_all_processes All
void domain_allocate(void)
void domain_decomposition(domain_options mode)
double timediff(double t0, double t1)
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
enum restart_options RestartFlag
double TopNodeAllocFactor
int SmallestTimeBinWithDomainDecomposition
int HighestOccupiedTimeBin
void myflush(FILE *fstream)