12#include "gadgetconfig.h"
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/mymalloc.h"
23#include "../domain/domain.h"
24#include "../logs/timer.h"
25#include "../main/simulation.h"
26#include "../mpi_utils/mpi_utils.h"
27#include "../sort/cxxsort.h"
28#include "../system/system.h"
29#include "../time_integration/timestep.h"
35#ifdef DOMAIN_SPECIAL_CHECK
37template <
typename partset>
40 double *cost_data = (
double *)
Mem.mymalloc_clear(
"cost_data",
sizeof(
double) * 2 * ndomains * (NumTimeBinsToBeBalanced + 1));
42 double *load = cost_data;
43 double *loadsph = cost_data + ndomains;
44 double *binGravCost = cost_data + 2 * ndomains;
45 double *binHydroCost = cost_data + 2 * ndomains + ndomains * NumTimeBinsToBeBalanced;
47 for(
int i = 0; i < Tp->NumPart; i++)
58 if(n < 0 || n >= ndomains)
62 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
64 int bin = ListOfTimeBinsToBeBalanced[k];
66 if(bin >= Tp->P[i].getTimeBinGrav())
67 binGravCost[k * ndomains + n] += GravCostNormFactors[k] * Tp->P[i].getGravCost();
71 load[n] += NormFactorLoad;
73 if(Tp->P[i].getType() == 0)
75 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
77 int bin = ListOfTimeBinsToBeBalanced[k];
79 if(bin >= Tp->P[i].getTimeBinHydro())
80 binHydroCost[k * ndomains + n] += HydroCostNormFactors[k];
83 loadsph[n] += NormFactorLoadSph;
87 MPI_Allreduce(MPI_IN_PLACE, cost_data, 2 * ndomains * (NumTimeBinsToBeBalanced + 1), MPI_DOUBLE, MPI_SUM, Communicator);
95 FILE *fd = fopen(buf,
"w");
96 fprintf(fd,
"%d %d\n", ndomains, NumTimeBinsToBeBalanced);
97 for(
int n = 0; n < ndomains; n++)
99 fprintf(fd,
"%g ", load[n]);
100 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
101 fprintf(fd,
"%g ", binGravCost[k * ndomains + n]);
108 Mem.myfree(cost_data);
122template <
typename partset>
131 double *cost_data = (
double *)
Mem.mymalloc_clear(
"cost_data",
sizeof(
double) * 2 * NTopleaves * (NumTimeBinsToBeBalanced + 1));
133 double *load = cost_data;
134 double *loadsph = cost_data + NTopleaves;
135 double *binGravCost = cost_data + 2 * NTopleaves;
136 double *binHydroCost = cost_data + 2 * NTopleaves + NTopleaves * NumTimeBinsToBeBalanced;
138 for(
int i = 0; i < Tp->NumPart; i++)
143 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
145 int bin = ListOfTimeBinsToBeBalanced[k];
147 if(bin >= Tp->P[i].getTimeBinGrav())
148 binGravCost[k * NTopleaves + no] += GravCostNormFactors[k] * Tp->P[i].getGravCost();
152 load[no] += NormFactorLoad;
154 if(Tp->P[i].getType() == 0)
156 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
158 int bin = ListOfTimeBinsToBeBalanced[k];
160 if(bin >= Tp->P[i].getTimeBinHydro())
161 binHydroCost[k * NTopleaves + no] += HydroCostNormFactors[k];
164 loadsph[no] += NormFactorLoadSph;
168 allreduce_sum<double>(cost_data, 2 * NTopleaves * (NumTimeBinsToBeBalanced + 1), Communicator);
173#ifdef DOMAIN_SPECIAL_CHECK
180 FILE *fd = fopen(buf,
"w");
181 fprintf(fd,
"%d %d\n", NTopleaves, NumTimeBinsToBeBalanced);
182 for(
int n = 0; n < NTopleaves; n++)
184 fprintf(fd,
"%g ", load[n]);
185 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
186 fprintf(fd,
"%g ", binGravCost[k * NTopleaves + n]);
199 balance_try_data *balance_try = NULL;
201 for(
int rep = 0; rep < 2; rep++)
207 int nextra_prev = -1;
209 while(nextra <= NTask)
211 if(nextra != nextra_prev)
213 double base_balance = ((double)(MultipleDomains * NTask)) / (MultipleDomains * NTask - nextra);
215 double excess_balance = 0.01;
217 while(base_balance + excess_balance < 2.5)
221 balance_try[cnt_try].nextra = nextra;
222 balance_try[cnt_try].try_balance = base_balance + excess_balance;
227 excess_balance *= 1.25;
230 nextra_prev = nextra;
238 nextra = fac * NTask;
242 balance_try = (balance_try_data *)
Mem.mymalloc(
"balance_try", cnt_try *
sizeof(balance_try_data));
246 determine_compute_nodes();
248 domain_printf(
"DOMAIN: we are going to try at most %d different settings for combining the domains on tasks=%d, nnodes=%d\n",
249 cnt_try, NTask, NumNodes);
252 mycxxsort(balance_try, balance_try + cnt_try, domain_compare_trybalance);
258 while(completed == 0 && start_try < cnt_try)
260 double glob_max_cost = 0;
262 if(start_try + ThisTask < cnt_try)
264 int nextra = balance_try[start_try + ThisTask].nextra;
265 double try_balance = balance_try[start_try + ThisTask].try_balance;
267 int ndomains = MultipleDomains * NTask - nextra;
269 domain_segments_data *domainAssign =
270 (domain_segments_data *)
Mem.mymalloc_clear(
"domainAssign", ndomains *
sizeof(domain_segments_data));
275 double costhalfnode = (0.5 * TotalCost) / NTopleaves;
276 double costavg = TotalCost / ndomains;
277 double cost_before = 0;
278 double costavg_before = 0;
281 double total_cost = 0, total_load = 0;
283 for(
int n = 0; n < ndomains; n++)
287 double cost = domain_leaf_cost[end].Cost;
289 while((cost + cost_before + (end + 1 < NTopleaves ? domain_leaf_cost[end + 1].Cost : 0) <
290 costavg + costavg_before + costhalfnode) ||
291 (n == ndomains - 1 && end < NTopleaves - 1))
293 if((NTopleaves - end) > (ndomains - n))
298 cost += domain_leaf_cost[end].Cost;
301 domainAssign[n].start = start;
302 domainAssign[n].end = end;
305 for(
int no = domainAssign[n].start; no <= domainAssign[n].end; no++)
307 domainAssign[n].load += load[no];
308 domainAssign[n].loadsph += loadsph[no];
310 total_load += load[no] + loadsph[no];
311 total_cost += load[no] + loadsph[no];
313 for(
int i = 0; i < NumTimeBinsToBeBalanced; i++)
315 domainAssign[n].bin_GravCost[i] += binGravCost[i * NTopleaves + no];
316 domainAssign[n].bin_HydroCost[i] += binHydroCost[i * NTopleaves + no];
318 total_cost += binGravCost[i * NTopleaves + no] + binHydroCost[i * NTopleaves + no];
323 costavg_before += costavg;
331 domain_printf(
"DOMAIN: total_cost=%g total_load=%g\n", total_cost, total_load);
345 tasklist_data *tasklist = (tasklist_data *)
Mem.mymalloc_clear(
"tasklist", NTask *
sizeof(tasklist_data));
347 int n_cost_items = 0;
348 cost_queue_data *cost_queues[2 *
TIMEBINS + 2];
349 int first_unusued_in_cost_queue[2 *
TIMEBINS + 2];
351 for(
int n = 0; n < NumTimeBinsToBeBalanced; n++)
353 if(GravCostPerListedTimeBin[n] > 0.0)
355 cost_queues[n_cost_items] = (cost_queue_data *)
Mem.mymalloc(
"cost_queues", ndomains *
sizeof(cost_queue_data));
356 for(
int i = 0; i < ndomains; i++)
358 cost_queues[n_cost_items][i].value = domainAssign[i].bin_GravCost[n];
359 cost_queues[n_cost_items][i].index = i;
361#ifdef SIMPLE_DOMAIN_AGGREGATION
362 domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
364 mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
365 first_unusued_in_cost_queue[n_cost_items] = 0;
370 if(HydroCostNormFactors[n] > 0.0)
372 cost_queues[n_cost_items] = (cost_queue_data *)
Mem.mymalloc(
"cost_queues", ndomains *
sizeof(cost_queue_data));
373 for(
int i = 0; i < ndomains; i++)
375 cost_queues[n_cost_items][i].value = domainAssign[i].bin_HydroCost[n];
376 cost_queues[n_cost_items][i].index = i;
378#ifdef SIMPLE_DOMAIN_AGGREGATION
379 domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
381 mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
382 first_unusued_in_cost_queue[n_cost_items] = 0;
388 if(NormFactorLoad > 0.0)
390 cost_queues[n_cost_items] = (cost_queue_data *)
Mem.mymalloc(
"cost_queues", ndomains *
sizeof(cost_queue_data));
391 for(
int i = 0; i < ndomains; i++)
393 cost_queues[n_cost_items][i].value = domainAssign[i].load;
394 cost_queues[n_cost_items][i].index = i;
396#ifdef SIMPLE_DOMAIN_AGGREGATION
397 domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
399 mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
400 first_unusued_in_cost_queue[n_cost_items] = 0;
405 if(NormFactorLoadSph > 0.0)
407 cost_queues[n_cost_items] = (cost_queue_data *)
Mem.mymalloc(
"cost_queues", ndomains *
sizeof(cost_queue_data));
408 for(
int i = 0; i < ndomains; i++)
410 cost_queues[n_cost_items][i].value = domainAssign[i].loadsph;
411 cost_queues[n_cost_items][i].index = i;
413#ifdef SIMPLE_DOMAIN_AGGREGATION
414 domain_determinate_aggregated_value(cost_queues[n_cost_items], ndomains);
416 mycxxsort(cost_queues[n_cost_items], cost_queues[n_cost_items] + ndomains, domain_sort_cost_queue_data);
417 first_unusued_in_cost_queue[n_cost_items] = 0;
423 int ndomains_left = ndomains;
426 for(target = 0; target < NTask && ndomains_left > 0; target++)
431 while(ndomains_left > 0)
433 int k = first_unusued_in_cost_queue[nextqueue];
434 while(domainAssign[cost_queues[nextqueue][k].index].used)
436 if(k == ndomains - 1)
437 Terminate(
"target=%d nextqueue=%d ndomains_left=%d k == ndomains - 1", target, nextqueue, ndomains_left);
440 first_unusued_in_cost_queue[nextqueue]++;
444 int n = cost_queues[nextqueue][k].index;
446 nextqueue = (nextqueue + 1) % n_cost_items;
451 if(max_cost < tasklist[target].load + domainAssign[n].load)
452 max_cost = tasklist[target].load + domainAssign[n].load;
454 if(max_cost < tasklist[target].loadsph + domainAssign[n].loadsph)
455 max_cost = tasklist[target].loadsph + domainAssign[n].loadsph;
457 for(
int bin = 0; bin < NumTimeBinsToBeBalanced; bin++)
459 if(max_cost < tasklist[target].bin_GravCost[bin] + domainAssign[n].bin_GravCost[bin])
460 max_cost = tasklist[target].bin_GravCost[bin] + domainAssign[n].bin_GravCost[bin];
462 if(max_cost < tasklist[target].bin_HydroCost[bin] + domainAssign[n].bin_HydroCost[bin])
463 max_cost = tasklist[target].bin_HydroCost[bin] + domainAssign[n].bin_HydroCost[bin];
468 if(max_cost * NTask > try_balance)
472 if(failures > n_cost_items)
479 if(max_cost > glob_max_cost)
480 glob_max_cost = max_cost;
482 domainAssign[n].task = target;
483 domainAssign[n].used = 1;
485 tasklist[target].load += domainAssign[n].load;
486 tasklist[target].loadsph += domainAssign[n].loadsph;
487 for(
int bin = 0; bin < NumTimeBinsToBeBalanced; bin++)
489 tasklist[target].bin_GravCost[bin] += domainAssign[n].bin_GravCost[bin];
490 tasklist[target].bin_HydroCost[bin] += domainAssign[n].bin_HydroCost[bin];
497 if(ndomains_left < NTask - target)
502 nextqueue = (nextqueue + 1) % n_cost_items;
505 if(ndomains_left == 0)
507 domain_printf(
"DOMAIN: combining multiple-domains succeeded, target=%d NTask=%d\n", target, NTask);
510#ifdef DOMAIN_SPECIAL_CHECK
515 FILE *fd = fopen(buf,
"w");
516 fprintf(fd,
"%d %d\n", ndomains, NumTimeBinsToBeBalanced);
517 for(
int n = 0; n < ndomains; n++)
519 fprintf(fd,
"%g ", domainAssign[n].load);
520 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
521 fprintf(fd,
"%g ", domainAssign[n].bin_GravCost[k]);
530 FILE *fd = fopen(buf,
"w");
531 fprintf(fd,
"%d %d\n", NTask, NumTimeBinsToBeBalanced);
532 for(
int n = 0; n < NTask; n++)
534 fprintf(fd,
"%g ", tasklist[n].load);
535 for(
int k = 0; k < NumTimeBinsToBeBalanced; k++)
536 fprintf(fd,
"%g ", tasklist[n].bin_GravCost[k]);
539 fprintf(fd,
"%g\n", glob_max_cost * NTask);
545 for(
int n = 0; n < ndomains; n++)
547 for(
int i = domainAssign[n].start; i <= domainAssign[n].end; i++)
548 TaskOfLeaf[i] = domainAssign[n].task;
556 for(
int num = n_cost_items - 1; num >= 0; num--)
557 Mem.myfree(cost_queues[num]);
559 Mem.myfree(tasklist);
560 Mem.myfree(domainAssign);
569 } global = {glob_max_cost, ThisTask};
571 MPI_Allreduce(MPI_IN_PLACE, &global, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
573 MPI_Allreduce(MPI_IN_PLACE, &completed, 1, MPI_INT, MPI_MAX, Communicator);
580 "DOMAIN: best solution found after %d iterations by task=%d for nextra=%d, reaching maximum imbalance of %g|%g\n", niter,
581 global.rank, balance_try[start_try + global.rank].nextra, global.cost * NTask,
582 balance_try[start_try + global.rank].try_balance);
584 MPI_Bcast(TaskOfLeaf, NTopleaves, MPI_INT, global.rank, Communicator);
594 Mem.myfree(balance_try);
596 Mem.myfree(cost_data);
599 domain_printf(
"DOMAIN: combining multiple-domains took %g sec\n",
Logs.
timediff(t0, t1));
602#ifdef SIMPLE_DOMAIN_AGGREGATION
603template <
typename partset>
609 int nbase = ndomains / NumNodes;
610 int additional = ndomains % NumNodes;
614 for(
int i = 0; i < NumNodes; i++)
625 double aggregated_value = 0;
626 for(
int n = start; n < end; n++)
627 aggregated_value += data[n].value;
629 for(
int n = start; n < end; n++)
630 data[n].aggregated_value = aggregated_value;
655 domain_to_be_balanced[i] = 0;
656 domain_grav_weight[i] = 1;
657 domain_hydro_weight[i] = 1;
665 NumTimeBinsToBeBalanced = 1;
670#ifdef HIERARCHICAL_GRAVITY
674 if(tot_count[j] > 0 || tot_count_sph[j] > 0)
676 ListOfTimeBinsToBeBalanced[NumTimeBinsToBeBalanced++] = j;
678 domain_to_be_balanced[j] = 1;
681 domain_grav_weight[j] += 2;
688 domain_grav_weight[i] = weight;
691 domain_grav_weight[j] += 2 * weight;
694 if(tot_count_sph[i] > 0)
695 domain_hydro_weight[i] = weight;
702 if(tot_count[i] > 0 || tot_count_sph[i] > 0)
704 ListOfTimeBinsToBeBalanced[NumTimeBinsToBeBalanced++] = i;
705 domain_to_be_balanced[i] = 1;
709 domain_grav_weight[i] = weight;
711 if(tot_count_sph[i] > 0)
712 domain_hydro_weight[i] = weight;
718#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES)
725 domain_to_be_balanced[i] = 0;
726 domain_grav_weight[i] = 1;
727 domain_hydro_weight[i] = 1;
730 domain_to_be_balanced[0] = 1;
735#include "../data/simparticles.h"
738#ifdef LIGHTCONE_PARTICLES
739#include "../data/lcparticles.h"
global_data_all_processes All
double timediff(double t0, double t1)
#define MAX_DOUBLE_NUMBER
double mycxxsort(T *begin, T *end, Tcomp comp)
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int LowestOccupiedTimeBin
int SmallestTimeBinWithDomainDecomposition
char OutputDir[MAXLEN_PATH]