00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006
00007 #include "allvars.h"
00008 #include "proto.h"
00009
00010
00011
00029 #define TOPNODEFACTOR 20.0
00030
00031 #define REDUC_FAC 0.98
00032
00033
00037 static int *toGo, *toGoSph;
00038 static int *local_toGo, *local_toGoSph;
00039 static int *list_NumPart;
00040 static int *list_N_gas;
00041 static int *list_load;
00042 static int *list_loadsph;
00043 static double *list_work;
00044
00045 static long long maxload, maxloadsph;
00046
00047 static struct topnode_exchange
00048 {
00049 peanokey Startkey;
00050 int Count;
00051 }
00052 *toplist, *toplist_local;
00053
00054
00055
00062 void domain_Decomposition(void)
00063 {
00064 double t0, t1;
00065
00066 #ifdef PMGRID
00067 if(All.PM_Ti_endstep == All.Ti_Current)
00068 {
00069 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00070
00071
00072 }
00073 #endif
00074
00075
00076 if(All.NumForcesSinceLastDomainDecomp > All.TotNumPart * All.TreeDomainUpdateFrequency)
00077 {
00078 t0 = second();
00079
00080 #ifdef PERIODIC
00081 do_box_wrapping();
00082 #endif
00083 All.NumForcesSinceLastDomainDecomp = 0;
00084 TreeReconstructFlag = 1;
00085
00086 if(ThisTask == 0)
00087 {
00088 printf("domain decomposition... \n");
00089 fflush(stdout);
00090 }
00091
00092 Key = malloc(sizeof(peanokey) * All.MaxPart);
00093 KeySorted = malloc(sizeof(peanokey) * All.MaxPart);
00094
00095 toGo = malloc(sizeof(int) * NTask * NTask);
00096 toGoSph = malloc(sizeof(int) * NTask * NTask);
00097 local_toGo = malloc(sizeof(int) * NTask);
00098 local_toGoSph = malloc(sizeof(int) * NTask);
00099 list_NumPart = malloc(sizeof(int) * NTask);
00100 list_N_gas = malloc(sizeof(int) * NTask);
00101 list_load = malloc(sizeof(int) * NTask);
00102 list_loadsph = malloc(sizeof(int) * NTask);
00103 list_work = malloc(sizeof(double) * NTask);
00104
00105 MPI_Allgather(&NumPart, 1, MPI_INT, list_NumPart, 1, MPI_INT, MPI_COMM_WORLD);
00106 MPI_Allgather(&N_gas, 1, MPI_INT, list_N_gas, 1, MPI_INT, MPI_COMM_WORLD);
00107
00108 maxload = All.MaxPart * REDUC_FAC;
00109 maxloadsph = All.MaxPartSph * REDUC_FAC;
00110
00111 domain_decompose();
00112
00113 free(list_work);
00114 free(list_loadsph);
00115 free(list_load);
00116 free(list_N_gas);
00117 free(list_NumPart);
00118 free(local_toGoSph);
00119 free(local_toGo);
00120 free(toGoSph);
00121 free(toGo);
00122
00123
00124 if(ThisTask == 0)
00125 {
00126 printf("domain decomposition done. \n");
00127 fflush(stdout);
00128 }
00129
00130 t1 = second();
00131 All.CPU_Domain += timediff(t0, t1);
00132
00133 #ifdef PEANOHILBERT
00134 t0 = second();
00135 peano_hilbert_order();
00136 t1 = second();
00137 All.CPU_Peano += timediff(t0, t1);
00138 #endif
00139
00140 free(KeySorted);
00141 free(Key);
00142 }
00143
00144 }
00145
00146
00147
00154 void domain_decompose(void)
00155 {
00156 int i, j, status;
00157 int ngrp, task, partner, sendcount, recvcount;
00158 long long sumtogo, sumload;
00159 int maxload, *temp;
00160 double sumwork, maxwork;
00161
00162 for(i = 0; i < 6; i++)
00163 NtypeLocal[i] = 0;
00164
00165 for(i = 0; i < NumPart; i++)
00166 NtypeLocal[P[i].Type]++;
00167
00168
00169
00170
00171 temp = malloc(NTask * 6 * sizeof(int));
00172 MPI_Allgather(NtypeLocal, 6, MPI_INT, temp, 6, MPI_INT, MPI_COMM_WORLD);
00173 for(i = 0; i < 6; i++)
00174 {
00175 Ntype[i] = 0;
00176 for(j = 0; j < NTask; j++)
00177 Ntype[i] += temp[j * 6 + i];
00178 }
00179 free(temp);
00180
00181 #ifndef UNEQUALSOFTENINGS
00182 for(i = 0; i < 6; i++)
00183 if(Ntype[i] > 0)
00184 break;
00185
00186 for(ngrp = i + 1; ngrp < 6; ngrp++)
00187 {
00188 if(Ntype[ngrp] > 0)
00189 if(All.SofteningTable[ngrp] != All.SofteningTable[i])
00190 {
00191 if(ThisTask == 0)
00192 {
00193 fprintf(stdout, "Code was not compiled with UNEQUALSOFTENINGS, but some of the\n");
00194 fprintf(stdout, "softening lengths are unequal nevertheless.\n");
00195 fprintf(stdout, "This is not allowed.\n");
00196 }
00197 endrun(0);
00198 }
00199 }
00200 #endif
00201
00202
00203
00204 domain_findExtent();
00205
00206 domain_determineTopTree();
00207
00208
00209 domain_sumCost();
00210
00211
00212 status = domain_findSplit(0, NTask, 0, NTopleaves - 1);
00213 if(status != 0)
00214 {
00215 if(ThisTask == 0)
00216 printf("\nNo domain decomposition that stays within memory bounds is possible.\n");
00217 endrun(0);
00218 }
00219
00220
00221 domain_shiftSplit();
00222
00223 DomainMyStart = DomainStartList[ThisTask];
00224 DomainMyLast = DomainEndList[ThisTask];
00225
00226 if(ThisTask == 0)
00227 {
00228 sumload = maxload = 0;
00229 sumwork = maxwork = 0;
00230 for(i = 0; i < NTask; i++)
00231 {
00232 sumload += list_load[i];
00233 sumwork += list_work[i];
00234
00235 if(list_load[i] > maxload)
00236 maxload = list_load[i];
00237
00238 if(list_work[i] > maxwork)
00239 maxwork = list_work[i];
00240 }
00241
00242 printf("work-load balance=%g memory-balance=%g\n",
00243 maxwork / (sumwork / NTask), maxload / (((double) sumload) / NTask));
00244 }
00245
00246
00247
00248 domain_countToGo();
00249
00250 for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
00251 sumtogo += toGo[i];
00252
00253 while(sumtogo > 0)
00254 {
00255 if(ThisTask == 0)
00256 {
00257 printf("exchange of %d%09d particles\n", (int) (sumtogo / 1000000000),
00258 (int) (sumtogo % 1000000000));
00259 fflush(stdout);
00260 }
00261
00262 for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
00263 {
00264 for(task = 0; task < NTask; task++)
00265 {
00266 partner = task ^ ngrp;
00267
00268 if(partner < NTask && task < partner)
00269 {
00270
00271 if(All.TotN_gas > 0)
00272 {
00273 domain_findExchangeNumbers(task, partner, 1, &sendcount, &recvcount);
00274
00275 list_NumPart[task] += recvcount - sendcount;
00276 list_NumPart[partner] -= recvcount - sendcount;
00277 list_N_gas[task] += recvcount - sendcount;
00278 list_N_gas[partner] -= recvcount - sendcount;
00279
00280 toGo[task * NTask + partner] -= sendcount;
00281 toGo[partner * NTask + task] -= recvcount;
00282 toGoSph[task * NTask + partner] -= sendcount;
00283 toGoSph[partner * NTask + task] -= recvcount;
00284
00285 if(task == ThisTask)
00286 domain_exchangeParticles(partner, 1, sendcount, recvcount);
00287 if(partner == ThisTask)
00288 domain_exchangeParticles(task, 1, recvcount, sendcount);
00289 }
00290
00291 domain_findExchangeNumbers(task, partner, 0, &sendcount, &recvcount);
00292
00293 list_NumPart[task] += recvcount - sendcount;
00294 list_NumPart[partner] -= recvcount - sendcount;
00295
00296 toGo[task * NTask + partner] -= sendcount;
00297 toGo[partner * NTask + task] -= recvcount;
00298
00299 if(task == ThisTask)
00300 domain_exchangeParticles(partner, 0, sendcount, recvcount);
00301 if(partner == ThisTask)
00302 domain_exchangeParticles(task, 0, recvcount, sendcount);
00303 }
00304 }
00305 }
00306
00307 for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
00308 sumtogo += toGo[i];
00309 }
00310 }
00311
00327 int domain_findSplit(int cpustart, int ncpu, int first, int last)
00328 {
00329 int i, split, ok_left, ok_right;
00330 long long load, sphload, load_leftOfSplit, sphload_leftOfSplit;
00331 int ncpu_leftOfSplit;
00332 double maxAvgLoad_CurrentSplit, maxAvgLoad_NewSplit;
00333
00334
00335 ncpu_leftOfSplit = ncpu / 2;
00336
00337 for(i = first, load = 0, sphload = 0; i <= last; i++)
00338 {
00339 load += DomainCount[i];
00340 sphload += DomainCountSph[i];
00341 }
00342
00343 split = first + ncpu_leftOfSplit;
00344
00345 for(i = first, load_leftOfSplit = sphload_leftOfSplit = 0; i < split; i++)
00346 {
00347 load_leftOfSplit += DomainCount[i];
00348 sphload_leftOfSplit += DomainCountSph[i];
00349 }
00350
00351
00352
00353 while(split < last - (ncpu - ncpu_leftOfSplit - 1) && split > 0)
00354 {
00355 maxAvgLoad_CurrentSplit =
00356 dmax(load_leftOfSplit / ncpu_leftOfSplit, (load - load_leftOfSplit) / (ncpu - ncpu_leftOfSplit));
00357
00358 maxAvgLoad_NewSplit =
00359 dmax((load_leftOfSplit + DomainCount[split]) / ncpu_leftOfSplit,
00360 (load - load_leftOfSplit - DomainCount[split]) / (ncpu - ncpu_leftOfSplit));
00361
00362 if(maxAvgLoad_NewSplit <= maxAvgLoad_CurrentSplit)
00363 {
00364 load_leftOfSplit += DomainCount[split];
00365 sphload_leftOfSplit += DomainCountSph[split];
00366 split++;
00367 }
00368 else
00369 break;
00370 }
00371
00372
00373
00374
00375 for(i = first, load_leftOfSplit = 0, sphload_leftOfSplit = 0; i < split; i++)
00376 {
00377 load_leftOfSplit += DomainCount[i];
00378 sphload_leftOfSplit += DomainCountSph[i];
00379 }
00380
00381 if(load_leftOfSplit > maxload * ncpu_leftOfSplit ||
00382 (load - load_leftOfSplit) > maxload * (ncpu - ncpu_leftOfSplit))
00383 {
00384
00385 return -1;
00386 }
00387
00388 if(sphload_leftOfSplit > maxloadsph * ncpu_leftOfSplit ||
00389 (sphload - sphload_leftOfSplit) > maxloadsph * (ncpu - ncpu_leftOfSplit))
00390 {
00391
00392 return -1;
00393 }
00394
00395 if(ncpu_leftOfSplit >= 2)
00396 ok_left = domain_findSplit(cpustart, ncpu_leftOfSplit, first, split - 1);
00397 else
00398 ok_left = 0;
00399
00400 if((ncpu - ncpu_leftOfSplit) >= 2)
00401 ok_right = domain_findSplit(cpustart + ncpu_leftOfSplit, ncpu - ncpu_leftOfSplit, split, last);
00402 else
00403 ok_right = 0;
00404
00405 if(ok_left == 0 && ok_right == 0)
00406 {
00407
00408
00409 if(ncpu_leftOfSplit == 1)
00410 {
00411 for(i = first; i < split; i++)
00412 DomainTask[i] = cpustart;
00413
00414 list_load[cpustart] = load_leftOfSplit;
00415 list_loadsph[cpustart] = sphload_leftOfSplit;
00416 DomainStartList[cpustart] = first;
00417 DomainEndList[cpustart] = split - 1;
00418 }
00419
00420 if((ncpu - ncpu_leftOfSplit) == 1)
00421 {
00422 for(i = split; i <= last; i++)
00423 DomainTask[i] = cpustart + ncpu_leftOfSplit;
00424
00425 list_load[cpustart + ncpu_leftOfSplit] = load - load_leftOfSplit;
00426 list_loadsph[cpustart + ncpu_leftOfSplit] = sphload - sphload_leftOfSplit;
00427 DomainStartList[cpustart + ncpu_leftOfSplit] = split;
00428 DomainEndList[cpustart + ncpu_leftOfSplit] = last;
00429 }
00430
00431 return 0;
00432 }
00433
00434
00435 return -1;
00436 }
00437
00438
00439
00447 void domain_shiftSplit(void)
00448 {
00449 int i, task, iter = 0, moved;
00450 double maxw, newmaxw;
00451
00452 for(task = 0; task < NTask; task++)
00453 list_work[task] = 0;
00454
00455 for(i = 0; i < NTopleaves; i++)
00456 list_work[DomainTask[i]] += DomainWork[i];
00457
00458 do
00459 {
00460 for(task = 0, moved = 0; task < NTask - 1; task++)
00461 {
00462 maxw = dmax(list_work[task], list_work[task + 1]);
00463
00464 if(list_work[task] < list_work[task + 1])
00465 {
00466 newmaxw = dmax(list_work[task] + DomainWork[DomainStartList[task + 1]],
00467 list_work[task + 1] - DomainWork[DomainStartList[task + 1]]);
00468 if(newmaxw <= maxw)
00469 {
00470 if(list_load[task] + DomainCount[DomainStartList[task + 1]] <= maxload)
00471 {
00472 if(list_loadsph[task] + DomainCountSph[DomainStartList[task + 1]] > maxloadsph)
00473 continue;
00474
00475
00476 list_work[task] += DomainWork[DomainStartList[task + 1]];
00477 list_load[task] += DomainCount[DomainStartList[task + 1]];
00478 list_loadsph[task] += DomainCountSph[DomainStartList[task + 1]];
00479 list_work[task + 1] -= DomainWork[DomainStartList[task + 1]];
00480 list_load[task + 1] -= DomainCount[DomainStartList[task + 1]];
00481 list_loadsph[task + 1] -= DomainCountSph[DomainStartList[task + 1]];
00482
00483 DomainTask[DomainStartList[task + 1]] = task;
00484 DomainStartList[task + 1] += 1;
00485 DomainEndList[task] += 1;
00486
00487 moved++;
00488 }
00489 }
00490 }
00491 else
00492 {
00493 newmaxw = dmax(list_work[task] - DomainWork[DomainEndList[task]],
00494 list_work[task + 1] + DomainWork[DomainEndList[task]]);
00495 if(newmaxw <= maxw)
00496 {
00497 if(list_load[task + 1] + DomainCount[DomainEndList[task]] <= maxload)
00498 {
00499 if(list_loadsph[task + 1] + DomainCountSph[DomainEndList[task]] > maxloadsph)
00500 continue;
00501
00502
00503 list_work[task] -= DomainWork[DomainEndList[task]];
00504 list_load[task] -= DomainCount[DomainEndList[task]];
00505 list_loadsph[task] -= DomainCountSph[DomainEndList[task]];
00506 list_work[task + 1] += DomainWork[DomainEndList[task]];
00507 list_load[task + 1] += DomainCount[DomainEndList[task]];
00508 list_loadsph[task + 1] += DomainCountSph[DomainEndList[task]];
00509
00510 DomainTask[DomainEndList[task]] = task + 1;
00511 DomainEndList[task] -= 1;
00512 DomainStartList[task + 1] -= 1;
00513
00514 moved++;
00515 }
00516 }
00517
00518 }
00519 }
00520
00521 iter++;
00522 }
00523 while(moved > 0 && iter < 10 * NTopleaves);
00524 }
00525
00526
00534 void domain_findExchangeNumbers(int task, int partner, int sphflag, int *send, int *recv)
00535 {
00536 int numpartA, numpartsphA, ntobesentA, maxsendA, maxsendA_old;
00537 int numpartB, numpartsphB, ntobesentB, maxsendB, maxsendB_old;
00538
00539 numpartA = list_NumPart[task];
00540 numpartsphA = list_N_gas[task];
00541
00542 numpartB = list_NumPart[partner];
00543 numpartsphB = list_N_gas[partner];
00544
00545 if(sphflag == 1)
00546 {
00547 ntobesentA = toGoSph[task * NTask + partner];
00548 ntobesentB = toGoSph[partner * NTask + task];
00549 }
00550 else
00551 {
00552 ntobesentA = toGo[task * NTask + partner] - toGoSph[task * NTask + partner];
00553 ntobesentB = toGo[partner * NTask + task] - toGoSph[partner * NTask + task];
00554 }
00555
00556 maxsendA = imin(ntobesentA, All.BunchSizeDomain);
00557 maxsendB = imin(ntobesentB, All.BunchSizeDomain);
00558
00559 do
00560 {
00561 maxsendA_old = maxsendA;
00562 maxsendB_old = maxsendB;
00563
00564 maxsendA = imin(All.MaxPart - numpartB + maxsendB, maxsendA);
00565 maxsendB = imin(All.MaxPart - numpartA + maxsendA, maxsendB);
00566 }
00567 while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
00568
00569
00570
00571 if(sphflag == 1)
00572 {
00573 do
00574 {
00575 maxsendA_old = maxsendA;
00576 maxsendB_old = maxsendB;
00577
00578 maxsendA = imin(All.MaxPartSph - numpartsphB + maxsendB, maxsendA);
00579 maxsendB = imin(All.MaxPartSph - numpartsphA + maxsendA, maxsendB);
00580 }
00581 while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
00582 }
00583
00584 *send = maxsendA;
00585 *recv = maxsendB;
00586 }
00587
00588
00589
00590
00595 void domain_exchangeParticles(int partner, int sphflag, int send_count, int recv_count)
00596 {
00597 int i, no, n, count, rep;
00598 MPI_Status status;
00599
00600 for(n = 0, count = 0; count < send_count && n < NumPart; n++)
00601 {
00602 if(sphflag)
00603 {
00604 if(P[n].Type != 0)
00605 continue;
00606 }
00607 else
00608 {
00609 if(P[n].Type == 0)
00610 continue;
00611 }
00612
00613 no = 0;
00614
00615 while(TopNodes[no].Daughter >= 0)
00616 no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00617
00618 no = TopNodes[no].Leaf;
00619
00620 if(DomainTask[no] == partner)
00621 {
00622 if(sphflag)
00623 {
00624 DomainPartBuf[count] = P[n];
00625 DomainKeyBuf[count] = Key[n];
00626 DomainSphBuf[count] = SphP[n];
00627
00628 P[n] = P[N_gas - 1];
00629 P[N_gas - 1] = P[NumPart - 1];
00630
00631 Key[n] = Key[N_gas - 1];
00632 Key[N_gas - 1] = Key[NumPart - 1];
00633
00634 SphP[n] = SphP[N_gas - 1];
00635
00636 N_gas--;
00637 }
00638 else
00639 {
00640 DomainPartBuf[count] = P[n];
00641 DomainKeyBuf[count] = Key[n];
00642 P[n] = P[NumPart - 1];
00643 Key[n] = Key[NumPart - 1];
00644 }
00645
00646 count++;
00647 NumPart--;
00648 n--;
00649 }
00650 }
00651
00652 if(count != send_count)
00653 {
00654 printf("Houston, we got a problem...\n");
00655 printf("ThisTask=%d count=%d send_count=%d\n", ThisTask, count, send_count);
00656 endrun(88);
00657 }
00658
00659
00660
00661 for(rep = 0; rep < 2; rep++)
00662 {
00663 if((rep == 0 && ThisTask < partner) || (rep == 1 && ThisTask > partner))
00664 {
00665 if(send_count > 0)
00666 {
00667 MPI_Ssend(&DomainPartBuf[0], send_count * sizeof(struct particle_data), MPI_BYTE, partner,
00668 TAG_PDATA, MPI_COMM_WORLD);
00669
00670 MPI_Ssend(&DomainKeyBuf[0], send_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
00671 MPI_COMM_WORLD);
00672
00673 if(sphflag)
00674 MPI_Ssend(&DomainSphBuf[0], send_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
00675 TAG_SPHDATA, MPI_COMM_WORLD);
00676 }
00677 }
00678
00679 if((rep == 1 && ThisTask < partner) || (rep == 0 && ThisTask > partner))
00680 {
00681 if(recv_count > 0)
00682 {
00683 if(sphflag)
00684 {
00685 if((NumPart - N_gas) > recv_count)
00686 {
00687 for(i = 0; i < recv_count; i++)
00688 {
00689 P[NumPart + i] = P[N_gas + i];
00690 Key[NumPart + i] = Key[N_gas + i];
00691 }
00692 }
00693 else
00694 {
00695 for(i = NumPart - 1; i >= N_gas; i--)
00696 {
00697 P[i + recv_count] = P[i];
00698 Key[i + recv_count] = Key[i];
00699 }
00700 }
00701
00702 MPI_Recv(&P[N_gas], recv_count * sizeof(struct particle_data), MPI_BYTE, partner, TAG_PDATA,
00703 MPI_COMM_WORLD, &status);
00704 MPI_Recv(&Key[N_gas], recv_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
00705 MPI_COMM_WORLD, &status);
00706 MPI_Recv(&SphP[N_gas], recv_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
00707 TAG_SPHDATA, MPI_COMM_WORLD, &status);
00708
00709 N_gas += recv_count;
00710 }
00711 else
00712 {
00713 MPI_Recv(&P[NumPart], recv_count * sizeof(struct particle_data), MPI_BYTE, partner,
00714 TAG_PDATA, MPI_COMM_WORLD, &status);
00715 MPI_Recv(&Key[NumPart], recv_count * sizeof(peanokey), MPI_BYTE, partner,
00716 TAG_KEY, MPI_COMM_WORLD, &status);
00717 }
00718
00719 NumPart += recv_count;
00720 }
00721 }
00722 }
00723 }
00724
00729 void domain_countToGo(void)
00730 {
00731 int n, no;
00732
00733 for(n = 0; n < NTask; n++)
00734 {
00735 local_toGo[n] = 0;
00736 local_toGoSph[n] = 0;
00737 }
00738
00739 for(n = 0; n < NumPart; n++)
00740 {
00741 no = 0;
00742
00743 while(TopNodes[no].Daughter >= 0)
00744 no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00745
00746 no = TopNodes[no].Leaf;
00747
00748 if(DomainTask[no] != ThisTask)
00749 {
00750 local_toGo[DomainTask[no]] += 1;
00751 if(P[n].Type == 0)
00752 local_toGoSph[DomainTask[no]] += 1;
00753 }
00754 }
00755
00756 MPI_Allgather(local_toGo, NTask, MPI_INT, toGo, NTask, MPI_INT, MPI_COMM_WORLD);
00757 MPI_Allgather(local_toGoSph, NTask, MPI_INT, toGoSph, NTask, MPI_INT, MPI_COMM_WORLD);
00758 }
00759
00760
00765 void domain_walktoptree(int no)
00766 {
00767 int i;
00768
00769 if(TopNodes[no].Daughter == -1)
00770 {
00771 TopNodes[no].Leaf = NTopleaves;
00772 NTopleaves++;
00773 }
00774 else
00775 {
00776 for(i = 0; i < 8; i++)
00777 domain_walktoptree(TopNodes[no].Daughter + i);
00778 }
00779 }
00780
00786 void domain_sumCost(void)
00787 {
00788 int i, n, no;
00789 double *local_DomainWork;
00790 int *local_DomainCount;
00791 int *local_DomainCountSph;
00792
00793 local_DomainWork = malloc(NTopnodes * sizeof(double));
00794 local_DomainCount = malloc(NTopnodes * sizeof(int));
00795 local_DomainCountSph = malloc(NTopnodes * sizeof(int));
00796
00797
00798
00799 NTopleaves = 0;
00800
00801 domain_walktoptree(0);
00802
00803 for(i = 0; i < NTopleaves; i++)
00804 {
00805 local_DomainWork[i] = 0;
00806 local_DomainCount[i] = 0;
00807 local_DomainCountSph[i] = 0;
00808 }
00809
00810 if(ThisTask == 0)
00811 printf("NTopleaves= %d\n", NTopleaves);
00812
00813 for(n = 0; n < NumPart; n++)
00814 {
00815 no = 0;
00816
00817 while(TopNodes[no].Daughter >= 0)
00818 no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00819
00820 no = TopNodes[no].Leaf;
00821
00822 if(P[n].Ti_endstep > P[n].Ti_begstep)
00823 local_DomainWork[no] += (1.0 + P[n].GravCost) / (P[n].Ti_endstep - P[n].Ti_begstep);
00824 else
00825 local_DomainWork[no] += (1.0 + P[n].GravCost);
00826
00827 local_DomainCount[no] += 1;
00828 if(P[n].Type == 0)
00829 local_DomainCountSph[no] += 1;
00830 }
00831
00832 MPI_Allreduce(local_DomainWork, DomainWork, NTopleaves, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
00833 MPI_Allreduce(local_DomainCount, DomainCount, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00834 MPI_Allreduce(local_DomainCountSph, DomainCountSph, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00835
00836
00837 free(local_DomainCountSph);
00838 free(local_DomainCount);
00839 free(local_DomainWork);
00840 }
00841
00842
00845 void domain_findExtent(void)
00846 {
00847 int i, j;
00848 double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
00849
00850
00851 for(j = 0; j < 3; j++)
00852 {
00853 xmin[j] = MAX_REAL_NUMBER;
00854 xmax[j] = -MAX_REAL_NUMBER;
00855 }
00856
00857 for(i = 0; i < NumPart; i++)
00858 {
00859 for(j = 0; j < 3; j++)
00860 {
00861 if(xmin[j] > P[i].Pos[j])
00862 xmin[j] = P[i].Pos[j];
00863
00864 if(xmax[j] < P[i].Pos[j])
00865 xmax[j] = P[i].Pos[j];
00866 }
00867 }
00868
00869 MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00870 MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00871
00872 len = 0;
00873 for(j = 0; j < 3; j++)
00874 if(xmax_glob[j] - xmin_glob[j] > len)
00875 len = xmax_glob[j] - xmin_glob[j];
00876
00877 len *= 1.001;
00878
00879 for(j = 0; j < 3; j++)
00880 {
00881 DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
00882 DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
00883 }
00884
00885 DomainLen = len;
00886 DomainFac = 1.0 / len * (((peanokey) 1) << (BITS_PER_DIMENSION));
00887 }
00888
00889
00896 void domain_determineTopTree(void)
00897 {
00898 int i, ntop_local, ntop;
00899 int *ntopnodelist, *ntopoffset;
00900
00901 for(i = 0; i < NumPart; i++)
00902 {
00903 KeySorted[i] = Key[i] = peano_hilbert_key((P[i].Pos[0] - DomainCorner[0]) * DomainFac,
00904 (P[i].Pos[1] - DomainCorner[1]) * DomainFac,
00905 (P[i].Pos[2] - DomainCorner[2]) * DomainFac,
00906 BITS_PER_DIMENSION);
00907 }
00908
00909 qsort(KeySorted, NumPart, sizeof(peanokey), domain_compare_key);
00910
00911 NTopnodes = 1;
00912 TopNodes[0].Daughter = -1;
00913 TopNodes[0].Size = PEANOCELLS;
00914 TopNodes[0].StartKey = 0;
00915 TopNodes[0].Count = NumPart;
00916 TopNodes[0].Pstart = 0;
00917
00918 domain_topsplit_local(0, 0);
00919
00920 toplist_local = malloc(NTopnodes * sizeof(struct topnode_exchange));
00921
00922 for(i = 0, ntop_local = 0; i < NTopnodes; i++)
00923 {
00924 if(TopNodes[i].Daughter == -1)
00925 {
00926 toplist_local[ntop_local].Startkey = TopNodes[i].StartKey;
00927 toplist_local[ntop_local].Count = TopNodes[i].Count;
00928 ntop_local++;
00929 }
00930 }
00931
00932 ntopnodelist = malloc(sizeof(int) * NTask);
00933 ntopoffset = malloc(sizeof(int) * NTask);
00934
00935 MPI_Allgather(&ntop_local, 1, MPI_INT, ntopnodelist, 1, MPI_INT, MPI_COMM_WORLD);
00936
00937 for(i = 0, ntop = 0, ntopoffset[0] = 0; i < NTask; i++)
00938 {
00939 ntop += ntopnodelist[i];
00940 if(i > 0)
00941 ntopoffset[i] = ntopoffset[i - 1] + ntopnodelist[i - 1];
00942 }
00943
00944
00945 toplist = malloc(ntop * sizeof(struct topnode_exchange));
00946
00947 for(i = 0; i < NTask; i++)
00948 {
00949 ntopnodelist[i] *= sizeof(struct topnode_exchange);
00950 ntopoffset[i] *= sizeof(struct topnode_exchange);
00951 }
00952
00953 MPI_Allgatherv(toplist_local, ntop_local * sizeof(struct topnode_exchange), MPI_BYTE,
00954 toplist, ntopnodelist, ntopoffset, MPI_BYTE, MPI_COMM_WORLD);
00955
00956 qsort(toplist, ntop, sizeof(struct topnode_exchange), domain_compare_toplist);
00957
00958 NTopnodes = 1;
00959 TopNodes[0].Daughter = -1;
00960 TopNodes[0].Size = PEANOCELLS;
00961 TopNodes[0].StartKey = 0;
00962 TopNodes[0].Count = All.TotNumPart;
00963 TopNodes[0].Pstart = 0;
00964 TopNodes[0].Blocks = ntop;
00965
00966 domain_topsplit(0, 0);
00967
00968 free(toplist);
00969 free(ntopoffset);
00970 free(ntopnodelist);
00971 free(toplist_local);
00972
00973 }
00974
00975
00976
00982 void domain_topsplit_local(int node, peanokey startkey)
00983 {
00984 int i, p, sub, bin;
00985
00986 if(TopNodes[node].Size >= 8)
00987 {
00988 TopNodes[node].Daughter = NTopnodes;
00989
00990 for(i = 0; i < 8; i++)
00991 {
00992 if(NTopnodes < MAXTOPNODES)
00993 {
00994 sub = TopNodes[node].Daughter + i;
00995 TopNodes[sub].Size = TopNodes[node].Size / 8;
00996 TopNodes[sub].Count = 0;
00997 TopNodes[sub].Daughter = -1;
00998 TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
00999 TopNodes[sub].Pstart = TopNodes[node].Pstart;
01000
01001 NTopnodes++;
01002 }
01003 else
01004 {
01005 printf("task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
01006 ThisTask);
01007 fflush(stdout);
01008 endrun(13213);
01009 }
01010 }
01011
01012 for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Count; p++)
01013 {
01014 bin = (KeySorted[p] - startkey) / (TopNodes[node].Size / 8);
01015
01016 if(bin < 0 || bin > 7)
01017 {
01018 printf("task=%d: something odd has happened here. bin=%d\n", ThisTask, bin);
01019 fflush(stdout);
01020 endrun(13123123);
01021 }
01022
01023 sub = TopNodes[node].Daughter + bin;
01024
01025 if(TopNodes[sub].Count == 0)
01026 TopNodes[sub].Pstart = p;
01027
01028 TopNodes[sub].Count++;
01029 }
01030
01031 for(i = 0; i < 8; i++)
01032 {
01033 sub = TopNodes[node].Daughter + i;
01034 if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask * NTask))
01035 domain_topsplit_local(sub, TopNodes[sub].StartKey);
01036 }
01037 }
01038 }
01039
01040
01041
01049 void domain_topsplit(int node, peanokey startkey)
01050 {
01051 int i, p, sub, bin;
01052
01053 if(TopNodes[node].Size >= 8)
01054 {
01055 TopNodes[node].Daughter = NTopnodes;
01056
01057 for(i = 0; i < 8; i++)
01058 {
01059 if(NTopnodes < MAXTOPNODES)
01060 {
01061 sub = TopNodes[node].Daughter + i;
01062 TopNodes[sub].Size = TopNodes[node].Size / 8;
01063 TopNodes[sub].Count = 0;
01064 TopNodes[sub].Blocks = 0;
01065 TopNodes[sub].Daughter = -1;
01066 TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
01067 TopNodes[sub].Pstart = TopNodes[node].Pstart;
01068 NTopnodes++;
01069 }
01070 else
01071 {
01072 printf("Task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
01073 ThisTask);
01074 fflush(stdout);
01075 endrun(137213);
01076 }
01077 }
01078
01079 for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Blocks; p++)
01080 {
01081 bin = (toplist[p].Startkey - startkey) / (TopNodes[node].Size / 8);
01082 sub = TopNodes[node].Daughter + bin;
01083
01084 if(bin < 0 || bin > 7)
01085 endrun(77);
01086
01087 if(TopNodes[sub].Blocks == 0)
01088 TopNodes[sub].Pstart = p;
01089
01090 TopNodes[sub].Count += toplist[p].Count;
01091 TopNodes[sub].Blocks++;
01092 }
01093
01094 for(i = 0; i < 8; i++)
01095 {
01096 sub = TopNodes[node].Daughter + i;
01097 if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask))
01098 domain_topsplit(sub, TopNodes[sub].StartKey);
01099 }
01100 }
01101 }
01102
01103
01106 int domain_compare_toplist(const void *a, const void *b)
01107 {
01108 if(((struct topnode_exchange *) a)->Startkey < (((struct topnode_exchange *) b)->Startkey))
01109 return -1;
01110
01111 if(((struct topnode_exchange *) a)->Startkey > (((struct topnode_exchange *) b)->Startkey))
01112 return +1;
01113
01114 return 0;
01115 }
01116
01119 int domain_compare_key(const void *a, const void *b)
01120 {
01121 if(*(peanokey *) a < *(peanokey *) b)
01122 return -1;
01123
01124 if(*(peanokey *) a > *(peanokey *) b)
01125 return +1;
01126
01127 return 0;
01128 }