Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

domain.c

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006 
00007 #include "allvars.h"
00008 #include "proto.h"
00009 
00010 
00011 
00029 #define TOPNODEFACTOR  20.0
00030 
00031 #define REDUC_FAC      0.98
00032 
00033 
00037 static int *toGo, *toGoSph;
00038 static int *local_toGo, *local_toGoSph;
00039 static int *list_NumPart;
00040 static int *list_N_gas;
00041 static int *list_load;
00042 static int *list_loadsph;
00043 static double *list_work;
00044 
00045 static long long maxload, maxloadsph;
00046 
00047 static struct topnode_exchange
00048 {
00049   peanokey Startkey;
00050   int Count;
00051 }
00052  *toplist, *toplist_local;
00053 
00054 
00055 
00062 void domain_Decomposition(void)
00063 {
00064   double t0, t1;
00065 
00066 #ifdef PMGRID
00067   if(All.PM_Ti_endstep == All.Ti_Current)
00068     {
00069       All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00070       /* to make sure that we do a domain decomposition before the PM-force is evaluated.
00071          this is needed to make sure that the particles are wrapped into the box */
00072     }
00073 #endif
00074 
00075   /* Check whether it is really time for a new domain decomposition */
00076   if(All.NumForcesSinceLastDomainDecomp > All.TotNumPart * All.TreeDomainUpdateFrequency)
00077     {
00078       t0 = second();
00079 
00080 #ifdef PERIODIC
00081       do_box_wrapping();        /* map the particles back onto the box */
00082 #endif
00083       All.NumForcesSinceLastDomainDecomp = 0;
00084       TreeReconstructFlag = 1;  /* ensures that new tree will be constructed */
00085 
00086       if(ThisTask == 0)
00087         {
00088           printf("domain decomposition... \n");
00089           fflush(stdout);
00090         }
00091 
00092       Key = malloc(sizeof(peanokey) * All.MaxPart);
00093       KeySorted = malloc(sizeof(peanokey) * All.MaxPart);
00094 
00095       toGo = malloc(sizeof(int) * NTask * NTask);
00096       toGoSph = malloc(sizeof(int) * NTask * NTask);
00097       local_toGo = malloc(sizeof(int) * NTask);
00098       local_toGoSph = malloc(sizeof(int) * NTask);
00099       list_NumPart = malloc(sizeof(int) * NTask);
00100       list_N_gas = malloc(sizeof(int) * NTask);
00101       list_load = malloc(sizeof(int) * NTask);
00102       list_loadsph = malloc(sizeof(int) * NTask);
00103       list_work = malloc(sizeof(double) * NTask);
00104 
00105       MPI_Allgather(&NumPart, 1, MPI_INT, list_NumPart, 1, MPI_INT, MPI_COMM_WORLD);
00106       MPI_Allgather(&N_gas, 1, MPI_INT, list_N_gas, 1, MPI_INT, MPI_COMM_WORLD);
00107 
00108       maxload = All.MaxPart * REDUC_FAC;
00109       maxloadsph = All.MaxPartSph * REDUC_FAC;
00110 
00111       domain_decompose();
00112 
00113       free(list_work);
00114       free(list_loadsph);
00115       free(list_load);
00116       free(list_N_gas);
00117       free(list_NumPart);
00118       free(local_toGoSph);
00119       free(local_toGo);
00120       free(toGoSph);
00121       free(toGo);
00122 
00123 
00124       if(ThisTask == 0)
00125         {
00126           printf("domain decomposition done. \n");
00127           fflush(stdout);
00128         }
00129 
00130       t1 = second();
00131       All.CPU_Domain += timediff(t0, t1);
00132 
00133 #ifdef PEANOHILBERT
00134       t0 = second();
00135       peano_hilbert_order();
00136       t1 = second();
00137       All.CPU_Peano += timediff(t0, t1);
00138 #endif
00139 
00140       free(KeySorted);
00141       free(Key);
00142     }
00143 
00144 }
00145 
00146 
00147 
00154 void domain_decompose(void)
00155 {
00156   int i, j, status;
00157   int ngrp, task, partner, sendcount, recvcount;
00158   long long sumtogo, sumload;
00159   int maxload, *temp;
00160   double sumwork, maxwork;
00161 
00162   for(i = 0; i < 6; i++)
00163     NtypeLocal[i] = 0;
00164 
00165   for(i = 0; i < NumPart; i++)
00166     NtypeLocal[P[i].Type]++;
00167 
00168   /* because Ntype[] is of type `long long', we cannot do a simple
00169    * MPI_Allreduce() to sum the total particle numbers 
00170    */
00171   temp = malloc(NTask * 6 * sizeof(int));
00172   MPI_Allgather(NtypeLocal, 6, MPI_INT, temp, 6, MPI_INT, MPI_COMM_WORLD);
00173   for(i = 0; i < 6; i++)
00174     {
00175       Ntype[i] = 0;
00176       for(j = 0; j < NTask; j++)
00177         Ntype[i] += temp[j * 6 + i];
00178     }
00179   free(temp);
00180 
00181 #ifndef UNEQUALSOFTENINGS
00182   for(i = 0; i < 6; i++)
00183     if(Ntype[i] > 0)
00184       break;
00185 
00186   for(ngrp = i + 1; ngrp < 6; ngrp++)
00187     {
00188       if(Ntype[ngrp] > 0)
00189         if(All.SofteningTable[ngrp] != All.SofteningTable[i])
00190           {
00191             if(ThisTask == 0)
00192               {
00193                 fprintf(stdout, "Code was not compiled with UNEQUALSOFTENINGS, but some of the\n");
00194                 fprintf(stdout, "softening lengths are unequal nevertheless.\n");
00195                 fprintf(stdout, "This is not allowed.\n");
00196               }
00197             endrun(0);
00198           }
00199     }
00200 #endif
00201 
00202 
00203   /* determine global dimensions of domain grid */
00204   domain_findExtent();
00205 
00206   domain_determineTopTree();
00207 
00208   /* determine cost distribution in domain grid */
00209   domain_sumCost();
00210 
00211   /* find the split of the domain grid recursively */
00212   status = domain_findSplit(0, NTask, 0, NTopleaves - 1);
00213   if(status != 0)
00214     {
00215       if(ThisTask == 0)
00216         printf("\nNo domain decomposition that stays within memory bounds is possible.\n");
00217       endrun(0);
00218     }
00219 
00220   /* now try to improve the work-load balance of the split */
00221   domain_shiftSplit();
00222 
00223   DomainMyStart = DomainStartList[ThisTask];
00224   DomainMyLast = DomainEndList[ThisTask];
00225 
00226   if(ThisTask == 0)
00227     {
00228       sumload = maxload = 0;
00229       sumwork = maxwork = 0;
00230       for(i = 0; i < NTask; i++)
00231         {
00232           sumload += list_load[i];
00233           sumwork += list_work[i];
00234 
00235           if(list_load[i] > maxload)
00236             maxload = list_load[i];
00237 
00238           if(list_work[i] > maxwork)
00239             maxwork = list_work[i];
00240         }
00241 
00242       printf("work-load balance=%g   memory-balance=%g\n",
00243              maxwork / (sumwork / NTask), maxload / (((double) sumload) / NTask));
00244     }
00245 
00246 
00247   /* determine for each cpu how many particles have to be shifted to other cpus */
00248   domain_countToGo();
00249 
00250   for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
00251     sumtogo += toGo[i];
00252 
00253   while(sumtogo > 0)
00254     {
00255       if(ThisTask == 0)
00256         {
00257           printf("exchange of %d%09d particles\n", (int) (sumtogo / 1000000000),
00258                  (int) (sumtogo % 1000000000));
00259           fflush(stdout);
00260         }
00261 
00262       for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
00263         {
00264           for(task = 0; task < NTask; task++)
00265             {
00266               partner = task ^ ngrp;
00267 
00268               if(partner < NTask && task < partner)
00269                 {
00270                   /* treat SPH separately */
00271                   if(All.TotN_gas > 0)
00272                     {
00273                       domain_findExchangeNumbers(task, partner, 1, &sendcount, &recvcount);
00274 
00275                       list_NumPart[task] += recvcount - sendcount;
00276                       list_NumPart[partner] -= recvcount - sendcount;
00277                       list_N_gas[task] += recvcount - sendcount;
00278                       list_N_gas[partner] -= recvcount - sendcount;
00279 
00280                       toGo[task * NTask + partner] -= sendcount;
00281                       toGo[partner * NTask + task] -= recvcount;
00282                       toGoSph[task * NTask + partner] -= sendcount;
00283                       toGoSph[partner * NTask + task] -= recvcount;
00284 
00285                       if(task == ThisTask)      /* actually carry out the exchange */
00286                         domain_exchangeParticles(partner, 1, sendcount, recvcount);
00287                       if(partner == ThisTask)
00288                         domain_exchangeParticles(task, 1, recvcount, sendcount);
00289                     }
00290 
00291                   domain_findExchangeNumbers(task, partner, 0, &sendcount, &recvcount);
00292 
00293                   list_NumPart[task] += recvcount - sendcount;
00294                   list_NumPart[partner] -= recvcount - sendcount;
00295 
00296                   toGo[task * NTask + partner] -= sendcount;
00297                   toGo[partner * NTask + task] -= recvcount;
00298 
00299                   if(task == ThisTask)  /* actually carry out the exchange */
00300                     domain_exchangeParticles(partner, 0, sendcount, recvcount);
00301                   if(partner == ThisTask)
00302                     domain_exchangeParticles(task, 0, recvcount, sendcount);
00303                 }
00304             }
00305         }
00306 
00307       for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
00308         sumtogo += toGo[i];
00309     }
00310 }
00311 
00327 int domain_findSplit(int cpustart, int ncpu, int first, int last)
00328 {
00329   int i, split, ok_left, ok_right;
00330   long long load, sphload, load_leftOfSplit, sphload_leftOfSplit;
00331   int ncpu_leftOfSplit;
00332   double maxAvgLoad_CurrentSplit, maxAvgLoad_NewSplit;
00333 
00334 
00335   ncpu_leftOfSplit = ncpu / 2;
00336 
00337   for(i = first, load = 0, sphload = 0; i <= last; i++)
00338     {
00339       load += DomainCount[i];
00340       sphload += DomainCountSph[i];
00341     }
00342 
00343   split = first + ncpu_leftOfSplit;
00344 
00345   for(i = first, load_leftOfSplit = sphload_leftOfSplit = 0; i < split; i++)
00346     {
00347       load_leftOfSplit += DomainCount[i];
00348       sphload_leftOfSplit += DomainCountSph[i];
00349     }
00350 
00351   /* find the best split point in terms of work-load balance */
00352 
00353   while(split < last - (ncpu - ncpu_leftOfSplit - 1) && split > 0)
00354     {
00355       maxAvgLoad_CurrentSplit =
00356         dmax(load_leftOfSplit / ncpu_leftOfSplit, (load - load_leftOfSplit) / (ncpu - ncpu_leftOfSplit));
00357 
00358       maxAvgLoad_NewSplit =
00359         dmax((load_leftOfSplit + DomainCount[split]) / ncpu_leftOfSplit,
00360              (load - load_leftOfSplit - DomainCount[split]) / (ncpu - ncpu_leftOfSplit));
00361 
00362       if(maxAvgLoad_NewSplit <= maxAvgLoad_CurrentSplit)
00363         {
00364           load_leftOfSplit += DomainCount[split];
00365           sphload_leftOfSplit += DomainCountSph[split];
00366           split++;
00367         }
00368       else
00369         break;
00370     }
00371 
00372 
00373   /* we will now have to check whether this solution is possible given the restrictions on the maximum load */
00374 
00375   for(i = first, load_leftOfSplit = 0, sphload_leftOfSplit = 0; i < split; i++)
00376     {
00377       load_leftOfSplit += DomainCount[i];
00378       sphload_leftOfSplit += DomainCountSph[i];
00379     }
00380 
00381   if(load_leftOfSplit > maxload * ncpu_leftOfSplit ||
00382      (load - load_leftOfSplit) > maxload * (ncpu - ncpu_leftOfSplit))
00383     {
00384       /* we did not find a viable split */
00385       return -1;
00386     }
00387 
00388   if(sphload_leftOfSplit > maxloadsph * ncpu_leftOfSplit ||
00389      (sphload - sphload_leftOfSplit) > maxloadsph * (ncpu - ncpu_leftOfSplit))
00390     {
00391       /* we did not find a viable split */
00392       return -1;
00393     }
00394 
00395   if(ncpu_leftOfSplit >= 2)
00396     ok_left = domain_findSplit(cpustart, ncpu_leftOfSplit, first, split - 1);
00397   else
00398     ok_left = 0;
00399 
00400   if((ncpu - ncpu_leftOfSplit) >= 2)
00401     ok_right = domain_findSplit(cpustart + ncpu_leftOfSplit, ncpu - ncpu_leftOfSplit, split, last);
00402   else
00403     ok_right = 0;
00404 
00405   if(ok_left == 0 && ok_right == 0)
00406     {
00407       /* found a viable split */
00408 
00409       if(ncpu_leftOfSplit == 1)
00410         {
00411           for(i = first; i < split; i++)
00412             DomainTask[i] = cpustart;
00413 
00414           list_load[cpustart] = load_leftOfSplit;
00415           list_loadsph[cpustart] = sphload_leftOfSplit;
00416           DomainStartList[cpustart] = first;
00417           DomainEndList[cpustart] = split - 1;
00418         }
00419 
00420       if((ncpu - ncpu_leftOfSplit) == 1)
00421         {
00422           for(i = split; i <= last; i++)
00423             DomainTask[i] = cpustart + ncpu_leftOfSplit;
00424 
00425           list_load[cpustart + ncpu_leftOfSplit] = load - load_leftOfSplit;
00426           list_loadsph[cpustart + ncpu_leftOfSplit] = sphload - sphload_leftOfSplit;
00427           DomainStartList[cpustart + ncpu_leftOfSplit] = split;
00428           DomainEndList[cpustart + ncpu_leftOfSplit] = last;
00429         }
00430 
00431       return 0;
00432     }
00433 
00434   /* we did not find a viable split */
00435   return -1;
00436 }
00437 
00438 
00439 
00447 void domain_shiftSplit(void)
00448 {
00449   int i, task, iter = 0, moved;
00450   double maxw, newmaxw;
00451 
00452   for(task = 0; task < NTask; task++)
00453     list_work[task] = 0;
00454 
00455   for(i = 0; i < NTopleaves; i++)
00456     list_work[DomainTask[i]] += DomainWork[i];
00457 
00458   do
00459     {
00460       for(task = 0, moved = 0; task < NTask - 1; task++)
00461         {
00462           maxw = dmax(list_work[task], list_work[task + 1]);
00463 
00464           if(list_work[task] < list_work[task + 1])
00465             {
00466               newmaxw = dmax(list_work[task] + DomainWork[DomainStartList[task + 1]],
00467                              list_work[task + 1] - DomainWork[DomainStartList[task + 1]]);
00468               if(newmaxw <= maxw)
00469                 {
00470                   if(list_load[task] + DomainCount[DomainStartList[task + 1]] <= maxload)
00471                     {
00472                       if(list_loadsph[task] + DomainCountSph[DomainStartList[task + 1]] > maxloadsph)
00473                         continue;
00474 
00475                       /* ok, we can move one domain cell from right to left */
00476                       list_work[task] += DomainWork[DomainStartList[task + 1]];
00477                       list_load[task] += DomainCount[DomainStartList[task + 1]];
00478                       list_loadsph[task] += DomainCountSph[DomainStartList[task + 1]];
00479                       list_work[task + 1] -= DomainWork[DomainStartList[task + 1]];
00480                       list_load[task + 1] -= DomainCount[DomainStartList[task + 1]];
00481                       list_loadsph[task + 1] -= DomainCountSph[DomainStartList[task + 1]];
00482 
00483                       DomainTask[DomainStartList[task + 1]] = task;
00484                       DomainStartList[task + 1] += 1;
00485                       DomainEndList[task] += 1;
00486 
00487                       moved++;
00488                     }
00489                 }
00490             }
00491           else
00492             {
00493               newmaxw = dmax(list_work[task] - DomainWork[DomainEndList[task]],
00494                              list_work[task + 1] + DomainWork[DomainEndList[task]]);
00495               if(newmaxw <= maxw)
00496                 {
00497                   if(list_load[task + 1] + DomainCount[DomainEndList[task]] <= maxload)
00498                     {
00499                       if(list_loadsph[task + 1] + DomainCountSph[DomainEndList[task]] > maxloadsph)
00500                         continue;
00501 
00502                       /* ok, we can move one domain cell from left to right */
00503                       list_work[task] -= DomainWork[DomainEndList[task]];
00504                       list_load[task] -= DomainCount[DomainEndList[task]];
00505                       list_loadsph[task] -= DomainCountSph[DomainEndList[task]];
00506                       list_work[task + 1] += DomainWork[DomainEndList[task]];
00507                       list_load[task + 1] += DomainCount[DomainEndList[task]];
00508                       list_loadsph[task + 1] += DomainCountSph[DomainEndList[task]];
00509 
00510                       DomainTask[DomainEndList[task]] = task + 1;
00511                       DomainEndList[task] -= 1;
00512                       DomainStartList[task + 1] -= 1;
00513 
00514                       moved++;
00515                     }
00516                 }
00517 
00518             }
00519         }
00520 
00521       iter++;
00522     }
00523   while(moved > 0 && iter < 10 * NTopleaves);
00524 }
00525 
00526 
00534 void domain_findExchangeNumbers(int task, int partner, int sphflag, int *send, int *recv)
00535 {
00536   int numpartA, numpartsphA, ntobesentA, maxsendA, maxsendA_old;
00537   int numpartB, numpartsphB, ntobesentB, maxsendB, maxsendB_old;
00538 
00539   numpartA = list_NumPart[task];
00540   numpartsphA = list_N_gas[task];
00541 
00542   numpartB = list_NumPart[partner];
00543   numpartsphB = list_N_gas[partner];
00544 
00545   if(sphflag == 1)
00546     {
00547       ntobesentA = toGoSph[task * NTask + partner];
00548       ntobesentB = toGoSph[partner * NTask + task];
00549     }
00550   else
00551     {
00552       ntobesentA = toGo[task * NTask + partner] - toGoSph[task * NTask + partner];
00553       ntobesentB = toGo[partner * NTask + task] - toGoSph[partner * NTask + task];
00554     }
00555 
00556   maxsendA = imin(ntobesentA, All.BunchSizeDomain);
00557   maxsendB = imin(ntobesentB, All.BunchSizeDomain);
00558 
00559   do
00560     {
00561       maxsendA_old = maxsendA;
00562       maxsendB_old = maxsendB;
00563 
00564       maxsendA = imin(All.MaxPart - numpartB + maxsendB, maxsendA);
00565       maxsendB = imin(All.MaxPart - numpartA + maxsendA, maxsendB);
00566     }
00567   while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
00568 
00569 
00570   /* now make also sure that there is enough space for SPH particeles */
00571   if(sphflag == 1)
00572     {
00573       do
00574         {
00575           maxsendA_old = maxsendA;
00576           maxsendB_old = maxsendB;
00577 
00578           maxsendA = imin(All.MaxPartSph - numpartsphB + maxsendB, maxsendA);
00579           maxsendB = imin(All.MaxPartSph - numpartsphA + maxsendA, maxsendB);
00580         }
00581       while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
00582     }
00583 
00584   *send = maxsendA;
00585   *recv = maxsendB;
00586 }
00587 
00588 
00589 
00590 
00595 void domain_exchangeParticles(int partner, int sphflag, int send_count, int recv_count)
00596 {
00597   int i, no, n, count, rep;
00598   MPI_Status status;
00599 
00600   for(n = 0, count = 0; count < send_count && n < NumPart; n++)
00601     {
00602       if(sphflag)
00603         {
00604           if(P[n].Type != 0)
00605             continue;
00606         }
00607       else
00608         {
00609           if(P[n].Type == 0)
00610             continue;
00611         }
00612 
00613       no = 0;
00614 
00615       while(TopNodes[no].Daughter >= 0)
00616         no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00617 
00618       no = TopNodes[no].Leaf;
00619 
00620       if(DomainTask[no] == partner)
00621         {
00622           if(sphflag)           /* special reorder routine for SPH particles (need to stay at beginning) */
00623             {
00624               DomainPartBuf[count] = P[n];      /* copy particle and collect in contiguous memory */
00625               DomainKeyBuf[count] = Key[n];
00626               DomainSphBuf[count] = SphP[n];
00627 
00628               P[n] = P[N_gas - 1];
00629               P[N_gas - 1] = P[NumPart - 1];
00630 
00631               Key[n] = Key[N_gas - 1];
00632               Key[N_gas - 1] = Key[NumPart - 1];
00633 
00634               SphP[n] = SphP[N_gas - 1];
00635 
00636               N_gas--;
00637             }
00638           else
00639             {
00640               DomainPartBuf[count] = P[n];      /* copy particle and collect in contiguous memory */
00641               DomainKeyBuf[count] = Key[n];
00642               P[n] = P[NumPart - 1];
00643               Key[n] = Key[NumPart - 1];
00644             }
00645 
00646           count++;
00647           NumPart--;
00648           n--;
00649         }
00650     }
00651 
00652   if(count != send_count)
00653     {
00654       printf("Houston, we got a problem...\n");
00655       printf("ThisTask=%d count=%d send_count=%d\n", ThisTask, count, send_count);
00656       endrun(88);
00657     }
00658 
00659   /* transmit */
00660 
00661   for(rep = 0; rep < 2; rep++)
00662     {
00663       if((rep == 0 && ThisTask < partner) || (rep == 1 && ThisTask > partner))
00664         {
00665           if(send_count > 0)
00666             {
00667               MPI_Ssend(&DomainPartBuf[0], send_count * sizeof(struct particle_data), MPI_BYTE, partner,
00668                         TAG_PDATA, MPI_COMM_WORLD);
00669 
00670               MPI_Ssend(&DomainKeyBuf[0], send_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
00671                         MPI_COMM_WORLD);
00672 
00673               if(sphflag)
00674                 MPI_Ssend(&DomainSphBuf[0], send_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
00675                           TAG_SPHDATA, MPI_COMM_WORLD);
00676             }
00677         }
00678 
00679       if((rep == 1 && ThisTask < partner) || (rep == 0 && ThisTask > partner))
00680         {
00681           if(recv_count > 0)
00682             {
00683               if(sphflag)
00684                 {
00685                   if((NumPart - N_gas) > recv_count)
00686                     {
00687                       for(i = 0; i < recv_count; i++)
00688                         {
00689                           P[NumPart + i] = P[N_gas + i];
00690                           Key[NumPart + i] = Key[N_gas + i];
00691                         }
00692                     }
00693                   else
00694                     {
00695                       for(i = NumPart - 1; i >= N_gas; i--)
00696                         {
00697                           P[i + recv_count] = P[i];
00698                           Key[i + recv_count] = Key[i];
00699                         }
00700                     }
00701 
00702                   MPI_Recv(&P[N_gas], recv_count * sizeof(struct particle_data), MPI_BYTE, partner, TAG_PDATA,
00703                            MPI_COMM_WORLD, &status);
00704                   MPI_Recv(&Key[N_gas], recv_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
00705                            MPI_COMM_WORLD, &status);
00706                   MPI_Recv(&SphP[N_gas], recv_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
00707                            TAG_SPHDATA, MPI_COMM_WORLD, &status);
00708 
00709                   N_gas += recv_count;
00710                 }
00711               else
00712                 {
00713                   MPI_Recv(&P[NumPart], recv_count * sizeof(struct particle_data), MPI_BYTE, partner,
00714                            TAG_PDATA, MPI_COMM_WORLD, &status);
00715                   MPI_Recv(&Key[NumPart], recv_count * sizeof(peanokey), MPI_BYTE, partner,
00716                            TAG_KEY, MPI_COMM_WORLD, &status);
00717                 }
00718 
00719               NumPart += recv_count;
00720             }
00721         }
00722     }
00723 }
00724 
00729 void domain_countToGo(void)
00730 {
00731   int n, no;
00732 
00733   for(n = 0; n < NTask; n++)
00734     {
00735       local_toGo[n] = 0;
00736       local_toGoSph[n] = 0;
00737     }
00738 
00739   for(n = 0; n < NumPart; n++)
00740     {
00741       no = 0;
00742 
00743       while(TopNodes[no].Daughter >= 0)
00744         no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00745 
00746       no = TopNodes[no].Leaf;
00747 
00748       if(DomainTask[no] != ThisTask)
00749         {
00750           local_toGo[DomainTask[no]] += 1;
00751           if(P[n].Type == 0)
00752             local_toGoSph[DomainTask[no]] += 1;
00753         }
00754     }
00755 
00756   MPI_Allgather(local_toGo, NTask, MPI_INT, toGo, NTask, MPI_INT, MPI_COMM_WORLD);
00757   MPI_Allgather(local_toGoSph, NTask, MPI_INT, toGoSph, NTask, MPI_INT, MPI_COMM_WORLD);
00758 }
00759 
00760 
00765 void domain_walktoptree(int no)
00766 {
00767   int i;
00768 
00769   if(TopNodes[no].Daughter == -1)
00770     {
00771       TopNodes[no].Leaf = NTopleaves;
00772       NTopleaves++;
00773     }
00774   else
00775     {
00776       for(i = 0; i < 8; i++)
00777         domain_walktoptree(TopNodes[no].Daughter + i);
00778     }
00779 }
00780 
00786 void domain_sumCost(void)
00787 {
00788   int i, n, no;
00789   double *local_DomainWork;
00790   int *local_DomainCount;
00791   int *local_DomainCountSph;
00792 
00793   local_DomainWork = malloc(NTopnodes * sizeof(double));
00794   local_DomainCount = malloc(NTopnodes * sizeof(int));
00795   local_DomainCountSph = malloc(NTopnodes * sizeof(int));
00796 
00797 
00798 
00799   NTopleaves = 0;
00800 
00801   domain_walktoptree(0);
00802 
00803   for(i = 0; i < NTopleaves; i++)
00804     {
00805       local_DomainWork[i] = 0;
00806       local_DomainCount[i] = 0;
00807       local_DomainCountSph[i] = 0;
00808     }
00809 
00810   if(ThisTask == 0)
00811     printf("NTopleaves= %d\n", NTopleaves);
00812 
00813   for(n = 0; n < NumPart; n++)
00814     {
00815       no = 0;
00816 
00817       while(TopNodes[no].Daughter >= 0)
00818         no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
00819 
00820       no = TopNodes[no].Leaf;
00821 
00822       if(P[n].Ti_endstep > P[n].Ti_begstep)
00823         local_DomainWork[no] += (1.0 + P[n].GravCost) / (P[n].Ti_endstep - P[n].Ti_begstep);
00824       else
00825         local_DomainWork[no] += (1.0 + P[n].GravCost);
00826 
00827       local_DomainCount[no] += 1;
00828       if(P[n].Type == 0)
00829         local_DomainCountSph[no] += 1;
00830     }
00831 
00832   MPI_Allreduce(local_DomainWork, DomainWork, NTopleaves, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
00833   MPI_Allreduce(local_DomainCount, DomainCount, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00834   MPI_Allreduce(local_DomainCountSph, DomainCountSph, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00835 
00836 
00837   free(local_DomainCountSph);
00838   free(local_DomainCount);
00839   free(local_DomainWork);
00840 }
00841 
00842 
00845 void domain_findExtent(void)
00846 {
00847   int i, j;
00848   double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
00849 
00850   /* determine local extension */
00851   for(j = 0; j < 3; j++)
00852     {
00853       xmin[j] = MAX_REAL_NUMBER;
00854       xmax[j] = -MAX_REAL_NUMBER;
00855     }
00856 
00857   for(i = 0; i < NumPart; i++)
00858     {
00859       for(j = 0; j < 3; j++)
00860         {
00861           if(xmin[j] > P[i].Pos[j])
00862             xmin[j] = P[i].Pos[j];
00863 
00864           if(xmax[j] < P[i].Pos[j])
00865             xmax[j] = P[i].Pos[j];
00866         }
00867     }
00868 
00869   MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00870   MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00871 
00872   len = 0;
00873   for(j = 0; j < 3; j++)
00874     if(xmax_glob[j] - xmin_glob[j] > len)
00875       len = xmax_glob[j] - xmin_glob[j];
00876 
00877   len *= 1.001;
00878 
00879   for(j = 0; j < 3; j++)
00880     {
00881       DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
00882       DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
00883     }
00884 
00885   DomainLen = len;
00886   DomainFac = 1.0 / len * (((peanokey) 1) << (BITS_PER_DIMENSION));
00887 }
00888 
00889 
00896 void domain_determineTopTree(void)
00897 {
00898   int i, ntop_local, ntop;
00899   int *ntopnodelist, *ntopoffset;
00900 
00901   for(i = 0; i < NumPart; i++)
00902     {
00903       KeySorted[i] = Key[i] = peano_hilbert_key((P[i].Pos[0] - DomainCorner[0]) * DomainFac,
00904                                                 (P[i].Pos[1] - DomainCorner[1]) * DomainFac,
00905                                                 (P[i].Pos[2] - DomainCorner[2]) * DomainFac,
00906                                                 BITS_PER_DIMENSION);
00907     }
00908 
00909   qsort(KeySorted, NumPart, sizeof(peanokey), domain_compare_key);
00910 
00911   NTopnodes = 1;
00912   TopNodes[0].Daughter = -1;
00913   TopNodes[0].Size = PEANOCELLS;
00914   TopNodes[0].StartKey = 0;
00915   TopNodes[0].Count = NumPart;
00916   TopNodes[0].Pstart = 0;
00917 
00918   domain_topsplit_local(0, 0);
00919 
00920   toplist_local = malloc(NTopnodes * sizeof(struct topnode_exchange));
00921 
00922   for(i = 0, ntop_local = 0; i < NTopnodes; i++)
00923     {
00924       if(TopNodes[i].Daughter == -1)    /* only use leaves */
00925         {
00926           toplist_local[ntop_local].Startkey = TopNodes[i].StartKey;
00927           toplist_local[ntop_local].Count = TopNodes[i].Count;
00928           ntop_local++;
00929         }
00930     }
00931 
00932   ntopnodelist = malloc(sizeof(int) * NTask);
00933   ntopoffset = malloc(sizeof(int) * NTask);
00934 
00935   MPI_Allgather(&ntop_local, 1, MPI_INT, ntopnodelist, 1, MPI_INT, MPI_COMM_WORLD);
00936 
00937   for(i = 0, ntop = 0, ntopoffset[0] = 0; i < NTask; i++)
00938     {
00939       ntop += ntopnodelist[i];
00940       if(i > 0)
00941         ntopoffset[i] = ntopoffset[i - 1] + ntopnodelist[i - 1];
00942     }
00943 
00944 
00945   toplist = malloc(ntop * sizeof(struct topnode_exchange));
00946 
00947   for(i = 0; i < NTask; i++)
00948     {
00949       ntopnodelist[i] *= sizeof(struct topnode_exchange);
00950       ntopoffset[i] *= sizeof(struct topnode_exchange);
00951     }
00952 
00953   MPI_Allgatherv(toplist_local, ntop_local * sizeof(struct topnode_exchange), MPI_BYTE,
00954                  toplist, ntopnodelist, ntopoffset, MPI_BYTE, MPI_COMM_WORLD);
00955 
00956   qsort(toplist, ntop, sizeof(struct topnode_exchange), domain_compare_toplist);
00957 
00958   NTopnodes = 1;
00959   TopNodes[0].Daughter = -1;
00960   TopNodes[0].Size = PEANOCELLS;
00961   TopNodes[0].StartKey = 0;
00962   TopNodes[0].Count = All.TotNumPart;
00963   TopNodes[0].Pstart = 0;
00964   TopNodes[0].Blocks = ntop;
00965 
00966   domain_topsplit(0, 0);
00967 
00968   free(toplist);
00969   free(ntopoffset);
00970   free(ntopnodelist);
00971   free(toplist_local);
00972 
00973 }
00974 
00975 
00976 
00982 void domain_topsplit_local(int node, peanokey startkey)
00983 {
00984   int i, p, sub, bin;
00985 
00986   if(TopNodes[node].Size >= 8)
00987     {
00988       TopNodes[node].Daughter = NTopnodes;
00989 
00990       for(i = 0; i < 8; i++)
00991         {
00992           if(NTopnodes < MAXTOPNODES)
00993             {
00994               sub = TopNodes[node].Daughter + i;
00995               TopNodes[sub].Size = TopNodes[node].Size / 8;
00996               TopNodes[sub].Count = 0;
00997               TopNodes[sub].Daughter = -1;
00998               TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
00999               TopNodes[sub].Pstart = TopNodes[node].Pstart;
01000 
01001               NTopnodes++;
01002             }
01003           else
01004             {
01005               printf("task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
01006                      ThisTask);
01007               fflush(stdout);
01008               endrun(13213);
01009             }
01010         }
01011 
01012       for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Count; p++)
01013         {
01014           bin = (KeySorted[p] - startkey) / (TopNodes[node].Size / 8);
01015 
01016           if(bin < 0 || bin > 7)
01017             {
01018               printf("task=%d: something odd has happened here. bin=%d\n", ThisTask, bin);
01019               fflush(stdout);
01020               endrun(13123123);
01021             }
01022 
01023           sub = TopNodes[node].Daughter + bin;
01024 
01025           if(TopNodes[sub].Count == 0)
01026             TopNodes[sub].Pstart = p;
01027 
01028           TopNodes[sub].Count++;
01029         }
01030 
01031       for(i = 0; i < 8; i++)
01032         {
01033           sub = TopNodes[node].Daughter + i;
01034           if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask * NTask))
01035             domain_topsplit_local(sub, TopNodes[sub].StartKey);
01036         }
01037     }
01038 }
01039 
01040 
01041 
01049 void domain_topsplit(int node, peanokey startkey)
01050 {
01051   int i, p, sub, bin;
01052 
01053   if(TopNodes[node].Size >= 8)
01054     {
01055       TopNodes[node].Daughter = NTopnodes;
01056 
01057       for(i = 0; i < 8; i++)
01058         {
01059           if(NTopnodes < MAXTOPNODES)
01060             {
01061               sub = TopNodes[node].Daughter + i;
01062               TopNodes[sub].Size = TopNodes[node].Size / 8;
01063               TopNodes[sub].Count = 0;
01064               TopNodes[sub].Blocks = 0;
01065               TopNodes[sub].Daughter = -1;
01066               TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
01067               TopNodes[sub].Pstart = TopNodes[node].Pstart;
01068               NTopnodes++;
01069             }
01070           else
01071             {
01072               printf("Task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
01073                      ThisTask);
01074               fflush(stdout);
01075               endrun(137213);
01076             }
01077         }
01078 
01079       for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Blocks; p++)
01080         {
01081           bin = (toplist[p].Startkey - startkey) / (TopNodes[node].Size / 8);
01082           sub = TopNodes[node].Daughter + bin;
01083 
01084           if(bin < 0 || bin > 7)
01085             endrun(77);
01086 
01087           if(TopNodes[sub].Blocks == 0)
01088             TopNodes[sub].Pstart = p;
01089 
01090           TopNodes[sub].Count += toplist[p].Count;
01091           TopNodes[sub].Blocks++;
01092         }
01093 
01094       for(i = 0; i < 8; i++)
01095         {
01096           sub = TopNodes[node].Daughter + i;
01097           if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask))
01098             domain_topsplit(sub, TopNodes[sub].StartKey);
01099         }
01100     }
01101 }
01102 
01103 
01106 int domain_compare_toplist(const void *a, const void *b)
01107 {
01108   if(((struct topnode_exchange *) a)->Startkey < (((struct topnode_exchange *) b)->Startkey))
01109     return -1;
01110 
01111   if(((struct topnode_exchange *) a)->Startkey > (((struct topnode_exchange *) b)->Startkey))
01112     return +1;
01113 
01114   return 0;
01115 }
01116 
01119 int domain_compare_key(const void *a, const void *b)
01120 {
01121   if(*(peanokey *) a < *(peanokey *) b)
01122     return -1;
01123 
01124   if(*(peanokey *) a > *(peanokey *) b)
01125     return +1;
01126 
01127   return 0;
01128 }

Generated on Sun May 22 17:33:28 2005 for GADGET-2 by  doxygen 1.3.9.1