Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

gravtree.c

Go to the documentation of this file.
00001 #include <stdlib.h>
00002 #include <string.h>
00003 #include <math.h>
00004 #include <float.h>
00005 #include <mpi.h>
00006 
00007 #include "allvars.h"
00008 #include "proto.h"
00009 
00027 void gravity_tree(void)
00028 {
00029   long long ntot;
00030   int numnodes, nexportsum = 0;
00031   int i, j, iter = 0;
00032   int *numnodeslist, maxnumnodes, nexport, *numlist, *nrecv, *ndonelist;
00033   double tstart, tend, timetree = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
00034   double ewaldcount;
00035   double costtotal, ewaldtot, *costtreelist, *ewaldlist;
00036   double maxt, sumt, *timetreelist, *timecommlist;
00037   double fac, plb, plb_max, sumcomm;
00038 
00039 #ifndef NOGRAVITY
00040   int *noffset, *nbuffer, *nsend, *nsend_local;
00041   long long ntotleft;
00042   int ndone, maxfill, ngrp;
00043   int k, place;
00044   int level, sendTask, recvTask;
00045   double ax, ay, az;
00046   MPI_Status status;
00047 #endif
00048 
00049   /* set new softening lengths */
00050   if(All.ComovingIntegrationOn)
00051     set_softenings();
00052 
00053 
00054   /* contruct tree if needed */
00055   tstart = second();
00056   if(TreeReconstructFlag)
00057     {
00058       if(ThisTask == 0)
00059         printf("Tree construction.\n");
00060 
00061       force_treebuild(NumPart);
00062 
00063       TreeReconstructFlag = 0;
00064 
00065       if(ThisTask == 0)
00066         printf("Tree construction done.\n");
00067     }
00068   tend = second();
00069   All.CPU_TreeConstruction += timediff(tstart, tend);
00070 
00071   costtotal = ewaldcount = 0;
00072 
00073   /* Note: 'NumForceUpdate' has already been determined in find_next_sync_point_and_drift() */
00074   numlist = malloc(NTask * sizeof(int) * NTask);
00075   MPI_Allgather(&NumForceUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00076   for(i = 0, ntot = 0; i < NTask; i++)
00077     ntot += numlist[i];
00078   free(numlist);
00079 
00080 
00081 #ifndef NOGRAVITY
00082   if(ThisTask == 0)
00083     printf("Begin tree force.\n");
00084 
00085 
00086 #ifdef SELECTIVE_NO_GRAVITY
00087   for(i = 0; i < NumPart; i++)
00088     if(((1 << P[i].Type) & (SELECTIVE_NO_GRAVITY)))
00089       P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00090 #endif
00091 
00092 
00093   noffset = malloc(sizeof(int) * NTask);        /* offsets of bunches in common list */
00094   nbuffer = malloc(sizeof(int) * NTask);
00095   nsend_local = malloc(sizeof(int) * NTask);
00096   nsend = malloc(sizeof(int) * NTask * NTask);
00097   ndonelist = malloc(sizeof(int) * NTask);
00098 
00099   i = 0;                        /* beginn with this index */
00100   ntotleft = ntot;              /* particles left for all tasks together */
00101 
00102   while(ntotleft > 0)
00103     {
00104       iter++;
00105 
00106       for(j = 0; j < NTask; j++)
00107         nsend_local[j] = 0;
00108 
00109       /* do local particles and prepare export list */
00110       tstart = second();
00111       for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++)
00112         if(P[i].Ti_endstep == All.Ti_Current)
00113           {
00114             ndone++;
00115 
00116             for(j = 0; j < NTask; j++)
00117               Exportflag[j] = 0;
00118 #ifndef PMGRID
00119             costtotal += force_treeevaluate(i, 0, &ewaldcount);
00120 #else
00121             costtotal += force_treeevaluate_shortrange(i, 0);
00122 #endif
00123             for(j = 0; j < NTask; j++)
00124               {
00125                 if(Exportflag[j])
00126                   {
00127                     for(k = 0; k < 3; k++)
00128                       GravDataGet[nexport].u.Pos[k] = P[i].Pos[k];
00129 #ifdef UNEQUALSOFTENINGS
00130                     GravDataGet[nexport].Type = P[i].Type;
00131 #ifdef ADAPTIVE_GRAVSOFT_FORGAS
00132                     if(P[i].Type == 0)
00133                       GravDataGet[nexport].Soft = SphP[i].Hsml;
00134 #endif
00135 #endif
00136                     GravDataGet[nexport].w.OldAcc = P[i].OldAcc;
00137                     GravDataIndexTable[nexport].Task = j;
00138                     GravDataIndexTable[nexport].Index = i;
00139                     GravDataIndexTable[nexport].SortIndex = nexport;
00140                     nexport++;
00141                     nexportsum++;
00142                     nsend_local[j]++;
00143                   }
00144               }
00145           }
00146       tend = second();
00147       timetree += timediff(tstart, tend);
00148 
00149       qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);
00150 
00151       for(j = 0; j < nexport; j++)
00152         GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];
00153 
00154       for(j = 1, noffset[0] = 0; j < NTask; j++)
00155         noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00156 
00157       tstart = second();
00158 
00159       MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00160 
00161       tend = second();
00162       timeimbalance += timediff(tstart, tend);
00163 
00164       /* now do the particles that need to be exported */
00165 
00166       for(level = 1; level < (1 << PTask); level++)
00167         {
00168           tstart = second();
00169           for(j = 0; j < NTask; j++)
00170             nbuffer[j] = 0;
00171           for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00172             {
00173               maxfill = 0;
00174               for(j = 0; j < NTask; j++)
00175                 {
00176                   if((j ^ ngrp) < NTask)
00177                     if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00178                       maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00179                 }
00180               if(maxfill >= All.BunchSizeForce)
00181                 break;
00182 
00183               sendTask = ThisTask;
00184               recvTask = ThisTask ^ ngrp;
00185 
00186               if(recvTask < NTask)
00187                 {
00188                   if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00189                     {
00190                       /* get the particles */
00191                       MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
00192                                    nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
00193                                    recvTask, TAG_GRAV_A,
00194                                    &GravDataGet[nbuffer[ThisTask]],
00195                                    nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
00196                                    recvTask, TAG_GRAV_A, MPI_COMM_WORLD, &status);
00197                     }
00198                 }
00199 
00200               for(j = 0; j < NTask; j++)
00201                 if((j ^ ngrp) < NTask)
00202                   nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00203             }
00204           tend = second();
00205           timecommsumm += timediff(tstart, tend);
00206 
00207 
00208           tstart = second();
00209           for(j = 0; j < nbuffer[ThisTask]; j++)
00210             {
00211 #ifndef PMGRID
00212               costtotal += force_treeevaluate(j, 1, &ewaldcount);
00213 #else
00214               costtotal += force_treeevaluate_shortrange(j, 1);
00215 #endif
00216             }
00217           tend = second();
00218           timetree += timediff(tstart, tend);
00219 
00220           tstart = second();
00221           MPI_Barrier(MPI_COMM_WORLD);
00222           tend = second();
00223           timeimbalance += timediff(tstart, tend);
00224 
00225           /* get the result */
00226           tstart = second();
00227           for(j = 0; j < NTask; j++)
00228             nbuffer[j] = 0;
00229           for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00230             {
00231               maxfill = 0;
00232               for(j = 0; j < NTask; j++)
00233                 {
00234                   if((j ^ ngrp) < NTask)
00235                     if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00236                       maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00237                 }
00238               if(maxfill >= All.BunchSizeForce)
00239                 break;
00240 
00241               sendTask = ThisTask;
00242               recvTask = ThisTask ^ ngrp;
00243               if(recvTask < NTask)
00244                 {
00245                   if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00246                     {
00247                       /* send the results */
00248                       MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
00249                                    nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
00250                                    MPI_BYTE, recvTask, TAG_GRAV_B,
00251                                    &GravDataOut[noffset[recvTask]],
00252                                    nsend_local[recvTask] * sizeof(struct gravdata_in),
00253                                    MPI_BYTE, recvTask, TAG_GRAV_B, MPI_COMM_WORLD, &status);
00254 
00255                       /* add the result to the particles */
00256                       for(j = 0; j < nsend_local[recvTask]; j++)
00257                         {
00258                           place = GravDataIndexTable[noffset[recvTask] + j].Index;
00259 
00260                           for(k = 0; k < 3; k++)
00261                             P[place].GravAccel[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k];
00262 
00263                           P[place].GravCost += GravDataOut[j + noffset[recvTask]].w.Ninteractions;
00264                         }
00265                     }
00266                 }
00267 
00268               for(j = 0; j < NTask; j++)
00269                 if((j ^ ngrp) < NTask)
00270                   nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00271             }
00272           tend = second();
00273           timecommsumm += timediff(tstart, tend);
00274 
00275           level = ngrp - 1;
00276         }
00277 
00278       MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
00279       for(j = 0; j < NTask; j++)
00280         ntotleft -= ndonelist[j];
00281     }
00282 
00283   free(ndonelist);
00284   free(nsend);
00285   free(nsend_local);
00286   free(nbuffer);
00287   free(noffset);
00288 
00289   /* now add things for comoving integration */
00290 
00291 #ifndef PERIODIC
00292 #ifndef PMGRID
00293   if(All.ComovingIntegrationOn)
00294     {
00295       fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;
00296 
00297       for(i = 0; i < NumPart; i++)
00298         if(P[i].Ti_endstep == All.Ti_Current)
00299           for(j = 0; j < 3; j++)
00300             P[i].GravAccel[j] += fac * P[i].Pos[j];
00301     }
00302 #endif
00303 #endif
00304 
00305   for(i = 0; i < NumPart; i++)
00306     if(P[i].Ti_endstep == All.Ti_Current)
00307       {
00308 #ifdef PMGRID
00309         ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G;
00310         ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G;
00311         az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G;
00312 #else
00313         ax = P[i].GravAccel[0];
00314         ay = P[i].GravAccel[1];
00315         az = P[i].GravAccel[2];
00316 #endif
00317         P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az);
00318       }
00319 
00320 
00321   if(All.TypeOfOpeningCriterion == 1)
00322     All.ErrTolTheta = 0;        /* This will switch to the relative opening criterion for the following force computations */
00323 
00324   /*  muliply by G */
00325   for(i = 0; i < NumPart; i++)
00326     if(P[i].Ti_endstep == All.Ti_Current)
00327       for(j = 0; j < 3; j++)
00328         P[i].GravAccel[j] *= All.G;
00329 
00330 
00331   /* Finally, the following factor allows a computation of a cosmological simulation 
00332      with vacuum energy in physical coordinates */
00333 #ifndef PERIODIC
00334 #ifndef PMGRID
00335   if(All.ComovingIntegrationOn == 0)
00336     {
00337       fac = All.OmegaLambda * All.Hubble * All.Hubble;
00338 
00339       for(i = 0; i < NumPart; i++)
00340         if(P[i].Ti_endstep == All.Ti_Current)
00341           for(j = 0; j < 3; j++)
00342             P[i].GravAccel[j] += fac * P[i].Pos[j];
00343     }
00344 #endif
00345 #endif
00346 
00347 #ifdef SELECTIVE_NO_GRAVITY
00348   for(i = 0; i < NumPart; i++)
00349     if(P[i].Ti_endstep < 0)
00350       P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00351 #endif
00352 
00353   if(ThisTask == 0)
00354     printf("tree is done.\n");
00355 
00356 #else /* gravity is switched off */
00357 
00358   for(i = 0; i < NumPart; i++)
00359     if(P[i].Ti_endstep == All.Ti_Current)
00360       for(j = 0; j < 3; j++)
00361         P[i].GravAccel[j] = 0;
00362 
00363 #endif
00364 
00365 
00366 
00367 
00368   /* Now the force computation is finished */
00369 
00370   /*  gather some diagnostic information */
00371 
00372   timetreelist = malloc(sizeof(double) * NTask);
00373   timecommlist = malloc(sizeof(double) * NTask);
00374   costtreelist = malloc(sizeof(double) * NTask);
00375   numnodeslist = malloc(sizeof(int) * NTask);
00376   ewaldlist = malloc(sizeof(double) * NTask);
00377   nrecv = malloc(sizeof(int) * NTask);
00378 
00379   numnodes = Numnodestree;
00380 
00381   MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00382   MPI_Gather(&numnodes, 1, MPI_INT, numnodeslist, 1, MPI_INT, 0, MPI_COMM_WORLD);
00383   MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00384   MPI_Gather(&timecommsumm, 1, MPI_DOUBLE, timecommlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00385   MPI_Gather(&NumPart, 1, MPI_INT, nrecv, 1, MPI_INT, 0, MPI_COMM_WORLD);
00386   MPI_Gather(&ewaldcount, 1, MPI_DOUBLE, ewaldlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00387   MPI_Reduce(&nexportsum, &nexport, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
00388   MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00389 
00390   if(ThisTask == 0)
00391     {
00392       All.TotNumOfForces += ntot;
00393 
00394       fprintf(FdTimings, "Step= %d  t= %g  dt= %g \n", All.NumCurrentTiStep, All.Time, All.TimeStep);
00395       fprintf(FdTimings, "Nf= %d%09d  total-Nf= %d%09d  ex-frac= %g  iter= %d\n",
00396               (int) (ntot / 1000000000), (int) (ntot % 1000000000),
00397               (int) (All.TotNumOfForces / 1000000000), (int) (All.TotNumOfForces % 1000000000),
00398               nexport / ((double) ntot), iter);
00399       /* note: on Linux, the 8-byte integer could be printed with the format identifier "%qd", but doesn't work on AIX */
00400 
00401       fac = NTask / ((double) All.TotNumPart);
00402 
00403       for(i = 0, maxt = timetreelist[0], sumt = 0, plb_max = 0,
00404           maxnumnodes = 0, costtotal = 0, sumcomm = 0, ewaldtot = 0; i < NTask; i++)
00405         {
00406           costtotal += costtreelist[i];
00407 
00408           sumcomm += timecommlist[i];
00409 
00410           if(maxt < timetreelist[i])
00411             maxt = timetreelist[i];
00412           sumt += timetreelist[i];
00413 
00414           plb = nrecv[i] * fac;
00415 
00416           if(plb > plb_max)
00417             plb_max = plb;
00418 
00419           if(numnodeslist[i] > maxnumnodes)
00420             maxnumnodes = numnodeslist[i];
00421 
00422           ewaldtot += ewaldlist[i];
00423         }
00424       fprintf(FdTimings, "work-load balance: %g  max=%g avg=%g PE0=%g\n",
00425               maxt / (sumt / NTask), maxt, sumt / NTask, timetreelist[0]);
00426       fprintf(FdTimings, "particle-load balance: %g\n", plb_max);
00427       fprintf(FdTimings, "max. nodes: %d, filled: %g\n", maxnumnodes,
00428               maxnumnodes / (All.TreeAllocFactor * All.MaxPart));
00429       fprintf(FdTimings, "part/sec=%g | %g  ia/part=%g (%g)\n", ntot / (sumt + 1.0e-20),
00430               ntot / (maxt * NTask), ((double) (costtotal)) / ntot, ((double) ewaldtot) / ntot);
00431       fprintf(FdTimings, "\n");
00432 
00433       fflush(FdTimings);
00434 
00435       All.CPU_TreeWalk += sumt / NTask;
00436       All.CPU_Imbalance += sumimbalance / NTask;
00437       All.CPU_CommSum += sumcomm / NTask;
00438     }
00439 
00440   free(nrecv);
00441   free(ewaldlist);
00442   free(numnodeslist);
00443   free(costtreelist);
00444   free(timecommlist);
00445   free(timetreelist);
00446 }
00447 
00448 
00449 
00454 void set_softenings(void)
00455 {
00456   int i;
00457 
00458   if(All.ComovingIntegrationOn)
00459     {
00460       if(All.SofteningGas * All.Time > All.SofteningGasMaxPhys)
00461         All.SofteningTable[0] = All.SofteningGasMaxPhys / All.Time;
00462       else
00463         All.SofteningTable[0] = All.SofteningGas;
00464       
00465       if(All.SofteningHalo * All.Time > All.SofteningHaloMaxPhys)
00466         All.SofteningTable[1] = All.SofteningHaloMaxPhys / All.Time;
00467       else
00468         All.SofteningTable[1] = All.SofteningHalo;
00469       
00470       if(All.SofteningDisk * All.Time > All.SofteningDiskMaxPhys)
00471         All.SofteningTable[2] = All.SofteningDiskMaxPhys / All.Time;
00472       else
00473         All.SofteningTable[2] = All.SofteningDisk;
00474       
00475       if(All.SofteningBulge * All.Time > All.SofteningBulgeMaxPhys)
00476         All.SofteningTable[3] = All.SofteningBulgeMaxPhys / All.Time;
00477       else
00478         All.SofteningTable[3] = All.SofteningBulge;
00479       
00480       if(All.SofteningStars * All.Time > All.SofteningStarsMaxPhys)
00481         All.SofteningTable[4] = All.SofteningStarsMaxPhys / All.Time;
00482       else
00483         All.SofteningTable[4] = All.SofteningStars;
00484       
00485       if(All.SofteningBndry * All.Time > All.SofteningBndryMaxPhys)
00486         All.SofteningTable[5] = All.SofteningBndryMaxPhys / All.Time;
00487       else
00488         All.SofteningTable[5] = All.SofteningBndry;
00489     }
00490   else
00491     {
00492       All.SofteningTable[0] = All.SofteningGas;
00493       All.SofteningTable[1] = All.SofteningHalo;
00494       All.SofteningTable[2] = All.SofteningDisk;
00495       All.SofteningTable[3] = All.SofteningBulge;
00496       All.SofteningTable[4] = All.SofteningStars;
00497       All.SofteningTable[5] = All.SofteningBndry;
00498     }
00499 
00500   for(i = 0; i < 6; i++)
00501     All.ForceSoftening[i] = 2.8 * All.SofteningTable[i];
00502 
00503   All.MinGasHsml = All.MinGasHsmlFractional * All.ForceSoftening[0];
00504 }
00505 
00506 
00511 int grav_tree_compare_key(const void *a, const void *b)
00512 {
00513   if(((struct gravdata_index *) a)->Task < (((struct gravdata_index *) b)->Task))
00514     return -1;
00515 
00516   if(((struct gravdata_index *) a)->Task > (((struct gravdata_index *) b)->Task))
00517     return +1;
00518 
00519   return 0;
00520 }

Generated on Sun May 22 17:33:28 2005 for GADGET-2 by  doxygen 1.3.9.1