00001 #include <stdlib.h>
00002 #include <string.h>
00003 #include <math.h>
00004 #include <float.h>
00005 #include <mpi.h>
00006
00007 #include "allvars.h"
00008 #include "proto.h"
00009
00027 void gravity_tree(void)
00028 {
00029 long long ntot;
00030 int numnodes, nexportsum = 0;
00031 int i, j, iter = 0;
00032 int *numnodeslist, maxnumnodes, nexport, *numlist, *nrecv, *ndonelist;
00033 double tstart, tend, timetree = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
00034 double ewaldcount;
00035 double costtotal, ewaldtot, *costtreelist, *ewaldlist;
00036 double maxt, sumt, *timetreelist, *timecommlist;
00037 double fac, plb, plb_max, sumcomm;
00038
00039 #ifndef NOGRAVITY
00040 int *noffset, *nbuffer, *nsend, *nsend_local;
00041 long long ntotleft;
00042 int ndone, maxfill, ngrp;
00043 int k, place;
00044 int level, sendTask, recvTask;
00045 double ax, ay, az;
00046 MPI_Status status;
00047 #endif
00048
00049
00050 if(All.ComovingIntegrationOn)
00051 set_softenings();
00052
00053
00054
00055 tstart = second();
00056 if(TreeReconstructFlag)
00057 {
00058 if(ThisTask == 0)
00059 printf("Tree construction.\n");
00060
00061 force_treebuild(NumPart);
00062
00063 TreeReconstructFlag = 0;
00064
00065 if(ThisTask == 0)
00066 printf("Tree construction done.\n");
00067 }
00068 tend = second();
00069 All.CPU_TreeConstruction += timediff(tstart, tend);
00070
00071 costtotal = ewaldcount = 0;
00072
00073
00074 numlist = malloc(NTask * sizeof(int) * NTask);
00075 MPI_Allgather(&NumForceUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00076 for(i = 0, ntot = 0; i < NTask; i++)
00077 ntot += numlist[i];
00078 free(numlist);
00079
00080
00081 #ifndef NOGRAVITY
00082 if(ThisTask == 0)
00083 printf("Begin tree force.\n");
00084
00085
00086 #ifdef SELECTIVE_NO_GRAVITY
00087 for(i = 0; i < NumPart; i++)
00088 if(((1 << P[i].Type) & (SELECTIVE_NO_GRAVITY)))
00089 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00090 #endif
00091
00092
00093 noffset = malloc(sizeof(int) * NTask);
00094 nbuffer = malloc(sizeof(int) * NTask);
00095 nsend_local = malloc(sizeof(int) * NTask);
00096 nsend = malloc(sizeof(int) * NTask * NTask);
00097 ndonelist = malloc(sizeof(int) * NTask);
00098
00099 i = 0;
00100 ntotleft = ntot;
00101
00102 while(ntotleft > 0)
00103 {
00104 iter++;
00105
00106 for(j = 0; j < NTask; j++)
00107 nsend_local[j] = 0;
00108
00109
00110 tstart = second();
00111 for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++)
00112 if(P[i].Ti_endstep == All.Ti_Current)
00113 {
00114 ndone++;
00115
00116 for(j = 0; j < NTask; j++)
00117 Exportflag[j] = 0;
00118 #ifndef PMGRID
00119 costtotal += force_treeevaluate(i, 0, &ewaldcount);
00120 #else
00121 costtotal += force_treeevaluate_shortrange(i, 0);
00122 #endif
00123 for(j = 0; j < NTask; j++)
00124 {
00125 if(Exportflag[j])
00126 {
00127 for(k = 0; k < 3; k++)
00128 GravDataGet[nexport].u.Pos[k] = P[i].Pos[k];
00129 #ifdef UNEQUALSOFTENINGS
00130 GravDataGet[nexport].Type = P[i].Type;
00131 #ifdef ADAPTIVE_GRAVSOFT_FORGAS
00132 if(P[i].Type == 0)
00133 GravDataGet[nexport].Soft = SphP[i].Hsml;
00134 #endif
00135 #endif
00136 GravDataGet[nexport].w.OldAcc = P[i].OldAcc;
00137 GravDataIndexTable[nexport].Task = j;
00138 GravDataIndexTable[nexport].Index = i;
00139 GravDataIndexTable[nexport].SortIndex = nexport;
00140 nexport++;
00141 nexportsum++;
00142 nsend_local[j]++;
00143 }
00144 }
00145 }
00146 tend = second();
00147 timetree += timediff(tstart, tend);
00148
00149 qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);
00150
00151 for(j = 0; j < nexport; j++)
00152 GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];
00153
00154 for(j = 1, noffset[0] = 0; j < NTask; j++)
00155 noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00156
00157 tstart = second();
00158
00159 MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00160
00161 tend = second();
00162 timeimbalance += timediff(tstart, tend);
00163
00164
00165
00166 for(level = 1; level < (1 << PTask); level++)
00167 {
00168 tstart = second();
00169 for(j = 0; j < NTask; j++)
00170 nbuffer[j] = 0;
00171 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00172 {
00173 maxfill = 0;
00174 for(j = 0; j < NTask; j++)
00175 {
00176 if((j ^ ngrp) < NTask)
00177 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00178 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00179 }
00180 if(maxfill >= All.BunchSizeForce)
00181 break;
00182
00183 sendTask = ThisTask;
00184 recvTask = ThisTask ^ ngrp;
00185
00186 if(recvTask < NTask)
00187 {
00188 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00189 {
00190
00191 MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
00192 nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
00193 recvTask, TAG_GRAV_A,
00194 &GravDataGet[nbuffer[ThisTask]],
00195 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
00196 recvTask, TAG_GRAV_A, MPI_COMM_WORLD, &status);
00197 }
00198 }
00199
00200 for(j = 0; j < NTask; j++)
00201 if((j ^ ngrp) < NTask)
00202 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00203 }
00204 tend = second();
00205 timecommsumm += timediff(tstart, tend);
00206
00207
00208 tstart = second();
00209 for(j = 0; j < nbuffer[ThisTask]; j++)
00210 {
00211 #ifndef PMGRID
00212 costtotal += force_treeevaluate(j, 1, &ewaldcount);
00213 #else
00214 costtotal += force_treeevaluate_shortrange(j, 1);
00215 #endif
00216 }
00217 tend = second();
00218 timetree += timediff(tstart, tend);
00219
00220 tstart = second();
00221 MPI_Barrier(MPI_COMM_WORLD);
00222 tend = second();
00223 timeimbalance += timediff(tstart, tend);
00224
00225
00226 tstart = second();
00227 for(j = 0; j < NTask; j++)
00228 nbuffer[j] = 0;
00229 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00230 {
00231 maxfill = 0;
00232 for(j = 0; j < NTask; j++)
00233 {
00234 if((j ^ ngrp) < NTask)
00235 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00236 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00237 }
00238 if(maxfill >= All.BunchSizeForce)
00239 break;
00240
00241 sendTask = ThisTask;
00242 recvTask = ThisTask ^ ngrp;
00243 if(recvTask < NTask)
00244 {
00245 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00246 {
00247
00248 MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
00249 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
00250 MPI_BYTE, recvTask, TAG_GRAV_B,
00251 &GravDataOut[noffset[recvTask]],
00252 nsend_local[recvTask] * sizeof(struct gravdata_in),
00253 MPI_BYTE, recvTask, TAG_GRAV_B, MPI_COMM_WORLD, &status);
00254
00255
00256 for(j = 0; j < nsend_local[recvTask]; j++)
00257 {
00258 place = GravDataIndexTable[noffset[recvTask] + j].Index;
00259
00260 for(k = 0; k < 3; k++)
00261 P[place].GravAccel[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k];
00262
00263 P[place].GravCost += GravDataOut[j + noffset[recvTask]].w.Ninteractions;
00264 }
00265 }
00266 }
00267
00268 for(j = 0; j < NTask; j++)
00269 if((j ^ ngrp) < NTask)
00270 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00271 }
00272 tend = second();
00273 timecommsumm += timediff(tstart, tend);
00274
00275 level = ngrp - 1;
00276 }
00277
00278 MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
00279 for(j = 0; j < NTask; j++)
00280 ntotleft -= ndonelist[j];
00281 }
00282
00283 free(ndonelist);
00284 free(nsend);
00285 free(nsend_local);
00286 free(nbuffer);
00287 free(noffset);
00288
00289
00290
00291 #ifndef PERIODIC
00292 #ifndef PMGRID
00293 if(All.ComovingIntegrationOn)
00294 {
00295 fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;
00296
00297 for(i = 0; i < NumPart; i++)
00298 if(P[i].Ti_endstep == All.Ti_Current)
00299 for(j = 0; j < 3; j++)
00300 P[i].GravAccel[j] += fac * P[i].Pos[j];
00301 }
00302 #endif
00303 #endif
00304
00305 for(i = 0; i < NumPart; i++)
00306 if(P[i].Ti_endstep == All.Ti_Current)
00307 {
00308 #ifdef PMGRID
00309 ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G;
00310 ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G;
00311 az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G;
00312 #else
00313 ax = P[i].GravAccel[0];
00314 ay = P[i].GravAccel[1];
00315 az = P[i].GravAccel[2];
00316 #endif
00317 P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az);
00318 }
00319
00320
00321 if(All.TypeOfOpeningCriterion == 1)
00322 All.ErrTolTheta = 0;
00323
00324
00325 for(i = 0; i < NumPart; i++)
00326 if(P[i].Ti_endstep == All.Ti_Current)
00327 for(j = 0; j < 3; j++)
00328 P[i].GravAccel[j] *= All.G;
00329
00330
00331
00332
00333 #ifndef PERIODIC
00334 #ifndef PMGRID
00335 if(All.ComovingIntegrationOn == 0)
00336 {
00337 fac = All.OmegaLambda * All.Hubble * All.Hubble;
00338
00339 for(i = 0; i < NumPart; i++)
00340 if(P[i].Ti_endstep == All.Ti_Current)
00341 for(j = 0; j < 3; j++)
00342 P[i].GravAccel[j] += fac * P[i].Pos[j];
00343 }
00344 #endif
00345 #endif
00346
00347 #ifdef SELECTIVE_NO_GRAVITY
00348 for(i = 0; i < NumPart; i++)
00349 if(P[i].Ti_endstep < 0)
00350 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00351 #endif
00352
00353 if(ThisTask == 0)
00354 printf("tree is done.\n");
00355
00356 #else
00357
00358 for(i = 0; i < NumPart; i++)
00359 if(P[i].Ti_endstep == All.Ti_Current)
00360 for(j = 0; j < 3; j++)
00361 P[i].GravAccel[j] = 0;
00362
00363 #endif
00364
00365
00366
00367
00368
00369
00370
00371
00372 timetreelist = malloc(sizeof(double) * NTask);
00373 timecommlist = malloc(sizeof(double) * NTask);
00374 costtreelist = malloc(sizeof(double) * NTask);
00375 numnodeslist = malloc(sizeof(int) * NTask);
00376 ewaldlist = malloc(sizeof(double) * NTask);
00377 nrecv = malloc(sizeof(int) * NTask);
00378
00379 numnodes = Numnodestree;
00380
00381 MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00382 MPI_Gather(&numnodes, 1, MPI_INT, numnodeslist, 1, MPI_INT, 0, MPI_COMM_WORLD);
00383 MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00384 MPI_Gather(&timecommsumm, 1, MPI_DOUBLE, timecommlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00385 MPI_Gather(&NumPart, 1, MPI_INT, nrecv, 1, MPI_INT, 0, MPI_COMM_WORLD);
00386 MPI_Gather(&ewaldcount, 1, MPI_DOUBLE, ewaldlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00387 MPI_Reduce(&nexportsum, &nexport, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
00388 MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00389
00390 if(ThisTask == 0)
00391 {
00392 All.TotNumOfForces += ntot;
00393
00394 fprintf(FdTimings, "Step= %d t= %g dt= %g \n", All.NumCurrentTiStep, All.Time, All.TimeStep);
00395 fprintf(FdTimings, "Nf= %d%09d total-Nf= %d%09d ex-frac= %g iter= %d\n",
00396 (int) (ntot / 1000000000), (int) (ntot % 1000000000),
00397 (int) (All.TotNumOfForces / 1000000000), (int) (All.TotNumOfForces % 1000000000),
00398 nexport / ((double) ntot), iter);
00399
00400
00401 fac = NTask / ((double) All.TotNumPart);
00402
00403 for(i = 0, maxt = timetreelist[0], sumt = 0, plb_max = 0,
00404 maxnumnodes = 0, costtotal = 0, sumcomm = 0, ewaldtot = 0; i < NTask; i++)
00405 {
00406 costtotal += costtreelist[i];
00407
00408 sumcomm += timecommlist[i];
00409
00410 if(maxt < timetreelist[i])
00411 maxt = timetreelist[i];
00412 sumt += timetreelist[i];
00413
00414 plb = nrecv[i] * fac;
00415
00416 if(plb > plb_max)
00417 plb_max = plb;
00418
00419 if(numnodeslist[i] > maxnumnodes)
00420 maxnumnodes = numnodeslist[i];
00421
00422 ewaldtot += ewaldlist[i];
00423 }
00424 fprintf(FdTimings, "work-load balance: %g max=%g avg=%g PE0=%g\n",
00425 maxt / (sumt / NTask), maxt, sumt / NTask, timetreelist[0]);
00426 fprintf(FdTimings, "particle-load balance: %g\n", plb_max);
00427 fprintf(FdTimings, "max. nodes: %d, filled: %g\n", maxnumnodes,
00428 maxnumnodes / (All.TreeAllocFactor * All.MaxPart));
00429 fprintf(FdTimings, "part/sec=%g | %g ia/part=%g (%g)\n", ntot / (sumt + 1.0e-20),
00430 ntot / (maxt * NTask), ((double) (costtotal)) / ntot, ((double) ewaldtot) / ntot);
00431 fprintf(FdTimings, "\n");
00432
00433 fflush(FdTimings);
00434
00435 All.CPU_TreeWalk += sumt / NTask;
00436 All.CPU_Imbalance += sumimbalance / NTask;
00437 All.CPU_CommSum += sumcomm / NTask;
00438 }
00439
00440 free(nrecv);
00441 free(ewaldlist);
00442 free(numnodeslist);
00443 free(costtreelist);
00444 free(timecommlist);
00445 free(timetreelist);
00446 }
00447
00448
00449
00454 void set_softenings(void)
00455 {
00456 int i;
00457
00458 if(All.ComovingIntegrationOn)
00459 {
00460 if(All.SofteningGas * All.Time > All.SofteningGasMaxPhys)
00461 All.SofteningTable[0] = All.SofteningGasMaxPhys / All.Time;
00462 else
00463 All.SofteningTable[0] = All.SofteningGas;
00464
00465 if(All.SofteningHalo * All.Time > All.SofteningHaloMaxPhys)
00466 All.SofteningTable[1] = All.SofteningHaloMaxPhys / All.Time;
00467 else
00468 All.SofteningTable[1] = All.SofteningHalo;
00469
00470 if(All.SofteningDisk * All.Time > All.SofteningDiskMaxPhys)
00471 All.SofteningTable[2] = All.SofteningDiskMaxPhys / All.Time;
00472 else
00473 All.SofteningTable[2] = All.SofteningDisk;
00474
00475 if(All.SofteningBulge * All.Time > All.SofteningBulgeMaxPhys)
00476 All.SofteningTable[3] = All.SofteningBulgeMaxPhys / All.Time;
00477 else
00478 All.SofteningTable[3] = All.SofteningBulge;
00479
00480 if(All.SofteningStars * All.Time > All.SofteningStarsMaxPhys)
00481 All.SofteningTable[4] = All.SofteningStarsMaxPhys / All.Time;
00482 else
00483 All.SofteningTable[4] = All.SofteningStars;
00484
00485 if(All.SofteningBndry * All.Time > All.SofteningBndryMaxPhys)
00486 All.SofteningTable[5] = All.SofteningBndryMaxPhys / All.Time;
00487 else
00488 All.SofteningTable[5] = All.SofteningBndry;
00489 }
00490 else
00491 {
00492 All.SofteningTable[0] = All.SofteningGas;
00493 All.SofteningTable[1] = All.SofteningHalo;
00494 All.SofteningTable[2] = All.SofteningDisk;
00495 All.SofteningTable[3] = All.SofteningBulge;
00496 All.SofteningTable[4] = All.SofteningStars;
00497 All.SofteningTable[5] = All.SofteningBndry;
00498 }
00499
00500 for(i = 0; i < 6; i++)
00501 All.ForceSoftening[i] = 2.8 * All.SofteningTable[i];
00502
00503 All.MinGasHsml = All.MinGasHsmlFractional * All.ForceSoftening[0];
00504 }
00505
00506
00511 int grav_tree_compare_key(const void *a, const void *b)
00512 {
00513 if(((struct gravdata_index *) a)->Task < (((struct gravdata_index *) b)->Task))
00514 return -1;
00515
00516 if(((struct gravdata_index *) a)->Task > (((struct gravdata_index *) b)->Task))
00517 return +1;
00518
00519 return 0;
00520 }