00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006
00007 #include "allvars.h"
00008 #include "proto.h"
00009
00010
00021 #ifdef PERIODIC
00022 static double boxSize, boxHalf;
00023
00024 #ifdef LONG_X
00025 static double boxSize_X, boxHalf_X;
00026 #else
00027 #define boxSize_X boxSize
00028 #define boxHalf_X boxHalf
00029 #endif
00030 #ifdef LONG_Y
00031 static double boxSize_Y, boxHalf_Y;
00032 #else
00033 #define boxSize_Y boxSize
00034 #define boxHalf_Y boxHalf
00035 #endif
00036 #ifdef LONG_Z
00037 static double boxSize_Z, boxHalf_Z;
00038 #else
00039 #define boxSize_Z boxSize
00040 #define boxHalf_Z boxHalf
00041 #endif
00042 #endif
00043
00044
00056 void density(void)
00057 {
00058 long long ntot, ntotleft;
00059 int *noffset, *nbuffer, *nsend, *nsend_local, *numlist, *ndonelist;
00060 int i, j, n, ndone, npleft, maxfill, source, iter = 0;
00061 int level, ngrp, sendTask, recvTask, place, nexport;
00062 double dt_entr, tstart, tend, tstart_ngb = 0, tend_ngb = 0;
00063 double sumt, sumcomm, timengb, sumtimengb;
00064 double timecomp = 0, timeimbalance = 0, timecommsumm = 0, sumimbalance;
00065 MPI_Status status;
00066
00067 #ifdef PERIODIC
00068 boxSize = All.BoxSize;
00069 boxHalf = 0.5 * All.BoxSize;
00070 #ifdef LONG_X
00071 boxHalf_X = boxHalf * LONG_X;
00072 boxSize_X = boxSize * LONG_X;
00073 #endif
00074 #ifdef LONG_Y
00075 boxHalf_Y = boxHalf * LONG_Y;
00076 boxSize_Y = boxSize * LONG_Y;
00077 #endif
00078 #ifdef LONG_Z
00079 boxHalf_Z = boxHalf * LONG_Z;
00080 boxSize_Z = boxSize * LONG_Z;
00081 #endif
00082 #endif
00083
00084
00085 noffset = malloc(sizeof(int) * NTask);
00086 nbuffer = malloc(sizeof(int) * NTask);
00087 nsend_local = malloc(sizeof(int) * NTask);
00088 nsend = malloc(sizeof(int) * NTask * NTask);
00089 ndonelist = malloc(sizeof(int) * NTask);
00090
00091 for(n = 0, NumSphUpdate = 0; n < N_gas; n++)
00092 {
00093 SphP[n].Left = SphP[n].Right = 0;
00094
00095 if(P[n].Ti_endstep == All.Ti_Current)
00096 NumSphUpdate++;
00097 }
00098
00099 numlist = malloc(NTask * sizeof(int) * NTask);
00100 MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00101 for(i = 0, ntot = 0; i < NTask; i++)
00102 ntot += numlist[i];
00103 free(numlist);
00104
00105
00106
00107
00108
00109
00110 do
00111 {
00112 i = 0;
00113 ntotleft = ntot;
00114
00115 while(ntotleft > 0)
00116 {
00117 for(j = 0; j < NTask; j++)
00118 nsend_local[j] = 0;
00119
00120
00121 tstart = second();
00122 for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeDensity - NTask; i++)
00123 if(P[i].Ti_endstep == All.Ti_Current)
00124 {
00125 ndone++;
00126
00127 for(j = 0; j < NTask; j++)
00128 Exportflag[j] = 0;
00129
00130 density_evaluate(i, 0);
00131
00132 for(j = 0; j < NTask; j++)
00133 {
00134 if(Exportflag[j])
00135 {
00136 DensDataIn[nexport].Pos[0] = P[i].Pos[0];
00137 DensDataIn[nexport].Pos[1] = P[i].Pos[1];
00138 DensDataIn[nexport].Pos[2] = P[i].Pos[2];
00139 DensDataIn[nexport].Vel[0] = SphP[i].VelPred[0];
00140 DensDataIn[nexport].Vel[1] = SphP[i].VelPred[1];
00141 DensDataIn[nexport].Vel[2] = SphP[i].VelPred[2];
00142 DensDataIn[nexport].Hsml = SphP[i].Hsml;
00143 DensDataIn[nexport].Index = i;
00144 DensDataIn[nexport].Task = j;
00145 nexport++;
00146 nsend_local[j]++;
00147 }
00148 }
00149 }
00150 tend = second();
00151 timecomp += timediff(tstart, tend);
00152
00153 qsort(DensDataIn, nexport, sizeof(struct densdata_in), dens_compare_key);
00154
00155 for(j = 1, noffset[0] = 0; j < NTask; j++)
00156 noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00157
00158 tstart = second();
00159
00160 MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00161
00162 tend = second();
00163 timeimbalance += timediff(tstart, tend);
00164
00165
00166
00167
00168 for(level = 1; level < (1 << PTask); level++)
00169 {
00170 tstart = second();
00171 for(j = 0; j < NTask; j++)
00172 nbuffer[j] = 0;
00173 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00174 {
00175 maxfill = 0;
00176 for(j = 0; j < NTask; j++)
00177 {
00178 if((j ^ ngrp) < NTask)
00179 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00180 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00181 }
00182 if(maxfill >= All.BunchSizeDensity)
00183 break;
00184
00185 sendTask = ThisTask;
00186 recvTask = ThisTask ^ ngrp;
00187
00188 if(recvTask < NTask)
00189 {
00190 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00191 {
00192
00193 MPI_Sendrecv(&DensDataIn[noffset[recvTask]],
00194 nsend_local[recvTask] * sizeof(struct densdata_in), MPI_BYTE,
00195 recvTask, TAG_DENS_A,
00196 &DensDataGet[nbuffer[ThisTask]],
00197 nsend[recvTask * NTask + ThisTask] * sizeof(struct densdata_in),
00198 MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, &status);
00199 }
00200 }
00201
00202 for(j = 0; j < NTask; j++)
00203 if((j ^ ngrp) < NTask)
00204 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00205 }
00206 tend = second();
00207 timecommsumm += timediff(tstart, tend);
00208
00209
00210 tstart = second();
00211 for(j = 0; j < nbuffer[ThisTask]; j++)
00212 density_evaluate(j, 1);
00213 tend = second();
00214 timecomp += timediff(tstart, tend);
00215
00216
00217 tstart = second();
00218 MPI_Barrier(MPI_COMM_WORLD);
00219 tend = second();
00220 timeimbalance += timediff(tstart, tend);
00221
00222
00223 tstart = second();
00224 for(j = 0; j < NTask; j++)
00225 nbuffer[j] = 0;
00226 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00227 {
00228 maxfill = 0;
00229 for(j = 0; j < NTask; j++)
00230 {
00231 if((j ^ ngrp) < NTask)
00232 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00233 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00234 }
00235 if(maxfill >= All.BunchSizeDensity)
00236 break;
00237
00238 sendTask = ThisTask;
00239 recvTask = ThisTask ^ ngrp;
00240
00241 if(recvTask < NTask)
00242 {
00243 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00244 {
00245
00246 MPI_Sendrecv(&DensDataResult[nbuffer[ThisTask]],
00247 nsend[recvTask * NTask + ThisTask] * sizeof(struct densdata_out),
00248 MPI_BYTE, recvTask, TAG_DENS_B,
00249 &DensDataPartialResult[noffset[recvTask]],
00250 nsend_local[recvTask] * sizeof(struct densdata_out),
00251 MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, &status);
00252
00253
00254 for(j = 0; j < nsend_local[recvTask]; j++)
00255 {
00256 source = j + noffset[recvTask];
00257 place = DensDataIn[source].Index;
00258
00259 SphP[place].NumNgb += DensDataPartialResult[source].Ngb;
00260 SphP[place].Density += DensDataPartialResult[source].Rho;
00261 SphP[place].DivVel += DensDataPartialResult[source].Div;
00262
00263 SphP[place].DhsmlDensityFactor += DensDataPartialResult[source].DhsmlDensity;
00264
00265 SphP[place].Rot[0] += DensDataPartialResult[source].Rot[0];
00266 SphP[place].Rot[1] += DensDataPartialResult[source].Rot[1];
00267 SphP[place].Rot[2] += DensDataPartialResult[source].Rot[2];
00268 }
00269 }
00270 }
00271
00272 for(j = 0; j < NTask; j++)
00273 if((j ^ ngrp) < NTask)
00274 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00275 }
00276 tend = second();
00277 timecommsumm += timediff(tstart, tend);
00278
00279 level = ngrp - 1;
00280 }
00281
00282 MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
00283 for(j = 0; j < NTask; j++)
00284 ntotleft -= ndonelist[j];
00285 }
00286
00287
00288
00289
00290 tstart = second();
00291 for(i = 0, npleft = 0; i < N_gas; i++)
00292 {
00293 if(P[i].Ti_endstep == All.Ti_Current)
00294 {
00295 {
00296 SphP[i].DhsmlDensityFactor =
00297 1 / (1 + SphP[i].Hsml * SphP[i].DhsmlDensityFactor / (NUMDIMS * SphP[i].Density));
00298
00299 SphP[i].CurlVel = sqrt(SphP[i].Rot[0] * SphP[i].Rot[0] +
00300 SphP[i].Rot[1] * SphP[i].Rot[1] +
00301 SphP[i].Rot[2] * SphP[i].Rot[2]) / SphP[i].Density;
00302
00303 SphP[i].DivVel /= SphP[i].Density;
00304
00305 dt_entr = (All.Ti_Current - (P[i].Ti_begstep + P[i].Ti_endstep) / 2) * All.Timebase_interval;
00306
00307 SphP[i].Pressure =
00308 (SphP[i].Entropy + SphP[i].DtEntropy * dt_entr) * pow(SphP[i].Density, GAMMA);
00309 }
00310
00311
00312
00313
00314 if(SphP[i].NumNgb < (All.DesNumNgb - All.MaxNumNgbDeviation) ||
00315 (SphP[i].NumNgb > (All.DesNumNgb + All.MaxNumNgbDeviation)
00316 && SphP[i].Hsml > (1.01 * All.MinGasHsml)))
00317 {
00318
00319 npleft++;
00320
00321 if(SphP[i].Left > 0 && SphP[i].Right > 0)
00322 if((SphP[i].Right - SphP[i].Left) < 1.0e-3 * SphP[i].Left)
00323 {
00324
00325 npleft--;
00326 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00327 continue;
00328 }
00329
00330 if(SphP[i].NumNgb < (All.DesNumNgb - All.MaxNumNgbDeviation))
00331 SphP[i].Left = dmax(SphP[i].Hsml, SphP[i].Left);
00332 else
00333 {
00334 if(SphP[i].Right != 0)
00335 {
00336 if(SphP[i].Hsml < SphP[i].Right)
00337 SphP[i].Right = SphP[i].Hsml;
00338 }
00339 else
00340 SphP[i].Right = SphP[i].Hsml;
00341 }
00342
00343 if(iter >= MAXITER - 10)
00344 {
00345 printf
00346 ("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n",
00347 i, ThisTask, (int) P[i].ID, SphP[i].Hsml, SphP[i].Left, SphP[i].Right,
00348 (float) SphP[i].NumNgb, SphP[i].Right - SphP[i].Left, P[i].Pos[0], P[i].Pos[1],
00349 P[i].Pos[2]);
00350 fflush(stdout);
00351 }
00352
00353 if(SphP[i].Right > 0 && SphP[i].Left > 0)
00354 SphP[i].Hsml = pow(0.5 * (pow(SphP[i].Left, 3) + pow(SphP[i].Right, 3)), 1.0 / 3);
00355 else
00356 {
00357 if(SphP[i].Right == 0 && SphP[i].Left == 0)
00358 endrun(8188);
00359
00360 if(SphP[i].Right == 0 && SphP[i].Left > 0)
00361 {
00362 if(P[i].Type == 0 && fabs(SphP[i].NumNgb - All.DesNumNgb) < 0.5 * All.DesNumNgb)
00363 {
00364 SphP[i].Hsml *=
00365 1 - (SphP[i].NumNgb -
00366 All.DesNumNgb) / (NUMDIMS * SphP[i].NumNgb) * SphP[i].DhsmlDensityFactor;
00367 }
00368 else
00369 SphP[i].Hsml *= 1.26;
00370 }
00371
00372 if(SphP[i].Right > 0 && SphP[i].Left == 0)
00373 {
00374 if(P[i].Type == 0 && fabs(SphP[i].NumNgb - All.DesNumNgb) < 0.5 * All.DesNumNgb)
00375 {
00376 SphP[i].Hsml *=
00377 1 - (SphP[i].NumNgb -
00378 All.DesNumNgb) / (NUMDIMS * SphP[i].NumNgb) * SphP[i].DhsmlDensityFactor;
00379 }
00380 else
00381 SphP[i].Hsml /= 1.26;
00382 }
00383 }
00384
00385 if(SphP[i].Hsml < All.MinGasHsml)
00386 SphP[i].Hsml = All.MinGasHsml;
00387 }
00388 else
00389 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00390 }
00391 }
00392 tend = second();
00393 timecomp += timediff(tstart, tend);
00394
00395
00396 numlist = malloc(NTask * sizeof(int) * NTask);
00397 MPI_Allgather(&npleft, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00398 for(i = 0, ntot = 0; i < NTask; i++)
00399 ntot += numlist[i];
00400 free(numlist);
00401
00402 if(ntot > 0)
00403 {
00404 if(iter == 0)
00405 tstart_ngb = second();
00406
00407 iter++;
00408
00409 if(iter > 0 && ThisTask == 0)
00410 {
00411 printf("ngb iteration %d: need to repeat for %d%09d particles.\n", iter,
00412 (int) (ntot / 1000000000), (int) (ntot % 1000000000));
00413 fflush(stdout);
00414 }
00415
00416 if(iter > MAXITER)
00417 {
00418 printf("failed to converge in neighbour iteration in density()\n");
00419 fflush(stdout);
00420 endrun(1155);
00421 }
00422 }
00423 else
00424 tend_ngb = second();
00425 }
00426 while(ntot > 0);
00427
00428
00429
00430 for(i = 0; i < NumPart; i++)
00431 if(P[i].Ti_endstep < 0)
00432 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00433
00434 free(ndonelist);
00435 free(nsend);
00436 free(nsend_local);
00437 free(nbuffer);
00438 free(noffset);
00439
00440
00441
00442 if(iter > 0)
00443 timengb = timediff(tstart_ngb, tend_ngb);
00444 else
00445 timengb = 0;
00446
00447 MPI_Reduce(&timengb, &sumtimengb, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00448 MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00449 MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00450 MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00451
00452 if(ThisTask == 0)
00453 {
00454 All.CPU_HydCompWalk += sumt / NTask;
00455 All.CPU_HydCommSumm += sumcomm / NTask;
00456 All.CPU_HydImbalance += sumimbalance / NTask;
00457 All.CPU_EnsureNgb += sumtimengb / NTask;
00458 }
00459 }
00460
00461
00462
00467 void density_evaluate(int target, int mode)
00468 {
00469 int j, n, startnode, numngb, numngb_inbox;
00470 double h, h2, fac, hinv, hinv3, hinv4;
00471 double rho, divv, wk, dwk;
00472 double dx, dy, dz, r, r2, u, mass_j;
00473 double dvx, dvy, dvz, rotv[3];
00474 double weighted_numngb, dhsmlrho;
00475 FLOAT *pos, *vel;
00476
00477 if(mode == 0)
00478 {
00479 pos = P[target].Pos;
00480 vel = SphP[target].VelPred;
00481 h = SphP[target].Hsml;
00482 }
00483 else
00484 {
00485 pos = DensDataGet[target].Pos;
00486 vel = DensDataGet[target].Vel;
00487 h = DensDataGet[target].Hsml;
00488 }
00489
00490 h2 = h * h;
00491 hinv = 1.0 / h;
00492 #ifndef TWODIMS
00493 hinv3 = hinv * hinv * hinv;
00494 #else
00495 hinv3 = hinv * hinv / boxSize_Z;
00496 #endif
00497 hinv4 = hinv3 * hinv;
00498
00499 rho = divv = rotv[0] = rotv[1] = rotv[2] = 0;
00500 weighted_numngb = 0;
00501 dhsmlrho = 0;
00502
00503 startnode = All.MaxPart;
00504 numngb = 0;
00505 do
00506 {
00507 numngb_inbox = ngb_treefind_variable(&pos[0], h, &startnode);
00508
00509 for(n = 0; n < numngb_inbox; n++)
00510 {
00511 j = Ngblist[n];
00512
00513 dx = pos[0] - P[j].Pos[0];
00514 dy = pos[1] - P[j].Pos[1];
00515 dz = pos[2] - P[j].Pos[2];
00516
00517 #ifdef PERIODIC
00518 if(dx > boxHalf_X)
00519 dx -= boxSize_X;
00520 if(dx < -boxHalf_X)
00521 dx += boxSize_X;
00522 if(dy > boxHalf_Y)
00523 dy -= boxSize_Y;
00524 if(dy < -boxHalf_Y)
00525 dy += boxSize_Y;
00526 if(dz > boxHalf_Z)
00527 dz -= boxSize_Z;
00528 if(dz < -boxHalf_Z)
00529 dz += boxSize_Z;
00530 #endif
00531 r2 = dx * dx + dy * dy + dz * dz;
00532
00533 if(r2 < h2)
00534 {
00535 numngb++;
00536
00537 r = sqrt(r2);
00538
00539 u = r * hinv;
00540
00541 if(u < 0.5)
00542 {
00543 wk = hinv3 * (KERNEL_COEFF_1 + KERNEL_COEFF_2 * (u - 1) * u * u);
00544 dwk = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
00545 }
00546 else
00547 {
00548 wk = hinv3 * KERNEL_COEFF_5 * (1.0 - u) * (1.0 - u) * (1.0 - u);
00549 dwk = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
00550 }
00551
00552 mass_j = P[j].Mass;
00553
00554 rho += mass_j * wk;
00555
00556 weighted_numngb += NORM_COEFF * wk / hinv3;
00557
00558 dhsmlrho += -mass_j * (NUMDIMS * hinv * wk + u * dwk);
00559
00560 if(r > 0)
00561 {
00562 fac = mass_j * dwk / r;
00563
00564 dvx = vel[0] - SphP[j].VelPred[0];
00565 dvy = vel[1] - SphP[j].VelPred[1];
00566 dvz = vel[2] - SphP[j].VelPred[2];
00567
00568 divv -= fac * (dx * dvx + dy * dvy + dz * dvz);
00569
00570 rotv[0] += fac * (dz * dvy - dy * dvz);
00571 rotv[1] += fac * (dx * dvz - dz * dvx);
00572 rotv[2] += fac * (dy * dvx - dx * dvy);
00573 }
00574 }
00575 }
00576 }
00577 while(startnode >= 0);
00578
00579 if(mode == 0)
00580 {
00581 SphP[target].NumNgb = weighted_numngb;
00582 SphP[target].Density = rho;
00583 SphP[target].DivVel = divv;
00584 SphP[target].DhsmlDensityFactor = dhsmlrho;
00585 SphP[target].Rot[0] = rotv[0];
00586 SphP[target].Rot[1] = rotv[1];
00587 SphP[target].Rot[2] = rotv[2];
00588 }
00589 else
00590 {
00591 DensDataResult[target].Rho = rho;
00592 DensDataResult[target].Div = divv;
00593 DensDataResult[target].Ngb = weighted_numngb;
00594 DensDataResult[target].DhsmlDensity = dhsmlrho;
00595 DensDataResult[target].Rot[0] = rotv[0];
00596 DensDataResult[target].Rot[1] = rotv[1];
00597 DensDataResult[target].Rot[2] = rotv[2];
00598 }
00599 }
00600
00601
00602
00603
00607 int dens_compare_key(const void *a, const void *b)
00608 {
00609 if(((struct densdata_in *) a)->Task < (((struct densdata_in *) b)->Task))
00610 return -1;
00611
00612 if(((struct densdata_in *) a)->Task > (((struct densdata_in *) b)->Task))
00613 return +1;
00614
00615 return 0;
00616 }