00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006 #include <gsl/gsl_math.h>
00007 #include "allvars.h"
00008 #include "proto.h"
00009
00019 static double hubble_a, atime, hubble_a2, fac_mu, fac_vsic_fix, a3inv, fac_egy;
00020
00021 #ifdef PERIODIC
00022 static double boxSize, boxHalf;
00023
00024 #ifdef LONG_X
00025 static double boxSize_X, boxHalf_X;
00026 #else
00027 #define boxSize_X boxSize
00028 #define boxHalf_X boxHalf
00029 #endif
00030 #ifdef LONG_Y
00031 static double boxSize_Y, boxHalf_Y;
00032 #else
00033 #define boxSize_Y boxSize
00034 #define boxHalf_Y boxHalf
00035 #endif
00036 #ifdef LONG_Z
00037 static double boxSize_Z, boxHalf_Z;
00038 #else
00039 #define boxSize_Z boxSize
00040 #define boxHalf_Z boxHalf
00041 #endif
00042 #endif
00043
00044
00045
00050 void hydro_force(void)
00051 {
00052 long long ntot, ntotleft;
00053 int i, j, k, n, ngrp, maxfill, source, ndone;
00054 int *nbuffer, *noffset, *nsend_local, *nsend, *numlist, *ndonelist;
00055 int level, sendTask, recvTask, nexport, place;
00056 double soundspeed_i;
00057 double tstart, tend, sumt, sumcomm;
00058 double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance;
00059 MPI_Status status;
00060
00061 #ifdef PERIODIC
00062 boxSize = All.BoxSize;
00063 boxHalf = 0.5 * All.BoxSize;
00064 #ifdef LONG_X
00065 boxHalf_X = boxHalf * LONG_X;
00066 boxSize_X = boxSize * LONG_X;
00067 #endif
00068 #ifdef LONG_Y
00069 boxHalf_Y = boxHalf * LONG_Y;
00070 boxSize_Y = boxSize * LONG_Y;
00071 #endif
00072 #ifdef LONG_Z
00073 boxHalf_Z = boxHalf * LONG_Z;
00074 boxSize_Z = boxSize * LONG_Z;
00075 #endif
00076 #endif
00077
00078 if(All.ComovingIntegrationOn)
00079 {
00080
00081 hubble_a = All.Omega0 / (All.Time * All.Time * All.Time)
00082 + (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda;
00083
00084 hubble_a = All.Hubble * sqrt(hubble_a);
00085 hubble_a2 = All.Time * All.Time * hubble_a;
00086
00087 fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time;
00088
00089 fac_egy = pow(All.Time, 3 * (GAMMA - 1));
00090
00091 fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1);
00092
00093 a3inv = 1 / (All.Time * All.Time * All.Time);
00094 atime = All.Time;
00095 }
00096 else
00097 hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0;
00098
00099
00100
00101 for(n = 0, NumSphUpdate = 0; n < N_gas; n++)
00102 {
00103 if(P[n].Ti_endstep == All.Ti_Current)
00104 NumSphUpdate++;
00105 }
00106
00107 numlist = malloc(NTask * sizeof(int) * NTask);
00108 MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00109 for(i = 0, ntot = 0; i < NTask; i++)
00110 ntot += numlist[i];
00111 free(numlist);
00112
00113
00114 noffset = malloc(sizeof(int) * NTask);
00115 nbuffer = malloc(sizeof(int) * NTask);
00116 nsend_local = malloc(sizeof(int) * NTask);
00117 nsend = malloc(sizeof(int) * NTask * NTask);
00118 ndonelist = malloc(sizeof(int) * NTask);
00119
00120
00121 i = 0;
00122 ntotleft = ntot;
00123
00124 while(ntotleft > 0)
00125 {
00126 for(j = 0; j < NTask; j++)
00127 nsend_local[j] = 0;
00128
00129
00130 tstart = second();
00131 for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++)
00132 if(P[i].Ti_endstep == All.Ti_Current)
00133 {
00134 ndone++;
00135
00136 for(j = 0; j < NTask; j++)
00137 Exportflag[j] = 0;
00138
00139 hydro_evaluate(i, 0);
00140
00141 for(j = 0; j < NTask; j++)
00142 {
00143 if(Exportflag[j])
00144 {
00145 for(k = 0; k < 3; k++)
00146 {
00147 HydroDataIn[nexport].Pos[k] = P[i].Pos[k];
00148 HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k];
00149 }
00150 HydroDataIn[nexport].Hsml = SphP[i].Hsml;
00151 HydroDataIn[nexport].Mass = P[i].Mass;
00152 HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor;
00153 HydroDataIn[nexport].Density = SphP[i].Density;
00154 HydroDataIn[nexport].Pressure = SphP[i].Pressure;
00155 HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep;
00156
00157
00158 soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density);
00159 HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) /
00160 (fabs(SphP[i].DivVel) + SphP[i].CurlVel +
00161 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu);
00162
00163 HydroDataIn[nexport].Index = i;
00164 HydroDataIn[nexport].Task = j;
00165 nexport++;
00166 nsend_local[j]++;
00167 }
00168 }
00169 }
00170 tend = second();
00171 timecomp += timediff(tstart, tend);
00172
00173 qsort(HydroDataIn, nexport, sizeof(struct hydrodata_in), hydro_compare_key);
00174
00175 for(j = 1, noffset[0] = 0; j < NTask; j++)
00176 noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00177
00178 tstart = second();
00179
00180 MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00181
00182 tend = second();
00183 timeimbalance += timediff(tstart, tend);
00184
00185
00186
00187
00188
00189 for(level = 1; level < (1 << PTask); level++)
00190 {
00191 tstart = second();
00192 for(j = 0; j < NTask; j++)
00193 nbuffer[j] = 0;
00194 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00195 {
00196 maxfill = 0;
00197 for(j = 0; j < NTask; j++)
00198 {
00199 if((j ^ ngrp) < NTask)
00200 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00201 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00202 }
00203 if(maxfill >= All.BunchSizeHydro)
00204 break;
00205
00206 sendTask = ThisTask;
00207 recvTask = ThisTask ^ ngrp;
00208
00209 if(recvTask < NTask)
00210 {
00211 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00212 {
00213
00214 MPI_Sendrecv(&HydroDataIn[noffset[recvTask]],
00215 nsend_local[recvTask] * sizeof(struct hydrodata_in), MPI_BYTE,
00216 recvTask, TAG_HYDRO_A,
00217 &HydroDataGet[nbuffer[ThisTask]],
00218 nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_in), MPI_BYTE,
00219 recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status);
00220 }
00221 }
00222
00223 for(j = 0; j < NTask; j++)
00224 if((j ^ ngrp) < NTask)
00225 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00226 }
00227 tend = second();
00228 timecommsumm += timediff(tstart, tend);
00229
00230
00231 tstart = second();
00232 for(j = 0; j < nbuffer[ThisTask]; j++)
00233 hydro_evaluate(j, 1);
00234 tend = second();
00235 timecomp += timediff(tstart, tend);
00236
00237
00238 tstart = second();
00239 MPI_Barrier(MPI_COMM_WORLD);
00240 tend = second();
00241 timeimbalance += timediff(tstart, tend);
00242
00243
00244 tstart = second();
00245 for(j = 0; j < NTask; j++)
00246 nbuffer[j] = 0;
00247 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00248 {
00249 maxfill = 0;
00250 for(j = 0; j < NTask; j++)
00251 {
00252 if((j ^ ngrp) < NTask)
00253 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00254 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00255 }
00256 if(maxfill >= All.BunchSizeHydro)
00257 break;
00258
00259 sendTask = ThisTask;
00260 recvTask = ThisTask ^ ngrp;
00261
00262 if(recvTask < NTask)
00263 {
00264 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00265 {
00266
00267 MPI_Sendrecv(&HydroDataResult[nbuffer[ThisTask]],
00268 nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_out),
00269 MPI_BYTE, recvTask, TAG_HYDRO_B,
00270 &HydroDataPartialResult[noffset[recvTask]],
00271 nsend_local[recvTask] * sizeof(struct hydrodata_out),
00272 MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status);
00273
00274
00275 for(j = 0; j < nsend_local[recvTask]; j++)
00276 {
00277 source = j + noffset[recvTask];
00278 place = HydroDataIn[source].Index;
00279
00280 for(k = 0; k < 3; k++)
00281 SphP[place].HydroAccel[k] += HydroDataPartialResult[source].Acc[k];
00282
00283 SphP[place].DtEntropy += HydroDataPartialResult[source].DtEntropy;
00284
00285 if(SphP[place].MaxSignalVel < HydroDataPartialResult[source].MaxSignalVel)
00286 SphP[place].MaxSignalVel = HydroDataPartialResult[source].MaxSignalVel;
00287 }
00288 }
00289 }
00290
00291 for(j = 0; j < NTask; j++)
00292 if((j ^ ngrp) < NTask)
00293 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00294 }
00295 tend = second();
00296 timecommsumm += timediff(tstart, tend);
00297
00298 level = ngrp - 1;
00299 }
00300
00301 MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
00302 for(j = 0; j < NTask; j++)
00303 ntotleft -= ndonelist[j];
00304 }
00305
00306 free(ndonelist);
00307 free(nsend);
00308 free(nsend_local);
00309 free(nbuffer);
00310 free(noffset);
00311
00312
00313
00314
00315 tstart = second();
00316
00317 for(i = 0; i < N_gas; i++)
00318 if(P[i].Ti_endstep == All.Ti_Current)
00319 {
00320 SphP[i].DtEntropy *= GAMMA_MINUS1 / (hubble_a2 * pow(SphP[i].Density, GAMMA_MINUS1));
00321 #ifdef SPH_BND_PARTICLES
00322 if(P[i].ID == 0)
00323 {
00324 SphP[i].DtEntropy = 0;
00325 for(k = 0; k < 3; k++)
00326 SphP[i].HydroAccel[k] = 0;
00327 }
00328 #endif
00329 }
00330
00331 tend = second();
00332 timecomp += timediff(tstart, tend);
00333
00334
00335
00336 MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00337 MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00338 MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
00339
00340 if(ThisTask == 0)
00341 {
00342 All.CPU_HydCompWalk += sumt / NTask;
00343 All.CPU_HydCommSumm += sumcomm / NTask;
00344 All.CPU_HydImbalance += sumimbalance / NTask;
00345 }
00346 }
00347
00348
00353 void hydro_evaluate(int target, int mode)
00354 {
00355 int j, k, n, timestep, startnode, numngb;
00356 FLOAT *pos, *vel;
00357 FLOAT mass, h_i, dhsmlDensityFactor, rho, pressure, f1, f2;
00358 double acc[3], dtEntropy, maxSignalVel;
00359 double dx, dy, dz, dvx, dvy, dvz;
00360 double h_i2, hinv, hinv4;
00361 double p_over_rho2_i, p_over_rho2_j, soundspeed_i, soundspeed_j;
00362 double hfc, dwk_i, vdotr, vdotr2, visc, mu_ij, rho_ij, vsig;
00363 double h_j, dwk_j, r, r2, u, hfc_visc;
00364
00365 #ifndef NOVISCOSITYLIMITER
00366 double dt;
00367 #endif
00368
00369 if(mode == 0)
00370 {
00371 pos = P[target].Pos;
00372 vel = SphP[target].VelPred;
00373 h_i = SphP[target].Hsml;
00374 mass = P[target].Mass;
00375 dhsmlDensityFactor = SphP[target].DhsmlDensityFactor;
00376 rho = SphP[target].Density;
00377 pressure = SphP[target].Pressure;
00378 timestep = P[target].Ti_endstep - P[target].Ti_begstep;
00379 soundspeed_i = sqrt(GAMMA * pressure / rho);
00380 f1 = fabs(SphP[target].DivVel) /
00381 (fabs(SphP[target].DivVel) + SphP[target].CurlVel +
00382 0.0001 * soundspeed_i / SphP[target].Hsml / fac_mu);
00383 }
00384 else
00385 {
00386 pos = HydroDataGet[target].Pos;
00387 vel = HydroDataGet[target].Vel;
00388 h_i = HydroDataGet[target].Hsml;
00389 mass = HydroDataGet[target].Mass;
00390 dhsmlDensityFactor = HydroDataGet[target].DhsmlDensityFactor;
00391 rho = HydroDataGet[target].Density;
00392 pressure = HydroDataGet[target].Pressure;
00393 timestep = HydroDataGet[target].Timestep;
00394 soundspeed_i = sqrt(GAMMA * pressure / rho);
00395 f1 = HydroDataGet[target].F1;
00396 }
00397
00398
00399
00400 acc[0] = acc[1] = acc[2] = dtEntropy = 0;
00401 maxSignalVel = 0;
00402
00403 p_over_rho2_i = pressure / (rho * rho) * dhsmlDensityFactor;
00404 h_i2 = h_i * h_i;
00405
00406
00407 startnode = All.MaxPart;
00408 do
00409 {
00410 numngb = ngb_treefind_pairs(&pos[0], h_i, &startnode);
00411
00412 for(n = 0; n < numngb; n++)
00413 {
00414 j = Ngblist[n];
00415
00416 dx = pos[0] - P[j].Pos[0];
00417 dy = pos[1] - P[j].Pos[1];
00418 dz = pos[2] - P[j].Pos[2];
00419
00420 #ifdef PERIODIC
00421 if(dx > boxHalf_X)
00422 dx -= boxSize_X;
00423 if(dx < -boxHalf_X)
00424 dx += boxSize_X;
00425 if(dy > boxHalf_Y)
00426 dy -= boxSize_Y;
00427 if(dy < -boxHalf_Y)
00428 dy += boxSize_Y;
00429 if(dz > boxHalf_Z)
00430 dz -= boxSize_Z;
00431 if(dz < -boxHalf_Z)
00432 dz += boxSize_Z;
00433 #endif
00434 r2 = dx * dx + dy * dy + dz * dz;
00435 h_j = SphP[j].Hsml;
00436 if(r2 < h_i2 || r2 < h_j * h_j)
00437 {
00438 r = sqrt(r2);
00439 if(r > 0)
00440 {
00441 p_over_rho2_j = SphP[j].Pressure / (SphP[j].Density * SphP[j].Density);
00442 soundspeed_j = sqrt(GAMMA * p_over_rho2_j * SphP[j].Density);
00443 dvx = vel[0] - SphP[j].VelPred[0];
00444 dvy = vel[1] - SphP[j].VelPred[1];
00445 dvz = vel[2] - SphP[j].VelPred[2];
00446 vdotr = dx * dvx + dy * dvy + dz * dvz;
00447
00448 if(All.ComovingIntegrationOn)
00449 vdotr2 = vdotr + hubble_a2 * r2;
00450 else
00451 vdotr2 = vdotr;
00452
00453 if(r2 < h_i2)
00454 {
00455 hinv = 1.0 / h_i;
00456 #ifndef TWODIMS
00457 hinv4 = hinv * hinv * hinv * hinv;
00458 #else
00459 hinv4 = hinv * hinv * hinv / boxSize_Z;
00460 #endif
00461 u = r * hinv;
00462 if(u < 0.5)
00463 dwk_i = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
00464 else
00465 dwk_i = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
00466 }
00467 else
00468 {
00469 dwk_i = 0;
00470 }
00471
00472 if(r2 < h_j * h_j)
00473 {
00474 hinv = 1.0 / h_j;
00475 #ifndef TWODIMS
00476 hinv4 = hinv * hinv * hinv * hinv;
00477 #else
00478 hinv4 = hinv * hinv * hinv / boxSize_Z;
00479 #endif
00480 u = r * hinv;
00481 if(u < 0.5)
00482 dwk_j = hinv4 * u * (KERNEL_COEFF_3 * u - KERNEL_COEFF_4);
00483 else
00484 dwk_j = hinv4 * KERNEL_COEFF_6 * (1.0 - u) * (1.0 - u);
00485 }
00486 else
00487 {
00488 dwk_j = 0;
00489 }
00490
00491 if(soundspeed_i + soundspeed_j > maxSignalVel)
00492 maxSignalVel = soundspeed_i + soundspeed_j;
00493
00494 if(vdotr2 < 0)
00495 {
00496 mu_ij = fac_mu * vdotr2 / r;
00497
00498 vsig = soundspeed_i + soundspeed_j - 3 * mu_ij;
00499
00500 if(vsig > maxSignalVel)
00501 maxSignalVel = vsig;
00502
00503 rho_ij = 0.5 * (rho + SphP[j].Density);
00504 f2 =
00505 fabs(SphP[j].DivVel) / (fabs(SphP[j].DivVel) + SphP[j].CurlVel +
00506 0.0001 * soundspeed_j / fac_mu / SphP[j].Hsml);
00507
00508 visc = 0.25 * All.ArtBulkViscConst * vsig * (-mu_ij) / rho_ij * (f1 + f2);
00509
00510
00511 #ifndef NOVISCOSITYLIMITER
00512
00513 dt = imax(timestep, (P[j].Ti_endstep - P[j].Ti_begstep)) * All.Timebase_interval;
00514 if(dt > 0 && (dwk_i + dwk_j) < 0)
00515 {
00516 visc = dmin(visc, 0.5 * fac_vsic_fix * vdotr2 /
00517 (0.5 * (mass + P[j].Mass) * (dwk_i + dwk_j) * r * dt));
00518 }
00519 #endif
00520 }
00521 else
00522 visc = 0;
00523
00524 p_over_rho2_j *= SphP[j].DhsmlDensityFactor;
00525
00526 hfc_visc = 0.5 * P[j].Mass * visc * (dwk_i + dwk_j) / r;
00527
00528 hfc = hfc_visc + P[j].Mass * (p_over_rho2_i * dwk_i + p_over_rho2_j * dwk_j) / r;
00529
00530 acc[0] -= hfc * dx;
00531 acc[1] -= hfc * dy;
00532 acc[2] -= hfc * dz;
00533 dtEntropy += 0.5 * hfc_visc * vdotr2;
00534 }
00535 }
00536 }
00537 }
00538 while(startnode >= 0);
00539
00540
00541 if(mode == 0)
00542 {
00543 for(k = 0; k < 3; k++)
00544 SphP[target].HydroAccel[k] = acc[k];
00545 SphP[target].DtEntropy = dtEntropy;
00546 SphP[target].MaxSignalVel = maxSignalVel;
00547 }
00548 else
00549 {
00550 for(k = 0; k < 3; k++)
00551 HydroDataResult[target].Acc[k] = acc[k];
00552 HydroDataResult[target].DtEntropy = dtEntropy;
00553 HydroDataResult[target].MaxSignalVel = maxSignalVel;
00554 }
00555 }
00556
00557
00558
00559
00563 int hydro_compare_key(const void *a, const void *b)
00564 {
00565 if(((struct hydrodata_in *) a)->Task < (((struct hydrodata_in *) b)->Task))
00566 return -1;
00567 if(((struct hydrodata_in *) a)->Task > (((struct hydrodata_in *) b)->Task))
00568 return +1;
00569 return 0;
00570 }