00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <float.h>
00006 #include <mpi.h>
00007
00008
00009 #include "allvars.h"
00010 #include "proto.h"
00011
00012
00022 void compute_potential(void)
00023 {
00024 int i;
00025
00026 #ifndef NOGRAVITY
00027 long long ntot, ntotleft;
00028 int j, k, level, sendTask, recvTask;
00029 int ndone;
00030 int maxfill, ngrp, place, nexport;
00031 int *nsend, *noffset, *nsend_local, *nbuffer, *ndonelist, *numlist;
00032 double fac;
00033 double t0, t1, tstart, tend;
00034 MPI_Status status;
00035 double r2;
00036
00037 t0 = second();
00038
00039 if(All.ComovingIntegrationOn)
00040 set_softenings();
00041
00042 if(ThisTask == 0)
00043 {
00044 printf("Start computation of potential for all particles...\n");
00045 fflush(stdout);
00046 }
00047
00048
00049 tstart = second();
00050 if(TreeReconstructFlag)
00051 {
00052 if(ThisTask == 0)
00053 printf("Tree construction.\n");
00054
00055 force_treebuild(NumPart);
00056
00057 TreeReconstructFlag = 0;
00058
00059 if(ThisTask == 0)
00060 printf("Tree construction done.\n");
00061 }
00062 tend = second();
00063 All.CPU_TreeConstruction += timediff(tstart, tend);
00064
00065 numlist = malloc(NTask * sizeof(int) * NTask);
00066 MPI_Allgather(&NumPart, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD);
00067 for(i = 0, ntot = 0; i < NTask; i++)
00068 ntot += numlist[i];
00069 free(numlist);
00070
00071 noffset = malloc(sizeof(int) * NTask);
00072 nbuffer = malloc(sizeof(int) * NTask);
00073 nsend_local = malloc(sizeof(int) * NTask);
00074 nsend = malloc(sizeof(int) * NTask * NTask);
00075 ndonelist = malloc(sizeof(int) * NTask);
00076
00077 i = 0;
00078 ntotleft = ntot;
00079
00080 while(ntotleft > 0)
00081 {
00082 for(j = 0; j < NTask; j++)
00083 nsend_local[j] = 0;
00084
00085
00086 for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++)
00087 {
00088 ndone++;
00089
00090 for(j = 0; j < NTask; j++)
00091 Exportflag[j] = 0;
00092
00093 #ifndef PMGRID
00094 force_treeevaluate_potential(i, 0);
00095 #else
00096 force_treeevaluate_potential_shortrange(i, 0);
00097 #endif
00098
00099 for(j = 0; j < NTask; j++)
00100 {
00101 if(Exportflag[j])
00102 {
00103 for(k = 0; k < 3; k++)
00104 GravDataGet[nexport].u.Pos[k] = P[i].Pos[k];
00105 #ifdef UNEQUALSOFTENINGS
00106 GravDataGet[nexport].Type = P[i].Type;
00107 #ifdef ADAPTIVE_GRAVSOFT_FORGAS
00108 if(P[i].Type == 0)
00109 GravDataGet[nexport].Soft = SphP[i].Hsml;
00110 #endif
00111 #endif
00112 GravDataGet[nexport].w.OldAcc = P[i].OldAcc;
00113
00114 GravDataIndexTable[nexport].Task = j;
00115 GravDataIndexTable[nexport].Index = i;
00116 GravDataIndexTable[nexport].SortIndex = nexport;
00117
00118 nexport++;
00119 nsend_local[j]++;
00120 }
00121 }
00122 }
00123
00124 qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);
00125
00126 for(j = 0; j < nexport; j++)
00127 GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];
00128
00129 for(j = 1, noffset[0] = 0; j < NTask; j++)
00130 noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00131
00132 MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00133
00134
00135
00136 for(level = 1; level < (1 << PTask); level++)
00137 {
00138 for(j = 0; j < NTask; j++)
00139 nbuffer[j] = 0;
00140 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00141 {
00142 maxfill = 0;
00143 for(j = 0; j < NTask; j++)
00144 {
00145 if((j ^ ngrp) < NTask)
00146 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00147 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00148 }
00149 if(maxfill >= All.BunchSizeForce)
00150 break;
00151
00152 sendTask = ThisTask;
00153 recvTask = ThisTask ^ ngrp;
00154
00155 if(recvTask < NTask)
00156 {
00157 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00158 {
00159
00160 MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
00161 nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
00162 recvTask, TAG_POTENTIAL_A,
00163 &GravDataGet[nbuffer[ThisTask]],
00164 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
00165 recvTask, TAG_POTENTIAL_A, MPI_COMM_WORLD, &status);
00166 }
00167 }
00168
00169 for(j = 0; j < NTask; j++)
00170 if((j ^ ngrp) < NTask)
00171 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00172 }
00173
00174 for(j = 0; j < nbuffer[ThisTask]; j++)
00175 {
00176 #ifndef PMGRID
00177 force_treeevaluate_potential(j, 1);
00178 #else
00179 force_treeevaluate_potential_shortrange(j, 1);
00180 #endif
00181 }
00182
00183
00184
00185 for(j = 0; j < NTask; j++)
00186 nbuffer[j] = 0;
00187 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00188 {
00189 maxfill = 0;
00190 for(j = 0; j < NTask; j++)
00191 {
00192 if((j ^ ngrp) < NTask)
00193 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00194 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00195 }
00196 if(maxfill >= All.BunchSizeForce)
00197 break;
00198
00199 sendTask = ThisTask;
00200 recvTask = ThisTask ^ ngrp;
00201
00202 if(recvTask < NTask)
00203 {
00204 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00205 {
00206
00207 MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
00208 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
00209 MPI_BYTE, recvTask, TAG_POTENTIAL_B,
00210 &GravDataOut[noffset[recvTask]],
00211 nsend_local[recvTask] * sizeof(struct gravdata_in),
00212 MPI_BYTE, recvTask, TAG_POTENTIAL_B, MPI_COMM_WORLD, &status);
00213
00214
00215 for(j = 0; j < nsend_local[recvTask]; j++)
00216 {
00217 place = GravDataIndexTable[noffset[recvTask] + j].Index;
00218
00219 P[place].Potential += GravDataOut[j + noffset[recvTask]].u.Potential;
00220 }
00221 }
00222 }
00223
00224 for(j = 0; j < NTask; j++)
00225 if((j ^ ngrp) < NTask)
00226 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00227 }
00228
00229 level = ngrp - 1;
00230 }
00231
00232 MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD);
00233 for(j = 0; j < NTask; j++)
00234 ntotleft -= ndonelist[j];
00235 }
00236
00237 free(ndonelist);
00238 free(nsend);
00239 free(nsend_local);
00240 free(nbuffer);
00241 free(noffset);
00242
00243
00244
00245
00246 for(i = 0; i < NumPart; i++)
00247 {
00248
00249 P[i].Potential += P[i].Mass / All.SofteningTable[P[i].Type];
00250
00251 if(All.ComovingIntegrationOn)
00252 if(All.PeriodicBoundariesOn)
00253 P[i].Potential -= 2.8372975 * pow(P[i].Mass, 2.0 / 3) *
00254 pow(All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G), 1.0 / 3);
00255 }
00256
00257
00258
00259
00260 for(i = 0; i < NumPart; i++)
00261 P[i].Potential *= All.G;
00262
00263
00264 #ifdef PMGRID
00265
00266 #ifdef PERIODIC
00267 pmpotential_periodic();
00268 #ifdef PLACEHIGHRESREGION
00269 pmpotential_nonperiodic(1);
00270 #endif
00271 #else
00272 pmpotential_nonperiodic(0);
00273 #ifdef PLACEHIGHRESREGION
00274 pmpotential_nonperiodic(1);
00275 #endif
00276 #endif
00277
00278 #endif
00279
00280
00281
00282 if(All.ComovingIntegrationOn)
00283 {
00284 #ifndef PERIODIC
00285 fac = -0.5 * All.Omega0 * All.Hubble * All.Hubble;
00286
00287 for(i = 0; i < NumPart; i++)
00288 {
00289 for(k = 0, r2 = 0; k < 3; k++)
00290 r2 += P[i].Pos[k] * P[i].Pos[k];
00291
00292 P[i].Potential += fac * r2;
00293 }
00294 #endif
00295 }
00296 else
00297 {
00298 fac = -0.5 * All.OmegaLambda * All.Hubble * All.Hubble;
00299 if(fac != 0)
00300 {
00301 for(i = 0; i < NumPart; i++)
00302 {
00303 for(k = 0, r2 = 0; k < 3; k++)
00304 r2 += P[i].Pos[k] * P[i].Pos[k];
00305
00306 P[i].Potential += fac * r2;
00307 }
00308 }
00309 }
00310
00311
00312 if(ThisTask == 0)
00313 {
00314 printf("potential done.\n");
00315 fflush(stdout);
00316 }
00317
00318 t1 = second();
00319
00320 All.CPU_Potential += timediff(t0, t1);
00321
00322 #else
00323 for(i = 0; i < NumPart; i++)
00324 P[i].Potential = 0;
00325 #endif
00326 }