00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <float.h>
00006 #include <mpi.h>
00007
00008 #include "allvars.h"
00009 #include "proto.h"
00010
00011
00023 #ifdef FORCETEST
00024
00028 void gravity_forcetest(void)
00029 {
00030 int ntot, iter = 0, ntotleft, nthis;
00031 double tstart, tend, timetree = 0;
00032 int i, j, ndone, ngrp, maxfill, place, ndonetot;
00033
00034 #ifndef NOGRAVITY
00035 int *noffset, *nbuffer, *nsend, *nsend_local;
00036 int k, nexport;
00037 int level, sendTask, recvTask;
00038 double fac1;
00039 MPI_Status status;
00040 #endif
00041 double costtotal, *costtreelist;
00042 double maxt, sumt, *timetreelist;
00043 double fac;
00044 char buf[200];
00045
00046 #ifdef PMGRID
00047 if(All.PM_Ti_endstep != All.Ti_Current)
00048 return;
00049 #endif
00050
00051 if(All.ComovingIntegrationOn)
00052 set_softenings();
00053
00054 for(i = 0, NumForceUpdate = 0; i < NumPart; i++)
00055 {
00056 if(P[i].Ti_endstep == All.Ti_Current)
00057 {
00058 if(get_random_number(P[i].ID) < FORCETEST)
00059 {
00060 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00061 NumForceUpdate++;
00062 }
00063 }
00064 }
00065
00066
00067
00068 MPI_Allreduce(&NumForceUpdate, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00069
00070 costtotal = 0;
00071
00072 noffset = malloc(sizeof(int) * NTask);
00073 nbuffer = malloc(sizeof(int) * NTask);
00074 nsend_local = malloc(sizeof(int) * NTask);
00075 nsend = malloc(sizeof(int) * NTask * NTask);
00076
00077 i = 0;
00078 ntotleft = ntot;
00079
00080 while(ntotleft > 0)
00081 {
00082 iter++;
00083
00084 for(j = 0; j < NTask; j++)
00085 nsend_local[j] = 0;
00086
00087
00088 tstart = second();
00089 for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++)
00090 if(P[i].Ti_endstep < 0)
00091 {
00092 ndone++;
00093
00094 for(j = 0; j < NTask; j++)
00095 Exportflag[j] = 1;
00096 Exportflag[ThisTask] = 0;
00097
00098 costtotal += force_treeevaluate_direct(i, 0);
00099
00100 for(j = 0; j < NTask; j++)
00101 {
00102 if(Exportflag[j])
00103 {
00104 for(k = 0; k < 3; k++)
00105 GravDataGet[nexport].u.Pos[k] = P[i].Pos[k];
00106
00107 #ifdef UNEQUALSOFTENINGS
00108 GravDataGet[nexport].Type = P[i].Type;
00109 #endif
00110 GravDataGet[nexport].w.OldAcc = P[i].OldAcc;
00111
00112 GravDataIndexTable[nexport].Task = j;
00113 GravDataIndexTable[nexport].Index = i;
00114 GravDataIndexTable[nexport].SortIndex = nexport;
00115
00116 nexport++;
00117 nsend_local[j]++;
00118 }
00119 }
00120 }
00121 tend = second();
00122 timetree += timediff(tstart, tend);
00123
00124 qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key);
00125
00126 for(j = 0; j < nexport; j++)
00127 GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex];
00128
00129 for(j = 1, noffset[0] = 0; j < NTask; j++)
00130 noffset[j] = noffset[j - 1] + nsend_local[j - 1];
00131
00132 MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD);
00133
00134
00135
00136 for(level = 1; level < (1 << PTask); level++)
00137 {
00138 for(j = 0; j < NTask; j++)
00139 nbuffer[j] = 0;
00140 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00141 {
00142 maxfill = 0;
00143 for(j = 0; j < NTask; j++)
00144 {
00145 if((j ^ ngrp) < NTask)
00146 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00147 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00148 }
00149 if(maxfill >= All.BunchSizeForce)
00150 break;
00151
00152 sendTask = ThisTask;
00153 recvTask = ThisTask ^ ngrp;
00154
00155 if(recvTask < NTask)
00156 {
00157 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00158 {
00159
00160 MPI_Sendrecv(&GravDataIn[noffset[recvTask]],
00161 nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE,
00162 recvTask, TAG_DIRECT_A,
00163 &GravDataGet[nbuffer[ThisTask]],
00164 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE,
00165 recvTask, TAG_DIRECT_A, MPI_COMM_WORLD, &status);
00166 }
00167 }
00168
00169 for(j = 0; j < NTask; j++)
00170 if((j ^ ngrp) < NTask)
00171 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00172 }
00173
00174 tstart = second();
00175 for(j = 0; j < nbuffer[ThisTask]; j++)
00176 {
00177 costtotal += force_treeevaluate_direct(j, 1);
00178 }
00179 tend = second();
00180 timetree += timediff(tstart, tend);
00181
00182
00183
00184 for(j = 0; j < NTask; j++)
00185 nbuffer[j] = 0;
00186 for(ngrp = level; ngrp < (1 << PTask); ngrp++)
00187 {
00188 maxfill = 0;
00189 for(j = 0; j < NTask; j++)
00190 {
00191 if((j ^ ngrp) < NTask)
00192 if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j])
00193 maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j];
00194 }
00195 if(maxfill >= All.BunchSizeForce)
00196 break;
00197
00198 sendTask = ThisTask;
00199 recvTask = ThisTask ^ ngrp;
00200 if(recvTask < NTask)
00201 {
00202 if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0)
00203 {
00204
00205 MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]],
00206 nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in),
00207 MPI_BYTE, recvTask, TAG_DIRECT_B,
00208 &GravDataOut[noffset[recvTask]],
00209 nsend_local[recvTask] * sizeof(struct gravdata_in),
00210 MPI_BYTE, recvTask, TAG_DIRECT_B, MPI_COMM_WORLD, &status);
00211
00212
00213 for(j = 0; j < nsend_local[recvTask]; j++)
00214 {
00215 place = GravDataIndexTable[noffset[recvTask] + j].Index;
00216
00217 for(k = 0; k < 3; k++)
00218 P[place].GravAccelDirect[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k];
00219 }
00220 }
00221 }
00222
00223 for(j = 0; j < NTask; j++)
00224 if((j ^ ngrp) < NTask)
00225 nbuffer[j] += nsend[(j ^ ngrp) * NTask + j];
00226 }
00227
00228 level = ngrp - 1;
00229 }
00230
00231 MPI_Allreduce(&ndone, &ndonetot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00232
00233 ntotleft -= ndonetot;
00234 }
00235
00236 free(nsend);
00237 free(nsend_local);
00238 free(nbuffer);
00239 free(noffset);
00240
00241
00242
00243
00244 if(All.ComovingIntegrationOn)
00245 {
00246 #ifndef PERIODIC
00247 fac1 = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G;
00248
00249 for(i = 0; i < NumPart; i++)
00250 if(P[i].Ti_endstep < 0)
00251 for(j = 0; j < 3; j++)
00252 P[i].GravAccelDirect[j] += fac1 * P[i].Pos[j];
00253 #endif
00254 }
00255
00256
00257
00258
00259
00260 for(i = 0; i < NumPart; i++)
00261 if(P[i].Ti_endstep < 0)
00262 for(j = 0; j < 3; j++)
00263 P[i].GravAccelDirect[j] *= All.G;
00264
00265
00266
00267
00268
00269
00270 if(All.ComovingIntegrationOn == 0)
00271 {
00272 fac1 = All.OmegaLambda * All.Hubble * All.Hubble;
00273
00274 for(i = 0; i < NumPart; i++)
00275 if(P[i].Ti_endstep < 0)
00276 for(j = 0; j < 3; j++)
00277 P[i].GravAccelDirect[j] += fac1 * P[i].Pos[j];
00278 }
00279
00280
00281
00282 for(nthis = 0; nthis < NTask; nthis++)
00283 {
00284 if(nthis == ThisTask)
00285 {
00286 sprintf(buf, "%s%s", All.OutputDir, "forcetest.txt");
00287 if(!(FdForceTest = fopen(buf, "a")))
00288 {
00289 printf("error in opening file '%s'\n", buf);
00290 endrun(17);
00291 }
00292 for(i = 0; i < NumPart; i++)
00293 if(P[i].Ti_endstep < 0)
00294 {
00295 #ifndef PMGRID
00296 fprintf(FdForceTest, "%d %g %g %g %g %g %g %g %g %g %g %g\n",
00297 P[i].Type, All.Time, All.Time - TimeOfLastTreeConstruction,
00298 P[i].Pos[0], P[i].Pos[1], P[i].Pos[2],
00299 P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2],
00300 P[i].GravAccel[0], P[i].GravAccel[1], P[i].GravAccel[2]);
00301 #else
00302 fprintf(FdForceTest, "%d %g %g %g %g %g %g %g %g %g %g %g %g %g %g\n",
00303 P[i].Type, All.Time, All.Time - TimeOfLastTreeConstruction,
00304 P[i].Pos[0], P[i].Pos[1], P[i].Pos[2],
00305 P[i].GravAccelDirect[0], P[i].GravAccelDirect[1], P[i].GravAccelDirect[2],
00306 P[i].GravAccel[0], P[i].GravAccel[1], P[i].GravAccel[2],
00307 P[i].GravPM[0] + P[i].GravAccel[0],
00308 P[i].GravPM[1] + P[i].GravAccel[1], P[i].GravPM[2] + P[i].GravAccel[2]);
00309 #endif
00310 }
00311 fclose(FdForceTest);
00312 }
00313 MPI_Barrier(MPI_COMM_WORLD);
00314 }
00315
00316 for(i = 0; i < NumPart; i++)
00317 if(P[i].Ti_endstep < 0)
00318 P[i].Ti_endstep = -P[i].Ti_endstep - 1;
00319
00320
00321
00322
00323
00324 timetreelist = malloc(sizeof(double) * NTask);
00325 costtreelist = malloc(sizeof(double) * NTask);
00326
00327 MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00328 MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
00329
00330 if(ThisTask == 0)
00331 {
00332 fac = NTask / ((double) All.TotNumPart);
00333
00334 for(i = 0, maxt = timetreelist[0], sumt = 0, costtotal = 0; i < NTask; i++)
00335 {
00336 costtotal += costtreelist[i];
00337
00338 if(maxt < timetreelist[i])
00339 maxt = timetreelist[i];
00340 sumt += timetreelist[i];
00341 }
00342
00343 fprintf(FdTimings, "DIRECT Nf= %d part/sec=%g | %g ia/part=%g \n", ntot, ntot / (sumt + 1.0e-20),
00344 ntot / (maxt * NTask), ((double) (costtotal)) / ntot);
00345 fprintf(FdTimings, "\n");
00346
00347 fflush(FdTimings);
00348 }
00349
00350 free(costtreelist);
00351 free(timetreelist);
00352 }
00353
00354 #endif