00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <float.h>
00006 #include <mpi.h>
00007
00012 #ifdef PMGRID
00013 #ifdef PERIODIC
00014
00015 #ifdef NOTYPEPREFIX_FFTW
00016 #include <rfftw_mpi.h>
00017 #else
00018 #ifdef DOUBLEPRECISION_FFTW
00019 #include <drfftw_mpi.h>
00020 #else
00021 #include <srfftw_mpi.h>
00022 #endif
00023 #endif
00024
00025
00026 #include "allvars.h"
00027 #include "proto.h"
00028
00029 #define PMGRID2 (2*(PMGRID/2 + 1))
00030
00031
00032
00033
00034 static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
00035
00036 static int slab_to_task[PMGRID];
00037 static int *slabs_per_task;
00038 static int *first_slab_of_task;
00039 static int *meshmin_list, *meshmax_list;
00040
00041 static int slabstart_x, nslab_x, slabstart_y, nslab_y, smallest_slab;
00042
00043 static int fftsize, maxfftsize;
00044
00045 static fftw_real *rhogrid, *forcegrid, *workspace;
00046 static fftw_complex *fft_of_rhogrid;
00047
00048
00049 static FLOAT to_slab_fac;
00050
00051
00055 void pm_init_periodic(void)
00056 {
00057 int i;
00058 int slab_to_task_local[PMGRID];
00059
00060 All.Asmth[0] = ASMTH * All.BoxSize / PMGRID;
00061 All.Rcut[0] = RCUT * All.Asmth[0];
00062
00063
00064
00065 fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID,
00066 FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
00067 fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, PMGRID, PMGRID, PMGRID,
00068 FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
00069
00070
00071
00072 rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
00073
00074 for(i = 0; i < PMGRID; i++)
00075 slab_to_task_local[i] = 0;
00076
00077 for(i = 0; i < nslab_x; i++)
00078 slab_to_task_local[slabstart_x + i] = ThisTask;
00079
00080 MPI_Allreduce(slab_to_task_local, slab_to_task, PMGRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00081
00082 MPI_Allreduce(&nslab_x, &smallest_slab, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
00083
00084 slabs_per_task = malloc(NTask * sizeof(int));
00085 MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);
00086
00087 if(ThisTask == 0)
00088 {
00089 for(i = 0; i < NTask; i++)
00090 printf("Task=%d FFT-Slabs=%d\n", i, slabs_per_task[i]);
00091 }
00092
00093 first_slab_of_task = malloc(NTask * sizeof(int));
00094 MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);
00095
00096 meshmin_list = malloc(3 * NTask * sizeof(int));
00097 meshmax_list = malloc(3 * NTask * sizeof(int));
00098
00099
00100 to_slab_fac = PMGRID / All.BoxSize;
00101
00102 MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00103 }
00104
00105
00113 void pm_init_periodic_allocate(int dimprod)
00114 {
00115 static int first_alloc = 1;
00116 int dimprodmax;
00117 double bytes_tot = 0;
00118 size_t bytes;
00119
00120 MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00121
00122
00123
00124 if(!(rhogrid = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00125 {
00126 printf("failed to allocate memory for `FFT-rhogrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00127 endrun(1);
00128 }
00129 bytes_tot += bytes;
00130
00131
00132 if(!(forcegrid = (fftw_real *) malloc(bytes = imax(fftsize, dimprodmax) * sizeof(fftw_real))))
00133 {
00134 printf("failed to allocate memory for `FFT-forcegrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00135 endrun(1);
00136 }
00137 bytes_tot += bytes;
00138
00139 if(!(workspace = (fftw_real *) malloc(bytes = imax(maxfftsize, dimprodmax) * sizeof(fftw_real))))
00140 {
00141 printf("failed to allocate memory for `FFT-workspace' (%g MB).\n", bytes / (1024.0 * 1024.0));
00142 endrun(1);
00143 }
00144 bytes_tot += bytes;
00145
00146 if(first_alloc == 1)
00147 {
00148 first_alloc = 0;
00149 if(ThisTask == 0)
00150 printf("\nAllocated %g MByte for FFT data.\n\n", bytes_tot / (1024.0 * 1024.0));
00151 }
00152
00153 fft_of_rhogrid = (fftw_complex *) & rhogrid[0];
00154 }
00155
00156
00157
00160 void pm_init_periodic_free(void)
00161 {
00162
00163 free(workspace);
00164 free(forcegrid);
00165 free(rhogrid);
00166 }
00167
00168
00169
00180 void pmforce_periodic(void)
00181 {
00182 double k2, kx, ky, kz, smth;
00183 double dx, dy, dz;
00184 double fx, fy, fz, ff;
00185 double asmth2, fac, acc_dim;
00186 int i, j, slab, level, sendTask, recvTask;
00187 int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
00188 int slab_x, slab_y, slab_z;
00189 int slab_xx, slab_yy, slab_zz;
00190 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00191 int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2];
00192 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00193 MPI_Status status;
00194
00195
00196 if(ThisTask == 0)
00197 {
00198 printf("Starting periodic PM calculation.\n");
00199 fflush(stdout);
00200 }
00201
00202
00203 force_treefree();
00204
00205
00206 asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
00207 asmth2 *= asmth2;
00208
00209 fac = All.G / (M_PI * All.BoxSize);
00210 fac *= 1 / (2 * All.BoxSize / PMGRID);
00211
00212
00213
00214 for(j = 0; j < 3; j++)
00215 {
00216 meshmin[j] = PMGRID;
00217 meshmax[j] = 0;
00218 }
00219
00220 for(i = 0; i < NumPart; i++)
00221 {
00222 for(j = 0; j < 3; j++)
00223 {
00224 slab = to_slab_fac * P[i].Pos[j];
00225 if(slab >= PMGRID)
00226 slab = PMGRID - 1;
00227
00228 if(slab < meshmin[j])
00229 meshmin[j] = slab;
00230
00231 if(slab > meshmax[j])
00232 meshmax[j] = slab;
00233 }
00234 }
00235
00236 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00237 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00238
00239 dimx = meshmax[0] - meshmin[0] + 2;
00240 dimy = meshmax[1] - meshmin[1] + 2;
00241 dimz = meshmax[2] - meshmin[2] + 2;
00242
00243 pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00244
00245 for(i = 0; i < dimx * dimy * dimz; i++)
00246 workspace[i] = 0;
00247
00248 for(i = 0; i < NumPart; i++)
00249 {
00250 slab_x = to_slab_fac * P[i].Pos[0];
00251 if(slab_x >= PMGRID)
00252 slab_x = PMGRID - 1;
00253 dx = to_slab_fac * P[i].Pos[0] - slab_x;
00254 slab_x -= meshmin[0];
00255 slab_xx = slab_x + 1;
00256
00257 slab_y = to_slab_fac * P[i].Pos[1];
00258 if(slab_y >= PMGRID)
00259 slab_y = PMGRID - 1;
00260 dy = to_slab_fac * P[i].Pos[1] - slab_y;
00261 slab_y -= meshmin[1];
00262 slab_yy = slab_y + 1;
00263
00264 slab_z = to_slab_fac * P[i].Pos[2];
00265 if(slab_z >= PMGRID)
00266 slab_z = PMGRID - 1;
00267 dz = to_slab_fac * P[i].Pos[2] - slab_z;
00268 slab_z -= meshmin[2];
00269 slab_zz = slab_z + 1;
00270
00271 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00272 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00273 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00274 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00275
00276 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00277 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00278 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00279 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00280 }
00281
00282
00283 for(i = 0; i < fftsize; i++)
00284 rhogrid[i] = 0;
00285
00286 for(level = 0; level < (1 << PTask); level++)
00287 {
00288 sendTask = ThisTask;
00289 recvTask = ThisTask ^ level;
00290 if(recvTask < NTask)
00291 {
00292
00293 sendmin = 2 * PMGRID;
00294 sendmax = -1;
00295 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00296 if(slab_to_task[slab_x % PMGRID] == recvTask)
00297 {
00298 if(slab_x < sendmin)
00299 sendmin = slab_x;
00300 if(slab_x > sendmax)
00301 sendmax = slab_x;
00302 }
00303 if(sendmax == -1)
00304 sendmin = 0;
00305
00306
00307 recvmin = 2 * PMGRID;
00308 recvmax = -1;
00309 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00310 if(slab_to_task[slab_x % PMGRID] == sendTask)
00311 {
00312 if(slab_x < recvmin)
00313 recvmin = slab_x;
00314 if(slab_x > recvmax)
00315 recvmax = slab_x;
00316 }
00317 if(recvmax == -1)
00318 recvmin = 0;
00319
00320
00321 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00322 {
00323 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00324 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00325 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00326
00327 if(level > 0)
00328 {
00329 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00330 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00331 TAG_PERIODIC_A, forcegrid,
00332 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00333 recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status);
00334 }
00335 else
00336 {
00337 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00338 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00339 }
00340
00341 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00342 {
00343 slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask];
00344
00345 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00346 {
00347 for(slab_y = meshmin_list[3 * recvTask + 1];
00348 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00349 {
00350 slab_yy = slab_y;
00351 if(slab_yy >= PMGRID)
00352 slab_yy -= PMGRID;
00353
00354 for(slab_z = meshmin_list[3 * recvTask + 2];
00355 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00356 {
00357 slab_zz = slab_z;
00358 if(slab_zz >= PMGRID)
00359 slab_zz -= PMGRID;
00360
00361 rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
00362 forcegrid[((slab_x - recvmin) * recv_dimy +
00363 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00364 (slab_z - meshmin_list[3 * recvTask + 2])];
00365 }
00366 }
00367 }
00368 }
00369 }
00370 }
00371 }
00372
00373
00374
00375 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00376
00377
00378
00379 for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00380 for(x = 0; x < PMGRID; x++)
00381 for(z = 0; z < PMGRID / 2 + 1; z++)
00382 {
00383 if(x > PMGRID / 2)
00384 kx = x - PMGRID;
00385 else
00386 kx = x;
00387 if(y > PMGRID / 2)
00388 ky = y - PMGRID;
00389 else
00390 ky = y;
00391 if(z > PMGRID / 2)
00392 kz = z - PMGRID;
00393 else
00394 kz = z;
00395
00396 k2 = kx * kx + ky * ky + kz * kz;
00397
00398 if(k2 > 0)
00399 {
00400 smth = -exp(-k2 * asmth2) / k2;
00401
00402
00403
00404 fx = fy = fz = 1;
00405 if(kx != 0)
00406 {
00407 fx = (M_PI * kx) / PMGRID;
00408 fx = sin(fx) / fx;
00409 }
00410 if(ky != 0)
00411 {
00412 fy = (M_PI * ky) / PMGRID;
00413 fy = sin(fy) / fy;
00414 }
00415 if(kz != 0)
00416 {
00417 fz = (M_PI * kz) / PMGRID;
00418 fz = sin(fz) / fz;
00419 }
00420 ff = 1 / (fx * fy * fz);
00421 smth *= ff * ff * ff * ff;
00422
00423
00424
00425 ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z;
00426 fft_of_rhogrid[ip].re *= smth;
00427 fft_of_rhogrid[ip].im *= smth;
00428 }
00429 }
00430
00431 if(slabstart_y == 0)
00432 fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0;
00433
00434
00435
00436 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00437
00438
00439
00440
00441
00442 dimx = meshmax[0] - meshmin[0] + 6;
00443 dimy = meshmax[1] - meshmin[1] + 6;
00444 dimz = meshmax[2] - meshmin[2] + 6;
00445
00446 for(level = 0; level < (1 << PTask); level++)
00447 {
00448 sendTask = ThisTask;
00449 recvTask = ThisTask ^ level;
00450
00451 if(recvTask < NTask)
00452 {
00453
00454
00455 sendmin = 2 * PMGRID;
00456 sendmax = -PMGRID;
00457 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00458 if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask)
00459 {
00460 if(slab_x < sendmin)
00461 sendmin = slab_x;
00462 if(slab_x > sendmax)
00463 sendmax = slab_x;
00464 }
00465 if(sendmax == -PMGRID)
00466 sendmin = sendmax + 1;
00467
00468
00469
00470 recvmin = 2 * PMGRID;
00471 recvmax = -PMGRID;
00472 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00473 if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask)
00474 {
00475 if(slab_x < recvmin)
00476 recvmin = slab_x;
00477 if(slab_x > recvmax)
00478 recvmax = slab_x;
00479 }
00480 if(recvmax == -PMGRID)
00481 recvmin = recvmax + 1;
00482
00483 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00484 {
00485 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00486 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00487 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00488
00489 ncont = 1;
00490 cont_sendmin[0] = sendmin;
00491 cont_sendmax[0] = sendmax;
00492 cont_sendmin[1] = sendmax + 1;
00493 cont_sendmax[1] = sendmax;
00494
00495 cont_recvmin[0] = recvmin;
00496 cont_recvmax[0] = recvmax;
00497 cont_recvmin[1] = recvmax + 1;
00498 cont_recvmax[1] = recvmax;
00499
00500 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00501 {
00502 if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
00503 {
00504
00505 cont_sendmax[0] = slab_x - 1;
00506 while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
00507 slab_x++;
00508 cont_sendmin[1] = slab_x;
00509 ncont++;
00510 }
00511 }
00512
00513 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00514 {
00515 if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
00516 {
00517
00518 cont_recvmax[0] = slab_x - 1;
00519 while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
00520 slab_x++;
00521 cont_recvmin[1] = slab_x;
00522 if(ncont == 1)
00523 ncont++;
00524 }
00525 }
00526
00527
00528 for(rep = 0; rep < ncont; rep++)
00529 {
00530 sendmin = cont_sendmin[rep];
00531 sendmax = cont_sendmax[rep];
00532 recvmin = cont_recvmin[rep];
00533 recvmax = cont_recvmax[rep];
00534
00535
00536 if(sendmax - sendmin >= 0)
00537 {
00538 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00539 {
00540 slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask];
00541
00542 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
00543 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
00544 {
00545 slab_yy = (slab_y + PMGRID) % PMGRID;
00546
00547 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
00548 slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
00549 {
00550 slab_zz = (slab_z + PMGRID) % PMGRID;
00551
00552 forcegrid[((slab_x - sendmin) * recv_dimy +
00553 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
00554 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
00555 rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz];
00556 }
00557 }
00558 }
00559 }
00560
00561 if(level > 0)
00562 {
00563 MPI_Sendrecv(forcegrid,
00564 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
00565 MPI_BYTE, recvTask, TAG_PERIODIC_B,
00566 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00567 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
00568 recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status);
00569 }
00570 else
00571 {
00572 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00573 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
00574 }
00575 }
00576 }
00577 }
00578 }
00579
00580
00581 dimx = meshmax[0] - meshmin[0] + 2;
00582 dimy = meshmax[1] - meshmin[1] + 2;
00583 dimz = meshmax[2] - meshmin[2] + 2;
00584
00585 recv_dimx = meshmax[0] - meshmin[0] + 6;
00586 recv_dimy = meshmax[1] - meshmin[1] + 6;
00587 recv_dimz = meshmax[2] - meshmin[2] + 6;
00588
00589
00590 for(dim = 0; dim < 3; dim++)
00591 {
00592
00593
00594
00595 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
00596 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
00597 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
00598 {
00599 xrr = xll = xr = xl = x;
00600 yrr = yll = yr = yl = y;
00601 zrr = zll = zr = zl = z;
00602
00603 switch (dim)
00604 {
00605 case 0:
00606 xr = x + 1;
00607 xrr = x + 2;
00608 xl = x - 1;
00609 xll = x - 2;
00610 break;
00611 case 1:
00612 yr = y + 1;
00613 yl = y - 1;
00614 yrr = y + 2;
00615 yll = y - 2;
00616 break;
00617 case 2:
00618 zr = z + 1;
00619 zl = z - 1;
00620 zrr = z + 2;
00621 zll = z - 2;
00622 break;
00623 }
00624
00625 forcegrid[(x * dimy + y) * dimz + z]
00626 =
00627 fac * ((4.0 / 3) *
00628 (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
00629 - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
00630 (1.0 / 6) *
00631 (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
00632 workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
00633 }
00634
00635
00636
00637 for(i = 0; i < NumPart; i++)
00638 {
00639 slab_x = to_slab_fac * P[i].Pos[0];
00640 if(slab_x >= PMGRID)
00641 slab_x = PMGRID - 1;
00642 dx = to_slab_fac * P[i].Pos[0] - slab_x;
00643 slab_x -= meshmin[0];
00644 slab_xx = slab_x + 1;
00645
00646 slab_y = to_slab_fac * P[i].Pos[1];
00647 if(slab_y >= PMGRID)
00648 slab_y = PMGRID - 1;
00649 dy = to_slab_fac * P[i].Pos[1] - slab_y;
00650 slab_y -= meshmin[1];
00651 slab_yy = slab_y + 1;
00652
00653 slab_z = to_slab_fac * P[i].Pos[2];
00654 if(slab_z >= PMGRID)
00655 slab_z = PMGRID - 1;
00656 dz = to_slab_fac * P[i].Pos[2] - slab_z;
00657 slab_z -= meshmin[2];
00658 slab_zz = slab_z + 1;
00659
00660 acc_dim =
00661 forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00662 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
00663 acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
00664 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
00665
00666 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
00667 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
00668 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
00669 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
00670
00671 P[i].GravPM[dim] = acc_dim;
00672 }
00673 }
00674
00675 pm_init_periodic_free();
00676 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
00677
00678 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00679
00680 if(ThisTask == 0)
00681 {
00682 printf("done PM.\n");
00683 fflush(stdout);
00684 }
00685 }
00686
00687
00693 void pmpotential_periodic(void)
00694 {
00695 double k2, kx, ky, kz, smth;
00696 double dx, dy, dz;
00697 double fx, fy, fz, ff;
00698 double asmth2, fac;
00699 int i, j, slab, level, sendTask, recvTask;
00700 int x, y, z, ip;
00701 int slab_x, slab_y, slab_z;
00702 int slab_xx, slab_yy, slab_zz;
00703 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00704 int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2];
00705 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00706 MPI_Status status;
00707
00708 if(ThisTask == 0)
00709 {
00710 printf("Starting periodic PM calculation.\n");
00711 fflush(stdout);
00712 }
00713
00714 asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
00715 asmth2 *= asmth2;
00716
00717 fac = All.G / (M_PI * All.BoxSize);
00718
00719 force_treefree();
00720
00721
00722
00723 for(j = 0; j < 3; j++)
00724 {
00725 meshmin[j] = PMGRID;
00726 meshmax[j] = 0;
00727 }
00728
00729 for(i = 0; i < NumPart; i++)
00730 {
00731 for(j = 0; j < 3; j++)
00732 {
00733 slab = to_slab_fac * P[i].Pos[j];
00734 if(slab >= PMGRID)
00735 slab = PMGRID - 1;
00736
00737 if(slab < meshmin[j])
00738 meshmin[j] = slab;
00739
00740 if(slab > meshmax[j])
00741 meshmax[j] = slab;
00742 }
00743 }
00744
00745 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00746 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00747
00748 dimx = meshmax[0] - meshmin[0] + 2;
00749 dimy = meshmax[1] - meshmin[1] + 2;
00750 dimz = meshmax[2] - meshmin[2] + 2;
00751
00752 pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00753
00754 for(i = 0; i < dimx * dimy * dimz; i++)
00755 workspace[i] = 0;
00756
00757 for(i = 0; i < NumPart; i++)
00758 {
00759 slab_x = to_slab_fac * P[i].Pos[0];
00760 if(slab_x >= PMGRID)
00761 slab_x = PMGRID - 1;
00762 dx = to_slab_fac * P[i].Pos[0] - slab_x;
00763 slab_x -= meshmin[0];
00764 slab_xx = slab_x + 1;
00765
00766 slab_y = to_slab_fac * P[i].Pos[1];
00767 if(slab_y >= PMGRID)
00768 slab_y = PMGRID - 1;
00769 dy = to_slab_fac * P[i].Pos[1] - slab_y;
00770 slab_y -= meshmin[1];
00771 slab_yy = slab_y + 1;
00772
00773 slab_z = to_slab_fac * P[i].Pos[2];
00774 if(slab_z >= PMGRID)
00775 slab_z = PMGRID - 1;
00776 dz = to_slab_fac * P[i].Pos[2] - slab_z;
00777 slab_z -= meshmin[2];
00778 slab_zz = slab_z + 1;
00779
00780 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00781 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00782 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00783 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00784
00785 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00786 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00787 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00788 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00789 }
00790
00791
00792 for(i = 0; i < fftsize; i++)
00793 rhogrid[i] = 0;
00794
00795 for(level = 0; level < (1 << PTask); level++)
00796 {
00797 sendTask = ThisTask;
00798 recvTask = ThisTask ^ level;
00799 if(recvTask < NTask)
00800 {
00801
00802 sendmin = 2 * PMGRID;
00803 sendmax = -1;
00804 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00805 if(slab_to_task[slab_x % PMGRID] == recvTask)
00806 {
00807 if(slab_x < sendmin)
00808 sendmin = slab_x;
00809 if(slab_x > sendmax)
00810 sendmax = slab_x;
00811 }
00812 if(sendmax == -1)
00813 sendmin = 0;
00814
00815
00816 recvmin = 2 * PMGRID;
00817 recvmax = -1;
00818 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00819 if(slab_to_task[slab_x % PMGRID] == sendTask)
00820 {
00821 if(slab_x < recvmin)
00822 recvmin = slab_x;
00823 if(slab_x > recvmax)
00824 recvmax = slab_x;
00825 }
00826 if(recvmax == -1)
00827 recvmin = 0;
00828
00829
00830 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00831 {
00832 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00833 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00834 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00835
00836 if(level > 0)
00837 {
00838 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00839 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00840 TAG_PERIODIC_C, forcegrid,
00841 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00842 recvTask, TAG_PERIODIC_C, MPI_COMM_WORLD, &status);
00843 }
00844 else
00845 {
00846 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00847 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00848 }
00849
00850 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00851 {
00852 slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask];
00853
00854 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00855 {
00856 for(slab_y = meshmin_list[3 * recvTask + 1];
00857 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00858 {
00859 slab_yy = slab_y;
00860 if(slab_yy >= PMGRID)
00861 slab_yy -= PMGRID;
00862
00863 for(slab_z = meshmin_list[3 * recvTask + 2];
00864 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00865 {
00866 slab_zz = slab_z;
00867 if(slab_zz >= PMGRID)
00868 slab_zz -= PMGRID;
00869
00870 rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
00871 forcegrid[((slab_x - recvmin) * recv_dimy +
00872 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00873 (slab_z - meshmin_list[3 * recvTask + 2])];
00874 }
00875 }
00876 }
00877 }
00878 }
00879 }
00880 }
00881
00882
00883
00884
00885
00886 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00887
00888
00889
00890 for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00891 for(x = 0; x < PMGRID; x++)
00892 for(z = 0; z < PMGRID / 2 + 1; z++)
00893 {
00894 if(x > PMGRID / 2)
00895 kx = x - PMGRID;
00896 else
00897 kx = x;
00898 if(y > PMGRID / 2)
00899 ky = y - PMGRID;
00900 else
00901 ky = y;
00902 if(z > PMGRID / 2)
00903 kz = z - PMGRID;
00904 else
00905 kz = z;
00906
00907 k2 = kx * kx + ky * ky + kz * kz;
00908
00909 if(k2 > 0)
00910 {
00911 smth = -exp(-k2 * asmth2) / k2 * fac;
00912
00913 fx = fy = fz = 1;
00914 if(kx != 0)
00915 {
00916 fx = (M_PI * kx) / PMGRID;
00917 fx = sin(fx) / fx;
00918 }
00919 if(ky != 0)
00920 {
00921 fy = (M_PI * ky) / PMGRID;
00922 fy = sin(fy) / fy;
00923 }
00924 if(kz != 0)
00925 {
00926 fz = (M_PI * kz) / PMGRID;
00927 fz = sin(fz) / fz;
00928 }
00929 ff = 1 / (fx * fy * fz);
00930 smth *= ff * ff * ff * ff;
00931
00932
00933 ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z;
00934 fft_of_rhogrid[ip].re *= smth;
00935 fft_of_rhogrid[ip].im *= smth;
00936 }
00937 }
00938
00939 if(slabstart_y == 0)
00940 fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0;
00941
00942
00943
00944 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00945
00946
00947
00948
00949
00950 dimx = meshmax[0] - meshmin[0] + 6;
00951 dimy = meshmax[1] - meshmin[1] + 6;
00952 dimz = meshmax[2] - meshmin[2] + 6;
00953
00954 for(level = 0; level < (1 << PTask); level++)
00955 {
00956 sendTask = ThisTask;
00957 recvTask = ThisTask ^ level;
00958
00959 if(recvTask < NTask)
00960 {
00961
00962
00963 sendmin = 2 * PMGRID;
00964 sendmax = -PMGRID;
00965 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00966 if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask)
00967 {
00968 if(slab_x < sendmin)
00969 sendmin = slab_x;
00970 if(slab_x > sendmax)
00971 sendmax = slab_x;
00972 }
00973 if(sendmax == -PMGRID)
00974 sendmin = sendmax + 1;
00975
00976
00977
00978 recvmin = 2 * PMGRID;
00979 recvmax = -PMGRID;
00980 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00981 if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask)
00982 {
00983 if(slab_x < recvmin)
00984 recvmin = slab_x;
00985 if(slab_x > recvmax)
00986 recvmax = slab_x;
00987 }
00988 if(recvmax == -PMGRID)
00989 recvmin = recvmax + 1;
00990
00991 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00992 {
00993 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00994 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00995 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00996
00997 ncont = 1;
00998 cont_sendmin[0] = sendmin;
00999 cont_sendmax[0] = sendmax;
01000 cont_sendmin[1] = sendmax + 1;
01001 cont_sendmax[1] = sendmax;
01002
01003 cont_recvmin[0] = recvmin;
01004 cont_recvmax[0] = recvmax;
01005 cont_recvmin[1] = recvmax + 1;
01006 cont_recvmax[1] = recvmax;
01007
01008 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01009 {
01010 if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
01011 {
01012
01013 cont_sendmax[0] = slab_x - 1;
01014 while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
01015 slab_x++;
01016 cont_sendmin[1] = slab_x;
01017 ncont++;
01018 }
01019 }
01020
01021 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
01022 {
01023 if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
01024 {
01025
01026 cont_recvmax[0] = slab_x - 1;
01027 while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
01028 slab_x++;
01029 cont_recvmin[1] = slab_x;
01030 if(ncont == 1)
01031 ncont++;
01032 }
01033 }
01034
01035
01036 for(rep = 0; rep < ncont; rep++)
01037 {
01038 sendmin = cont_sendmin[rep];
01039 sendmax = cont_sendmax[rep];
01040 recvmin = cont_recvmin[rep];
01041 recvmax = cont_recvmax[rep];
01042
01043
01044 if(sendmax - sendmin >= 0)
01045 {
01046 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01047 {
01048 slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask];
01049
01050 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
01051 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
01052 {
01053 slab_yy = (slab_y + PMGRID) % PMGRID;
01054
01055 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
01056 slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
01057 {
01058 slab_zz = (slab_z + PMGRID) % PMGRID;
01059
01060 forcegrid[((slab_x - sendmin) * recv_dimy +
01061 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
01062 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
01063 rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz];
01064 }
01065 }
01066 }
01067 }
01068
01069 if(level > 0)
01070 {
01071 MPI_Sendrecv(forcegrid,
01072 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
01073 MPI_BYTE, recvTask, TAG_PERIODIC_D,
01074 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01075 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
01076 recvTask, TAG_PERIODIC_D, MPI_COMM_WORLD, &status);
01077 }
01078 else
01079 {
01080 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01081 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
01082 }
01083 }
01084 }
01085 }
01086 }
01087
01088
01089 dimx = meshmax[0] - meshmin[0] + 2;
01090 dimy = meshmax[1] - meshmin[1] + 2;
01091 dimz = meshmax[2] - meshmin[2] + 2;
01092
01093 recv_dimx = meshmax[0] - meshmin[0] + 6;
01094 recv_dimy = meshmax[1] - meshmin[1] + 6;
01095 recv_dimz = meshmax[2] - meshmin[2] + 6;
01096
01097
01098
01099 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
01100 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
01101 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
01102 {
01103 forcegrid[(x * dimy + y) * dimz + z] =
01104 workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)];
01105 }
01106
01107
01108
01109
01110 for(i = 0; i < NumPart; i++)
01111 {
01112 slab_x = to_slab_fac * P[i].Pos[0];
01113 if(slab_x >= PMGRID)
01114 slab_x = PMGRID - 1;
01115 dx = to_slab_fac * P[i].Pos[0] - slab_x;
01116 slab_x -= meshmin[0];
01117 slab_xx = slab_x + 1;
01118
01119 slab_y = to_slab_fac * P[i].Pos[1];
01120 if(slab_y >= PMGRID)
01121 slab_y = PMGRID - 1;
01122 dy = to_slab_fac * P[i].Pos[1] - slab_y;
01123 slab_y -= meshmin[1];
01124 slab_yy = slab_y + 1;
01125
01126 slab_z = to_slab_fac * P[i].Pos[2];
01127 if(slab_z >= PMGRID)
01128 slab_z = PMGRID - 1;
01129 dz = to_slab_fac * P[i].Pos[2] - slab_z;
01130 slab_z -= meshmin[2];
01131 slab_zz = slab_z + 1;
01132
01133 P[i].Potential +=
01134 forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01135 P[i].Potential += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
01136 P[i].Potential += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
01137 P[i].Potential += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
01138
01139 P[i].Potential += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
01140 P[i].Potential += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
01141 P[i].Potential += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
01142 P[i].Potential += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
01143 }
01144
01145 pm_init_periodic_free();
01146 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
01147
01148 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
01149
01150 if(ThisTask == 0)
01151 {
01152 printf("done PM-Potential.\n");
01153 fflush(stdout);
01154 }
01155 }
01156
01157 #endif
01158 #endif