00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006
00007
00013 #ifdef PMGRID
00014 #if !defined (PERIODIC) || defined (PLACEHIGHRESREGION)
00015
00016 #ifdef NOTYPEPREFIX_FFTW
00017 #include <rfftw_mpi.h>
00018 #else
00019 #ifdef DOUBLEPRECISION_FFTW
00020 #include <drfftw_mpi.h>
00021 #else
00022 #include <srfftw_mpi.h>
00023 #endif
00024 #endif
00025
00026 #include "allvars.h"
00027 #include "proto.h"
00028
00029 #define GRID (2*PMGRID)
00030 #define GRID2 (2*(GRID/2 + 1))
00031
00032
00033
00034 static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
00035
00036 static int slab_to_task[GRID];
00037 static int *slabs_per_task;
00038 static int *first_slab_of_task;
00039
00040 static int *meshmin_list, *meshmax_list;
00041
00042 static int slabstart_x, nslab_x, slabstart_y, nslab_y;
00043
00044 static int fftsize, maxfftsize;
00045
00046 static fftw_real *kernel[2], *rhogrid, *forcegrid, *workspace;
00047 static fftw_complex *fft_of_kernel[2], *fft_of_rhogrid;
00048
00058 void pm_init_regionsize(void)
00059 {
00060 double meshinner[2], xmin[2][3], xmax[2][3];
00061 int i, j, t;
00062
00063
00064
00065 for(j = 0; j < 3; j++)
00066 {
00067 xmin[0][j] = xmin[1][j] = 1.0e36;
00068 xmax[0][j] = xmax[1][j] = -1.0e36;
00069 }
00070
00071 for(i = 0; i < NumPart; i++)
00072 for(j = 0; j < 3; j++)
00073 {
00074 t = 0;
00075 #ifdef PLACEHIGHRESREGION
00076 if(((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00077 t = 1;
00078 #endif
00079 if(P[i].Pos[j] > xmax[t][j])
00080 xmax[t][j] = P[i].Pos[j];
00081 if(P[i].Pos[j] < xmin[t][j])
00082 xmin[t][j] = P[i].Pos[j];
00083 }
00084
00085 MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00086 MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00087
00088 for(j = 0; j < 2; j++)
00089 {
00090 All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0];
00091 All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]);
00092 All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]);
00093 #ifdef ENLARGEREGION
00094 All.TotalMeshSize[j] *= ENLARGEREGION;
00095 #endif
00096
00097
00098 for(i = 0; i < 3; i++)
00099 {
00100 All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2;
00101 All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j];
00102 }
00103 }
00104
00105
00106
00107
00108 for(j = 0; j < 2; j++)
00109 {
00110 meshinner[j] = All.TotalMeshSize[j];
00111 All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double) (GRID - 2 - 8));
00112 }
00113
00114
00115
00116 for(j = 0; j < 2; j++)
00117 for(i = 0; i < 3; i++)
00118 {
00119 All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID;
00120 All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID);
00121 }
00122
00123
00124 #ifndef PERIODIC
00125 All.Asmth[0] = ASMTH * All.TotalMeshSize[0] / GRID;
00126 All.Rcut[0] = RCUT * All.Asmth[0];
00127 #endif
00128
00129 #ifdef PLACEHIGHRESREGION
00130 All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00131 All.Rcut[1] = RCUT * All.Asmth[1];
00132 #endif
00133
00134 #ifdef PLACEHIGHRESREGION
00135 if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0])
00136 {
00137 All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 2));
00138
00139 for(i = 0; i < 3; i++)
00140 {
00141 All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0];
00142 All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00143 }
00144
00145 if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0])
00146 {
00147 All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 10));
00148
00149 for(i = 0; i < 3; i++)
00150 {
00151 All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[j] / GRID);
00152 All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00153 }
00154 }
00155
00156 All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00157 All.Rcut[1] = RCUT * All.Asmth[1];
00158 }
00159 #endif
00160
00161 if(ThisTask == 0)
00162 {
00163 #ifndef PERIODIC
00164 printf("\nAllowed region for isolated PM mesh (coarse):\n");
00165 printf("(%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g meshsize=%g\n\n",
00166 All.Xmintot[0][0], All.Xmintot[0][1], All.Xmintot[0][2],
00167 All.Xmaxtot[0][0], All.Xmaxtot[0][1], All.Xmaxtot[0][2], meshinner[0], All.TotalMeshSize[0],
00168 All.TotalMeshSize[0] / GRID);
00169 #endif
00170 #ifdef PLACEHIGHRESREGION
00171 printf("\nAllowed region for isolated PM mesh (high-res):\n");
00172 printf("(%g|%g|%g) -> (%g|%g|%g) ext=%g totmeshsize=%g meshsize=%g\n\n",
00173 All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2],
00174 All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2],
00175 meshinner[1], All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID);
00176 #endif
00177 }
00178
00179 }
00180
00185 void pm_init_nonperiodic(void)
00186 {
00187 int i, slab_to_task_local[GRID];
00188 double bytes_tot = 0;
00189 size_t bytes;
00190
00191
00192
00193 fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00194 FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
00195 fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00196 FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
00197
00198
00199
00200 rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
00201
00202
00203 for(i = 0; i < GRID; i++)
00204 slab_to_task_local[i] = 0;
00205
00206 for(i = 0; i < nslab_x; i++)
00207 slab_to_task_local[slabstart_x + i] = ThisTask;
00208
00209 MPI_Allreduce(slab_to_task_local, slab_to_task, GRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00210
00211 slabs_per_task = malloc(NTask * sizeof(int));
00212 MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);
00213
00214 #ifndef PERIODIC
00215 if(ThisTask == 0)
00216 {
00217 for(i = 0; i < NTask; i++)
00218 printf("Task=%d FFT-Slabs=%d\n", i, slabs_per_task[i]);
00219 }
00220 #endif
00221
00222 first_slab_of_task = malloc(NTask * sizeof(int));
00223 MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);
00224
00225 meshmin_list = malloc(3 * NTask * sizeof(int));
00226 meshmax_list = malloc(3 * NTask * sizeof(int));
00227
00228 MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00229
00230
00231
00232 #if !defined(PERIODIC)
00233 if(!(kernel[0] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00234 {
00235 printf("failed to allocate memory for `FFT-kernel[0]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00236 endrun(1);
00237 }
00238 bytes_tot += bytes;
00239 fft_of_kernel[0] = (fftw_complex *) kernel[0];
00240 #endif
00241
00242 #if defined(PLACEHIGHRESREGION)
00243 if(!(kernel[1] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00244 {
00245 printf("failed to allocate memory for `FFT-kernel[1]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00246 endrun(1);
00247 }
00248 bytes_tot += bytes;
00249 fft_of_kernel[1] = (fftw_complex *) kernel[1];
00250 #endif
00251
00252 if(ThisTask == 0)
00253 printf("\nAllocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
00254
00255 }
00256
00257
00264 void pm_init_nonperiodic_allocate(int dimprod)
00265 {
00266 static int first_alloc = 1;
00267 int dimprodmax;
00268 double bytes_tot = 0;
00269 size_t bytes;
00270
00271 MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00272
00273 if(!(rhogrid = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00274 {
00275 printf("failed to allocate memory for `FFT-rhogrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00276 endrun(1);
00277 }
00278 bytes_tot += bytes;
00279
00280 fft_of_rhogrid = (fftw_complex *) rhogrid;
00281
00282 if(!(forcegrid = (fftw_real *) malloc(bytes = imax(fftsize, dimprodmax) * sizeof(fftw_real))))
00283 {
00284 printf("failed to allocate memory for `FFT-forcegrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00285 endrun(1);
00286 }
00287 bytes_tot += bytes;
00288
00289 if(!(workspace = (fftw_real *) malloc(bytes = imax(maxfftsize, dimprodmax) * sizeof(fftw_real))))
00290 {
00291 printf("failed to allocate memory for `FFT-workspace' (%g MB).\n", bytes / (1024.0 * 1024.0));
00292 endrun(1);
00293 }
00294 bytes_tot += bytes;
00295
00296 if(first_alloc == 1)
00297 {
00298 first_alloc = 0;
00299 if(ThisTask == 0)
00300 printf("\nUsing %g MByte for non-periodic FFT computation.\n\n", bytes_tot / (1024.0 * 1024.0));
00301 }
00302 }
00303
00304
00309 void pm_init_nonperiodic_free(void)
00310 {
00311
00312 free(workspace);
00313 free(forcegrid);
00314 free(rhogrid);
00315 }
00316
00317
00321 void pm_setup_nonperiodic_kernel(void)
00322 {
00323 int i, j, k;
00324 double x, y, z, r, u, fac;
00325 double kx, ky, kz, k2, fx, fy, fz, ff;
00326 int ip;
00327
00328
00329
00330 pm_init_nonperiodic_allocate(0);
00331
00332 #if !defined(PERIODIC)
00333 for(i = 0; i < fftsize; i++)
00334 kernel[0][i] = 0;
00335
00336 for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00337 for(j = 0; j < GRID; j++)
00338 for(k = 0; k < GRID; k++)
00339 {
00340 x = ((double) i) / GRID;
00341 y = ((double) j) / GRID;
00342 z = ((double) k) / GRID;
00343
00344 if(x >= 0.5)
00345 x -= 1.0;
00346 if(y >= 0.5)
00347 y -= 1.0;
00348 if(z >= 0.5)
00349 z -= 1.0;
00350
00351 r = sqrt(x * x + y * y + z * z);
00352
00353 u = 0.5 * r / (((double) ASMTH) / GRID);
00354
00355 fac = 1 - erfc(u);
00356
00357 if(r > 0)
00358 kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00359 else
00360 kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00361 -1 / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00362 }
00363
00364
00365
00366 rfftwnd_mpi(fft_forward_plan, 1, kernel[0], workspace, FFTW_TRANSPOSED_ORDER);
00367 #endif
00368
00369
00370 #if defined(PLACEHIGHRESREGION)
00371 for(i = 0; i < fftsize; i++)
00372 kernel[1][i] = 0;
00373
00374 for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00375 for(j = 0; j < GRID; j++)
00376 for(k = 0; k < GRID; k++)
00377 {
00378 x = ((double) i) / GRID;
00379 y = ((double) j) / GRID;
00380 z = ((double) k) / GRID;
00381
00382 if(x >= 0.5)
00383 x -= 1.0;
00384 if(y >= 0.5)
00385 y -= 1.0;
00386 if(z >= 0.5)
00387 z -= 1.0;
00388
00389 r = sqrt(x * x + y * y + z * z);
00390
00391 u = 0.5 * r / (((double) ASMTH) / GRID);
00392
00393 fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u);
00394
00395 if(r > 0)
00396 kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00397 else
00398 {
00399 fac = 1 - All.Asmth[1] / All.Asmth[0];
00400 kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00401 -fac / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00402 }
00403 }
00404
00405
00406
00407 rfftwnd_mpi(fft_forward_plan, 1, kernel[1], workspace, FFTW_TRANSPOSED_ORDER);
00408 #endif
00409
00410
00411
00412 for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00413 for(x = 0; x < GRID; x++)
00414 for(z = 0; z < GRID / 2 + 1; z++)
00415 {
00416 if(x > GRID / 2)
00417 kx = x - GRID;
00418 else
00419 kx = x;
00420 if(y > GRID / 2)
00421 ky = y - GRID;
00422 else
00423 ky = y;
00424 if(z > GRID / 2)
00425 kz = z - GRID;
00426 else
00427 kz = z;
00428
00429 k2 = kx * kx + ky * ky + kz * kz;
00430
00431 if(k2 > 0)
00432 {
00433 fx = fy = fz = 1;
00434 if(kx != 0)
00435 {
00436 fx = (M_PI * kx) / GRID;
00437 fx = sin(fx) / fx;
00438 }
00439 if(ky != 0)
00440 {
00441 fy = (M_PI * ky) / GRID;
00442 fy = sin(fy) / fy;
00443 }
00444 if(kz != 0)
00445 {
00446 fz = (M_PI * kz) / GRID;
00447 fz = sin(fz) / fz;
00448 }
00449 ff = 1 / (fx * fy * fz);
00450 ff = ff * ff * ff * ff;
00451
00452 ip = GRID * (GRID / 2 + 1) * (y - slabstart_y) + (GRID / 2 + 1) * x + z;
00453 #if !defined(PERIODIC)
00454 fft_of_kernel[0][ip].re *= ff;
00455 fft_of_kernel[0][ip].im *= ff;
00456 #endif
00457 #if defined(PLACEHIGHRESREGION)
00458 fft_of_kernel[1][ip].re *= ff;
00459 fft_of_kernel[1][ip].im *= ff;
00460 #endif
00461 }
00462 }
00463
00464
00465 pm_init_nonperiodic_free();
00466 }
00467
00468
00469
00477 int pmforce_nonperiodic(int grnr)
00478 {
00479 double dx, dy, dz;
00480 double fac, to_slab_fac;
00481 double re, im, acc_dim;
00482 int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00483 int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
00484 int slab_x, slab_y, slab_z;
00485 int slab_xx, slab_yy, slab_zz;
00486 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00487 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00488 MPI_Status status;
00489
00490 if(ThisTask == 0)
00491 printf("Starting non-periodic PM calculation (grid=%d).\n", grnr);
00492
00493 fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
00494 fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID);
00495
00496 to_slab_fac = GRID / All.TotalMeshSize[grnr];
00497
00498
00499
00500
00501 for(j = 0; j < 3; j++)
00502 {
00503 meshmin[j] = GRID;
00504 meshmax[j] = 0;
00505 }
00506
00507 for(i = 0, flag = 0; i < NumPart; i++)
00508 {
00509 #ifdef PLACEHIGHRESREGION
00510 if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
00511 #endif
00512 {
00513 for(j = 0; j < 3; j++)
00514 {
00515 if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
00516 {
00517 if(flag == 0)
00518 {
00519 printf
00520 ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
00521 (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
00522 fflush(stdout);
00523 }
00524 flag++;
00525 break;
00526 }
00527 }
00528 }
00529
00530 if(flag > 0)
00531 continue;
00532
00533 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00534 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00535 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00536 {
00537 for(j = 0; j < 3; j++)
00538 {
00539 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00540
00541 if(slab < meshmin[j])
00542 meshmin[j] = slab;
00543
00544 if(slab > meshmax[j])
00545 meshmax[j] = slab;
00546 }
00547 }
00548 }
00549
00550
00551 MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00552 if(flagsum > 0)
00553 {
00554 if(ThisTask == 0)
00555 {
00556 printf("In total %d particles were outside allowed range.\n", flagsum);
00557 fflush(stdout);
00558 }
00559 return 1;
00560 }
00561
00562 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00563 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00564
00565 dimx = meshmax[0] - meshmin[0] + 2;
00566 dimy = meshmax[1] - meshmin[1] + 2;
00567 dimz = meshmax[2] - meshmin[2] + 2;
00568
00569
00570 force_treefree();
00571
00572 pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00573
00574 for(i = 0; i < dimx * dimy * dimz; i++)
00575 workspace[i] = 0;
00576
00577 for(i = 0; i < NumPart; i++)
00578 {
00579 if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
00580 continue;
00581 if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
00582 continue;
00583 if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
00584 continue;
00585
00586 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00587 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00588 slab_x -= meshmin[0];
00589 slab_xx = slab_x + 1;
00590
00591 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00592 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00593 slab_y -= meshmin[1];
00594 slab_yy = slab_y + 1;
00595
00596 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00597 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00598 slab_z -= meshmin[2];
00599 slab_zz = slab_z + 1;
00600
00601 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00602 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00603 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00604 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00605
00606 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00607 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00608 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00609 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00610 }
00611
00612
00613 for(i = 0; i < fftsize; i++)
00614 rhogrid[i] = 0;
00615
00616 for(level = 0; level < (1 << PTask); level++)
00617 {
00618 sendTask = ThisTask;
00619 recvTask = ThisTask ^ level;
00620 if(recvTask < NTask)
00621 {
00622
00623 sendmin = 2 * GRID;
00624 sendmax = -1;
00625 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00626 if(slab_to_task[slab_x] == recvTask)
00627 {
00628 if(slab_x < sendmin)
00629 sendmin = slab_x;
00630 if(slab_x > sendmax)
00631 sendmax = slab_x;
00632 }
00633 if(sendmax == -1)
00634 sendmin = 0;
00635
00636
00637 recvmin = 2 * GRID;
00638 recvmax = -1;
00639 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00640 if(slab_to_task[slab_x] == sendTask)
00641 {
00642 if(slab_x < recvmin)
00643 recvmin = slab_x;
00644 if(slab_x > recvmax)
00645 recvmax = slab_x;
00646 }
00647 if(recvmax == -1)
00648 recvmin = 0;
00649
00650 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00651 {
00652 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00653 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00654 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00655
00656 if(level > 0)
00657 {
00658 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00659 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00660 TAG_NONPERIOD_A, forcegrid,
00661 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00662 recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
00663 }
00664 else
00665 {
00666 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00667 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00668 }
00669
00670 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00671 {
00672 slab_xx = slab_x - first_slab_of_task[ThisTask];
00673
00674 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00675 {
00676 for(slab_y = meshmin_list[3 * recvTask + 1];
00677 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00678 {
00679 slab_yy = slab_y;
00680
00681 for(slab_z = meshmin_list[3 * recvTask + 2];
00682 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00683 {
00684 slab_zz = slab_z;
00685
00686 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
00687 forcegrid[((slab_x - recvmin) * recv_dimy +
00688 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00689 (slab_z - meshmin_list[3 * recvTask + 2])];
00690 }
00691 }
00692 }
00693 }
00694 }
00695 }
00696 }
00697
00698
00699
00700
00701 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00702
00703
00704
00705
00706 for(y = 0; y < nslab_y; y++)
00707 for(x = 0; x < GRID; x++)
00708 for(z = 0; z < GRID / 2 + 1; z++)
00709 {
00710 ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
00711
00712 re =
00713 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
00714 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
00715
00716 im =
00717 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
00718 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
00719
00720 fft_of_rhogrid[ip].re = re;
00721 fft_of_rhogrid[ip].im = im;
00722 }
00723
00724
00725
00726 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736 #ifdef PLACEHIGHRESREGION
00737 if(grnr == 1)
00738 {
00739 for(j = 0; j < 3; j++)
00740 {
00741 meshmin[j] = GRID;
00742 meshmax[j] = 0;
00743 }
00744
00745 for(i = 0; i < NumPart; i++)
00746 {
00747 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00748 continue;
00749
00750
00751 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00752 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00753 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00754 {
00755 for(j = 0; j < 3; j++)
00756 {
00757 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00758
00759 if(slab < meshmin[j])
00760 meshmin[j] = slab;
00761
00762 if(slab > meshmax[j])
00763 meshmax[j] = slab;
00764 }
00765 }
00766 }
00767
00768 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00769 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00770 }
00771 #endif
00772
00773 dimx = meshmax[0] - meshmin[0] + 6;
00774 dimy = meshmax[1] - meshmin[1] + 6;
00775 dimz = meshmax[2] - meshmin[2] + 6;
00776
00777 for(j = 0; j < 3; j++)
00778 {
00779 if(meshmin[j] < 2)
00780 endrun(131231);
00781 if(meshmax[j] > GRID / 2 - 3)
00782 endrun(131288);
00783 }
00784
00785 for(level = 0; level < (1 << PTask); level++)
00786 {
00787 sendTask = ThisTask;
00788 recvTask = ThisTask ^ level;
00789
00790 if(recvTask < NTask)
00791 {
00792
00793 sendmin = 2 * GRID;
00794 sendmax = -GRID;
00795 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00796 if(slab_to_task[slab_x] == sendTask)
00797 {
00798 if(slab_x < sendmin)
00799 sendmin = slab_x;
00800 if(slab_x > sendmax)
00801 sendmax = slab_x;
00802 }
00803 if(sendmax == -GRID)
00804 sendmin = sendmax + 1;
00805
00806
00807
00808 recvmin = 2 * GRID;
00809 recvmax = -GRID;
00810 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00811 if(slab_to_task[slab_x] == recvTask)
00812 {
00813 if(slab_x < recvmin)
00814 recvmin = slab_x;
00815 if(slab_x > recvmax)
00816 recvmax = slab_x;
00817 }
00818 if(recvmax == -GRID)
00819 recvmin = recvmax + 1;
00820
00821 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
00822 {
00823 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00824 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00825 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00826
00827
00828 if(sendmax - sendmin >= 0)
00829 {
00830 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00831 {
00832 slab_xx = slab_x - first_slab_of_task[ThisTask];
00833
00834 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
00835 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
00836 {
00837 slab_yy = slab_y;
00838
00839 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
00840 slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
00841 {
00842 slab_zz = slab_z;
00843
00844 forcegrid[((slab_x - sendmin) * recv_dimy +
00845 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
00846 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
00847 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
00848 }
00849 }
00850 }
00851 }
00852
00853 if(level > 0)
00854 {
00855 MPI_Sendrecv(forcegrid,
00856 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
00857 MPI_BYTE, recvTask, TAG_NONPERIOD_B,
00858 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00859 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
00860 recvTask, TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
00861 }
00862 else
00863 {
00864 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00865 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
00866 }
00867 }
00868 }
00869 }
00870
00871 dimx = meshmax[0] - meshmin[0] + 2;
00872 dimy = meshmax[1] - meshmin[1] + 2;
00873 dimz = meshmax[2] - meshmin[2] + 2;
00874
00875 recv_dimx = meshmax[0] - meshmin[0] + 6;
00876 recv_dimy = meshmax[1] - meshmin[1] + 6;
00877 recv_dimz = meshmax[2] - meshmin[2] + 6;
00878
00879
00880 for(dim = 0; dim < 3; dim++)
00881 {
00882
00883
00884
00885 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
00886 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
00887 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
00888 {
00889 xrr = xll = xr = xl = x;
00890 yrr = yll = yr = yl = y;
00891 zrr = zll = zr = zl = z;
00892
00893 switch (dim)
00894 {
00895 case 0:
00896 xr = x + 1;
00897 xrr = x + 2;
00898 xl = x - 1;
00899 xll = x - 2;
00900 break;
00901 case 1:
00902 yr = y + 1;
00903 yl = y - 1;
00904 yrr = y + 2;
00905 yll = y - 2;
00906 break;
00907 case 2:
00908 zr = z + 1;
00909 zl = z - 1;
00910 zrr = z + 2;
00911 zll = z - 2;
00912 break;
00913 }
00914
00915 forcegrid[(x * dimy + y) * dimz + z]
00916 =
00917 fac * ((4.0 / 3) *
00918 (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
00919 - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
00920 (1.0 / 6) *
00921 (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
00922 workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
00923 }
00924
00925
00926
00927
00928 for(i = 0; i < NumPart; i++)
00929 {
00930 #ifdef PLACEHIGHRESREGION
00931 if(grnr == 1)
00932 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00933 continue;
00934 #endif
00935 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00936 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00937 slab_x -= meshmin[0];
00938 slab_xx = slab_x + 1;
00939
00940 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00941 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00942 slab_y -= meshmin[1];
00943 slab_yy = slab_y + 1;
00944
00945 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00946 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00947 slab_z -= meshmin[2];
00948 slab_zz = slab_z + 1;
00949
00950 acc_dim =
00951 forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00952 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
00953 acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
00954 acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
00955
00956 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
00957 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
00958 acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
00959 acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
00960
00961 P[i].GravPM[dim] += acc_dim;
00962 }
00963 }
00964
00965 pm_init_nonperiodic_free();
00966 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
00967 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00968
00969 if(ThisTask == 0)
00970 printf("done PM.\n");
00971
00972 return 0;
00973 }
00974
00975
00976
00977
00983 void pmpotential_nonperiodic(int grnr)
00984 {
00985 double dx, dy, dz;
00986 double fac, to_slab_fac;
00987 double re, im, pot;
00988 int i, j, slab, level, sendTask, recvTask;
00989 int x, y, z, ip;
00990 int slab_x, slab_y, slab_z;
00991 int slab_xx, slab_yy, slab_zz;
00992 int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00993 int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00994 MPI_Status status;
00995
00996
00997 if(ThisTask == 0)
00998 printf("Starting non-periodic PM-potential calculation.\n");
00999
01000 fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);
01001
01002 to_slab_fac = GRID / All.TotalMeshSize[grnr];
01003
01004
01005
01006 for(j = 0; j < 3; j++)
01007 {
01008 meshmin[j] = GRID;
01009 meshmax[j] = 0;
01010 }
01011
01012 for(i = 0; i < NumPart; i++)
01013 {
01014 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01015 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01016 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01017 {
01018 for(j = 0; j < 3; j++)
01019 {
01020 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01021
01022 if(slab < meshmin[j])
01023 meshmin[j] = slab;
01024
01025 if(slab > meshmax[j])
01026 meshmax[j] = slab;
01027 }
01028 }
01029 }
01030
01031 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01032 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01033
01034 dimx = meshmax[0] - meshmin[0] + 2;
01035 dimy = meshmax[1] - meshmin[1] + 2;
01036 dimz = meshmax[2] - meshmin[2] + 2;
01037
01038
01039 force_treefree();
01040
01041 pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
01042
01043 for(i = 0; i < dimx * dimy * dimz; i++)
01044 workspace[i] = 0;
01045
01046 for(i = 0; i < NumPart; i++)
01047 {
01048 if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
01049 continue;
01050 if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
01051 continue;
01052 if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
01053 continue;
01054
01055 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01056 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01057 slab_x -= meshmin[0];
01058 slab_xx = slab_x + 1;
01059
01060 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01061 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01062 slab_y -= meshmin[1];
01063 slab_yy = slab_y + 1;
01064
01065 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01066 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01067 slab_z -= meshmin[2];
01068 slab_zz = slab_z + 1;
01069
01070 workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01071 workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
01072 workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
01073 workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
01074
01075 workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
01076 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
01077 workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
01078 workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
01079 }
01080
01081
01082 for(i = 0; i < fftsize; i++)
01083 rhogrid[i] = 0;
01084
01085 for(level = 0; level < (1 << PTask); level++)
01086 {
01087 sendTask = ThisTask;
01088 recvTask = ThisTask ^ level;
01089 if(recvTask < NTask)
01090 {
01091
01092 sendmin = 2 * GRID;
01093 sendmax = -1;
01094 for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
01095 if(slab_to_task[slab_x] == recvTask)
01096 {
01097 if(slab_x < sendmin)
01098 sendmin = slab_x;
01099 if(slab_x > sendmax)
01100 sendmax = slab_x;
01101 }
01102 if(sendmax == -1)
01103 sendmin = 0;
01104
01105
01106 recvmin = 2 * GRID;
01107 recvmax = -1;
01108 for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
01109 if(slab_to_task[slab_x] == sendTask)
01110 {
01111 if(slab_x < recvmin)
01112 recvmin = slab_x;
01113 if(slab_x > recvmax)
01114 recvmax = slab_x;
01115 }
01116 if(recvmax == -1)
01117 recvmin = 0;
01118
01119 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
01120 {
01121 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
01122 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
01123 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
01124
01125 if(level > 0)
01126 {
01127 MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
01128 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
01129 TAG_NONPERIOD_C, forcegrid,
01130 (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
01131 recvTask, TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
01132 }
01133 else
01134 {
01135 memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
01136 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
01137 }
01138
01139 for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
01140 {
01141 slab_xx = slab_x - first_slab_of_task[ThisTask];
01142
01143 if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
01144 {
01145 for(slab_y = meshmin_list[3 * recvTask + 1];
01146 slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
01147 {
01148 slab_yy = slab_y;
01149
01150 for(slab_z = meshmin_list[3 * recvTask + 2];
01151 slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
01152 {
01153 slab_zz = slab_z;
01154
01155 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
01156 forcegrid[((slab_x - recvmin) * recv_dimy +
01157 (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
01158 (slab_z - meshmin_list[3 * recvTask + 2])];
01159 }
01160 }
01161 }
01162 }
01163 }
01164 }
01165 }
01166
01167
01168
01169
01170 rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01171
01172
01173
01174
01175 for(y = 0; y < nslab_y; y++)
01176 for(x = 0; x < GRID; x++)
01177 for(z = 0; z < GRID / 2 + 1; z++)
01178 {
01179 ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
01180
01181 re =
01182 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
01183 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
01184
01185 im =
01186 fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
01187 fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
01188
01189 fft_of_rhogrid[ip].re = fac * re;
01190 fft_of_rhogrid[ip].im = fac * im;
01191 }
01192
01193
01194
01195 rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205 #ifdef PLACEHIGHRESREGION
01206 if(grnr == 1)
01207 {
01208 for(j = 0; j < 3; j++)
01209 {
01210 meshmin[j] = GRID;
01211 meshmax[j] = 0;
01212 }
01213
01214 for(i = 0; i < NumPart; i++)
01215 {
01216 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01217 continue;
01218
01219
01220 if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01221 if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01222 if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01223 {
01224 for(j = 0; j < 3; j++)
01225 {
01226 slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01227
01228 if(slab < meshmin[j])
01229 meshmin[j] = slab;
01230
01231 if(slab > meshmax[j])
01232 meshmax[j] = slab;
01233 }
01234 }
01235 }
01236
01237 MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01238 MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01239 }
01240 #endif
01241
01242 dimx = meshmax[0] - meshmin[0] + 6;
01243 dimy = meshmax[1] - meshmin[1] + 6;
01244 dimz = meshmax[2] - meshmin[2] + 6;
01245
01246 for(j = 0; j < 3; j++)
01247 {
01248 if(meshmin[j] < 2)
01249 endrun(131231);
01250 if(meshmax[j] > GRID / 2 - 3)
01251 endrun(131288);
01252 }
01253
01254 for(level = 0; level < (1 << PTask); level++)
01255 {
01256 sendTask = ThisTask;
01257 recvTask = ThisTask ^ level;
01258
01259 if(recvTask < NTask)
01260 {
01261
01262 sendmin = 2 * GRID;
01263 sendmax = -GRID;
01264 for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
01265 if(slab_to_task[slab_x] == sendTask)
01266 {
01267 if(slab_x < sendmin)
01268 sendmin = slab_x;
01269 if(slab_x > sendmax)
01270 sendmax = slab_x;
01271 }
01272 if(sendmax == -GRID)
01273 sendmin = sendmax + 1;
01274
01275
01276
01277 recvmin = 2 * GRID;
01278 recvmax = -GRID;
01279 for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
01280 if(slab_to_task[slab_x] == recvTask)
01281 {
01282 if(slab_x < recvmin)
01283 recvmin = slab_x;
01284 if(slab_x > recvmax)
01285 recvmax = slab_x;
01286 }
01287 if(recvmax == -GRID)
01288 recvmin = recvmax + 1;
01289
01290 if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)
01291 {
01292 recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
01293 recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
01294 recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
01295
01296
01297 if(sendmax - sendmin >= 0)
01298 {
01299 for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01300 {
01301 slab_xx = slab_x - first_slab_of_task[ThisTask];
01302
01303 for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
01304 slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
01305 {
01306 slab_yy = slab_y;
01307
01308 for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
01309 slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
01310 {
01311 slab_zz = slab_z;
01312
01313 forcegrid[((slab_x - sendmin) * recv_dimy +
01314 (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
01315 slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
01316 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
01317 }
01318 }
01319 }
01320 }
01321
01322 if(level > 0)
01323 {
01324 MPI_Sendrecv(forcegrid,
01325 (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
01326 MPI_BYTE, recvTask, TAG_NONPERIOD_D,
01327 workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01328 (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
01329 recvTask, TAG_NONPERIOD_D, MPI_COMM_WORLD, &status);
01330 }
01331 else
01332 {
01333 memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01334 forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
01335 }
01336 }
01337 }
01338 }
01339
01340 dimx = meshmax[0] - meshmin[0] + 2;
01341 dimy = meshmax[1] - meshmin[1] + 2;
01342 dimz = meshmax[2] - meshmin[2] + 2;
01343
01344 recv_dimx = meshmax[0] - meshmin[0] + 6;
01345 recv_dimy = meshmax[1] - meshmin[1] + 6;
01346 recv_dimz = meshmax[2] - meshmin[2] + 6;
01347
01348
01349 for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
01350 for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
01351 for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
01352 {
01353 forcegrid[(x * dimy + y) * dimz + z]
01354 = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)];
01355 }
01356
01357
01358
01359
01360 for(i = 0; i < NumPart; i++)
01361 {
01362 #ifdef PLACEHIGHRESREGION
01363 if(grnr == 1)
01364 if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01365 continue;
01366 #endif
01367 slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01368 dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01369 slab_x -= meshmin[0];
01370 slab_xx = slab_x + 1;
01371
01372 slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01373 dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01374 slab_y -= meshmin[1];
01375 slab_yy = slab_y + 1;
01376
01377 slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01378 dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01379 slab_z -= meshmin[2];
01380 slab_zz = slab_z + 1;
01381
01382 pot = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01383 pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
01384 pot += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
01385 pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
01386
01387 pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
01388 pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
01389 pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
01390 pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
01391
01392 P[i].Potential += pot;
01393 }
01394
01395 pm_init_nonperiodic_free();
01396 force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
01397 All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
01398
01399 if(ThisTask == 0)
01400 printf("done PM-potential.\n");
01401 }
01402
01403
01404 #endif
01405 #endif