Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

pm_nonperiodic.c

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include <math.h>
00005 #include <mpi.h>
00006 
00007 
00013 #ifdef PMGRID
00014 #if !defined (PERIODIC) || defined (PLACEHIGHRESREGION)
00015 
00016 #ifdef NOTYPEPREFIX_FFTW
00017 #include        <rfftw_mpi.h>
00018 #else
00019 #ifdef DOUBLEPRECISION_FFTW
00020 #include     <drfftw_mpi.h>     /* double precision FFTW */
00021 #else
00022 #include     <srfftw_mpi.h>
00023 #endif
00024 #endif
00025 
00026 #include "allvars.h"
00027 #include "proto.h"
00028 
00029 #define  GRID  (2*PMGRID)
00030 #define  GRID2 (2*(GRID/2 + 1))
00031 
00032 
00033 
00034 static rfftwnd_mpi_plan fft_forward_plan, fft_inverse_plan;
00035 
00036 static int slab_to_task[GRID];
00037 static int *slabs_per_task;
00038 static int *first_slab_of_task;
00039 
00040 static int *meshmin_list, *meshmax_list;
00041 
00042 static int slabstart_x, nslab_x, slabstart_y, nslab_y;
00043 
00044 static int fftsize, maxfftsize;
00045 
00046 static fftw_real *kernel[2], *rhogrid, *forcegrid, *workspace;
00047 static fftw_complex *fft_of_kernel[2], *fft_of_rhogrid;
00048 
00058 void pm_init_regionsize(void)
00059 {
00060   double meshinner[2], xmin[2][3], xmax[2][3];
00061   int i, j, t;
00062 
00063   /* find enclosing rectangle */
00064 
00065   for(j = 0; j < 3; j++)
00066     {
00067       xmin[0][j] = xmin[1][j] = 1.0e36;
00068       xmax[0][j] = xmax[1][j] = -1.0e36;
00069     }
00070 
00071   for(i = 0; i < NumPart; i++)
00072     for(j = 0; j < 3; j++)
00073       {
00074         t = 0;
00075 #ifdef PLACEHIGHRESREGION
00076         if(((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00077           t = 1;
00078 #endif
00079         if(P[i].Pos[j] > xmax[t][j])
00080           xmax[t][j] = P[i].Pos[j];
00081         if(P[i].Pos[j] < xmin[t][j])
00082           xmin[t][j] = P[i].Pos[j];
00083       }
00084 
00085   MPI_Allreduce(xmin, All.Xmintot, 6, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
00086   MPI_Allreduce(xmax, All.Xmaxtot, 6, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
00087 
00088   for(j = 0; j < 2; j++)
00089     {
00090       All.TotalMeshSize[j] = All.Xmaxtot[j][0] - All.Xmintot[j][0];
00091       All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][1] - All.Xmintot[j][1]);
00092       All.TotalMeshSize[j] = dmax(All.TotalMeshSize[j], All.Xmaxtot[j][2] - All.Xmintot[j][2]);
00093 #ifdef ENLARGEREGION
00094       All.TotalMeshSize[j] *= ENLARGEREGION;
00095 #endif
00096 
00097       /* symmetrize the box onto the center */
00098       for(i = 0; i < 3; i++)
00099         {
00100           All.Xmintot[j][i] = (All.Xmintot[j][i] + All.Xmaxtot[j][i]) / 2 - All.TotalMeshSize[j] / 2;
00101           All.Xmaxtot[j][i] = All.Xmintot[j][i] + All.TotalMeshSize[j];
00102         }
00103     }
00104 
00105   /* this will produce enough room for zero-padding and buffer region to
00106      allow finite differencing of the potential  */
00107 
00108   for(j = 0; j < 2; j++)
00109     {
00110       meshinner[j] = All.TotalMeshSize[j];
00111       All.TotalMeshSize[j] *= 2.001 * (GRID) / ((double) (GRID - 2 - 8));
00112     }
00113 
00114   /* move lower left corner by two cells to allow finite differencing of the potential by a 4-point function */
00115 
00116   for(j = 0; j < 2; j++)
00117     for(i = 0; i < 3; i++)
00118       {
00119         All.Corner[j][i] = All.Xmintot[j][i] - 2.0005 * All.TotalMeshSize[j] / GRID;
00120         All.UpperCorner[j][i] = All.Corner[j][i] + (GRID / 2 - 1) * (All.TotalMeshSize[j] / GRID);
00121       }
00122 
00123 
00124 #ifndef PERIODIC
00125   All.Asmth[0] = ASMTH * All.TotalMeshSize[0] / GRID;
00126   All.Rcut[0] = RCUT * All.Asmth[0];
00127 #endif
00128 
00129 #ifdef PLACEHIGHRESREGION
00130   All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00131   All.Rcut[1] = RCUT * All.Asmth[1];
00132 #endif
00133 
00134 #ifdef PLACEHIGHRESREGION
00135   if(2 * All.TotalMeshSize[1] / GRID < All.Rcut[0])
00136     {
00137       All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 2));
00138 
00139       for(i = 0; i < 3; i++)
00140         {
00141           All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * All.Rcut[0];
00142           All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00143         }
00144 
00145       if(2 * All.TotalMeshSize[1] / GRID > All.Rcut[0])
00146         {
00147           All.TotalMeshSize[1] = 2 * (meshinner[1] + 2 * All.Rcut[0]) * (GRID) / ((double) (GRID - 10));
00148 
00149           for(i = 0; i < 3; i++)
00150             {
00151               All.Corner[1][i] = All.Xmintot[1][i] - 1.0001 * (All.Rcut[0] + 2 * All.TotalMeshSize[j] / GRID);
00152               All.UpperCorner[1][i] = All.Corner[1][i] + (GRID / 2 - 1) * (All.TotalMeshSize[1] / GRID);
00153             }
00154         }
00155 
00156       All.Asmth[1] = ASMTH * All.TotalMeshSize[1] / GRID;
00157       All.Rcut[1] = RCUT * All.Asmth[1];
00158     }
00159 #endif
00160 
00161   if(ThisTask == 0)
00162     {
00163 #ifndef PERIODIC
00164       printf("\nAllowed region for isolated PM mesh (coarse):\n");
00165       printf("(%g|%g|%g)  -> (%g|%g|%g)   ext=%g  totmeshsize=%g  meshsize=%g\n\n",
00166              All.Xmintot[0][0], All.Xmintot[0][1], All.Xmintot[0][2],
00167              All.Xmaxtot[0][0], All.Xmaxtot[0][1], All.Xmaxtot[0][2], meshinner[0], All.TotalMeshSize[0],
00168              All.TotalMeshSize[0] / GRID);
00169 #endif
00170 #ifdef PLACEHIGHRESREGION
00171       printf("\nAllowed region for isolated PM mesh (high-res):\n");
00172       printf("(%g|%g|%g)  -> (%g|%g|%g)   ext=%g  totmeshsize=%g  meshsize=%g\n\n",
00173              All.Xmintot[1][0], All.Xmintot[1][1], All.Xmintot[1][2],
00174              All.Xmaxtot[1][0], All.Xmaxtot[1][1], All.Xmaxtot[1][2],
00175              meshinner[1], All.TotalMeshSize[1], All.TotalMeshSize[1] / GRID);
00176 #endif
00177     }
00178 
00179 }
00180 
00185 void pm_init_nonperiodic(void)
00186 {
00187   int i, slab_to_task_local[GRID];
00188   double bytes_tot = 0;
00189   size_t bytes;
00190 
00191   /* Set up the FFTW plan files. */
00192 
00193   fft_forward_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00194                                              FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
00195   fft_inverse_plan = rfftw3d_mpi_create_plan(MPI_COMM_WORLD, GRID, GRID, GRID,
00196                                              FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
00197 
00198   /* Workspace out the ranges on each processor. */
00199 
00200   rfftwnd_mpi_local_sizes(fft_forward_plan, &nslab_x, &slabstart_x, &nslab_y, &slabstart_y, &fftsize);
00201 
00202 
00203   for(i = 0; i < GRID; i++)
00204     slab_to_task_local[i] = 0;
00205 
00206   for(i = 0; i < nslab_x; i++)
00207     slab_to_task_local[slabstart_x + i] = ThisTask;
00208 
00209   MPI_Allreduce(slab_to_task_local, slab_to_task, GRID, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00210 
00211   slabs_per_task = malloc(NTask * sizeof(int));
00212   MPI_Allgather(&nslab_x, 1, MPI_INT, slabs_per_task, 1, MPI_INT, MPI_COMM_WORLD);
00213 
00214 #ifndef PERIODIC
00215   if(ThisTask == 0)
00216     {
00217       for(i = 0; i < NTask; i++)
00218         printf("Task=%d  FFT-Slabs=%d\n", i, slabs_per_task[i]);
00219     }
00220 #endif
00221 
00222   first_slab_of_task = malloc(NTask * sizeof(int));
00223   MPI_Allgather(&slabstart_x, 1, MPI_INT, first_slab_of_task, 1, MPI_INT, MPI_COMM_WORLD);
00224 
00225   meshmin_list = malloc(3 * NTask * sizeof(int));
00226   meshmax_list = malloc(3 * NTask * sizeof(int));
00227 
00228   MPI_Allreduce(&fftsize, &maxfftsize, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00229 
00230   /* now allocate memory to hold the FFT fields */
00231 
00232 #if !defined(PERIODIC)
00233   if(!(kernel[0] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00234     {
00235       printf("failed to allocate memory for `FFT-kernel[0]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00236       endrun(1);
00237     }
00238   bytes_tot += bytes;
00239   fft_of_kernel[0] = (fftw_complex *) kernel[0];
00240 #endif
00241 
00242 #if defined(PLACEHIGHRESREGION)
00243   if(!(kernel[1] = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00244     {
00245       printf("failed to allocate memory for `FFT-kernel[1]' (%g MB).\n", bytes / (1024.0 * 1024.0));
00246       endrun(1);
00247     }
00248   bytes_tot += bytes;
00249   fft_of_kernel[1] = (fftw_complex *) kernel[1];
00250 #endif
00251 
00252   if(ThisTask == 0)
00253     printf("\nAllocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
00254 
00255 }
00256 
00257 
00264 void pm_init_nonperiodic_allocate(int dimprod)
00265 {
00266   static int first_alloc = 1;
00267   int dimprodmax;
00268   double bytes_tot = 0;
00269   size_t bytes;
00270 
00271   MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
00272 
00273   if(!(rhogrid = (fftw_real *) malloc(bytes = fftsize * sizeof(fftw_real))))
00274     {
00275       printf("failed to allocate memory for `FFT-rhogrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00276       endrun(1);
00277     }
00278   bytes_tot += bytes;
00279 
00280   fft_of_rhogrid = (fftw_complex *) rhogrid;
00281 
00282   if(!(forcegrid = (fftw_real *) malloc(bytes = imax(fftsize, dimprodmax) * sizeof(fftw_real))))
00283     {
00284       printf("failed to allocate memory for `FFT-forcegrid' (%g MB).\n", bytes / (1024.0 * 1024.0));
00285       endrun(1);
00286     }
00287   bytes_tot += bytes;
00288 
00289   if(!(workspace = (fftw_real *) malloc(bytes = imax(maxfftsize, dimprodmax) * sizeof(fftw_real))))
00290     {
00291       printf("failed to allocate memory for `FFT-workspace' (%g MB).\n", bytes / (1024.0 * 1024.0));
00292       endrun(1);
00293     }
00294   bytes_tot += bytes;
00295 
00296   if(first_alloc == 1)
00297     {
00298       first_alloc = 0;
00299       if(ThisTask == 0)
00300         printf("\nUsing %g MByte for non-periodic FFT computation.\n\n", bytes_tot / (1024.0 * 1024.0));
00301     }
00302 }
00303 
00304 
00309 void pm_init_nonperiodic_free(void)
00310 {
00311   /* deallocate memory */
00312   free(workspace);
00313   free(forcegrid);
00314   free(rhogrid);
00315 }
00316 
00317 
00321 void pm_setup_nonperiodic_kernel(void)
00322 {
00323   int i, j, k;
00324   double x, y, z, r, u, fac;
00325   double kx, ky, kz, k2, fx, fy, fz, ff;
00326   int ip;
00327 
00328   /* now set up kernel and its Fourier transform */
00329 
00330   pm_init_nonperiodic_allocate(0);
00331 
00332 #if !defined(PERIODIC)
00333   for(i = 0; i < fftsize; i++)  /* clear local density field */
00334     kernel[0][i] = 0;
00335 
00336   for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00337     for(j = 0; j < GRID; j++)
00338       for(k = 0; k < GRID; k++)
00339         {
00340           x = ((double) i) / GRID;
00341           y = ((double) j) / GRID;
00342           z = ((double) k) / GRID;
00343 
00344           if(x >= 0.5)
00345             x -= 1.0;
00346           if(y >= 0.5)
00347             y -= 1.0;
00348           if(z >= 0.5)
00349             z -= 1.0;
00350 
00351           r = sqrt(x * x + y * y + z * z);
00352 
00353           u = 0.5 * r / (((double) ASMTH) / GRID);
00354 
00355           fac = 1 - erfc(u);
00356 
00357           if(r > 0)
00358             kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00359           else
00360             kernel[0][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00361               -1 / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00362         }
00363 
00364   /* do the forward transform of the kernel */
00365 
00366   rfftwnd_mpi(fft_forward_plan, 1, kernel[0], workspace, FFTW_TRANSPOSED_ORDER);
00367 #endif
00368 
00369 
00370 #if defined(PLACEHIGHRESREGION)
00371   for(i = 0; i < fftsize; i++)  /* clear local density field */
00372     kernel[1][i] = 0;
00373 
00374   for(i = slabstart_x; i < (slabstart_x + nslab_x); i++)
00375     for(j = 0; j < GRID; j++)
00376       for(k = 0; k < GRID; k++)
00377         {
00378           x = ((double) i) / GRID;
00379           y = ((double) j) / GRID;
00380           z = ((double) k) / GRID;
00381 
00382           if(x >= 0.5)
00383             x -= 1.0;
00384           if(y >= 0.5)
00385             y -= 1.0;
00386           if(z >= 0.5)
00387             z -= 1.0;
00388 
00389           r = sqrt(x * x + y * y + z * z);
00390 
00391           u = 0.5 * r / (((double) ASMTH) / GRID);
00392 
00393           fac = erfc(u * All.Asmth[1] / All.Asmth[0]) - erfc(u);
00394 
00395           if(r > 0)
00396             kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] = -fac / r;
00397           else
00398             {
00399               fac = 1 - All.Asmth[1] / All.Asmth[0];
00400               kernel[1][GRID * GRID2 * (i - slabstart_x) + GRID2 * j + k] =
00401                 -fac / (sqrt(M_PI) * (((double) ASMTH) / GRID));
00402             }
00403         }
00404 
00405   /* do the forward transform of the kernel */
00406 
00407   rfftwnd_mpi(fft_forward_plan, 1, kernel[1], workspace, FFTW_TRANSPOSED_ORDER);
00408 #endif
00409 
00410   /* deconvolve the Greens function twice with the CIC kernel */
00411 
00412   for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
00413     for(x = 0; x < GRID; x++)
00414       for(z = 0; z < GRID / 2 + 1; z++)
00415         {
00416           if(x > GRID / 2)
00417             kx = x - GRID;
00418           else
00419             kx = x;
00420           if(y > GRID / 2)
00421             ky = y - GRID;
00422           else
00423             ky = y;
00424           if(z > GRID / 2)
00425             kz = z - GRID;
00426           else
00427             kz = z;
00428 
00429           k2 = kx * kx + ky * ky + kz * kz;
00430 
00431           if(k2 > 0)
00432             {
00433               fx = fy = fz = 1;
00434               if(kx != 0)
00435                 {
00436                   fx = (M_PI * kx) / GRID;
00437                   fx = sin(fx) / fx;
00438                 }
00439               if(ky != 0)
00440                 {
00441                   fy = (M_PI * ky) / GRID;
00442                   fy = sin(fy) / fy;
00443                 }
00444               if(kz != 0)
00445                 {
00446                   fz = (M_PI * kz) / GRID;
00447                   fz = sin(fz) / fz;
00448                 }
00449               ff = 1 / (fx * fy * fz);
00450               ff = ff * ff * ff * ff;
00451 
00452               ip = GRID * (GRID / 2 + 1) * (y - slabstart_y) + (GRID / 2 + 1) * x + z;
00453 #if !defined(PERIODIC)
00454               fft_of_kernel[0][ip].re *= ff;
00455               fft_of_kernel[0][ip].im *= ff;
00456 #endif
00457 #if defined(PLACEHIGHRESREGION)
00458               fft_of_kernel[1][ip].re *= ff;
00459               fft_of_kernel[1][ip].im *= ff;
00460 #endif
00461             }
00462         }
00463   /* end deconvolution */
00464 
00465   pm_init_nonperiodic_free();
00466 }
00467 
00468 
00469 
00477 int pmforce_nonperiodic(int grnr)
00478 {
00479   double dx, dy, dz;
00480   double fac, to_slab_fac;
00481   double re, im, acc_dim;
00482   int i, j, slab, level, sendTask, recvTask, flag, flagsum;
00483   int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
00484   int slab_x, slab_y, slab_z;
00485   int slab_xx, slab_yy, slab_zz;
00486   int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00487   int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00488   MPI_Status status;
00489 
00490   if(ThisTask == 0)
00491     printf("Starting non-periodic PM calculation (grid=%d).\n", grnr);
00492 
00493   fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);       /* to get potential */
00494   fac *= 1 / (2 * All.TotalMeshSize[grnr] / GRID);      /* for finite differencing */
00495 
00496   to_slab_fac = GRID / All.TotalMeshSize[grnr];
00497 
00498 
00499   /* first, establish the extension of the local patch in GRID (for binning) */
00500 
00501   for(j = 0; j < 3; j++)
00502     {
00503       meshmin[j] = GRID;
00504       meshmax[j] = 0;
00505     }
00506 
00507   for(i = 0, flag = 0; i < NumPart; i++)
00508     {
00509 #ifdef PLACEHIGHRESREGION
00510       if(grnr == 0 || (grnr == 1 && ((1 << P[i].Type) & (PLACEHIGHRESREGION))))
00511 #endif
00512         {
00513           for(j = 0; j < 3; j++)
00514             {
00515               if(P[i].Pos[j] < All.Xmintot[grnr][j] || P[i].Pos[j] > All.Xmaxtot[grnr][j])
00516                 {
00517                   if(flag == 0)
00518                     {
00519                       printf
00520                         ("Particle Id=%d on task=%d with coordinates (%g|%g|%g) lies outside PM mesh.\nStopping\n",
00521                          (int)P[i].ID, ThisTask, P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]);
00522                       fflush(stdout);
00523                     }
00524                   flag++;
00525                   break;
00526                 }
00527             }
00528         }
00529 
00530       if(flag > 0)
00531         continue;
00532 
00533       if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00534         if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00535           if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00536             {
00537               for(j = 0; j < 3; j++)
00538                 {
00539                   slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00540 
00541                   if(slab < meshmin[j])
00542                     meshmin[j] = slab;
00543 
00544                   if(slab > meshmax[j])
00545                     meshmax[j] = slab;
00546                 }
00547             }
00548     }
00549 
00550 
00551   MPI_Allreduce(&flag, &flagsum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
00552   if(flagsum > 0)
00553     {
00554       if(ThisTask == 0)
00555         {
00556           printf("In total %d particles were outside allowed range.\n", flagsum);
00557           fflush(stdout);
00558         }
00559       return 1;                 /* error - need to return because particle were outside allowed range */
00560     }
00561 
00562   MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00563   MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00564 
00565   dimx = meshmax[0] - meshmin[0] + 2;
00566   dimy = meshmax[1] - meshmin[1] + 2;
00567   dimz = meshmax[2] - meshmin[2] + 2;
00568 
00569 
00570   force_treefree();
00571 
00572   pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
00573 
00574   for(i = 0; i < dimx * dimy * dimz; i++)
00575     workspace[i] = 0;
00576 
00577   for(i = 0; i < NumPart; i++)
00578     {
00579       if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
00580         continue;
00581       if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
00582         continue;
00583       if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
00584         continue;
00585 
00586       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00587       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00588       slab_x -= meshmin[0];
00589       slab_xx = slab_x + 1;
00590 
00591       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00592       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00593       slab_y -= meshmin[1];
00594       slab_yy = slab_y + 1;
00595 
00596       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00597       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00598       slab_z -= meshmin[2];
00599       slab_zz = slab_z + 1;
00600 
00601       workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00602       workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
00603       workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
00604       workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
00605 
00606       workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
00607       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
00608       workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
00609       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
00610     }
00611 
00612 
00613   for(i = 0; i < fftsize; i++)  /* clear local density field */
00614     rhogrid[i] = 0;
00615 
00616   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
00617     {
00618       sendTask = ThisTask;
00619       recvTask = ThisTask ^ level;
00620       if(recvTask < NTask)
00621         {
00622           /* check how much we have to send */
00623           sendmin = 2 * GRID;
00624           sendmax = -1;
00625           for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
00626             if(slab_to_task[slab_x] == recvTask)
00627               {
00628                 if(slab_x < sendmin)
00629                   sendmin = slab_x;
00630                 if(slab_x > sendmax)
00631                   sendmax = slab_x;
00632               }
00633           if(sendmax == -1)
00634             sendmin = 0;
00635 
00636           /* check how much we have to receive */
00637           recvmin = 2 * GRID;
00638           recvmax = -1;
00639           for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
00640             if(slab_to_task[slab_x] == sendTask)
00641               {
00642                 if(slab_x < recvmin)
00643                   recvmin = slab_x;
00644                 if(slab_x > recvmax)
00645                   recvmax = slab_x;
00646               }
00647           if(recvmax == -1)
00648             recvmin = 0;
00649 
00650           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
00651             {
00652               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
00653               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
00654               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
00655 
00656               if(level > 0)
00657                 {
00658                   MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
00659                                (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
00660                                TAG_NONPERIOD_A, forcegrid,
00661                                (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
00662                                recvTask, TAG_NONPERIOD_A, MPI_COMM_WORLD, &status);
00663                 }
00664               else
00665                 {
00666                   memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
00667                          (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
00668                 }
00669 
00670               for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
00671                 {
00672                   slab_xx = slab_x - first_slab_of_task[ThisTask];
00673 
00674                   if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
00675                     {
00676                       for(slab_y = meshmin_list[3 * recvTask + 1];
00677                           slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
00678                         {
00679                           slab_yy = slab_y;
00680 
00681                           for(slab_z = meshmin_list[3 * recvTask + 2];
00682                               slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
00683                             {
00684                               slab_zz = slab_z;
00685 
00686                               rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
00687                                 forcegrid[((slab_x - recvmin) * recv_dimy +
00688                                            (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
00689                                           (slab_z - meshmin_list[3 * recvTask + 2])];
00690                             }
00691                         }
00692                     }
00693                 }
00694             }
00695         }
00696     }
00697 
00698 
00699   /* Do the FFT of the density field */
00700 
00701   rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00702 
00703 
00704   /* multiply with the Fourier transform of the Green's function (kernel) */
00705 
00706   for(y = 0; y < nslab_y; y++)
00707     for(x = 0; x < GRID; x++)
00708       for(z = 0; z < GRID / 2 + 1; z++)
00709         {
00710           ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
00711 
00712           re =
00713             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
00714             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
00715 
00716           im =
00717             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
00718             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
00719 
00720           fft_of_rhogrid[ip].re = re;
00721           fft_of_rhogrid[ip].im = im;
00722         }
00723 
00724   /* get the potential by inverse FFT */
00725 
00726   rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
00727 
00728   /* Now rhogrid holds the potential */
00729   /* construct the potential for the local patch */
00730 
00731 
00732   /* if we have a high-res mesh, establish the extension of the local patch in GRID (for reading out the
00733    * forces) 
00734    */
00735 
00736 #ifdef PLACEHIGHRESREGION
00737   if(grnr == 1)
00738     {
00739       for(j = 0; j < 3; j++)
00740         {
00741           meshmin[j] = GRID;
00742           meshmax[j] = 0;
00743         }
00744 
00745       for(i = 0; i < NumPart; i++)
00746         {
00747           if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00748             continue;
00749 
00750 
00751           if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
00752             if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
00753               if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
00754                 {
00755                   for(j = 0; j < 3; j++)
00756                     {
00757                       slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
00758 
00759                       if(slab < meshmin[j])
00760                         meshmin[j] = slab;
00761 
00762                       if(slab > meshmax[j])
00763                         meshmax[j] = slab;
00764                     }
00765                 }
00766         }
00767 
00768       MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
00769       MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
00770     }
00771 #endif
00772 
00773   dimx = meshmax[0] - meshmin[0] + 6;
00774   dimy = meshmax[1] - meshmin[1] + 6;
00775   dimz = meshmax[2] - meshmin[2] + 6;
00776 
00777   for(j = 0; j < 3; j++)
00778     {
00779       if(meshmin[j] < 2)
00780         endrun(131231);
00781       if(meshmax[j] > GRID / 2 - 3)
00782         endrun(131288);
00783     }
00784 
00785   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
00786     {
00787       sendTask = ThisTask;
00788       recvTask = ThisTask ^ level;
00789 
00790       if(recvTask < NTask)
00791         {
00792           /* check how much we have to send */
00793           sendmin = 2 * GRID;
00794           sendmax = -GRID;
00795           for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
00796             if(slab_to_task[slab_x] == sendTask)
00797               {
00798                 if(slab_x < sendmin)
00799                   sendmin = slab_x;
00800                 if(slab_x > sendmax)
00801                   sendmax = slab_x;
00802               }
00803           if(sendmax == -GRID)
00804             sendmin = sendmax + 1;
00805 
00806 
00807           /* check how much we have to receive */
00808           recvmin = 2 * GRID;
00809           recvmax = -GRID;
00810           for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
00811             if(slab_to_task[slab_x] == recvTask)
00812               {
00813                 if(slab_x < recvmin)
00814                   recvmin = slab_x;
00815                 if(slab_x > recvmax)
00816                   recvmax = slab_x;
00817               }
00818           if(recvmax == -GRID)
00819             recvmin = recvmax + 1;
00820 
00821           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
00822             {
00823               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
00824               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
00825               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
00826 
00827               /* prepare what we want to send */
00828               if(sendmax - sendmin >= 0)
00829                 {
00830                   for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
00831                     {
00832                       slab_xx = slab_x - first_slab_of_task[ThisTask];
00833 
00834                       for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
00835                           slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
00836                         {
00837                           slab_yy = slab_y;
00838 
00839                           for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
00840                               slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
00841                             {
00842                               slab_zz = slab_z;
00843 
00844                               forcegrid[((slab_x - sendmin) * recv_dimy +
00845                                          (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
00846                                         slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
00847                                 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
00848                             }
00849                         }
00850                     }
00851                 }
00852 
00853               if(level > 0)
00854                 {
00855                   MPI_Sendrecv(forcegrid,
00856                                (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
00857                                MPI_BYTE, recvTask, TAG_NONPERIOD_B,
00858                                workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00859                                (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
00860                                recvTask, TAG_NONPERIOD_B, MPI_COMM_WORLD, &status);
00861                 }
00862               else
00863                 {
00864                   memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
00865                          forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
00866                 }
00867             }
00868         }
00869     }
00870 
00871   dimx = meshmax[0] - meshmin[0] + 2;
00872   dimy = meshmax[1] - meshmin[1] + 2;
00873   dimz = meshmax[2] - meshmin[2] + 2;
00874 
00875   recv_dimx = meshmax[0] - meshmin[0] + 6;
00876   recv_dimy = meshmax[1] - meshmin[1] + 6;
00877   recv_dimz = meshmax[2] - meshmin[2] + 6;
00878 
00879 
00880   for(dim = 0; dim < 3; dim++)  /* Calculate each component of the force. */
00881     {
00882       /* get the force component by finite differencing the potential */
00883       /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */
00884 
00885       for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
00886         for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
00887           for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
00888             {
00889               xrr = xll = xr = xl = x;
00890               yrr = yll = yr = yl = y;
00891               zrr = zll = zr = zl = z;
00892 
00893               switch (dim)
00894                 {
00895                 case 0:
00896                   xr = x + 1;
00897                   xrr = x + 2;
00898                   xl = x - 1;
00899                   xll = x - 2;
00900                   break;
00901                 case 1:
00902                   yr = y + 1;
00903                   yl = y - 1;
00904                   yrr = y + 2;
00905                   yll = y - 2;
00906                   break;
00907                 case 2:
00908                   zr = z + 1;
00909                   zl = z - 1;
00910                   zrr = z + 2;
00911                   zll = z - 2;
00912                   break;
00913                 }
00914 
00915               forcegrid[(x * dimy + y) * dimz + z]
00916                 =
00917                 fac * ((4.0 / 3) *
00918                        (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
00919                         - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
00920                        (1.0 / 6) *
00921                        (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
00922                         workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
00923             }
00924 
00925 
00926       /* read out the forces */
00927 
00928       for(i = 0; i < NumPart; i++)
00929         {
00930 #ifdef PLACEHIGHRESREGION
00931           if(grnr == 1)
00932             if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
00933               continue;
00934 #endif
00935           slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
00936           dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
00937           slab_x -= meshmin[0];
00938           slab_xx = slab_x + 1;
00939 
00940           slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
00941           dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
00942           slab_y -= meshmin[1];
00943           slab_yy = slab_y + 1;
00944 
00945           slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
00946           dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
00947           slab_z -= meshmin[2];
00948           slab_zz = slab_z + 1;
00949 
00950           acc_dim =
00951             forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
00952           acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
00953           acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
00954           acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
00955 
00956           acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
00957           acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
00958           acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
00959           acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
00960 
00961           P[i].GravPM[dim] += acc_dim;
00962         }
00963     }
00964 
00965   pm_init_nonperiodic_free();
00966   force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
00967   All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
00968 
00969   if(ThisTask == 0)
00970     printf("done PM.\n");
00971 
00972   return 0;
00973 }
00974 
00975 
00976 
00977 
00983 void pmpotential_nonperiodic(int grnr)
00984 {
00985   double dx, dy, dz;
00986   double fac, to_slab_fac;
00987   double re, im, pot;
00988   int i, j, slab, level, sendTask, recvTask;
00989   int x, y, z, ip;
00990   int slab_x, slab_y, slab_z;
00991   int slab_xx, slab_yy, slab_zz;
00992   int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
00993   int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
00994   MPI_Status status;
00995 
00996 
00997   if(ThisTask == 0)
00998     printf("Starting non-periodic PM-potential calculation.\n");
00999 
01000   fac = All.G / pow(All.TotalMeshSize[grnr], 4) * pow(All.TotalMeshSize[grnr] / GRID, 3);       /* to get potential */
01001 
01002   to_slab_fac = GRID / All.TotalMeshSize[grnr];
01003 
01004   /* first, establish the extension of the local patch in GRID (for binning) */
01005 
01006   for(j = 0; j < 3; j++)
01007     {
01008       meshmin[j] = GRID;
01009       meshmax[j] = 0;
01010     }
01011 
01012   for(i = 0; i < NumPart; i++)
01013     {
01014       if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01015         if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01016           if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01017             {
01018               for(j = 0; j < 3; j++)
01019                 {
01020                   slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01021 
01022                   if(slab < meshmin[j])
01023                     meshmin[j] = slab;
01024 
01025                   if(slab > meshmax[j])
01026                     meshmax[j] = slab;
01027                 }
01028             }
01029     }
01030 
01031   MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01032   MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01033 
01034   dimx = meshmax[0] - meshmin[0] + 2;
01035   dimy = meshmax[1] - meshmin[1] + 2;
01036   dimz = meshmax[2] - meshmin[2] + 2;
01037 
01038 
01039   force_treefree();
01040 
01041   pm_init_nonperiodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));
01042 
01043   for(i = 0; i < dimx * dimy * dimz; i++)
01044     workspace[i] = 0;
01045 
01046   for(i = 0; i < NumPart; i++)
01047     {
01048       if(P[i].Pos[0] < All.Corner[grnr][0] || P[i].Pos[0] >= All.UpperCorner[grnr][0])
01049         continue;
01050       if(P[i].Pos[1] < All.Corner[grnr][1] || P[i].Pos[1] >= All.UpperCorner[grnr][1])
01051         continue;
01052       if(P[i].Pos[2] < All.Corner[grnr][2] || P[i].Pos[2] >= All.UpperCorner[grnr][2])
01053         continue;
01054 
01055       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01056       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01057       slab_x -= meshmin[0];
01058       slab_xx = slab_x + 1;
01059 
01060       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01061       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01062       slab_y -= meshmin[1];
01063       slab_yy = slab_y + 1;
01064 
01065       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01066       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01067       slab_z -= meshmin[2];
01068       slab_zz = slab_z + 1;
01069 
01070       workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01071       workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
01072       workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
01073       workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;
01074 
01075       workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
01076       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
01077       workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
01078       workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
01079     }
01080 
01081 
01082   for(i = 0; i < fftsize; i++)  /* clear local density field */
01083     rhogrid[i] = 0;
01084 
01085   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
01086     {
01087       sendTask = ThisTask;
01088       recvTask = ThisTask ^ level;
01089       if(recvTask < NTask)
01090         {
01091           /* check how much we have to send */
01092           sendmin = 2 * GRID;
01093           sendmax = -1;
01094           for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
01095             if(slab_to_task[slab_x] == recvTask)
01096               {
01097                 if(slab_x < sendmin)
01098                   sendmin = slab_x;
01099                 if(slab_x > sendmax)
01100                   sendmax = slab_x;
01101               }
01102           if(sendmax == -1)
01103             sendmin = 0;
01104 
01105           /* check how much we have to receive */
01106           recvmin = 2 * GRID;
01107           recvmax = -1;
01108           for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
01109             if(slab_to_task[slab_x] == sendTask)
01110               {
01111                 if(slab_x < recvmin)
01112                   recvmin = slab_x;
01113                 if(slab_x > recvmax)
01114                   recvmax = slab_x;
01115               }
01116           if(recvmax == -1)
01117             recvmin = 0;
01118 
01119           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
01120             {
01121               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
01122               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
01123               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;
01124 
01125               if(level > 0)
01126                 {
01127                   MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
01128                                (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
01129                                TAG_NONPERIOD_C, forcegrid,
01130                                (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
01131                                recvTask, TAG_NONPERIOD_C, MPI_COMM_WORLD, &status);
01132                 }
01133               else
01134                 {
01135                   memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
01136                          (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
01137                 }
01138 
01139               for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
01140                 {
01141                   slab_xx = slab_x - first_slab_of_task[ThisTask];
01142 
01143                   if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
01144                     {
01145                       for(slab_y = meshmin_list[3 * recvTask + 1];
01146                           slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
01147                         {
01148                           slab_yy = slab_y;
01149 
01150                           for(slab_z = meshmin_list[3 * recvTask + 2];
01151                               slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
01152                             {
01153                               slab_zz = slab_z;
01154 
01155                               rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz] +=
01156                                 forcegrid[((slab_x - recvmin) * recv_dimy +
01157                                            (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
01158                                           (slab_z - meshmin_list[3 * recvTask + 2])];
01159                             }
01160                         }
01161                     }
01162                 }
01163             }
01164         }
01165     }
01166 
01167 
01168   /* Do the FFT of the density field */
01169 
01170   rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01171 
01172 
01173   /* multiply with the Fourier transform of the Green's function (kernel) */
01174 
01175   for(y = 0; y < nslab_y; y++)
01176     for(x = 0; x < GRID; x++)
01177       for(z = 0; z < GRID / 2 + 1; z++)
01178         {
01179           ip = GRID * (GRID / 2 + 1) * y + (GRID / 2 + 1) * x + z;
01180 
01181           re =
01182             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].re -
01183             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].im;
01184 
01185           im =
01186             fft_of_rhogrid[ip].re * fft_of_kernel[grnr][ip].im +
01187             fft_of_rhogrid[ip].im * fft_of_kernel[grnr][ip].re;
01188 
01189           fft_of_rhogrid[ip].re = fac * re;
01190           fft_of_rhogrid[ip].im = fac * im;
01191         }
01192 
01193   /* get the potential by inverse FFT */
01194 
01195   rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
01196 
01197   /* Now rhogrid holds the potential */
01198   /* construct the potential for the local patch */
01199 
01200 
01201   /* if we have a high-res mesh, establish the extension of the local patch in GRID (for reading out the
01202    * forces) 
01203    */
01204 
01205 #ifdef PLACEHIGHRESREGION
01206   if(grnr == 1)
01207     {
01208       for(j = 0; j < 3; j++)
01209         {
01210           meshmin[j] = GRID;
01211           meshmax[j] = 0;
01212         }
01213 
01214       for(i = 0; i < NumPart; i++)
01215         {
01216           if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01217             continue;
01218 
01219 
01220           if(P[i].Pos[0] >= All.Corner[grnr][0] && P[i].Pos[0] < All.UpperCorner[grnr][0])
01221             if(P[i].Pos[1] >= All.Corner[grnr][1] && P[i].Pos[1] < All.UpperCorner[grnr][1])
01222               if(P[i].Pos[2] >= All.Corner[grnr][2] && P[i].Pos[2] < All.UpperCorner[grnr][2])
01223                 {
01224                   for(j = 0; j < 3; j++)
01225                     {
01226                       slab = to_slab_fac * (P[i].Pos[j] - All.Corner[grnr][j]);
01227 
01228                       if(slab < meshmin[j])
01229                         meshmin[j] = slab;
01230 
01231                       if(slab > meshmax[j])
01232                         meshmax[j] = slab;
01233                     }
01234                 }
01235         }
01236 
01237       MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
01238       MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);
01239     }
01240 #endif
01241 
01242   dimx = meshmax[0] - meshmin[0] + 6;
01243   dimy = meshmax[1] - meshmin[1] + 6;
01244   dimz = meshmax[2] - meshmin[2] + 6;
01245 
01246   for(j = 0; j < 3; j++)
01247     {
01248       if(meshmin[j] < 2)
01249         endrun(131231);
01250       if(meshmax[j] > GRID / 2 - 3)
01251         endrun(131288);
01252     }
01253 
01254   for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */
01255     {
01256       sendTask = ThisTask;
01257       recvTask = ThisTask ^ level;
01258 
01259       if(recvTask < NTask)
01260         {
01261           /* check how much we have to send */
01262           sendmin = 2 * GRID;
01263           sendmax = -GRID;
01264           for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
01265             if(slab_to_task[slab_x] == sendTask)
01266               {
01267                 if(slab_x < sendmin)
01268                   sendmin = slab_x;
01269                 if(slab_x > sendmax)
01270                   sendmax = slab_x;
01271               }
01272           if(sendmax == -GRID)
01273             sendmin = sendmax + 1;
01274 
01275 
01276           /* check how much we have to receive */
01277           recvmin = 2 * GRID;
01278           recvmax = -GRID;
01279           for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
01280             if(slab_to_task[slab_x] == recvTask)
01281               {
01282                 if(slab_x < recvmin)
01283                   recvmin = slab_x;
01284                 if(slab_x > recvmax)
01285                   recvmax = slab_x;
01286               }
01287           if(recvmax == -GRID)
01288             recvmin = recvmax + 1;
01289 
01290           if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)      /* ok, we have a contribution to the slab */
01291             {
01292               recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
01293               recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
01294               recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;
01295 
01296               /* prepare what we want to send */
01297               if(sendmax - sendmin >= 0)
01298                 {
01299                   for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
01300                     {
01301                       slab_xx = slab_x - first_slab_of_task[ThisTask];
01302 
01303                       for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
01304                           slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
01305                         {
01306                           slab_yy = slab_y;
01307 
01308                           for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
01309                               slab_z <= meshmax_list[3 * recvTask + 2] + 4; slab_z++)
01310                             {
01311                               slab_zz = slab_z;
01312 
01313                               forcegrid[((slab_x - sendmin) * recv_dimy +
01314                                          (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
01315                                         slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
01316                                 rhogrid[GRID * GRID2 * slab_xx + GRID2 * slab_yy + slab_zz];
01317                             }
01318                         }
01319                     }
01320                 }
01321 
01322               if(level > 0)
01323                 {
01324                   MPI_Sendrecv(forcegrid,
01325                                (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
01326                                MPI_BYTE, recvTask, TAG_NONPERIOD_D,
01327                                workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01328                                (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
01329                                recvTask, TAG_NONPERIOD_D, MPI_COMM_WORLD, &status);
01330                 }
01331               else
01332                 {
01333                   memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
01334                          forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
01335                 }
01336             }
01337         }
01338     }
01339 
01340   dimx = meshmax[0] - meshmin[0] + 2;
01341   dimy = meshmax[1] - meshmin[1] + 2;
01342   dimz = meshmax[2] - meshmin[2] + 2;
01343 
01344   recv_dimx = meshmax[0] - meshmin[0] + 6;
01345   recv_dimy = meshmax[1] - meshmin[1] + 6;
01346   recv_dimz = meshmax[2] - meshmin[2] + 6;
01347 
01348 
01349   for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
01350     for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
01351       for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
01352         {
01353           forcegrid[(x * dimy + y) * dimz + z]
01354             = workspace[((x + 2) * recv_dimy + (y + 2)) * recv_dimz + (z + 2)];
01355         }
01356 
01357 
01358   /* read out the potential */
01359 
01360   for(i = 0; i < NumPart; i++)
01361     {
01362 #ifdef PLACEHIGHRESREGION
01363       if(grnr == 1)
01364         if(!((1 << P[i].Type) & (PLACEHIGHRESREGION)))
01365           continue;
01366 #endif
01367       slab_x = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]);
01368       dx = to_slab_fac * (P[i].Pos[0] - All.Corner[grnr][0]) - slab_x;
01369       slab_x -= meshmin[0];
01370       slab_xx = slab_x + 1;
01371 
01372       slab_y = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]);
01373       dy = to_slab_fac * (P[i].Pos[1] - All.Corner[grnr][1]) - slab_y;
01374       slab_y -= meshmin[1];
01375       slab_yy = slab_y + 1;
01376 
01377       slab_z = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]);
01378       dz = to_slab_fac * (P[i].Pos[2] - All.Corner[grnr][2]) - slab_z;
01379       slab_z -= meshmin[2];
01380       slab_zz = slab_z + 1;
01381 
01382       pot = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
01383       pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
01384       pot += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
01385       pot += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;
01386 
01387       pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
01388       pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
01389       pot += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
01390       pot += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;
01391 
01392       P[i].Potential += pot;
01393     }
01394 
01395   pm_init_nonperiodic_free();
01396   force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);
01397   All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
01398 
01399   if(ThisTask == 0)
01400     printf("done PM-potential.\n");
01401 }
01402 
01403 
01404 #endif
01405 #endif

Generated on Sun May 22 17:33:29 2005 for GADGET-2 by  doxygen 1.3.9.1