12#include "gadgetconfig.h"
14#if defined(PMGRID) && (!defined(PERIODIC) || defined(PLACEHIGHRESREGION))
23#include "../data/allvars.h"
24#include "../data/dtypes.h"
25#include "../data/intposconvert.h"
26#include "../data/mymalloc.h"
27#include "../logs/timer.h"
28#include "../main/simulation.h"
29#include "../mpi_utils/mpi_utils.h"
31#include "../pm/pm_mpi_fft.h"
32#include "../pm/pm_nonperiodic.h"
33#include "../sort/cxxsort.h"
34#include "../src/gravtree/gravtree.h"
35#include "../src/time_integration/timestep.h"
36#include "../system/system.h"
38#define GRID (HRPMGRID)
39#define GRIDz (GRID / 2 + 1)
40#define GRID2 (2 * GRIDz)
42#define FI(x, y, z) (((large_array_offset)GRID2) * (GRID * (x) + (y)) + (z))
43#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.firstcol_XY) + (z))
44#define TI(x, y, z) (((large_array_offset)GRID) * ((x) + (y)*myplan.nslab_x) + (z))
55void pm_nonperiodic::pm_init_regionsize(
void)
61 int have_low_mesh =
NTask, have_high_mesh =
NTask;
65 for(
int j = 0; j < 3; j++)
66 Sp->ReferenceIntPos[
LOW_MESH][j] = P[0].IntPos[j];
71 for(
int i = 0; i < Sp->NumPart; i++)
73#ifdef PLACEHIGHRESREGION
74 if(((1 << P[i].getType()) & (PLACEHIGHRESREGION)))
76 for(
int j = 0; j < 3; j++)
77 Sp->ReferenceIntPos[
HIGH_MESH][j] = P[i].IntPos[j];
87 MPI_Allreduce(MPI_IN_PLACE, have_global, 2, MPI_2INT, MPI_MINLOC,
Communicator);
89 if(have_global[0] >=
NTask)
90 Terminate(
"have_global[0] >= NTask: Don't we have any particle?");
94#ifdef PLACEHIGHRESREGION
95 if(have_global[2] >=
NTask)
96 Terminate(
"have_global[2] >= NTask: Apparently there are no particles in high res region");
105 for(
int j = 0; j < 3; j++)
111 for(
int i = 0; i < Sp->NumPart; i++)
118 for(
int j = 0; j < 3; j++)
126#ifdef PLACEHIGHRESREGION
127 if(((1 << P[i].getType()) & (PLACEHIGHRESREGION)))
135 for(
int j = 0; j < 3; j++)
149 for(
int i = 0; i < 3; i++)
156 int flag_recompute_kernel = 0;
158 for(
int mesh = 0; mesh < 2; mesh++)
160 inner_meshsize[mesh] = (
MyIntPosType)(Sp->Xmaxtot[mesh][0] - Sp->Xmintot[mesh][0]);
162 if((
MyIntPosType)(Sp->Xmaxtot[mesh][1] - Sp->Xmintot[mesh][1]) > inner_meshsize[mesh])
163 inner_meshsize[mesh] = Sp->Xmaxtot[mesh][1] - Sp->Xmintot[mesh][1];
165 if((
MyIntPosType)(Sp->Xmaxtot[mesh][2] - Sp->Xmintot[mesh][2]) > inner_meshsize[mesh])
166 inner_meshsize[mesh] = Sp->Xmaxtot[mesh][2] - Sp->Xmintot[mesh][2];
169 for(
int mesh = 0; mesh < 2; mesh++)
175#ifndef PLACEHIGHRESREGION
189 if(blocksize >= refsize)
198 blocksize = Sp->PlacingBlocksize;
202 "PM-NONPERIODIC: Allowed region for isolated PM mesh (%s): BEFORE (%g|%g|%g) -> (%g|%g|%g) inner meshsize=%g "
204 mesh ==
LOW_MESH ?
"coarse" :
"fine", Sp->FacIntToCoord * Sp->Xmintot[mesh][0], Sp->FacIntToCoord * Sp->Xmintot[mesh][1],
205 Sp->FacIntToCoord * Sp->Xmintot[mesh][2], Sp->FacIntToCoord * Sp->Xmaxtot[mesh][0], Sp->FacIntToCoord * Sp->Xmaxtot[mesh][1],
206 Sp->FacIntToCoord * Sp->Xmaxtot[mesh][2], Sp->FacIntToCoord * inner_meshsize[mesh], Sp->FacIntToCoord * blocksize);
208 enclosing_meshsize[mesh] = 0;
211 for(
int i = 0; i < 3; i++)
217 left = ((Sp->Xmintot[mesh][i] + Sp->Xmaxtot[mesh][i]) / 2 - inner_meshsize[mesh] / 2) + Sp->ReferenceIntPos[mesh][i];
221 right = ((Sp->Xmintot[mesh][i] + Sp->Xmaxtot[mesh][i]) / 2 + inner_meshsize[mesh] / 2) + Sp->ReferenceIntPos[mesh][i];
227 left = (Sp->ReferenceIntPos[
HIGH_MESH][i] + Sp->Xmintot[
HIGH_MESH][i]) & Sp->PlacingMask;
228 right = left + Sp->PlacingBlocksize;
231 Sp->Xmintot[mesh][i] = left - Sp->ReferenceIntPos[mesh][i];
232 Sp->Xmaxtot[mesh][i] = right - Sp->ReferenceIntPos[mesh][i];
234 Sp->Left[mesh][i] = left;
236 Sp->MeshSize[mesh][i] = Sp->Xmaxtot[mesh][i] - Sp->Xmintot[mesh][i];
238 if(Sp->MeshSize[mesh][i] > enclosing_meshsize[mesh])
239 enclosing_meshsize[mesh] = Sp->MeshSize[mesh][i];
243 "PM-NONPERIODIC: Allowed region for isolated PM mesh (%s): AFTER (%g|%g|%g) -> (%g|%g|%g) enclosing_meshsize=%g "
244 "absolute space (%g|%g|%g) -> (%g|%g|%g)\n",
245 mesh ==
LOW_MESH ?
"coarse" :
"fine", Sp->FacIntToCoord * Sp->Xmintot[mesh][0], Sp->FacIntToCoord * Sp->Xmintot[mesh][1],
246 Sp->FacIntToCoord * Sp->Xmintot[mesh][2], Sp->FacIntToCoord * Sp->Xmaxtot[mesh][0], Sp->FacIntToCoord * Sp->Xmaxtot[mesh][1],
247 Sp->FacIntToCoord * Sp->Xmaxtot[mesh][2], Sp->FacIntToCoord * enclosing_meshsize[mesh],
248 Sp->FacIntToCoord * (Sp->Xmintot[mesh][0] + Sp->ReferenceIntPos[mesh][0]),
249 Sp->FacIntToCoord * (Sp->Xmintot[mesh][1] + Sp->ReferenceIntPos[mesh][1]),
250 Sp->FacIntToCoord * (Sp->Xmintot[mesh][2] + Sp->ReferenceIntPos[mesh][2]),
251 Sp->FacIntToCoord * (Sp->Xmaxtot[mesh][0] + Sp->ReferenceIntPos[mesh][0]),
252 Sp->FacIntToCoord * (Sp->Xmaxtot[mesh][1] + Sp->ReferenceIntPos[mesh][1]),
253 Sp->FacIntToCoord * (Sp->Xmaxtot[mesh][2] + Sp->ReferenceIntPos[mesh][2]));
255 if(enclosing_meshsize[mesh] != Sp->OldMeshSize[mesh])
257 flag_recompute_kernel = 1;
258 Sp->OldMeshSize[mesh] = enclosing_meshsize[mesh];
263 Sp->TotalMeshSize[mesh] = 2.0 * enclosing_meshsize[mesh] * (GRID / ((double)(GRID - 10)));
266 for(
int i = 0; i < 3; i++)
267 Sp->Corner[mesh][i] = Sp->Xmintot[mesh][i] - (2.0 * Sp->TotalMeshSize[mesh] / GRID);
269 Sp->Asmth[mesh] =
ASMTH * (Sp->FacIntToCoord * Sp->TotalMeshSize[mesh]) / GRID;
270 Sp->Rcut[mesh] =
RCUT * Sp->Asmth[mesh];
273 static int first_init_done = 0;
274 if(flag_recompute_kernel || first_init_done == 0)
277 mpi_printf(
"PM-NONPERIODIC: Recompute kernel course mesh: Asmth=%g Rcut=%g mesh cell size=%g\n", Sp->Asmth[
LOW_MESH],
278 Sp->Rcut[
LOW_MESH], Sp->FacIntToCoord * Sp->TotalMeshSize[
LOW_MESH] / GRID);
280#ifdef PLACEHIGHRESREGION
281 mpi_printf(
"PM-NONPERIODIC: Recompute kernel fine mesh: Asmth=%g Rcut=%g mesh cell size=%g\n", Sp->Asmth[
HIGH_MESH],
285 pm_setup_nonperiodic_kernel();
294void pm_nonperiodic::pm_init_nonperiodic(
simparticles *Sp_ptr)
299 int ndim[1] = {GRID};
302 rhogrid = (fft_real *)
Mem.mymalloc(
"rhogrid", GRID2 *
sizeof(fft_real));
303 forcegrid = (fft_real *)
Mem.mymalloc(
"forcegrid", GRID2 *
sizeof(fft_real));
305#ifdef DOUBLEPRECISION_FFTW
309 int alignflag = FFTW_UNALIGNED;
311#ifndef FFT_COLUMN_BASED
317 myplan.forward_plan_zdir =
FFTW(plan_many_dft_r2c)(1, ndim, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz,
318 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
320 myplan.forward_plan_xdir =
321 FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
322 GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
324 myplan.forward_plan_ydir =
325 FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
326 GRIDz * GRID, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
328 myplan.backward_plan_zdir =
FFTW(plan_many_dft_c2r)(1, ndim, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2,
329 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
331 myplan.backward_plan_xdir =
332 FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
333 GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
335 myplan.backward_plan_ydir =
336 FFTW(plan_many_dft)(1, ndim, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRID, (fft_complex *)forcegrid, 0, stride,
337 GRIDz * GRID, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
339 Mem.myfree(forcegrid);
342#ifndef FFT_COLUMN_BASED
346 maxfftsize = myplan.largest_x_slab * GRID * ((size_t)GRID2);
352 maxfftsize = myplan.max_datasize;
358 size_t bytes, bytes_tot = 0;
360#if !defined(PERIODIC)
361 kernel[0] = (fft_real *)
Mem.mymalloc(
"kernel[0]", bytes = maxfftsize *
sizeof(fft_real));
363 fft_of_kernel[0] = (fft_complex *)kernel[0];
366#if defined(PLACEHIGHRESREGION)
367 kernel[1] = (fft_real *)
Mem.mymalloc(
"kernel[1]", bytes = maxfftsize *
sizeof(fft_real));
369 fft_of_kernel[1] = (fft_complex *)kernel[1];
372 mpi_printf(
"\nPM-NONPERIODIC: Allocated %g MByte for FFT kernel(s).\n\n", bytes_tot / (1024.0 * 1024.0));
375#ifdef PM_ZOOM_OPTIMIZED
377void pm_nonperiodic::pmforce_nonperiodic_zoom_optimized_prepare_density(
int grnr)
383 double to_slab_fac = GRID / ((double)Sp->TotalMeshSize[grnr]);
385 part = (part_slab_data *)
Mem.mymalloc(
"part", 8 * (NSource *
sizeof(part_slab_data)));
386 large_numpart_type *part_sortindex =
387 (large_numpart_type *)
Mem.mymalloc(
"part_sortindex", 8 * (NSource *
sizeof(large_numpart_type)));
391#ifdef FFT_COLUMN_BASED
392 int columns = GRIDX * GRIDY;
393 int avg = (columns - 1) /
NTask + 1;
394 int exc =
NTask * avg - columns;
395 int tasklastsection =
NTask - exc;
396 int pivotcol = tasklastsection * avg;
400 for(
int idx = 0; idx < NSource; idx++)
402 int i = Sp->get_active_index(idx);
407 MyIntPosType diff[3] = {P[i].
IntPos[0] - Sp->ReferenceIntPos[grnr][0], P[i].
IntPos[1] - Sp->ReferenceIntPos[grnr][1],
408 P[i].
IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
412 if(delta[0] < Sp->Xmintot[grnr][0] || delta[0] >= Sp->Xmaxtot[grnr][0])
415 if(delta[1] < Sp->Xmintot[grnr][1] || delta[1] >= Sp->Xmaxtot[grnr][1])
418 if(delta[2] < Sp->Xmintot[grnr][2] || delta[2] >= Sp->Xmaxtot[grnr][2])
421 int slab_x = (int)(to_slab_fac * (delta[0] - Sp->Corner[grnr][0]));
422 int slab_y = (int)(to_slab_fac * (delta[1] - Sp->Corner[grnr][1]));
423 int slab_z = (int)(to_slab_fac * (delta[2] - Sp->Corner[grnr][2]));
429 large_numpart_type index_on_grid = ((large_numpart_type)myngrid) * 8;
431 for(
int xx = 0; xx < 2; xx++)
432 for(
int yy = 0; yy < 2; yy++)
433 for(
int zz = 0; zz < 2; zz++)
435 int slab_xx = slab_x + xx;
436 int slab_yy = slab_y + yy;
437 int slab_zz = slab_z + zz;
446 large_array_offset offset = FI(slab_xx, slab_yy, slab_zz);
448 part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz;
449 part[index_on_grid].globalindex = offset;
450 part_sortindex[index_on_grid] = index_on_grid;
456 num_on_grid = ((large_numpart_type)ngrid) * 8;
459 mycxxsort(part_sortindex, part_sortindex + num_on_grid, pm_nonperiodic_sortindex_comparator(part));
461 large_array_offset num_field_points;
464 num_field_points = 1;
466 num_field_points = 0;
469 for(large_numpart_type i = 1; i < num_on_grid; i++)
471 if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
476 localfield_globalindex = (large_array_offset *)
Mem.mymalloc_movable(&localfield_globalindex,
"localfield_globalindex",
477 num_field_points *
sizeof(large_array_offset));
478 localfield_data = (fft_real *)
Mem.mymalloc_movable(&localfield_data,
"localfield_data", num_field_points *
sizeof(fft_real));
479 localfield_first = (
size_t *)
Mem.mymalloc_movable(&localfield_first,
"localfield_first",
NTask *
sizeof(
size_t));
480 localfield_sendcount = (
size_t *)
Mem.mymalloc_movable(&localfield_sendcount,
"localfield_sendcount",
NTask *
sizeof(
size_t));
481 localfield_offset = (
size_t *)
Mem.mymalloc_movable(&localfield_offset,
"localfield_offset",
NTask *
sizeof(
size_t));
482 localfield_recvcount = (
size_t *)
Mem.mymalloc_movable(&localfield_recvcount,
"localfield_recvcount",
NTask *
sizeof(
size_t));
484 for(
int i = 0; i <
NTask; i++)
486 localfield_first[i] = 0;
487 localfield_sendcount[i] = 0;
493 num_field_points = 0;
494 for(large_numpart_type i = 0; i < num_on_grid; i++)
497 if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
500 part[part_sortindex[i]].localindex = num_field_points;
503 if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
506 localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex;
508#ifndef FFT_COLUMN_BASED
509 int slab = part[part_sortindex[i]].globalindex / (GRID * GRID2);
510 int task = myplan.slab_to_task[slab];
512 int task, column = part[part_sortindex[i]].globalindex / (GRID2);
514 if(column < pivotcol)
517 task = (column - pivotcol) / (avg - 1) + tasklastsection;
520 if(localfield_sendcount[task] == 0)
521 localfield_first[task] = num_field_points;
523 localfield_sendcount[task]++;
527 localfield_offset[0] = 0;
528 for(
int i = 1; i <
NTask; i++)
529 localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1];
531 Mem.myfree_movable(part_sortindex);
532 part_sortindex = NULL;
535 for(large_numpart_type i = 0; i < num_field_points; i++)
536 localfield_data[i] = 0;
538 for(large_numpart_type i = 0; i < num_on_grid; i += 8)
540 int pindex = (part[i].partindex >> 3);
542 MyIntPosType diff[3] = {P[pindex].
IntPos[0] - Sp->ReferenceIntPos[grnr][0], P[pindex].
IntPos[1] - Sp->ReferenceIntPos[grnr][1],
543 P[pindex].
IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
547 double dx = to_slab_fac * (delta[0] - Sp->Corner[grnr][0]);
548 double dy = to_slab_fac * (delta[1] - Sp->Corner[grnr][1]);
549 double dz = to_slab_fac * (delta[2] - Sp->Corner[grnr][2]);
551 int slab_x = (int)(dx);
552 int slab_y = (int)(dy);
553 int slab_z = (int)(dz);
559 double weight = P[pindex].
getMass();
561 localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
562 localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz;
563 localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz);
564 localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz;
565 localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz);
566 localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz;
567 localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz);
568 localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz;
571 rhogrid = (fft_real *)
Mem.mymalloc(
"rhogrid", maxfftsize *
sizeof(fft_real));
574 for(large_array_offset ii = 0; ii < maxfftsize; ii++)
580 for(
int level = 0; level < (1 <<
PTask); level++)
588 import_data = (fft_real *)
Mem.mymalloc(
"import_data", localfield_recvcount[recvTask] *
sizeof(fft_real));
589 import_globalindex = (large_array_offset *)
Mem.mymalloc(
"import_globalindex",
590 localfield_recvcount[recvTask] *
sizeof(large_array_offset));
592 if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
594 myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] *
sizeof(fft_real),
595 MPI_BYTE, recvTask,
TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] *
sizeof(fft_real),
598 myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
599 localfield_sendcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
TAG_NONPERIOD_B,
600 import_globalindex, localfield_recvcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
606 import_data = localfield_data + localfield_offset[
ThisTask];
607 import_globalindex = localfield_globalindex + localfield_offset[
ThisTask];
611 for(large_numpart_type i = 0; i < localfield_recvcount[recvTask]; i++)
614#ifndef FFT_COLUMN_BASED
615 large_array_offset offset =
616 import_globalindex[i] - myplan.first_slab_x_of_task[
ThisTask] * GRID * ((large_array_offset)GRID2);
618 large_array_offset offset = import_globalindex[i] - myplan.firstcol_XY * ((large_array_offset)GRID2);
620 rhogrid[offset] += import_data[i];
625 Mem.myfree(import_globalindex);
626 Mem.myfree(import_data);
635void pm_nonperiodic::pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(
int grnr,
int dim)
638 double fac = 1.0 / (Sp->FacIntToCoord * Sp->TotalMeshSize[grnr]) /
pow(GRID, 3);
652 double to_slab_fac = GRID / ((double)Sp->TotalMeshSize[grnr]);
654 for(
int level = 0; level < (1 <<
PTask); level++)
662 import_data = (fft_real *)
Mem.mymalloc(
"import_data", localfield_recvcount[recvTask] *
sizeof(fft_real));
663 import_globalindex = (large_array_offset *)
Mem.mymalloc(
"import_globalindex",
664 localfield_recvcount[recvTask] *
sizeof(large_array_offset));
666 if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
668 myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
669 localfield_sendcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
TAG_NONPERIOD_C,
670 import_globalindex, localfield_recvcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
676 import_data = localfield_data + localfield_offset[
ThisTask];
677 import_globalindex = localfield_globalindex + localfield_offset[
ThisTask];
680 for(large_numpart_type i = 0; i < localfield_recvcount[recvTask]; i++)
682#ifndef FFT_COLUMN_BASED
683 large_array_offset offset =
684 import_globalindex[i] - myplan.first_slab_x_of_task[
ThisTask] * GRID * ((large_array_offset)GRID2);
686 large_array_offset offset = import_globalindex[i] - myplan.firstcol_XY * ((large_array_offset)GRID2);
688 import_data[i] = grid[offset];
694 localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] *
sizeof(fft_real),
697 Mem.myfree(import_globalindex);
698 Mem.myfree(import_data);
705 int ngrid = (num_on_grid >> 3);
707 for(
int k = 0; k < ngrid; k++)
709 large_numpart_type j = (((large_numpart_type)k) << 3);
711 int i = (part[j].partindex >> 3);
713#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
714 if(!Sp->TimeBinSynchronized[P[i].TimeBinGrav])
718 MyIntPosType diff[3] = {P[i].
IntPos[0] - Sp->ReferenceIntPos[grnr][0], P[i].
IntPos[1] - Sp->ReferenceIntPos[grnr][1],
719 P[i].
IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
723 double dx = to_slab_fac * (delta[0] - Sp->Corner[grnr][0]);
724 double dy = to_slab_fac * (delta[1] - Sp->Corner[grnr][1]);
725 double dz = to_slab_fac * (delta[2] - Sp->Corner[grnr][2]);
727 int slab_x = (int)(dx);
728 int slab_y = (int)(dy);
729 int slab_z = (int)(dz);
735 double value = +localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
736 localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz +
737 localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) +
738 localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz +
739 localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) +
740 localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz +
741 localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) +
742 localfield_data[part[j + 7].localindex] * (dx)*dy * dz;
747 P[i].Potential += value * fac;
759void pm_nonperiodic::pmforce_nonperiodic_uniform_optimized_prepare_density(
int grnr)
761 double to_slab_fac = GRID / ((double)Sp->TotalMeshSize[grnr]);
763 Sndpm_count = (
size_t *)
Mem.mymalloc(
"Sndpm_count",
NTask *
sizeof(
size_t));
764 Sndpm_offset = (
size_t *)
Mem.mymalloc(
"Sndpm_offset",
NTask *
sizeof(
size_t));
765 Rcvpm_count = (
size_t *)
Mem.mymalloc(
"Rcvpm_count",
NTask *
sizeof(
size_t));
766 Rcvpm_offset = (
size_t *)
Mem.mymalloc(
"Rcvpm_offset",
NTask *
sizeof(
size_t));
768#ifdef FFT_COLUMN_BASED
769 int columns = GRIDX * GRIDY;
770 int avg = (columns - 1) /
NTask + 1;
771 int exc =
NTask * avg - columns;
772 int tasklastsection =
NTask - exc;
773 int pivotcol = tasklastsection * avg;
778 size_t *send_count = Sndpm_count;
780 for(
int j = 0; j <
NTask; j++)
783 for(
int idx = 0; idx < NSource; idx++)
785 int i = Sp->get_active_index(idx);
790 MyIntPosType diff[3] = {Sp->P[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], Sp->P[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
791 Sp->P[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
795 if(delta[0] < Sp->Xmintot[grnr][0] || delta[0] >= Sp->Xmaxtot[grnr][0])
798 if(delta[1] < Sp->Xmintot[grnr][1] || delta[1] >= Sp->Xmaxtot[grnr][1])
801 if(delta[2] < Sp->Xmintot[grnr][2] || delta[2] >= Sp->Xmaxtot[grnr][2])
804 int slab_x = (int)(to_slab_fac * (delta[0] - Sp->Corner[grnr][0]));
805 int slab_xx = slab_x + 1;
807#ifndef FFT_COLUMN_BASED
808 int task0 = myplan.slab_to_task[slab_x];
809 int task1 = myplan.slab_to_task[slab_xx];
815 int slab_y = (int)(to_slab_fac * (delta[1] - Sp->Corner[grnr][1]));
816 int slab_yy = slab_y + 1;
818 int column0 = slab_x * GRID + slab_y;
819 int column1 = slab_x * GRID + slab_yy;
820 int column2 = slab_xx * GRID + slab_y;
821 int column3 = slab_xx * GRID + slab_yy;
823 int task0, task1, task2, task3;
825 if(column0 < pivotcol)
826 task0 = column0 / avg;
828 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
830 if(column1 < pivotcol)
831 task1 = column1 / avg;
833 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
835 if(column2 < pivotcol)
836 task2 = column2 / avg;
838 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
840 if(column3 < pivotcol)
841 task3 = column3 / avg;
843 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
848 if(task2 != task1 && task2 != task0)
850 if(task3 != task0 && task3 != task1 && task3 != task2)
858 for(
int i = 1; i <
NTask; i++)
861 int ind_prev = i - 1;
863 Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
868 nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0;
869 for(
int j = 0; j <
NTask; j++)
871 nexport += Sndpm_count[j];
872 nimport += Rcvpm_count[j];
876 Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
877 Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
882 partin = (partbuf *)
Mem.mymalloc(
"partin", nimport *
sizeof(partbuf));
883 partout = (partbuf *)
Mem.mymalloc(
"partout", nexport *
sizeof(partbuf));
886 size_t *send_count = Sndpm_count;
887 size_t *send_offset = Sndpm_offset;
889 for(
int j = 0; j <
NTask; j++)
893 for(
int idx = 0; idx < NSource; idx++)
895 int i = Sp->get_active_index(idx);
897 MyIntPosType diff[3] = {Sp->P[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], Sp->P[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
898 Sp->P[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
902 if(delta[0] < Sp->Xmintot[grnr][0] || delta[0] >= Sp->Xmaxtot[grnr][0])
905 if(delta[1] < Sp->Xmintot[grnr][1] || delta[1] >= Sp->Xmaxtot[grnr][1])
908 if(delta[2] < Sp->Xmintot[grnr][2] || delta[2] >= Sp->Xmaxtot[grnr][2])
911 int slab_x = (int)(to_slab_fac * (delta[0] - Sp->Corner[grnr][0]));
912 int slab_xx = slab_x + 1;
914#ifndef FFT_COLUMN_BASED
915 int task0 = myplan.slab_to_task[slab_x];
916 int task1 = myplan.slab_to_task[slab_xx];
918 size_t ind0 = send_offset[task0] + send_count[task0]++;
919 partout[ind0].Mass = Sp->P[i].getMass();
920 for(
int j = 0; j < 3; j++)
921 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
925 size_t ind1 = send_offset[task1] + send_count[task1]++;
926 partout[ind1].Mass = Sp->P[i].getMass();
927 for(
int j = 0; j < 3; j++)
928 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
931 int slab_y = (int)(to_slab_fac * (delta[1] - Sp->Corner[grnr][1]));
932 int slab_yy = slab_y + 1;
934 int column0 = slab_x * GRID + slab_y;
935 int column1 = slab_x * GRID + slab_yy;
936 int column2 = slab_xx * GRID + slab_y;
937 int column3 = slab_xx * GRID + slab_yy;
939 int task0, task1, task2, task3;
941 if(column0 < pivotcol)
942 task0 = column0 / avg;
944 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
946 if(column1 < pivotcol)
947 task1 = column1 / avg;
949 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
951 if(column2 < pivotcol)
952 task2 = column2 / avg;
954 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
956 if(column3 < pivotcol)
957 task3 = column3 / avg;
959 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
961 size_t ind0 = send_offset[task0] + send_count[task0]++;
962 partout[ind0].Mass = Sp->P[i].getMass();
963 for(
int j = 0; j < 3; j++)
964 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
968 size_t ind1 = send_offset[task1] + send_count[task1]++;
969 partout[ind1].Mass = Sp->P[i].getMass();
970 for(
int j = 0; j < 3; j++)
971 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
973 if(task2 != task1 && task2 != task0)
975 size_t ind2 = send_offset[task2] + send_count[task2]++;
976 partout[ind2].Mass = Sp->P[i].getMass();
977 for(
int j = 0; j < 3; j++)
978 partout[ind2].IntPos[j] = Sp->P[i].IntPos[j];
980 if(task3 != task0 && task3 != task1 && task3 != task2)
982 size_t ind3 = send_offset[task3] + send_count[task3]++;
983 partout[ind3].Mass = Sp->P[i].getMass();
984 for(
int j = 0; j < 3; j++)
985 partout[ind3].IntPos[j] = Sp->P[i].IntPos[j];
991 int flag_big = 0, flag_big_all;
992 for(
int i = 0; i <
NTask; i++)
999 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX,
Communicator);
1002 myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset,
sizeof(partbuf), flag_big_all,
Communicator);
1004 Mem.myfree(partout);
1007 rhogrid = (fft_real *)
Mem.mymalloc(
"rhogrid", maxfftsize *
sizeof(fft_real));
1010 for(
size_t ii = 0; ii < maxfftsize; ii++)
1013#ifndef FFT_COLUMN_BASED
1016 int first_y, count_y;
1018 int last_y = first_y + count_y - 1;
1020 for(
size_t i = 0; i < nimport; i++)
1022 MyIntPosType diff[3] = {partin[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], partin[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
1023 partin[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
1027 double dy = to_slab_fac * (delta[1] - Sp->Corner[grnr][1]);
1028 int slab_y = (int)(dy);
1031 int slab_yy = slab_y + 1;
1032 int flag_slab_y, flag_slab_yy;
1034 if(slab_y >= first_y && slab_y <= last_y)
1039 if(slab_yy >= first_y && slab_yy <= last_y)
1044 if(flag_slab_y || flag_slab_yy)
1046 double mass = partin[i].Mass;
1048 double dx = to_slab_fac * (delta[0] - Sp->Corner[grnr][0]);
1049 double dz = to_slab_fac * (delta[2] - Sp->Corner[grnr][2]);
1050 int slab_x = (int)(dx);
1051 int slab_z = (int)(dz);
1055 int slab_xx = slab_x + 1;
1056 int slab_zz = slab_z + 1;
1058 int flag_slab_x, flag_slab_xx;
1060 if(myplan.slab_to_task[slab_x] ==
ThisTask)
1062 slab_x -= myplan.first_slab_x_of_task[
ThisTask];
1068 if(myplan.slab_to_task[slab_xx] ==
ThisTask)
1070 slab_xx -= myplan.first_slab_x_of_task[
ThisTask];
1080 rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
1081 rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
1086 rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
1087 rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
1095 rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
1096 rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
1101 rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
1102 rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz));
1113 int col0, col1, col2, col3;
1117 data_cols *aux = (data_cols *)
Mem.mymalloc(
"aux", nimport *
sizeof(data_cols));
1119 for(
int i = 0; i < nimport; i++)
1121 MyIntPosType diff[3] = {partin[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], partin[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
1122 partin[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
1126 aux[i].dx = to_slab_fac * (delta[0] - Sp->Corner[grnr][0]);
1127 aux[i].dy = to_slab_fac * (delta[1] - Sp->Corner[grnr][1]);
1129 int slab_x = (int)(aux[i].dx);
1130 int slab_y = (int)(aux[i].dy);
1132 aux[i].dx -= slab_x;
1133 aux[i].dy -= slab_y;
1135 int slab_xx = slab_x + 1;
1136 int slab_yy = slab_y + 1;
1138 aux[i].col0 = slab_x * GRID + slab_y;
1139 aux[i].col1 = slab_x * GRID + slab_yy;
1140 aux[i].col2 = slab_xx * GRID + slab_y;
1141 aux[i].col3 = slab_xx * GRID + slab_yy;
1145 int first_col, last_col, count_col;
1147 last_col = first_col + count_col - 1;
1148 first_col += myplan.firstcol_XY;
1149 last_col += myplan.firstcol_XY;
1151 for(
int i = 0; i < nimport; i++)
1153 int flag0, flag1, flag2, flag3;
1154 int col0 = aux[i].col0;
1155 int col1 = aux[i].col1;
1156 int col2 = aux[i].col2;
1157 int col3 = aux[i].col3;
1159 if(col0 >= first_col && col0 <= last_col)
1164 if(col1 >= first_col && col1 <= last_col)
1169 if(col2 >= first_col && col2 <= last_col)
1174 if(col3 >= first_col && col3 <= last_col)
1179 if(flag0 || flag1 || flag2 || flag3)
1181 double mass = partin[i].Mass;
1183 double dx = aux[i].dx;
1184 double dy = aux[i].dy;
1188 double dz = to_slab_fac * (deltaz - Sp->Corner[grnr][2]);
1189 int slab_z = (int)(dz);
1191 int slab_zz = slab_z + 1;
1195 rhogrid[FC(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
1196 rhogrid[FC(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
1201 rhogrid[FC(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
1202 rhogrid[FC(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
1207 rhogrid[FC(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
1208 rhogrid[FC(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
1213 rhogrid[FC(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
1214 rhogrid[FC(col3, slab_zz)] += (mass * (dx) * (dy) * (dz));
1227void pm_nonperiodic::pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(
int grnr,
int dim)
1230 double fac = 1.0 / (Sp->FacIntToCoord * Sp->TotalMeshSize[grnr]) /
pow(GRID, 3);
1233 double to_slab_fac = GRID / ((double)Sp->TotalMeshSize[grnr]);
1235 double *flistin = (
double *)
Mem.mymalloc(
"flistin", nimport *
sizeof(
double));
1236 double *flistout = (
double *)
Mem.mymalloc(
"flistout", nexport *
sizeof(
double));
1245#ifdef FFT_COLUMN_BASED
1246 int columns = GRIDX * GRIDY;
1247 int avg = (columns - 1) /
NTask + 1;
1248 int exc =
NTask * avg - columns;
1249 int tasklastsection =
NTask - exc;
1250 int pivotcol = tasklastsection * avg;
1253 for(
size_t i = 0; i < nimport; i++)
1257 MyIntPosType diff[3] = {partin[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], partin[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
1258 partin[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
1262 double dx = to_slab_fac * (delta[0] - Sp->Corner[grnr][0]);
1263 double dy = to_slab_fac * (delta[1] - Sp->Corner[grnr][1]);
1264 double dz = to_slab_fac * (delta[2] - Sp->Corner[grnr][2]);
1266 int slab_x = (int)(dx);
1267 int slab_y = (int)(dy);
1268 int slab_z = (int)(dz);
1274 int slab_xx = slab_x + 1;
1275 int slab_yy = slab_y + 1;
1276 int slab_zz = slab_z + 1;
1278#ifndef FFT_COLUMN_BASED
1279 if(myplan.slab_to_task[slab_x] ==
ThisTask)
1281 slab_x -= myplan.first_slab_x_of_task[
ThisTask];
1283 flistin[i] += +grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1284 grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
1285 grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
1286 grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
1289 if(myplan.slab_to_task[slab_xx] ==
ThisTask)
1291 slab_xx -= myplan.first_slab_x_of_task[
ThisTask];
1293 flistin[i] += +grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
1294 grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
1295 grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
1296 grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
1299 int column0 = slab_x * GRID + slab_y;
1300 int column1 = slab_x * GRID + slab_yy;
1301 int column2 = slab_xx * GRID + slab_y;
1302 int column3 = slab_xx * GRID + slab_yy;
1304 if(column0 >= myplan.firstcol_XY && column0 <= myplan.lastcol_XY)
1306 flistin[i] += +grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1307 grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
1309 if(column1 >= myplan.firstcol_XY && column1 <= myplan.lastcol_XY)
1312 +grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
1315 if(column2 >= myplan.firstcol_XY && column2 <= myplan.lastcol_XY)
1318 +grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
1321 if(column3 >= myplan.firstcol_XY && column3 <= myplan.lastcol_XY)
1323 flistin[i] += +grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
1329 int flag_big = 0, flag_big_all;
1330 for(
int i = 0; i <
NTask; i++)
1337 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX,
Communicator);
1340 myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset,
sizeof(
double), flag_big_all,
Communicator);
1344 size_t *send_count = Sndpm_count;
1345 size_t *send_offset = Sndpm_offset;
1347 for(
int j = 0; j <
NTask; j++)
1350 for(
int idx = 0; idx < NSource; idx++)
1352 int i = Sp->get_active_index(idx);
1354 MyIntPosType diff[3] = {Sp->P[i].IntPos[0] - Sp->ReferenceIntPos[grnr][0], Sp->P[i].IntPos[1] - Sp->ReferenceIntPos[grnr][1],
1355 Sp->P[i].IntPos[2] - Sp->ReferenceIntPos[grnr][2]};
1359 if(delta[0] < Sp->Xmintot[grnr][0] || delta[0] >= Sp->Xmaxtot[grnr][0])
1362 if(delta[1] < Sp->Xmintot[grnr][1] || delta[1] >= Sp->Xmaxtot[grnr][1])
1365 if(delta[2] < Sp->Xmintot[grnr][2] || delta[2] >= Sp->Xmaxtot[grnr][2])
1368 int slab_x = (int)(to_slab_fac * (delta[0] - Sp->Corner[grnr][0]));
1369 int slab_xx = slab_x + 1;
1371#ifndef FFT_COLUMN_BASED
1372 int task0 = myplan.slab_to_task[slab_x];
1373 int task1 = myplan.slab_to_task[slab_xx];
1375 double value = flistout[send_offset[task0] + send_count[task0]++];
1378 value += flistout[send_offset[task1] + send_count[task1]++];
1380 int slab_y = (int)(to_slab_fac * (delta[1] - Sp->Corner[grnr][1]));
1381 int slab_yy = slab_y + 1;
1383 int column0 = slab_x * GRID + slab_y;
1384 int column1 = slab_x * GRID + slab_yy;
1385 int column2 = slab_xx * GRID + slab_y;
1386 int column3 = slab_xx * GRID + slab_yy;
1388 int task0, task1, task2, task3;
1390 if(column0 < pivotcol)
1391 task0 = column0 / avg;
1393 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1395 if(column1 < pivotcol)
1396 task1 = column1 / avg;
1398 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1400 if(column2 < pivotcol)
1401 task2 = column2 / avg;
1403 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1405 if(column3 < pivotcol)
1406 task3 = column3 / avg;
1408 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1410 double value = flistout[send_offset[task0] + send_count[task0]++];
1413 value += flistout[send_offset[task1] + send_count[task1]++];
1415 if(task2 != task1 && task2 != task0)
1416 value += flistout[send_offset[task2] + send_count[task2]++];
1418 if(task3 != task0 && task3 != task1 && task3 != task2)
1419 value += flistout[send_offset[task3] + send_count[task3]++];
1422#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
1423 if(!Sp->TimeBinSynchronized[Sp->P[i].TimeBinGrav])
1430 Sp->P[i].Potential += value * fac;
1435 Sp->P[i].GravAccel[dim] += value;
1439 Mem.myfree(flistout);
1440 Mem.myfree(flistin);
1451int pm_nonperiodic::pmforce_nonperiodic(
int grnr)
1455 mpi_printf(
"PM-NONPERIODIC: Starting non-periodic PM calculation (Rcut=%g, grid=%d) presently allocated=%g MB.\n", Sp->Rcut[grnr],
1458 if(grnr == 1 && Sp->Rcut[1] > Sp->Rcut[0])
1460 "We have Sp->Rcut[1]=%g > Sp->Rcut[0]=%g, which means that the high-res cut-off is larger than the normal one... this is "
1462 Sp->Rcut[1], Sp->Rcut[0]);
1464#ifdef HIERARCHICAL_GRAVITY
1465 NSource = Sp->TimeBinsGravity.NActiveParticles;
1467 NSource = Sp->NumPart;
1470#ifndef TREEPM_NOTIMESPLIT
1471 if(NSource != Sp->NumPart)
1472 Terminate(
"unexpected NSource != Sp->NumPart");
1475#ifndef NUMPART_PER_TASK_LARGE
1476 if((((
long long)Sp->NumPart) << 3) >= (((
long long)1) << 31))
1477 Terminate(
"We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help.");
1480 double fac = 1.0 / (Sp->FacIntToCoord * Sp->TotalMeshSize[grnr]) /
pow(GRID, 3);
1481 fac *= 1 / (2 * (Sp->FacIntToCoord * Sp->TotalMeshSize[grnr]) / GRID);
1483#ifdef PM_ZOOM_OPTIMIZED
1484 pmforce_nonperiodic_zoom_optimized_prepare_density(grnr);
1486 pmforce_nonperiodic_uniform_optimized_prepare_density(grnr);
1490 forcegrid = (fft_real *)
Mem.mymalloc(
"forcegrid", maxfftsize *
sizeof(fft_real));
1492 workspace = forcegrid;
1494#ifndef FFT_COLUMN_BASED
1495 fft_of_rhogrid = (fft_complex *)&rhogrid[0];
1497 fft_of_rhogrid = (fft_complex *)&workspace[0];
1501#ifndef FFT_COLUMN_BASED
1512#ifdef FFT_COLUMN_BASED
1513 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
1516 for(
int x = 0; x < GRID; x++)
1517 for(
int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
1518 for(
int z = 0; z < GRIDz; z++)
1522#ifndef FFT_COLUMN_BASED
1523 large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z;
1526 double re = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][0] - fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][1];
1527 double im = fft_of_rhogrid[ip][0] * fft_of_kernel[grnr][ip][1] + fft_of_rhogrid[ip][1] * fft_of_kernel[grnr][ip][0];
1529 fft_of_rhogrid[ip][0] = re;
1530 fft_of_rhogrid[ip][1] = im;
1535#ifndef FFT_COLUMN_BASED
1544#ifdef PM_ZOOM_OPTIMIZED
1545 pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, -1);
1547 pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, -1);
1554 for(
int dim = 2; dim >= 0; dim--)
1557#ifndef FFT_COLUMN_BASED
1561 for(
int y = 2; y < GRID / 2 - 2; y++)
1562 for(
int x = 0; x < myplan.nslab_x; x++)
1563 if(x + myplan.slabstart_x >= 2 && x + myplan.slabstart_x < GRID / 2 - 2)
1564 for(
int z = 2; z < GRID / 2 - 2; z++)
1566 int yrr = y, yll = y, yr = y, yl = y;
1567 int zrr = z, zll = z, zr = z, zl = z;
1589 forcegrid[TI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[TI(x, yl, zl)] - rhogrid[TI(x, yr, zr)]) -
1590 (1.0 / 6) * (rhogrid[TI(x, yll, zll)] - rhogrid[TI(x, yrr, zrr)]));
1592 forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, zl)] - rhogrid[FI(x, yr, zr)]) -
1593 (1.0 / 6) * (rhogrid[FI(x, yll, zll)] - rhogrid[FI(x, yrr, zrr)]));
1603 scratch = (fft_real *)
Mem.mymalloc(
"scratch", myplan.fftsize *
sizeof(fft_real));
1604 memcpy(scratch, rhogrid, myplan.fftsize *
sizeof(fft_real));
1614 ncols = myplan.ncol_XY;
1616 ncols = myplan.ncol_XZ;
1618 ncols = myplan.ncol_ZY;
1620 for(
int i = 0; i < ncols; i++)
1622 fft_real *forcep, *potp;
1626 forcep = &scratch[GRID * i];
1627 potp = &forcegrid[GRID * i];
1631 forcep = &forcegrid[GRID2 * i];
1632 potp = &rhogrid[GRID2 * i];
1635 for(
int z = 2; z < GRID / 2 - 2; z++)
1642 forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr]));
1653 Mem.myfree(scratch);
1657#ifdef PM_ZOOM_OPTIMIZED
1658 pmforce_nonperiodic_zoom_optimized_readout_forces_or_potential(grnr, dim);
1660 pmforce_nonperiodic_uniform_optimized_readout_forces_or_potential(grnr, dim);
1665 Mem.myfree(forcegrid);
1666 Mem.myfree(rhogrid);
1668#ifdef PM_ZOOM_OPTIMIZED
1669 Mem.myfree(localfield_recvcount);
1670 Mem.myfree(localfield_offset);
1671 Mem.myfree(localfield_sendcount);
1672 Mem.myfree(localfield_first);
1673 Mem.myfree(localfield_data);
1674 Mem.myfree(localfield_globalindex);
1678 Mem.myfree(Rcvpm_offset);
1679 Mem.myfree(Rcvpm_count);
1680 Mem.myfree(Sndpm_offset);
1681 Mem.myfree(Sndpm_count);
1694void pm_nonperiodic::pm_setup_nonperiodic_kernel(
void)
1696 mpi_printf(
"PM-NONPERIODIC: Setting up non-periodic PM kernel(s) (GRID=%d) presently allocated=%g MB).\n", (
int)GRID,
1701#if !defined(PERIODIC)
1702 for(
size_t i = 0; i < maxfftsize; i++)
1705#ifndef FFT_COLUMN_BASED
1706 for(
int i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++)
1707 for(
int j = 0; j < GRID; j++)
1710 for(
int c = myplan.firstcol_XY; c < (myplan.firstcol_XY + myplan.ncol_XY); c++)
1715 for(
int k = 0; k < GRID; k++)
1717 double xx = ((double)i) / GRID;
1718 double yy = ((double)j) / GRID;
1719 double zz = ((double)k) / GRID;
1728 double r =
sqrt(xx * xx + yy * yy + zz * zz);
1730 double u = 0.5 * r / (((double)
ASMTH) / GRID);
1732 double fac = 1 -
erfc(u);
1734#ifndef FFT_COLUMN_BASED
1735 size_t ip = FI(i - myplan.slabstart_x, j, k);
1737 size_t ip = FC(c, k);
1740 kernel[0][ip] = -fac / r;
1742 kernel[0][ip] = -1 / (
sqrt(
M_PI) * (((double)
ASMTH) / GRID));
1747 fft_real *workspc = (fft_real *)
Mem.mymalloc(
"workspc", maxfftsize *
sizeof(fft_real));
1749#ifndef FFT_COLUMN_BASED
1753 memcpy(kernel[0], workspc, maxfftsize *
sizeof(fft_real));
1755 Mem.myfree(workspc);
1760#if defined(PLACEHIGHRESREGION)
1762 for(
int i = 0; i < maxfftsize; i++)
1765#ifndef FFT_COLUMN_BASED
1766 for(
int i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++)
1767 for(
int j = 0; j < GRID; j++)
1770 for(
int c = myplan.firstcol_XY; c < (myplan.firstcol_XY + myplan.ncol_XY); c++)
1775 for(
int k = 0; k < GRID; k++)
1777 double xx = ((double)i) / GRID;
1778 double yy = ((double)j) / GRID;
1779 double zz = ((double)k) / GRID;
1788 double r =
sqrt(xx * xx + yy * yy + zz * zz);
1790 double u = 0.5 * r / (((double)
ASMTH) / GRID);
1792 double fac =
erfc(u * Sp->Asmth[1] / Sp->Asmth[0]) -
erfc(u);
1794#ifndef FFT_COLUMN_BASED
1795 size_t ip = FI(i - myplan.slabstart_x, j, k);
1797 size_t ip = FC(c, k);
1801 kernel[1][ip] = -fac / r;
1804 fac = 1 - Sp->Asmth[1] / Sp->Asmth[0];
1805 kernel[1][ip] = -fac / (
sqrt(
M_PI) * (((double)
ASMTH) / GRID));
1808#ifndef FFT_COLUMN_BASED
1815 fft_real *workspc = (fft_real *)
Mem.mymalloc(
"workspc", maxfftsize *
sizeof(fft_real));
1817#ifndef FFT_COLUMN_BASED
1821 memcpy(kernel[1], workspc, maxfftsize *
sizeof(fft_real));
1823 Mem.myfree(workspc);
1829#ifdef FFT_COLUMN_BASED
1831 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
1833 large_array_offset ipcell = ip + myplan.transposed_firstcol * GRID;
1834 int y = ipcell / (GRID * GRIDz);
1835 int yr = ipcell % (GRID * GRIDz);
1839 for(
int x = 0; x < GRID; x++)
1840 for(
int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
1841 for(
int z = 0; z < GRIDz; z++)
1859 double k2 = kx * kx + ky * ky + kz * kz;
1863 double fx = 1, fy = 1, fz = 1;
1867 fx = (
M_PI * kx) / GRID;
1872 fy = (
M_PI * ky) / GRID;
1877 fz = (
M_PI * kz) / GRID;
1881 double ff = 1 / (fx * fy * fz);
1882 ff = ff * ff * ff * ff;
1884#ifndef FFT_COLUMN_BASED
1885 large_array_offset ip = ((large_array_offset)GRIDz) * (GRID * (y - myplan.slabstart_y) + x) + z;
1887#if !defined(PERIODIC)
1888 fft_of_kernel[0][ip][0] *= ff;
1889 fft_of_kernel[0][ip][1] *= ff;
1891#if defined(PLACEHIGHRESREGION)
1892 fft_of_kernel[1][ip][0] *= ff;
1893 fft_of_kernel[1][ip][1] *= ff;
global_data_all_processes All
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
void my_column_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
void my_column_based_fft(fft_plan *plan, void *data, void *workspace, int forward)
void my_fft_swap13back(fft_plan *plan, fft_real *data, fft_real *out)
void my_fft_swap13(fft_plan *plan, fft_real *data, fft_real *out)
void my_slab_transposeB(fft_plan *plan, fft_real *field, fft_real *scratch)
void my_slab_based_fft(fft_plan *plan, void *data, void *workspace, int forward)
void my_fft_swap23(fft_plan *plan, fft_real *data, fft_real *out)
void my_fft_swap23back(fft_plan *plan, fft_real *data, fft_real *out)
void my_slab_transposeA(fft_plan *plan, fft_real *field, fft_real *scratch)
void my_slab_based_fft_init(fft_plan *plan, int NgridX, int NgridY, int NgridZ)
void mpi_printf(const char *fmt,...)
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
double mycxxsort(T *begin, T *end, Tcomp comp)
int32_t MySignedIntPosType
#define BITS_FOR_POSITIONS
MPI_Datatype MPI_MyIntPosType
MPI_Op MPI_MAX_MySignedIntPosType
MPI_Op MPI_MIN_MySignedIntPosType
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
expr pow(half base, half exp)
vector< MyFloat > GravAccel
void subdivide_evenly(long long N, int pieces, int index_bin, long long *first, int *count)