12#include "gadgetconfig.h"
14#if defined(PMGRID) && defined(PERIODIC)
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/intposconvert.h"
28#include "../data/mymalloc.h"
29#include "../domain/domain.h"
30#include "../logs/timer.h"
31#include "../main/simulation.h"
32#include "../mpi_utils/mpi_utils.h"
34#include "../pm/pm_periodic.h"
35#include "../sort/cxxsort.h"
36#include "../system/system.h"
37#include "../time_integration/timestep.h"
69#define GRIDz (GRIDZ / 2 + 1)
70#define GRID2 (2 * GRIDz)
73#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z))
74#define FCxy(c, z) (((large_array_offset)GRID2) * ((c)-myplan.firstcol_XY) + (z))
75#define FCxz(c, y) (((large_array_offset)GRIDY) * ((c)-myplan.firstcol_XZ) + (y))
76#define FCzy(c, x) (((large_array_offset)GRIDX) * ((c)-myplan.firstcol_ZY) + (x))
78#ifndef FFT_COLUMN_BASED
79#define NI(x, y, z) (((large_array_offset)GRIDZ) * ((y) + (x)*myplan.nslab_y) + (z))
91 Sp->Rcut[0] =
RCUT * Sp->Asmth[0];
94 int ndimx[1] = {GRIDX};
95 int ndimy[1] = {GRIDY};
96 int ndimz[1] = {GRIDZ};
98 int max_GRID2 = 2 * (std::max<int>(std::max<int>(GRIDX, GRIDY), GRIDZ) / 2 + 1);
101 rhogrid = (fft_real *)
Mem.mymalloc(
"rhogrid", max_GRID2 *
sizeof(fft_real));
102 forcegrid = (fft_real *)
Mem.mymalloc(
"forcegrid", max_GRID2 *
sizeof(fft_real));
104#ifdef DOUBLEPRECISION_FFTW
108 int alignflag = FFTW_UNALIGNED;
111 myplan.forward_plan_zdir =
FFTW(plan_many_dft_r2c)(1, ndimz, 1, rhogrid, 0, 1, GRID2, (fft_complex *)forcegrid, 0, 1, GRIDz,
112 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
114#ifndef FFT_COLUMN_BASED
120 myplan.forward_plan_ydir =
121 FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride,
122 GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
124 myplan.forward_plan_xdir =
125 FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride,
126 GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
128 myplan.backward_plan_xdir =
129 FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDX, (fft_complex *)forcegrid, 0, stride,
130 GRIDz * GRIDX, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
132 myplan.backward_plan_ydir =
133 FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)rhogrid, 0, stride, GRIDz * GRIDY, (fft_complex *)forcegrid, 0, stride,
134 GRIDz * GRIDY, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
136 myplan.backward_plan_zdir =
FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)rhogrid, 0, 1, GRIDz, forcegrid, 0, 1, GRID2,
137 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
139 Mem.myfree(forcegrid);
142#ifndef FFT_COLUMN_BASED
144 my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
146 maxfftsize = std::max<int>(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2);
150 my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
152 maxfftsize = myplan.max_datasize;
156#if defined(GRAVITY_TALLBOX)
157 kernel = (fft_real *)
Mem.mymalloc(
"kernel", maxfftsize *
sizeof(fft_real));
158 fft_of_kernel = (fft_complex *)kernel;
160 pmforce_setup_tallbox_kernel();
175#ifdef PM_ZOOM_OPTIMIZED
177void pm_periodic::pmforce_zoom_optimized_prepare_density(
int mode,
int *typelist)
184 part = (part_slab_data *)
Mem.mymalloc(
"part", 8 * (NSource *
sizeof(part_slab_data)));
185 large_numpart_type *part_sortindex =
186 (large_numpart_type *)
Mem.mymalloc(
"part_sortindex", 8 * (NSource *
sizeof(large_numpart_type)));
188#ifdef FFT_COLUMN_BASED
189 int columns = GRIDX * GRIDY;
190 int avg = (columns - 1) / NTask + 1;
191 int exc = NTask * avg - columns;
192 int tasklastsection = NTask - exc;
193 int pivotcol = tasklastsection * avg;
197 for(
int idx = 0; idx < NSource; idx++)
199 int i = Sp->get_active_index(idx);
204 int slab_x, slab_y, slab_z;
207 slab_x = (P[i].
IntPos[0] * POWERSPEC_FOLDFAC) / INTCELL;
208 slab_y = (P[i].
IntPos[1] * POWERSPEC_FOLDFAC) / INTCELL;
209 slab_z = (P[i].
IntPos[2] * POWERSPEC_FOLDFAC) / INTCELL;
213 slab_x = (P[i].
IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
214 slab_y = (P[i].
IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
215 slab_z = (P[i].
IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
219 slab_x = P[i].
IntPos[0] / INTCELL;
220 slab_y = P[i].
IntPos[1] / INTCELL;
221 slab_z = P[i].
IntPos[2] / INTCELL;
224 large_numpart_type index_on_grid = ((large_numpart_type)idx) << 3;
226 for(
int xx = 0; xx < 2; xx++)
227 for(
int yy = 0; yy < 2; yy++)
228 for(
int zz = 0; zz < 2; zz++)
230 int slab_xx = slab_x + xx;
231 int slab_yy = slab_y + yy;
232 int slab_zz = slab_z + zz;
241 large_array_offset offset = FI(slab_xx, slab_yy, slab_zz);
243 part[index_on_grid].partindex = (i << 3) + (xx << 2) + (yy << 1) + zz;
244 part[index_on_grid].globalindex = offset;
245 part_sortindex[index_on_grid] = index_on_grid;
252 large_array_offset num_field_points;
253 large_numpart_type num_on_grid = ((large_numpart_type)NSource) << 3;
257 mycxxsort(part_sortindex, part_sortindex + num_on_grid, pm_periodic_sortindex_comparator(part));
260 num_field_points = 1;
262 num_field_points = 0;
265 for(large_numpart_type i = 1; i < num_on_grid; i++)
267 if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
272 localfield_globalindex = (large_array_offset *)
Mem.mymalloc_movable(&localfield_globalindex,
"localfield_globalindex",
273 num_field_points *
sizeof(large_array_offset));
274 localfield_data = (fft_real *)
Mem.mymalloc_movable(&localfield_data,
"localfield_data", num_field_points *
sizeof(fft_real));
275 localfield_first = (
size_t *)
Mem.mymalloc_movable(&localfield_first,
"localfield_first", NTask *
sizeof(
size_t));
276 localfield_sendcount = (
size_t *)
Mem.mymalloc_movable(&localfield_sendcount,
"localfield_sendcount", NTask *
sizeof(
size_t));
277 localfield_offset = (
size_t *)
Mem.mymalloc_movable(&localfield_offset,
"localfield_offset", NTask *
sizeof(
size_t));
278 localfield_recvcount = (
size_t *)
Mem.mymalloc_movable(&localfield_recvcount,
"localfield_recvcount", NTask *
sizeof(
size_t));
280 for(
int i = 0; i < NTask; i++)
282 localfield_first[i] = 0;
283 localfield_sendcount[i] = 0;
289 num_field_points = 0;
290 for(large_numpart_type i = 0; i < num_on_grid; i++)
293 if(part[part_sortindex[i]].globalindex != part[part_sortindex[i - 1]].globalindex)
296 part[part_sortindex[i]].localindex = num_field_points;
299 if(part[part_sortindex[i]].globalindex == part[part_sortindex[i - 1]].globalindex)
302 localfield_globalindex[num_field_points] = part[part_sortindex[i]].globalindex;
304#ifndef FFT_COLUMN_BASED
305 int slab = part[part_sortindex[i]].globalindex / (GRIDY * GRID2);
306 int task = myplan.slab_to_task[slab];
308 int task, column = part[part_sortindex[i]].globalindex / (GRID2);
310 if(column < pivotcol)
313 task = (column - pivotcol) / (avg - 1) + tasklastsection;
316 if(localfield_sendcount[task] == 0)
317 localfield_first[task] = num_field_points;
319 localfield_sendcount[task]++;
323 localfield_offset[0] = 0;
324 for(
int i = 1; i < NTask; i++)
325 localfield_offset[i] = localfield_offset[i - 1] + localfield_sendcount[i - 1];
327 Mem.myfree_movable(part_sortindex);
328 part_sortindex = NULL;
331 for(large_numpart_type i = 0; i < num_field_points; i++)
332 localfield_data[i] = 0;
334 for(large_numpart_type i = 0; i < num_on_grid; i += 8)
336 int pindex = (part[i].partindex >> 3);
341 rmd_x = (P[pindex].
IntPos[0] * POWERSPEC_FOLDFAC) % INTCELL;
342 rmd_y = (P[pindex].
IntPos[1] * POWERSPEC_FOLDFAC) % INTCELL;
343 rmd_z = (P[pindex].
IntPos[2] * POWERSPEC_FOLDFAC) % INTCELL;
347 rmd_x = (P[pindex].
IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
348 rmd_y = (P[pindex].
IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
349 rmd_z = (P[pindex].
IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
353 rmd_x = P[pindex].
IntPos[0] % INTCELL;
354 rmd_y = P[pindex].
IntPos[1] % INTCELL;
355 rmd_z = P[pindex].
IntPos[2] % INTCELL;
358 double dx = rmd_x * (1.0 / INTCELL);
359 double dy = rmd_y * (1.0 / INTCELL);
360 double dz = rmd_z * (1.0 / INTCELL);
362 double weight = P[pindex].
getMass();
365 if(typelist[P[pindex].getType()] == 0)
368 localfield_data[part[i + 0].localindex] += weight * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
369 localfield_data[part[i + 1].localindex] += weight * (1.0 - dx) * (1.0 - dy) * dz;
370 localfield_data[part[i + 2].localindex] += weight * (1.0 - dx) * dy * (1.0 - dz);
371 localfield_data[part[i + 3].localindex] += weight * (1.0 - dx) * dy * dz;
372 localfield_data[part[i + 4].localindex] += weight * (dx) * (1.0 - dy) * (1.0 - dz);
373 localfield_data[part[i + 5].localindex] += weight * (dx) * (1.0 - dy) * dz;
374 localfield_data[part[i + 6].localindex] += weight * (dx)*dy * (1.0 - dz);
375 localfield_data[part[i + 7].localindex] += weight * (dx)*dy * dz;
378 rhogrid = (fft_real *)
Mem.mymalloc_clear(
"rhogrid", maxfftsize *
sizeof(fft_real));
381 myMPI_Alltoall(localfield_sendcount,
sizeof(
size_t), MPI_BYTE, localfield_recvcount,
sizeof(
size_t), MPI_BYTE, Communicator);
383 for(level = 0; level < (1 << PTask); level++)
385 recvTask = ThisTask ^ level;
391 import_data = (fft_real *)
Mem.mymalloc(
"import_data", localfield_recvcount[recvTask] *
sizeof(fft_real));
392 import_globalindex = (large_array_offset *)
Mem.mymalloc(
"import_globalindex",
393 localfield_recvcount[recvTask] *
sizeof(large_array_offset));
395 if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
397 myMPI_Sendrecv(localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] *
sizeof(fft_real),
398 MPI_BYTE, recvTask,
TAG_NONPERIOD_A, import_data, localfield_recvcount[recvTask] *
sizeof(fft_real),
401 myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
402 localfield_sendcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
TAG_NONPERIOD_B,
403 import_globalindex, localfield_recvcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
409 import_data = localfield_data + localfield_offset[ThisTask];
410 import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
414 for(
size_t i = 0; i < localfield_recvcount[recvTask]; i++)
417#ifndef FFT_COLUMN_BASED
418 large_array_offset offset =
419 import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2);
421 large_array_offset offset = import_globalindex[i] - myplan.firstcol_XY * ((large_array_offset)GRID2);
423 rhogrid[offset] += import_data[i];
428 Mem.myfree(import_globalindex);
429 Mem.myfree(import_data);
438void pm_periodic::pmforce_zoom_optimized_readout_forces_or_potential(fft_real *grid,
int dim)
443#ifdef GRAVITY_TALLBOX
444 double fac = 1.0 / (((double)GRIDX) * GRIDY * GRIDZ);
450 for(
int level = 0; level < (1 << PTask); level++)
452 int recvTask = ThisTask ^ level;
458 import_data = (fft_real *)
Mem.mymalloc(
"import_data", localfield_recvcount[recvTask] *
sizeof(fft_real));
459 import_globalindex = (large_array_offset *)
Mem.mymalloc(
"import_globalindex",
460 localfield_recvcount[recvTask] *
sizeof(large_array_offset));
462 if(localfield_sendcount[recvTask] > 0 || localfield_recvcount[recvTask] > 0)
465 myMPI_Sendrecv(localfield_globalindex + localfield_offset[recvTask],
466 localfield_sendcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
TAG_NONPERIOD_C,
467 import_globalindex, localfield_recvcount[recvTask] *
sizeof(large_array_offset), MPI_BYTE, recvTask,
473 import_data = localfield_data + localfield_offset[ThisTask];
474 import_globalindex = localfield_globalindex + localfield_offset[ThisTask];
477 for(
size_t i = 0; i < localfield_recvcount[recvTask]; i++)
479#ifndef FFT_COLUMN_BASED
480 large_array_offset offset =
481 import_globalindex[i] - myplan.first_slab_x_of_task[ThisTask] * GRIDY * ((large_array_offset)GRID2);
483 large_array_offset offset = import_globalindex[i] - myplan.firstcol_XY * ((large_array_offset)GRID2);
485 import_data[i] = grid[offset];
492 localfield_data + localfield_offset[recvTask], localfield_sendcount[recvTask] *
sizeof(fft_real),
495 Mem.myfree(import_globalindex);
496 Mem.myfree(import_data);
502 for(
int idx = 0; idx < NSource; idx++)
504 int i = Sp->get_active_index(idx);
506#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
507 if(!Sp->TimeBinSynchronized[P[i].TimeBinGrav])
511 large_numpart_type j = (idx << 3);
517 double dx = rmd_x * (1.0 / INTCELL);
518 double dy = rmd_y * (1.0 / INTCELL);
519 double dz = rmd_z * (1.0 / INTCELL);
521 double value = localfield_data[part[j + 0].localindex] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
522 localfield_data[part[j + 1].localindex] * (1.0 - dx) * (1.0 - dy) * dz +
523 localfield_data[part[j + 2].localindex] * (1.0 - dx) * dy * (1.0 - dz) +
524 localfield_data[part[j + 3].localindex] * (1.0 - dx) * dy * dz +
525 localfield_data[part[j + 4].localindex] * (dx) * (1.0 - dy) * (1.0 - dz) +
526 localfield_data[part[j + 5].localindex] * (dx) * (1.0 - dy) * dz +
527 localfield_data[part[j + 6].localindex] * (dx)*dy * (1.0 - dz) +
528 localfield_data[part[j + 7].localindex] * (dx)*dy * dz;
533#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
534 P[i].PM_Potential += value * fac;
536 P[i].Potential += value * fac;
542#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
543 Sp->P[i].GravPM[dim] += value;
557void pm_periodic::pmforce_uniform_optimized_prepare_density(
int mode,
int *typelist)
559 Sndpm_count = (
size_t *)
Mem.mymalloc(
"Sndpm_count", NTask *
sizeof(
size_t));
560 Sndpm_offset = (
size_t *)
Mem.mymalloc(
"Sndpm_offset", NTask *
sizeof(
size_t));
561 Rcvpm_count = (
size_t *)
Mem.mymalloc(
"Rcvpm_count", NTask *
sizeof(
size_t));
562 Rcvpm_offset = (
size_t *)
Mem.mymalloc(
"Rcvpm_offset", NTask *
sizeof(
size_t));
568#ifdef FFT_COLUMN_BASED
569 int columns = GRIDX * GRIDY;
570 int avg = (columns - 1) / NTask + 1;
571 int exc = NTask * avg - columns;
572 int tasklastsection = NTask - exc;
573 int pivotcol = tasklastsection * avg;
576 for(
int rep = 0; rep < 2; rep++)
579 for(
int j = 0; j < NTask; j++)
582 for(
int idx = 0; idx < NSource; idx++)
584 int i = Sp->get_active_index(idx);
590 if(typelist[P[i].getType()] == 0)
595 slab_x = (P[i].
IntPos[0] * POWERSPEC_FOLDFAC) / INTCELL;
597 slab_x = (P[i].
IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
599 slab_x = P[i].
IntPos[0] / INTCELL;
601 int slab_xx = slab_x + 1;
606#ifndef FFT_COLUMN_BASED
609 int task0 = myplan.slab_to_task[slab_x];
610 int task1 = myplan.slab_to_task[slab_xx];
612 Sndpm_count[task0]++;
615 Sndpm_count[task1]++;
619 int task0 = myplan.slab_to_task[slab_x];
620 int task1 = myplan.slab_to_task[slab_xx];
622 size_t ind0 = Sndpm_offset[task0] + Sndpm_count[task0]++;
624 partout[ind0].Mass = P[i].
getMass();
626 for(
int j = 0; j < 3; j++)
627 partout[ind0].IntPos[j] = P[i].IntPos[j];
631 size_t ind1 = Sndpm_offset[task1] + Sndpm_count[task1]++;
633 partout[ind1].Mass = P[i].
getMass();
635 for(
int j = 0; j < 3; j++)
636 partout[ind1].IntPos[j] = P[i].IntPos[j];
643 slab_y = (P[i].
IntPos[1] * POWERSPEC_FOLDFAC) / INTCELL;
645 slab_y = (P[i].
IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
647 slab_y = P[i].
IntPos[1] / INTCELL;
649 int slab_yy = slab_y + 1;
654 int column0 = slab_x * GRIDY + slab_y;
655 int column1 = slab_x * GRIDY + slab_yy;
656 int column2 = slab_xx * GRIDY + slab_y;
657 int column3 = slab_xx * GRIDY + slab_yy;
659 int task0, task1, task2, task3;
661 if(column0 < pivotcol)
662 task0 = column0 / avg;
664 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
666 if(column1 < pivotcol)
667 task1 = column1 / avg;
669 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
671 if(column2 < pivotcol)
672 task2 = column2 / avg;
674 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
676 if(column3 < pivotcol)
677 task3 = column3 / avg;
679 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
683 Sndpm_count[task0]++;
685 Sndpm_count[task1]++;
686 if(task2 != task1 && task2 != task0)
687 Sndpm_count[task2]++;
688 if(task3 != task0 && task3 != task1 && task3 != task2)
689 Sndpm_count[task3]++;
693 size_t ind0 = Sndpm_offset[task0] + Sndpm_count[task0]++;
695 partout[ind0].Mass = P[i].
getMass();
697 for(
int j = 0; j < 3; j++)
698 partout[ind0].IntPos[j] = P[i].IntPos[j];
702 size_t ind1 = Sndpm_offset[task1] + Sndpm_count[task1]++;
704 partout[ind1].Mass = P[i].
getMass();
706 for(
int j = 0; j < 3; j++)
707 partout[ind1].IntPos[j] = P[i].IntPos[j];
709 if(task2 != task1 && task2 != task0)
711 size_t ind2 = Sndpm_offset[task2] + Sndpm_count[task2]++;
713 partout[ind2].Mass = P[i].
getMass();
715 for(
int j = 0; j < 3; j++)
716 partout[ind2].IntPos[j] = P[i].IntPos[j];
718 if(task3 != task0 && task3 != task1 && task3 != task2)
720 size_t ind3 = Sndpm_offset[task3] + Sndpm_count[task3]++;
722 partout[ind3].Mass = P[i].
getMass();
724 for(
int j = 0; j < 3; j++)
725 partout[ind3].IntPos[j] = P[i].IntPos[j];
733 myMPI_Alltoall(Sndpm_count,
sizeof(
size_t), MPI_BYTE, Rcvpm_count,
sizeof(
size_t), MPI_BYTE, Communicator);
735 nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0;
736 for(
int j = 0; j < NTask; j++)
738 nexport += Sndpm_count[j];
739 nimport += Rcvpm_count[j];
743 Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
744 Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
749 partin = (partbuf *)
Mem.mymalloc_movable(&partin,
"partin", nimport *
sizeof(partbuf));
750 partout = (partbuf *)
Mem.mymalloc(
"partout", nexport *
sizeof(partbuf));
757 int flag_big = 0, flag_big_all;
758 for(
int i = 0; i < NTask; i++)
762 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
765 myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset,
sizeof(partbuf), flag_big_all, Communicator);
770 rhogrid = (fft_real *)
Mem.mymalloc_movable_clear(&rhogrid,
"rhogrid", maxfftsize *
sizeof(fft_real));
772#ifndef FFT_COLUMN_BASED
775 for(
size_t i = 0; i < nimport; i++)
777 int slab_x, slab_y, slab_z;
782 slab_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC) / INTCELL;
783 rmd_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC) % INTCELL;
784 slab_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC) / INTCELL;
785 rmd_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC) % INTCELL;
786 slab_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC) / INTCELL;
787 rmd_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC) % INTCELL;
791 slab_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
792 rmd_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
793 slab_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
794 rmd_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
795 slab_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
796 rmd_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
800 slab_x = partin[i].IntPos[0] / INTCELL;
801 rmd_x = partin[i].IntPos[0] % INTCELL;
802 slab_y = partin[i].IntPos[1] / INTCELL;
803 rmd_y = partin[i].IntPos[1] % INTCELL;
804 slab_z = partin[i].IntPos[2] / INTCELL;
805 rmd_z = partin[i].IntPos[2] % INTCELL;
808 double dx = rmd_x * (1.0 / INTCELL);
809 double dy = rmd_y * (1.0 / INTCELL);
810 double dz = rmd_z * (1.0 / INTCELL);
812 int slab_xx = slab_x + 1;
813 int slab_yy = slab_y + 1;
814 int slab_zz = slab_z + 1;
824 double mass =
All.PartMass;
826 double mass = partin[i].Mass;
829 if(myplan.slab_to_task[slab_x] == ThisTask)
831 slab_x -= myplan.first_slab_x_of_task[ThisTask];
833 rhogrid[FI(slab_x, slab_y, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
834 rhogrid[FI(slab_x, slab_y, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
836 rhogrid[FI(slab_x, slab_yy, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
837 rhogrid[FI(slab_x, slab_yy, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
840 if(myplan.slab_to_task[slab_xx] == ThisTask)
842 slab_xx -= myplan.first_slab_x_of_task[ThisTask];
844 rhogrid[FI(slab_xx, slab_y, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
845 rhogrid[FI(slab_xx, slab_y, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
847 rhogrid[FI(slab_xx, slab_yy, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
848 rhogrid[FI(slab_xx, slab_yy, slab_zz)] += (mass * (dx) * (dy) * (dz));
854 int first_col = myplan.firstcol_XY;
855 int last_col = myplan.firstcol_XY + myplan.ncol_XY - 1;
857 for(
size_t i = 0; i < nimport; i++)
863 slab_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC) / INTCELL;
864 rmd_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC) % INTCELL;
865 slab_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC) / INTCELL;
866 rmd_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC) % INTCELL;
870 slab_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
871 rmd_x = (partin[i].IntPos[0] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
872 slab_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
873 rmd_y = (partin[i].IntPos[1] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
877 slab_x = partin[i].IntPos[0] / INTCELL;
878 rmd_x = partin[i].IntPos[0] % INTCELL;
879 slab_y = partin[i].IntPos[1] / INTCELL;
880 rmd_y = partin[i].IntPos[1] % INTCELL;
883 double dx = rmd_x * (1.0 / INTCELL);
884 double dy = rmd_y * (1.0 / INTCELL);
886 int slab_xx = slab_x + 1;
887 int slab_yy = slab_y + 1;
895 int col0 = slab_x * GRIDY + slab_y;
896 int col1 = slab_x * GRIDY + slab_yy;
897 int col2 = slab_xx * GRIDY + slab_y;
898 int col3 = slab_xx * GRIDY + slab_yy;
901 double mass =
All.PartMass;
903 double mass = partin[i].Mass;
910 slab_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC) / INTCELL;
911 rmd_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC) % INTCELL;
915 slab_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) / INTCELL;
916 rmd_z = (partin[i].IntPos[2] * POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC) % INTCELL;
920 slab_z = partin[i].IntPos[2] / INTCELL;
921 rmd_z = partin[i].IntPos[2] % INTCELL;
924 double dz = rmd_z * (1.0 / INTCELL);
926 int slab_zz = slab_z + 1;
931 if(col0 >= first_col && col0 <= last_col)
933 rhogrid[FCxy(col0, slab_z)] += (mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz));
934 rhogrid[FCxy(col0, slab_zz)] += (mass * (1.0 - dx) * (1.0 - dy) * (dz));
937 if(col1 >= first_col && col1 <= last_col)
939 rhogrid[FCxy(col1, slab_z)] += (mass * (1.0 - dx) * (dy) * (1.0 - dz));
940 rhogrid[FCxy(col1, slab_zz)] += (mass * (1.0 - dx) * (dy) * (dz));
943 if(col2 >= first_col && col2 <= last_col)
945 rhogrid[FCxy(col2, slab_z)] += (mass * (dx) * (1.0 - dy) * (1.0 - dz));
946 rhogrid[FCxy(col2, slab_zz)] += (mass * (dx) * (1.0 - dy) * (dz));
949 if(col3 >= first_col && col3 <= last_col)
951 rhogrid[FCxy(col3, slab_z)] += (mass * (dx) * (dy) * (1.0 - dz));
952 rhogrid[FCxy(col3, slab_zz)] += (mass * (dx) * (dy) * (dz));
961void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_xy(fft_real *grid,
int dim)
966#ifdef GRAVITY_TALLBOX
967 double fac = 1.0 / (((double)GRIDX) * GRIDY * GRIDZ);
976#ifdef FFT_COLUMN_BASED
977 int columns = GRIDX * GRIDY;
978 int avg = (columns - 1) / NTask + 1;
979 int exc = NTask * avg - columns;
980 int tasklastsection = NTask - exc;
981 int pivotcol = tasklastsection * avg;
984 for(
size_t i = 0; i < nimport; i++)
988 int slab_x = partin[i].IntPos[0] / INTCELL;
989 int slab_y = partin[i].IntPos[1] / INTCELL;
990 int slab_z = partin[i].IntPos[2] / INTCELL;
996 double dx = rmd_x * (1.0 / INTCELL);
997 double dy = rmd_y * (1.0 / INTCELL);
998 double dz = rmd_z * (1.0 / INTCELL);
1000 int slab_xx = slab_x + 1;
1001 int slab_yy = slab_y + 1;
1002 int slab_zz = slab_z + 1;
1004 if(slab_xx >= GRIDX)
1006 if(slab_yy >= GRIDY)
1008 if(slab_zz >= GRIDZ)
1011#ifndef FFT_COLUMN_BASED
1012 if(myplan.slab_to_task[slab_x] == ThisTask)
1014 slab_x -= myplan.first_slab_x_of_task[ThisTask];
1016 flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1017 grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
1018 grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
1019 grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
1022 if(myplan.slab_to_task[slab_xx] == ThisTask)
1024 slab_xx -= myplan.first_slab_x_of_task[ThisTask];
1026 flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
1027 grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
1028 grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
1029 grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
1032 int column0 = slab_x * GRIDY + slab_y;
1033 int column1 = slab_x * GRIDY + slab_yy;
1034 int column2 = slab_xx * GRIDY + slab_y;
1035 int column3 = slab_xx * GRIDY + slab_yy;
1037 if(column0 >= myplan.firstcol_XY && column0 <= myplan.lastcol_XY)
1039 flistin[i] += grid[FCxy(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1040 grid[FCxy(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
1042 if(column1 >= myplan.firstcol_XY && column1 <= myplan.lastcol_XY)
1045 grid[FCxy(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FCxy(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
1048 if(column2 >= myplan.firstcol_XY && column2 <= myplan.lastcol_XY)
1051 grid[FCxy(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FCxy(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
1054 if(column3 >= myplan.firstcol_XY && column3 <= myplan.lastcol_XY)
1056 flistin[i] += grid[FCxy(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FCxy(column3, slab_zz)] * (dx) * (dy) * (dz);
1062 int flag_big = 0, flag_big_all;
1063 for(
int i = 0; i < NTask; i++)
1070 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
1073 myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset,
sizeof(
MyFloat), flag_big_all,
1077 for(
int j = 0; j < NTask; j++)
1080 for(
int idx = 0; idx < NSource; idx++)
1082 int i = Sp->get_active_index(idx);
1084 int slab_x = P[i].
IntPos[0] / INTCELL;
1085 int slab_xx = slab_x + 1;
1087 if(slab_xx >= GRIDX)
1090#ifndef FFT_COLUMN_BASED
1091 int task0 = myplan.slab_to_task[slab_x];
1092 int task1 = myplan.slab_to_task[slab_xx];
1094 double value = flistout[Sndpm_offset[task0] + Sndpm_count[task0]++];
1097 value += flistout[Sndpm_offset[task1] + Sndpm_count[task1]++];
1099 int slab_y = P[i].
IntPos[1] / INTCELL;
1100 int slab_yy = slab_y + 1;
1102 if(slab_yy >= GRIDY)
1105 int column0 = slab_x * GRIDY + slab_y;
1106 int column1 = slab_x * GRIDY + slab_yy;
1107 int column2 = slab_xx * GRIDY + slab_y;
1108 int column3 = slab_xx * GRIDY + slab_yy;
1110 int task0, task1, task2, task3;
1112 if(column0 < pivotcol)
1113 task0 = column0 / avg;
1115 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1117 if(column1 < pivotcol)
1118 task1 = column1 / avg;
1120 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1122 if(column2 < pivotcol)
1123 task2 = column2 / avg;
1125 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1127 if(column3 < pivotcol)
1128 task3 = column3 / avg;
1130 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1132 double value = flistout[Sndpm_offset[task0] + Sndpm_count[task0]++];
1135 value += flistout[Sndpm_offset[task1] + Sndpm_count[task1]++];
1137 if(task2 != task1 && task2 != task0)
1138 value += flistout[Sndpm_offset[task2] + Sndpm_count[task2]++];
1140 if(task3 != task0 && task3 != task1 && task3 != task2)
1141 value += flistout[Sndpm_offset[task3] + Sndpm_count[task3]++];
1144#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
1145 if(!Sp->TimeBinSynchronized[Sp->P[i].TimeBinGrav])
1152#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
1153 Sp->P[i].PM_Potential += value * fac;
1155 Sp->P[i].Potential += value * fac;
1161#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
1162 Sp->P[i].GravPM[dim] += value;
1164 Sp->P[i].GravAccel[dim] += value;
1169 Mem.myfree(flistout);
1170 Mem.myfree(flistin);
1173#ifdef FFT_COLUMN_BASED
1174void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_xz(fft_real *grid,
int dim)
1179 size_t *send_count = (
size_t *)
Mem.mymalloc(
"send_count", NTask *
sizeof(
size_t));
1180 size_t *send_offset = (
size_t *)
Mem.mymalloc(
"send_offset", NTask *
sizeof(
size_t));
1181 size_t *recv_count = (
size_t *)
Mem.mymalloc(
"recv_count", NTask *
sizeof(
size_t));
1182 size_t *recv_offset = (
size_t *)
Mem.mymalloc(
"recv_offset", NTask *
sizeof(
size_t));
1189 partbuf *partin, *partout;
1190 size_t nimport = 0, nexport = 0;
1194 int columns = GRIDX * GRID2;
1195 int avg = (columns - 1) / NTask + 1;
1196 int exc = NTask * avg - columns;
1197 int tasklastsection = NTask - exc;
1198 int pivotcol = tasklastsection * avg;
1201 for(
int rep = 0; rep < 2; rep++)
1203 for(
int j = 0; j < NTask; j++)
1206 for(
int idx = 0; idx < NSource; idx++)
1208 int i = Sp->get_active_index(idx);
1211 Sp->drift_particle(&Sp->P[i], &Sp->SphP[i],
All.
Ti_Current);
1213 int slab_x = P[i].
IntPos[0] / INTCELL;
1214 int slab_xx = slab_x + 1;
1216 if(slab_xx >= GRIDX)
1219 int slab_z = P[i].
IntPos[2] / INTCELL;
1220 int slab_zz = slab_z + 1;
1222 if(slab_zz >= GRIDZ)
1225 int column0 = slab_x * GRID2 + slab_z;
1226 int column1 = slab_x * GRID2 + slab_zz;
1227 int column2 = slab_xx * GRID2 + slab_z;
1228 int column3 = slab_xx * GRID2 + slab_zz;
1230 int task0, task1, task2, task3;
1232 if(column0 < pivotcol)
1233 task0 = column0 / avg;
1235 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1237 if(column1 < pivotcol)
1238 task1 = column1 / avg;
1240 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1242 if(column2 < pivotcol)
1243 task2 = column2 / avg;
1245 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1247 if(column3 < pivotcol)
1248 task3 = column3 / avg;
1250 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1254 send_count[task0]++;
1256 send_count[task1]++;
1257 if(task2 != task1 && task2 != task0)
1258 send_count[task2]++;
1259 if(task3 != task0 && task3 != task1 && task3 != task2)
1260 send_count[task3]++;
1264 size_t ind0 = send_offset[task0] + send_count[task0]++;
1265 for(
int j = 0; j < 3; j++)
1266 partout[ind0].IntPos[j] = P[i].IntPos[j];
1270 size_t ind1 = send_offset[task1] + send_count[task1]++;
1271 for(
int j = 0; j < 3; j++)
1272 partout[ind1].IntPos[j] = P[i].IntPos[j];
1274 if(task2 != task1 && task2 != task0)
1276 size_t ind2 = send_offset[task2] + send_count[task2]++;
1278 for(
int j = 0; j < 3; j++)
1279 partout[ind2].IntPos[j] = P[i].IntPos[j];
1281 if(task3 != task0 && task3 != task1 && task3 != task2)
1283 size_t ind3 = send_offset[task3] + send_count[task3]++;
1285 for(
int j = 0; j < 3; j++)
1286 partout[ind3].IntPos[j] = P[i].IntPos[j];
1293 myMPI_Alltoall(send_count,
sizeof(
size_t), MPI_BYTE, recv_count,
sizeof(
size_t), MPI_BYTE, Communicator);
1295 nimport = 0, nexport = 0;
1296 recv_offset[0] = send_offset[0] = 0;
1298 for(
int j = 0; j < NTask; j++)
1300 nexport += send_count[j];
1301 nimport += recv_count[j];
1305 send_offset[j] = send_offset[j - 1] + send_count[j - 1];
1306 recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
1311 partin = (partbuf *)
Mem.mymalloc(
"partin", nimport *
sizeof(partbuf));
1312 partout = (partbuf *)
Mem.mymalloc(
"partout", nexport *
sizeof(partbuf));
1319 int flag_big = 0, flag_big_all;
1320 for(
int i = 0; i < NTask; i++)
1324 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
1327 myMPI_Alltoallv(partout, send_count, send_offset, partin, recv_count, recv_offset,
sizeof(partbuf), flag_big_all, Communicator);
1329 Mem.myfree(partout);
1334 for(
size_t i = 0; i < nimport; i++)
1338 int slab_x = partin[i].IntPos[0] / INTCELL;
1339 int slab_y = partin[i].IntPos[1] / INTCELL;
1340 int slab_z = partin[i].IntPos[2] / INTCELL;
1346 double dx = rmd_x * (1.0 / INTCELL);
1347 double dy = rmd_y * (1.0 / INTCELL);
1348 double dz = rmd_z * (1.0 / INTCELL);
1350 int slab_xx = slab_x + 1;
1351 int slab_yy = slab_y + 1;
1352 int slab_zz = slab_z + 1;
1354 if(slab_xx >= GRIDX)
1356 if(slab_yy >= GRIDY)
1358 if(slab_zz >= GRIDZ)
1361 int column0 = slab_x * GRID2 + slab_z;
1362 int column1 = slab_x * GRID2 + slab_zz;
1363 int column2 = slab_xx * GRID2 + slab_z;
1364 int column3 = slab_xx * GRID2 + slab_zz;
1366 if(column0 >= myplan.firstcol_XZ && column0 <= myplan.lastcol_XZ)
1368 flistin[i] += grid[FCxz(column0, slab_y)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1369 grid[FCxz(column0, slab_yy)] * (1.0 - dx) * (dy) * (1.0 - dz);
1371 if(column1 >= myplan.firstcol_XZ && column1 <= myplan.lastcol_XZ)
1374 grid[FCxz(column1, slab_y)] * (1.0 - dx) * (1.0 - dy) * (dz) + grid[FCxz(column1, slab_yy)] * (1.0 - dx) * (dy) * (dz);
1377 if(column2 >= myplan.firstcol_XZ && column2 <= myplan.lastcol_XZ)
1380 grid[FCxz(column2, slab_y)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FCxz(column2, slab_yy)] * (dx) * (dy) * (1.0 - dz);
1383 if(column3 >= myplan.firstcol_XZ && column3 <= myplan.lastcol_XZ)
1385 flistin[i] += grid[FCxz(column3, slab_y)] * (dx) * (1.0 - dy) * (dz) + grid[FCxz(column3, slab_yy)] * (dx) * (dy) * (dz);
1393 for(
int i = 0; i < NTask; i++)
1397 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
1400 myMPI_Alltoallv(flistin, recv_count, recv_offset, flistout, send_count, send_offset,
sizeof(
MyFloat), flag_big_all, Communicator);
1402 for(
int j = 0; j < NTask; j++)
1406 for(
int idx = 0; idx < NSource; idx++)
1408 int i = Sp->get_active_index(idx);
1410 int slab_x = P[i].
IntPos[0] / INTCELL;
1411 int slab_xx = slab_x + 1;
1413 if(slab_xx >= GRIDX)
1416 int slab_z = P[i].
IntPos[2] / INTCELL;
1417 int slab_zz = slab_z + 1;
1419 if(slab_zz >= GRIDZ)
1422 int column0 = slab_x * GRID2 + slab_z;
1423 int column1 = slab_x * GRID2 + slab_zz;
1424 int column2 = slab_xx * GRID2 + slab_z;
1425 int column3 = slab_xx * GRID2 + slab_zz;
1427 int task0, task1, task2, task3;
1429 if(column0 < pivotcol)
1430 task0 = column0 / avg;
1432 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1434 if(column1 < pivotcol)
1435 task1 = column1 / avg;
1437 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1439 if(column2 < pivotcol)
1440 task2 = column2 / avg;
1442 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1444 if(column3 < pivotcol)
1445 task3 = column3 / avg;
1447 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1449 double value = flistout[send_offset[task0] + send_count[task0]++];
1452 value += flistout[send_offset[task1] + send_count[task1]++];
1454 if(task2 != task1 && task2 != task0)
1455 value += flistout[send_offset[task2] + send_count[task2]++];
1457 if(task3 != task0 && task3 != task1 && task3 != task2)
1458 value += flistout[send_offset[task3] + send_count[task3]++];
1460#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
1461 if(!Sp->TimeBinSynchronized[Sp->P[i].TimeBinGrav])
1465#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
1466 Sp->P[i].GravPM[dim] += value;
1468 Sp->P[i].GravAccel[dim] += value;
1472 Mem.myfree(flistout);
1473 Mem.myfree(flistin);
1475 Mem.myfree(recv_offset);
1476 Mem.myfree(recv_count);
1477 Mem.myfree(send_offset);
1478 Mem.myfree(send_count);
1481void pm_periodic::pmforce_uniform_optimized_readout_forces_or_potential_zy(fft_real *grid,
int dim)
1486 size_t *send_count = (
size_t *)
Mem.mymalloc(
"send_count", NTask *
sizeof(
size_t));
1487 size_t *send_offset = (
size_t *)
Mem.mymalloc(
"send_offset", NTask *
sizeof(
size_t));
1488 size_t *recv_count = (
size_t *)
Mem.mymalloc(
"recv_count", NTask *
sizeof(
size_t));
1489 size_t *recv_offset = (
size_t *)
Mem.mymalloc(
"recv_offset", NTask *
sizeof(
size_t));
1496 partbuf *partin, *partout;
1497 size_t nimport = 0, nexport = 0;
1501 int columns = GRID2 * GRIDY;
1502 int avg = (columns - 1) / NTask + 1;
1503 int exc = NTask * avg - columns;
1504 int tasklastsection = NTask - exc;
1505 int pivotcol = tasklastsection * avg;
1508 for(
int rep = 0; rep < 2; rep++)
1510 for(
int j = 0; j < NTask; j++)
1513 for(
int idx = 0; idx < NSource; idx++)
1515 int i = Sp->get_active_index(idx);
1518 Sp->drift_particle(&Sp->P[i], &Sp->SphP[i],
All.
Ti_Current);
1520 int slab_z = P[i].
IntPos[2] / INTCELL;
1521 int slab_zz = slab_z + 1;
1523 if(slab_zz >= GRIDZ)
1526 int slab_y = P[i].
IntPos[1] / INTCELL;
1527 int slab_yy = slab_y + 1;
1529 if(slab_yy >= GRIDY)
1532 int column0 = slab_z * GRIDY + slab_y;
1533 int column1 = slab_z * GRIDY + slab_yy;
1534 int column2 = slab_zz * GRIDY + slab_y;
1535 int column3 = slab_zz * GRIDY + slab_yy;
1537 int task0, task1, task2, task3;
1539 if(column0 < pivotcol)
1540 task0 = column0 / avg;
1542 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1544 if(column1 < pivotcol)
1545 task1 = column1 / avg;
1547 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1549 if(column2 < pivotcol)
1550 task2 = column2 / avg;
1552 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1554 if(column3 < pivotcol)
1555 task3 = column3 / avg;
1557 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1561 send_count[task0]++;
1563 send_count[task1]++;
1564 if(task2 != task1 && task2 != task0)
1565 send_count[task2]++;
1566 if(task3 != task0 && task3 != task1 && task3 != task2)
1567 send_count[task3]++;
1571 size_t ind0 = send_offset[task0] + send_count[task0]++;
1572 for(
int j = 0; j < 3; j++)
1573 partout[ind0].IntPos[j] = P[i].IntPos[j];
1577 size_t ind1 = send_offset[task1] + send_count[task1]++;
1578 for(
int j = 0; j < 3; j++)
1579 partout[ind1].IntPos[j] = P[i].IntPos[j];
1581 if(task2 != task1 && task2 != task0)
1583 size_t ind2 = send_offset[task2] + send_count[task2]++;
1585 for(
int j = 0; j < 3; j++)
1586 partout[ind2].IntPos[j] = P[i].IntPos[j];
1588 if(task3 != task0 && task3 != task1 && task3 != task2)
1590 size_t ind3 = send_offset[task3] + send_count[task3]++;
1592 for(
int j = 0; j < 3; j++)
1593 partout[ind3].IntPos[j] = P[i].IntPos[j];
1600 myMPI_Alltoall(send_count,
sizeof(
size_t), MPI_BYTE, recv_count,
sizeof(
size_t), MPI_BYTE, Communicator);
1602 nimport = 0, nexport = 0;
1603 recv_offset[0] = send_offset[0] = 0;
1605 for(
int j = 0; j < NTask; j++)
1607 nexport += send_count[j];
1608 nimport += recv_count[j];
1612 send_offset[j] = send_offset[j - 1] + send_count[j - 1];
1613 recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
1618 partin = (partbuf *)
Mem.mymalloc(
"partin", nimport *
sizeof(partbuf));
1619 partout = (partbuf *)
Mem.mymalloc(
"partout", nexport *
sizeof(partbuf));
1626 int flag_big = 0, flag_big_all;
1627 for(
int i = 0; i < NTask; i++)
1631 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
1634 myMPI_Alltoallv(partout, send_count, send_offset, partin, recv_count, recv_offset,
sizeof(partbuf), flag_big_all, Communicator);
1636 Mem.myfree(partout);
1641 for(
size_t i = 0; i < nimport; i++)
1645 int slab_x = partin[i].IntPos[0] / INTCELL;
1646 int slab_y = partin[i].IntPos[1] / INTCELL;
1647 int slab_z = partin[i].IntPos[2] / INTCELL;
1653 double dx = rmd_x * (1.0 / INTCELL);
1654 double dy = rmd_y * (1.0 / INTCELL);
1655 double dz = rmd_z * (1.0 / INTCELL);
1657 int slab_xx = slab_x + 1;
1658 int slab_yy = slab_y + 1;
1659 int slab_zz = slab_z + 1;
1661 if(slab_xx >= GRIDX)
1663 if(slab_yy >= GRIDY)
1665 if(slab_zz >= GRIDZ)
1668 int column0 = slab_z * GRIDY + slab_y;
1669 int column1 = slab_z * GRIDY + slab_yy;
1670 int column2 = slab_zz * GRIDY + slab_y;
1671 int column3 = slab_zz * GRIDY + slab_yy;
1673 if(column0 >= myplan.firstcol_ZY && column0 <= myplan.lastcol_ZY)
1675 flistin[i] += grid[FCzy(column0, slab_x)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
1676 grid[FCzy(column0, slab_xx)] * (dx) * (1.0 - dy) * (1.0 - dz);
1678 if(column1 >= myplan.firstcol_ZY && column1 <= myplan.lastcol_ZY)
1681 grid[FCzy(column1, slab_x)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FCzy(column1, slab_xx)] * (dx) * (dy) * (1.0 - dz);
1684 if(column2 >= myplan.firstcol_ZY && column2 <= myplan.lastcol_ZY)
1687 grid[FCzy(column2, slab_x)] * (1.0 - dx) * (1.0 - dy) * (dz) + grid[FCzy(column2, slab_xx)] * (dx) * (1.0 - dy) * (dz);
1690 if(column3 >= myplan.firstcol_ZY && column3 <= myplan.lastcol_ZY)
1692 flistin[i] += grid[FCzy(column3, slab_x)] * (1.0 - dx) * (dy) * (dz) + grid[FCzy(column3, slab_xx)] * (dx) * (dy) * (dz);
1700 for(
int i = 0; i < NTask; i++)
1704 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
1707 myMPI_Alltoallv(flistin, recv_count, recv_offset, flistout, send_count, send_offset,
sizeof(
MyFloat), flag_big_all, Communicator);
1709 for(
int j = 0; j < NTask; j++)
1713 for(
int idx = 0; idx < NSource; idx++)
1715 int i = Sp->get_active_index(idx);
1717 int slab_z = P[i].
IntPos[2] / INTCELL;
1718 int slab_zz = slab_z + 1;
1720 if(slab_zz >= GRIDZ)
1723 int slab_y = P[i].
IntPos[1] / INTCELL;
1724 int slab_yy = slab_y + 1;
1726 if(slab_yy >= GRIDY)
1729 int column0 = slab_z * GRIDY + slab_y;
1730 int column1 = slab_z * GRIDY + slab_yy;
1731 int column2 = slab_zz * GRIDY + slab_y;
1732 int column3 = slab_zz * GRIDY + slab_yy;
1734 int task0, task1, task2, task3;
1736 if(column0 < pivotcol)
1737 task0 = column0 / avg;
1739 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1741 if(column1 < pivotcol)
1742 task1 = column1 / avg;
1744 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1746 if(column2 < pivotcol)
1747 task2 = column2 / avg;
1749 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1751 if(column3 < pivotcol)
1752 task3 = column3 / avg;
1754 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1756 double value = flistout[send_offset[task0] + send_count[task0]++];
1759 value += flistout[send_offset[task1] + send_count[task1]++];
1761 if(task2 != task1 && task2 != task0)
1762 value += flistout[send_offset[task2] + send_count[task2]++];
1764 if(task3 != task0 && task3 != task1 && task3 != task2)
1765 value += flistout[send_offset[task3] + send_count[task3]++];
1767#if !defined(HIERARCHICAL_GRAVITY) && defined(TREEPM_NOTIMESPLIT)
1768 if(!Sp->TimeBinSynchronized[Sp->P[i].TimeBinGrav])
1772#if defined(PERIODIC) && !defined(TREEPM_NOTIMESPLIT)
1773 Sp->P[i].GravPM[dim] += value;
1775 Sp->P[i].GravAccel[dim] += value;
1779 Mem.myfree(flistout);
1780 Mem.myfree(flistin);
1782 Mem.myfree(recv_offset);
1783 Mem.myfree(recv_count);
1784 Mem.myfree(send_offset);
1785 Mem.myfree(send_count);
1803void pm_periodic::pmforce_periodic(
int mode,
int *typelist)
1810 mpi_printf(
"PM-PERIODIC: Starting periodic PM calculation. (Rcut=%g) presently allocated=%g MB\n", Sp->Rcut[0],
1813#ifdef HIERARCHICAL_GRAVITY
1814 NSource = Sp->TimeBinsGravity.NActiveParticles;
1816 NSource = Sp->NumPart;
1819#ifndef TREEPM_NOTIMESPLIT
1820 if(NSource != Sp->NumPart)
1821 Terminate(
"unexpected NSource != Sp->NumPart");
1824#ifndef NUMPART_PER_TASK_LARGE
1825 if((((
long long)Sp->NumPart) << 3) >= (((
long long)1) << 31))
1826 Terminate(
"We are dealing with a too large particle number per MPI rank - enabling NUMPART_PER_TASK_LARGE might help.");
1829 double asmth2 = Sp->Asmth[0] * Sp->Asmth[0];
1831 double dhalf = 0.5 * d;
1833#ifdef GRAVITY_TALLBOX
1834 double fac = 1.0 / (((double)GRIDX) * GRIDY * GRIDZ);
1841#ifdef PM_ZOOM_OPTIMIZED
1842 pmforce_zoom_optimized_prepare_density(mode, typelist);
1844 pmforce_uniform_optimized_prepare_density(mode, typelist);
1853 forcegrid = (fft_real *)
Mem.mymalloc_movable(&forcegrid,
"forcegrid", maxfftsize *
sizeof(fft_real));
1855 workspace = forcegrid;
1857#ifndef FFT_COLUMN_BASED
1858 fft_of_rhogrid = (fft_complex *)&rhogrid[0];
1860 fft_of_rhogrid = (fft_complex *)&workspace[0];
1864#ifndef FFT_COLUMN_BASED
1865 my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], 1);
1867 my_column_based_fft(&myplan, rhogrid, workspace, 1);
1872 pmforce_measure_powerspec(mode - 1, typelist);
1874#if defined(FFT_COLUMN_BASED) && !defined(PM_ZOOM_OPTIMIZED)
1875 Mem.myfree_movable(partin);
1883 double kfacx = 2.0 *
M_PI / (GRIDX * d);
1884 double kfacy = 2.0 *
M_PI / (GRIDY * d);
1885 double kfacz = 2.0 *
M_PI / (GRIDZ * d);
1887#ifdef FFT_COLUMN_BASED
1888 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
1890 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
1891 y = ipcell / (GRIDX * GRIDz);
1892 int yr = ipcell % (GRIDX * GRIDz);
1896 for(x = 0; x < GRIDX; x++)
1897 for(y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
1898 for(z = 0; z < GRIDz; z++)
1903 if(x >= (GRIDX / 2))
1907 if(y >= (GRIDY / 2))
1911 if(z >= (GRIDZ / 2))
1916 double kx = kfacx * xx;
1917 double ky = kfacy * yy;
1918 double kz = kfacz * zz;
1920 double k2 = kx * kx + ky * ky + kz * kz;
1922 double smth = 1.0, deconv = 1.0;
1926 smth = -
exp(-k2 * asmth2) / k2;
1930 double fx = 1, fy = 1, fz = 1;
1948 double ff = 1 / (fx * fy * fz);
1949 deconv = ff * ff * ff * ff;
1954#ifndef FFT_COLUMN_BASED
1955 large_array_offset ip = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
1958#ifdef GRAVITY_TALLBOX
1959 double re = fft_of_rhogrid[ip][0] * fft_of_kernel[ip][0] - fft_of_rhogrid[ip][1] * fft_of_kernel[ip][1];
1960 double im = fft_of_rhogrid[ip][0] * fft_of_kernel[ip][1] + fft_of_rhogrid[ip][1] * fft_of_kernel[ip][0];
1962 fft_of_rhogrid[ip][0] = re * deconv *
exp(-k2 * asmth2);
1963 fft_of_rhogrid[ip][1] = im * deconv *
exp(-k2 * asmth2);
1965 fft_of_rhogrid[ip][0] *= smth;
1966 fft_of_rhogrid[ip][1] *= smth;
1970#ifndef GRAVITY_TALLBOX
1971#ifdef FFT_COLUMN_BASED
1972 if(myplan.second_transposed_firstcol == 0)
1973 fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0;
1975 if(myplan.slabstart_y == 0)
1976 fft_of_rhogrid[0][0] = fft_of_rhogrid[0][1] = 0.0;
1982#ifndef FFT_COLUMN_BASED
1983 my_slab_based_fft(&myplan, &rhogrid[0], &workspace[0], -1);
1985 my_column_based_fft(&myplan, workspace, rhogrid, -1);
1991#ifdef PM_ZOOM_OPTIMIZED
1992 pmforce_zoom_optimized_readout_forces_or_potential(rhogrid, -1);
1994 pmforce_uniform_optimized_readout_forces_or_potential_xy(rhogrid, -1);
2006#ifndef FFT_COLUMN_BASED
2009 for(y = 0; y < GRIDY; y++)
2010 for(x = 0; x < myplan.nslab_x; x++)
2011 for(z = 0; z < GRIDZ; z++)
2013 int zr = z + 1, zl = z - 1, zrr = z + 2, zll = z - 2;
2023 forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, y, zl)] - rhogrid[FI(x, y, zr)]) -
2024 (1.0 / 6) * (rhogrid[FI(x, y, zll)] - rhogrid[FI(x, y, zrr)]));
2027#ifdef PM_ZOOM_OPTIMIZED
2028 pmforce_zoom_optimized_readout_forces_or_potential(forcegrid, 2);
2030 pmforce_uniform_optimized_readout_forces_or_potential_xy(forcegrid, 2);
2034 for(y = 0; y < GRIDY; y++)
2035 for(x = 0; x < myplan.nslab_x; x++)
2036 for(z = 0; z < GRIDZ; z++)
2038 int yr = y + 1, yl = y - 1, yrr = y + 2, yll = y - 2;
2048 forcegrid[FI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[FI(x, yl, z)] - rhogrid[FI(x, yr, z)]) -
2049 (1.0 / 6) * (rhogrid[FI(x, yll, z)] - rhogrid[FI(x, yrr, z)]));
2052#ifdef PM_ZOOM_OPTIMIZED
2053 pmforce_zoom_optimized_readout_forces_or_potential(forcegrid, 1);
2055 pmforce_uniform_optimized_readout_forces_or_potential_xy(forcegrid, 1);
2059 my_slab_transposeA(&myplan, rhogrid, forcegrid);
2062 for(x = 0; x < GRIDX; x++)
2063 for(y = 0; y < myplan.nslab_y; y++)
2064 for(z = 0; z < GRIDZ; z++)
2066 int xrr = x + 2, xll = x - 2, xr = x + 1, xl = x - 1;
2076 forcegrid[NI(x, y, z)] = fac * ((4.0 / 3) * (rhogrid[NI(xl, y, z)] - rhogrid[NI(xr, y, z)]) -
2077 (1.0 / 6) * (rhogrid[NI(xll, y, z)] - rhogrid[NI(xrr, y, z)]));
2080 my_slab_transposeB(&myplan, forcegrid, rhogrid);
2082#ifdef PM_ZOOM_OPTIMIZED
2083 pmforce_zoom_optimized_readout_forces_or_potential(forcegrid, 0);
2085 pmforce_uniform_optimized_readout_forces_or_potential_xy(forcegrid, 0);
2091 for(large_array_offset i = 0; i < myplan.ncol_XY; i++)
2093 fft_real *forcep = &forcegrid[GRID2 * i];
2094 fft_real *potp = &rhogrid[GRID2 * i];
2096 for(
int z = 0; z < GRIDZ; z++)
2112 forcep[z] = fac * ((4.0 / 3) * (potp[zl] - potp[zr]) - (1.0 / 6) * (potp[zll] - potp[zrr]));
2116#ifdef PM_ZOOM_OPTIMIZED
2117 pmforce_zoom_optimized_readout_forces_or_potential(forcegrid, 2);
2119 pmforce_uniform_optimized_readout_forces_or_potential_xy(forcegrid, 2);
2122 Mem.myfree_movable(partin);
2127 my_fft_swap23(&myplan, rhogrid, forcegrid);
2130 fft_real *column = (fft_real *)
Mem.mymalloc(
"column", GRIDY *
sizeof(fft_real));
2132 for(large_array_offset i = 0; i < myplan.ncol_XZ; i++)
2134 memcpy(column, &forcegrid[GRIDY * i], GRIDY *
sizeof(fft_real));
2136 fft_real *potp = column;
2137 fft_real *forcep = &forcegrid[GRIDY * i];
2139 for(
int y = 0; y < GRIDY; y++)
2155 forcep[y] = fac * ((4.0 / 3) * (potp[yl] - potp[yr]) - (1.0 / 6) * (potp[yll] - potp[yrr]));
2163#ifdef PM_ZOOM_OPTIMIZED
2165 fft_real *scratch = (fft_real *)
Mem.mymalloc(
"scratch", myplan.fftsize *
sizeof(fft_real));
2167 my_fft_swap23back(&myplan, forcegrid, scratch);
2168 pmforce_zoom_optimized_readout_forces_or_potential(scratch, 1);
2170 Mem.myfree(scratch);
2172 pmforce_uniform_optimized_readout_forces_or_potential_xz(forcegrid, 1);
2176 my_fft_swap13(&myplan, rhogrid, forcegrid);
2178 for(large_array_offset i = 0; i < myplan.ncol_ZY; i++)
2180 fft_real *forcep = &rhogrid[GRIDX * i];
2181 fft_real *potp = &forcegrid[GRIDX * i];
2183 for(
int x = 0; x < GRIDX; x++)
2199 forcep[x] = fac * ((4.0 / 3) * (potp[xl] - potp[xr]) - (1.0 / 6) * (potp[xll] - potp[xrr]));
2204#ifdef PM_ZOOM_OPTIMIZED
2205 my_fft_swap13back(&myplan, rhogrid, forcegrid);
2206 pmforce_zoom_optimized_readout_forces_or_potential(forcegrid, 0);
2208 pmforce_uniform_optimized_readout_forces_or_potential_zy(rhogrid, 0);
2216 Mem.myfree(forcegrid);
2217 Mem.myfree(rhogrid);
2219#ifdef PM_ZOOM_OPTIMIZED
2220 Mem.myfree(localfield_recvcount);
2221 Mem.myfree(localfield_offset);
2222 Mem.myfree(localfield_sendcount);
2223 Mem.myfree(localfield_first);
2224 Mem.myfree(localfield_data);
2225 Mem.myfree(localfield_globalindex);
2228#ifndef FFT_COLUMN_BASED
2231 Mem.myfree(Rcvpm_offset);
2232 Mem.myfree(Rcvpm_count);
2233 Mem.myfree(Sndpm_offset);
2234 Mem.myfree(Sndpm_count);
2240 mpi_printf(
"PM-PERIODIC: done. (took %g seconds)\n",
Logs.
timediff(tstart, tend));
2243#ifdef GRAVITY_TALLBOX
2248void pm_periodic::pmforce_setup_tallbox_kernel(
void)
2252 mpi_printf(
"PM-PERIODIC: Setting up tallbox kernel (GRIDX=%d, GRIDY=%d, GRIDZ=%d)\n", GRIDX, GRIDY, GRIDZ);
2256 for(
int i = 0; i < maxfftsize; i++)
2259#ifndef FFT_COLUMN_BASED
2260 for(
int i = myplan.slabstart_x; i < (myplan.slabstart_x + myplan.nslab_x); i++)
2261 for(
int j = 0; j < GRIDY; j++)
2264 for(
int c = myplan.firstcol_XY; c < (myplan.firstcol_XY + myplan.ncol_XY); c++)
2270 for(
int k = 0; k < GRIDZ; k++)
2274 if(i >= (GRIDX / 2))
2278 if(j >= (GRIDY / 2))
2282 if(k >= (GRIDZ / 2))
2291 double pot = pmperiodic_tallbox_long_range_potential(xx, yy, zz);
2293#ifndef FFT_COLUMN_BASED
2294 size_t ip = FI(i - myplan.slabstart_x, j, k);
2296 size_t ip = FCxy(c, k);
2301#ifndef FFT_COLUMN_BASED
2309 fft_real *workspc = (fft_real *)
Mem.mymalloc(
"workspc", maxfftsize *
sizeof(fft_real));
2311#ifndef FFT_COLUMN_BASED
2312 my_slab_based_fft(&myplan, kernel, workspc, 1);
2314 my_column_based_fft(&myplan, kernel, workspc, 1);
2315 memcpy(kernel, workspc, maxfftsize *
sizeof(fft_real));
2318 Mem.myfree(workspc);
2320 mpi_printf(
"PM-PERIODIC: Done setting up tallbox kernel\n");
2323double pm_periodic::pmperiodic_tallbox_long_range_potential(
double x,
double y,
double z)
2329 double r =
sqrt(x * x + y * y + z * z);
2332 switch(GRAVITY_TALLBOX)
2356 double leff =
sqrt(BOXX * BOXY);
2357 double alpha = 2.0 / leff;
2361 int qxmax = (int)(10.0 / (BOXX * alpha) + 0.5);
2362 int qymax = (int)(10.0 / (BOXY * alpha) + 0.5);
2364 int nxmax = (int)(4.0 * alpha * BOXX + 0.5);
2365 int nymax = (int)(4.0 * alpha * BOXY + 0.5);
2367 for(
int nx = -qxmax; nx <= qxmax; nx++)
2368 for(
int ny = -qymax; ny <= qymax; ny++)
2370 if(nx != 0 || ny != 0)
2372 double dx = x - nx * BOXX;
2373 double dy = y - ny * BOXY;
2374 double r =
sqrt(dx * dx + dy * dy + z * z);
2376 sum1 +=
erfc(alpha * r) / r;
2381 double alpha_star = 0.5 / (((double)
ASMTH) / PMGRID);
2382 double u = alpha_star * r;
2384 sum1 += (
erfc(alpha * r) -
erfc(u)) / r;
2386 sum1 += 2.0 /
sqrt(
M_PI) * (alpha_star - alpha);
2390 double alpha2 = alpha * alpha;
2394 for(
int nx = -nxmax; nx <= nxmax; nx++)
2395 for(
int ny = -nymax; ny <= nymax; ny++)
2397 if(nx != 0 || ny != 0)
2399 double kx = (2.0 *
M_PI / BOXX) * nx;
2400 double ky = (2.0 *
M_PI / BOXY) * ny;
2401 double k2 = kx * kx + ky * ky;
2402 double k =
sqrt(k2);
2406 double ex =
exp(-k * z);
2408 sum2 +=
cos(kx * x + ky * y) * (
erfc(k / (2 * alpha) + alpha * z) / ex + ex *
erfc(k / (2 * alpha) - alpha * z)) / k;
2412 double ex =
exp(k * z);
2414 sum2 +=
cos(kx * x + ky * y) * (ex *
erfc(k / (2 * alpha) + alpha * z) +
erfc(k / (2 * alpha) - alpha * z) / ex) / k;
2419 sum2 *=
M_PI / (BOXX * BOXY);
2421 double psi = 2.0 * alpha /
sqrt(
M_PI) +
2422 (2 *
sqrt(
M_PI) / (BOXX * BOXY) * (
exp(-alpha2 * z * z) / alpha +
sqrt(
M_PI) * z *
erf(alpha * z))) - (sum1 + sum2);
2432void pm_periodic::calculate_power_spectra(
int num)
2435 long long ntot_type_all[
NTYPES];
2437 for(
int n = 0; n <
NTYPES; n++)
2439 for(
int n = 0; n < Sp->NumPart; n++)
2440 n_type[Sp->P[n].getType()]++;
2446 for(
int i = 0; i <
NTYPES; i++)
2449#ifdef HIERARCHICAL_GRAVITY
2450 int flag_extra_allocate = 0;
2451 if(Sp->TimeBinsGravity.ActiveParticleList == NULL)
2453 flag_extra_allocate = 1;
2454 Sp->TimeBinsGravity.timebins_allocate();
2457 Sp->TimeBinsGravity.NActiveParticles = 0;
2458 for(
int i = 0; i < Sp->NumPart; i++)
2459 Sp->TimeBinsGravity.ActiveParticleList[Sp->TimeBinsGravity.NActiveParticles++] = i;
2469 sprintf(power_spec_fname,
"%s/powerspecs/powerspec_%03d.txt",
All.
OutputDir, num);
2471 pmforce_do_powerspec(typeflag);
2474 int count_types = 0;
2475 for(
int i = 0; i <
NTYPES; i++)
2476 if(ntot_type_all[i] > 0)
2480 for(
int i = 0; i <
NTYPES; i++)
2482 if(ntot_type_all[i] > 0)
2484 for(
int j = 0; j <
NTYPES; j++)
2489 sprintf(power_spec_fname,
"%s/powerspecs/powerspec_type%d_%03d.txt",
All.
OutputDir, i, num);
2491 pmforce_do_powerspec(typeflag);
2495#ifdef HIERARCHICAL_GRAVITY
2496 if(flag_extra_allocate)
2497 Sp->TimeBinsGravity.timebins_free();
2501void pm_periodic::pmforce_do_powerspec(
int *typeflag)
2503 mpi_printf(
"POWERSPEC: Begin power spectrum. (typeflag=[");
2504 for(
int i = 0; i <
NTYPES; i++)
2505 mpi_printf(
" %d ", typeflag[i]);
2510 pmforce_periodic(1, typeflag);
2512 pmforce_periodic(2, typeflag);
2514 pmforce_periodic(3, typeflag);
2518 mpi_printf(
"POWERSPEC: End power spectrum. took %g seconds\n",
Logs.
timediff(tstart, tend));
2521void pm_periodic::pmforce_measure_powerspec(
int flag,
int *typeflag)
2525 long long CountModes[BINS_PS];
2526 double SumPowerUncorrected[BINS_PS];
2527 double PowerUncorrected[BINS_PS];
2528 double DeltaUncorrected[BINS_PS];
2529 double ShotLimit[BINS_PS];
2530 double KWeightSum[BINS_PS];
2531 double Kbin[BINS_PS];
2533 double mass = 0, mass2 = 0, count = 0;
2534 for(
int i = 0; i < Sp->NumPart; i++)
2535 if(typeflag[P[i].getType()])
2538 mass2 += Sp->P[i].getMass() * Sp->P[i].getMass();
2542 MPI_Allreduce(MPI_IN_PLACE, &mass, 1, MPI_DOUBLE, MPI_SUM, Communicator);
2543 MPI_Allreduce(MPI_IN_PLACE, &mass2, 1, MPI_DOUBLE, MPI_SUM, Communicator);
2544 MPI_Allreduce(MPI_IN_PLACE, &count, 1, MPI_DOUBLE, MPI_SUM, Communicator);
2547 double dhalf = 0.5 * d;
2549 double fac = 1.0 / mass;
2552 double K1 = 2 *
M_PI /
All.
BoxSize * (POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC * PMGRID / 2);
2553 double binfac = BINS_PS / (
log(K1) -
log(K0));
2559 for(
int i = 0; i < BINS_PS; i++)
2561 SumPowerUncorrected[i] = 0;
2566#ifdef FFT_COLUMN_BASED
2567 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip++)
2569 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
2570 int y = ipcell / (GRIDX * GRIDz);
2571 int yr = ipcell % (GRIDX * GRIDz);
2575 for(
int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
2576 for(
int x = 0; x < GRIDX; x++)
2577 for(
int z = 0; z < GRIDz; z++)
2583 z < (GRIDZ + 1) / 2)
2590 if(x >= (GRIDX / 2))
2595 if(y >= (GRIDY / 2))
2600 if(z >= (GRIDZ / 2))
2605 double kx = kfacx * xx;
2606 double ky = kfacy * yy;
2607 double kz = kfacz * zz;
2609 double k2 = kx * kx + ky * ky + kz * kz;
2614 double fx = 1, fy = 1, fz = 1;
2631 double ff = 1 / (fx * fy * fz);
2632 double smth = ff * ff * ff * ff;
2639#ifndef FFT_COLUMN_BASED
2640 large_array_offset ip = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
2643 double po = (fft_of_rhogrid[ip][0] * fft_of_rhogrid[ip][0] + fft_of_rhogrid[ip][1] * fft_of_rhogrid[ip][1]);
2645 po *= fac * fac * smth;
2647 double k =
sqrt(k2);
2650 k *= POWERSPEC_FOLDFAC;
2652 k *= POWERSPEC_FOLDFAC * POWERSPEC_FOLDFAC;
2654 if(k >= K0 && k < K1)
2656 int bin =
log(k / K0) * binfac;
2658 SumPowerUncorrected[bin] += po;
2659 CountModes[bin] += 1;
2660 KWeightSum[bin] +=
log(k);
2664 SumPowerUncorrected[bin] += po;
2665 CountModes[bin] += 1;
2666 KWeightSum[bin] +=
log(k);
2672 MPI_Allreduce(MPI_IN_PLACE, SumPowerUncorrected, BINS_PS, MPI_DOUBLE, MPI_SUM, Communicator);
2673 MPI_Allreduce(MPI_IN_PLACE, CountModes, BINS_PS, MPI_LONG_LONG, MPI_SUM, Communicator);
2674 MPI_Allreduce(MPI_IN_PLACE, KWeightSum, BINS_PS, MPI_DOUBLE, MPI_SUM, Communicator);
2676 int count_non_zero_bins = 0;
2677 for(
int i = 0; i < BINS_PS; i++)
2679 if(CountModes[i] > 0)
2681 Kbin[i] =
exp(KWeightSum[i] / CountModes[i]);
2682 count_non_zero_bins++;
2685 Kbin[i] =
exp((i + 0.5) / binfac +
log(K0));
2687 if(CountModes[i] > 0)
2688 PowerUncorrected[i] = SumPowerUncorrected[i] / CountModes[i];
2690 PowerUncorrected[i] = 0;
2704 if(!(fd = fopen(power_spec_fname,
"w")))
2705 Terminate(
"can't open file `%s`\n", power_spec_fname);
2707 else if(flag == 1 || flag == 2)
2709 if(!(fd = fopen(power_spec_fname,
"a")))
2710 Terminate(
"can't open file `%s`\n", power_spec_fname);
2715 fprintf(fd,
"%g\n",
All.
Time);
2716 fprintf(fd,
"%d\n", count_non_zero_bins);
2718 fprintf(fd,
"%d\n", (
int)(PMGRID));
2722 for(
int i = 0; i < BINS_PS; i++)
2723 if(CountModes[i] > 0)
2724 fprintf(fd,
"%g %g %g %g %g\n", Kbin[i], DeltaUncorrected[i], PowerUncorrected[i], (
double)CountModes[i], ShotLimit[i]);
2728 fprintf(fd,
"%g\n", mass);
2729 fprintf(fd,
"%g\n", count);
2730 fprintf(fd,
"%g\n", mass * mass / mass2);
global_data_all_processes All
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
#define MAXLEN_PATH_EXTRA
double mycxxsort(T *begin, T *end, Tcomp comp)
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
expr pow(half base, half exp)
int ComovingIntegrationOn
char OutputDir[MAXLEN_PATH]
vector< MyFloat > GravAccel