12#include "gadgetconfig.h"
16#include <gsl/gsl_rng.h>
22#include "../data/allvars.h"
23#include "../data/dtypes.h"
24#include "../data/intposconvert.h"
25#include "../data/mymalloc.h"
26#include "../logs/logs.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../mpi_utils/shared_mem_handler.h"
30#include "../ngenic/ngenic.h"
31#include "../pm/pm_mpi_fft.h"
32#include "../system/system.h"
45#define INTCELL ((~((MyIntPosType)0)) / GRIDX + 1)
47#define GRIDz (GRIDZ / 2 + 1)
48#define GRID2 (2 * GRIDz)
50#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z))
51#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.firstcol_XY) + (z))
54typedef long long large_array_offset;
57typedef unsigned int large_array_offset;
60#ifdef NUMPART_PER_TASK_LARGE
61typedef long long large_numpart_type;
64typedef int large_numpart_type;
67void ngenic::ngenic_displace_particles(
void)
71 mpi_printf(
"NGENIC: computing displacement fields...\n");
84 rnd_generator_conjugate = gsl_rng_alloc(gsl_rng_ranlxd1);
85 rnd_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
86 gsl_rng_set(rnd_generator,
All.NgenicSeed);
88 ngenic_initialize_powerspectrum();
90 ngenic_initialize_ffts();
92 if(!(seedtable = (
unsigned int *)
Mem.mymalloc(
"seedtable", NGENIC * NGENIC *
sizeof(
unsigned int))))
93 Terminate(
"could not allocate seed table");
95 for(
int i = 0; i < NGENIC / 2; i++)
97 for(
int j = 0; j < i; j++)
98 seedtable[i * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
100 for(
int j = 0; j < i + 1; j++)
101 seedtable[j * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
103 for(
int j = 0; j < i; j++)
104 seedtable[(NGENIC - 1 - i) * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
106 for(
int j = 0; j < i + 1; j++)
107 seedtable[(NGENIC - 1 - j) * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
109 for(
int j = 0; j < i; j++)
110 seedtable[i * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
112 for(
int j = 0; j < i + 1; j++)
113 seedtable[j * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
115 for(
int j = 0; j < i; j++)
116 seedtable[(NGENIC - 1 - i) * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
118 for(
int j = 0; j < i + 1; j++)
119 seedtable[(NGENIC - 1 - j) * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
129 size_t tab_len = NGENIC * NGENIC *
sizeof(
unsigned int);
135 Mem.myfree(seedtable);
143 ngenic_distribute_particles();
146 Pdisp = (disp_data *)
Mem.mymalloc_clear(
"disp_data", Sp->NumPart *
sizeof(disp_data));
148#if defined(MULTICOMPONENTGLASSFILE) && defined(DIFFERENT_TRANSFER_FUNC)
149 for(Type = MinType; Type <= MaxType; Type++)
156 for(
int axes = 0; axes < 3; axes++)
157 d2phi1[axes] = (fft_real *)
Mem.mymalloc_clear(
"d2Phi1", maxfftsize *
sizeof(fft_real));
159 for(
int axes = 0; axes < 3; axes++)
161 mpi_printf(
"NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", axes, axes);
163 fft_real *disp = (fft_real *)
Mem.mymalloc(
"disp", maxfftsize *
sizeof(fft_real));
165 ngenic_setup_modes_in_kspace((fft_complex *)disp);
166 ngenic_get_derivate_from_fourier_field(axes, axes, (fft_complex *)disp);
168 memcpy(d2phi1[axes], disp, maxfftsize *
sizeof(fft_real));
174 fft_real *Phi2 = (fft_real *)
Mem.mymalloc_movable(&Phi2,
"Phi2", maxfftsize *
sizeof(fft_real));
176 for(
size_t n = 0; n < maxfftsize; n++)
177 Phi2[n] = d2phi1[0][n] * d2phi1[1][n] + d2phi1[0][n] * d2phi1[2][n] + d2phi1[1][n] * d2phi1[2][n];
179 for(
int axes = 2; axes >= 0; axes--)
180 Mem.myfree_movable(d2phi1[axes]);
182 for(
int i = 0; i < 3; i++)
183 for(
int j = i + 1; j < 3; j++)
185 mpi_printf(
"NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", i, j);
187 fft_real *disp = (fft_real *)
Mem.mymalloc(
"disp", maxfftsize *
sizeof(fft_real));
189 ngenic_setup_modes_in_kspace((fft_complex *)disp);
190 ngenic_get_derivate_from_fourier_field(i, j, (fft_complex *)disp);
192 for(
size_t n = 0; n < maxfftsize; n++)
193 Phi2[n] -= disp[n] * disp[n];
198 mpi_printf(
"NGENIC_2LPT: Secondary source term computed in real space\n");
201 ngenic_compute_transform_of_source_potential(Phi2);
203 mpi_printf(
"NGENIC_2LPT: Done transforming it to k-space\n");
205 for(
int axes = 0; axes < 3; axes++)
207 mpi_printf(
"NGENIC_2LPT: Obtaining second order displacements for axes=%d\n", axes);
209 fft_real *disp = (fft_real *)
Mem.mymalloc(
"disp", maxfftsize *
sizeof(fft_real));
211 memcpy(disp, Phi2, maxfftsize *
sizeof(fft_real));
213 ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
215 ngenic_readout_disp(disp, axes, 3.0 / 7, 3.0 / 7 * vel_prefac2);
224 for(
int axes = 0; axes < 3; axes++)
226 mpi_printf(
"NGENIC_2LPT: Obtaining Zeldovich displacements for axes=%d\n", axes);
228 fft_real *disp = (fft_real *)
Mem.mymalloc(
"disp", maxfftsize *
sizeof(fft_real));
230 ngenic_setup_modes_in_kspace((fft_complex *)disp);
232 ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
234 ngenic_readout_disp(disp, axes, 1.0, vel_prefac1);
243 for(
int n = 0; n < Sp->NumPart; n++)
245 double posdiff[3] = {Pdisp[n].deltapos[0], Pdisp[n].deltapos[1], Pdisp[n].deltapos[2]};
250 for(
int axes = 0; axes < 3; axes++)
252 Sp->P[n].IntPos[axes] += delta[axes];
254 if(Pdisp[n].deltapos[axes] > maxdisp)
255 maxdisp = Pdisp[n].deltapos[axes];
257 if(Sp->P[n].Vel[axes] > maxvel)
258 maxvel = Sp->P[n].Vel[axes];
262 double max_disp_global, maxvel_global;
263 MPI_Reduce(&maxdisp, &max_disp_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
264 MPI_Reduce(&maxvel, &maxvel_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
266 mpi_printf(
"\nNGENIC: Maximum displacement: %g, in units of the part-spacing= %g\n\n", max_disp_global,
268 mpi_printf(
"\nNGENIC: Maximum velocity component: %g\n\n", maxvel_global);
273 Mem.myfree(Rcvpm_offset);
274 Mem.myfree(Rcvpm_count);
275 Mem.myfree(Sndpm_offset);
276 Mem.myfree(Sndpm_count);
283 size_t tab_len = NGENIC * NGENIC *
sizeof(
unsigned int);
289 Mem.myfree(seedtable);
292#ifndef FFT_COLUMN_BASED
293 my_slab_based_fft_free(&myplan);
295 my_column_based_fft_free(&myplan);
298 FFTW(destroy_plan)(myplan.forward_plan_zdir);
299 FFTW(destroy_plan)(myplan.forward_plan_ydir);
300 FFTW(destroy_plan)(myplan.forward_plan_xdir);
302 FFTW(destroy_plan)(myplan.backward_plan_zdir);
303 FFTW(destroy_plan)(myplan.backward_plan_ydir);
304 FFTW(destroy_plan)(myplan.backward_plan_xdir);
308 if(
All.PowerSpectrumType == 2)
311 gsl_rng_free(rnd_generator);
312 gsl_rng_free(rnd_generator_conjugate);
317void ngenic::ngenic_distribute_particles(
void)
319 Sndpm_count = (
size_t *)
Mem.mymalloc(
"Sndpm_count", NTask *
sizeof(
size_t));
320 Sndpm_offset = (
size_t *)
Mem.mymalloc(
"Sndpm_offset", NTask *
sizeof(
size_t));
321 Rcvpm_count = (
size_t *)
Mem.mymalloc(
"Rcvpm_count", NTask *
sizeof(
size_t));
322 Rcvpm_offset = (
size_t *)
Mem.mymalloc(
"Rcvpm_offset", NTask *
sizeof(
size_t));
324#ifdef FFT_COLUMN_BASED
325 int columns = GRIDX * GRIDY;
326 int avg = (columns - 1) / NTask + 1;
327 int exc = NTask * avg - columns;
328 int tasklastsection = NTask - exc;
329 int pivotcol = tasklastsection * avg;
334 size_t *send_count = Sndpm_count;
336 for(
int j = 0; j < NTask; j++)
339 for(
int i = 0; i < Sp->NumPart; i++)
341 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
342 int slab_xx = slab_x + 1;
347#ifndef FFT_COLUMN_BASED
348 int task0 = myplan.slab_to_task[slab_x];
349 int task1 = myplan.slab_to_task[slab_xx];
355 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
356 int slab_yy = slab_y + 1;
361 int column0 = slab_x * GRIDY + slab_y;
362 int column1 = slab_x * GRIDY + slab_yy;
363 int column2 = slab_xx * GRIDY + slab_y;
364 int column3 = slab_xx * GRIDY + slab_yy;
366 int task0, task1, task2, task3;
368 if(column0 < pivotcol)
369 task0 = column0 / avg;
371 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
373 if(column1 < pivotcol)
374 task1 = column1 / avg;
376 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
378 if(column2 < pivotcol)
379 task2 = column2 / avg;
381 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
383 if(column3 < pivotcol)
384 task3 = column3 / avg;
386 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
391 if(task2 != task1 && task2 != task0)
393 if(task3 != task0 && task3 != task1 && task3 != task2)
401 for(
int i = 1; i < NTask; i++)
404 int ind_prev = i - 1;
406 Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
409 myMPI_Alltoall(Sndpm_count,
sizeof(
size_t), MPI_BYTE, Rcvpm_count,
sizeof(
size_t), MPI_BYTE, Communicator);
411 nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0;
412 for(
int j = 0; j < NTask; j++)
414 nexport += Sndpm_count[j];
415 nimport += Rcvpm_count[j];
419 Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
420 Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
425 partin = (partbuf *)
Mem.mymalloc(
"partin", nimport *
sizeof(partbuf));
426 partout = (partbuf *)
Mem.mymalloc(
"partout", nexport *
sizeof(partbuf));
429 size_t *send_count = Sndpm_count;
430 size_t *send_offset = Sndpm_offset;
432 for(
int j = 0; j < NTask; j++)
436 for(
int i = 0; i < Sp->NumPart; i++)
438 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
439 int slab_xx = slab_x + 1;
444#ifndef FFT_COLUMN_BASED
445 int task0 = myplan.slab_to_task[slab_x];
446 int task1 = myplan.slab_to_task[slab_xx];
448 size_t ind0 = send_offset[task0] + send_count[task0]++;
449 for(
int j = 0; j < 3; j++)
450 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
454 size_t ind1 = send_offset[task1] + send_count[task1]++;
455 for(
int j = 0; j < 3; j++)
456 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
459 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
460 int slab_yy = slab_y + 1;
465 int column0 = slab_x * GRIDY + slab_y;
466 int column1 = slab_x * GRIDY + slab_yy;
467 int column2 = slab_xx * GRIDY + slab_y;
468 int column3 = slab_xx * GRIDY + slab_yy;
470 int task0, task1, task2, task3;
472 if(column0 < pivotcol)
473 task0 = column0 / avg;
475 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
477 if(column1 < pivotcol)
478 task1 = column1 / avg;
480 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
482 if(column2 < pivotcol)
483 task2 = column2 / avg;
485 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
487 if(column3 < pivotcol)
488 task3 = column3 / avg;
490 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
492 size_t ind0 = send_offset[task0] + send_count[task0]++;
493 for(
int j = 0; j < 3; j++)
494 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
498 size_t ind1 = send_offset[task1] + send_count[task1]++;
500 for(
int j = 0; j < 3; j++)
501 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
503 if(task2 != task1 && task2 != task0)
505 size_t ind2 = send_offset[task2] + send_count[task2]++;
507 for(
int j = 0; j < 3; j++)
508 partout[ind2].IntPos[j] = Sp->P[i].IntPos[j];
510 if(task3 != task0 && task3 != task1 && task3 != task2)
512 size_t ind3 = send_offset[task3] + send_count[task3]++;
514 for(
int j = 0; j < 3; j++)
515 partout[ind3].IntPos[j] = Sp->P[i].IntPos[j];
521 int flag_big = 0, flag_big_all;
522 for(
int i = 0; i < NTask; i++)
529 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
532 myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset,
sizeof(partbuf), flag_big_all, Communicator);
537void ngenic::ngenic_compute_transform_of_source_potential(fft_real *pot)
539 fft_real *workspace = (fft_real *)
Mem.mymalloc(
"workspace", maxfftsize *
sizeof(fft_real));
541#ifndef FFT_COLUMN_BASED
542 my_slab_based_fft(&myplan, &pot[0], &workspace[0], +1);
544 my_column_based_fft(&myplan, pot, workspace, +1);
545 memcpy(pot, workspace, maxfftsize *
sizeof(fft_real));
548 Mem.myfree(workspace);
550 double normfac = 1 / (((double)GRIDX) * GRIDY * GRIDZ);
552 for(
size_t n = 0; n < maxfftsize; n++)
561void ngenic::ngenic_get_derivate_from_fourier_field(
int axes1,
int axes2, fft_complex *fft_of_grid)
567#ifdef FFT_COLUMN_BASED
568 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
570 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
571 int y = ipcell / (GRIDX * GRIDz);
572 int yr = ipcell % (GRIDX * GRIDz);
575 Terminate(
"x-column seems incomplete. This is not expected");
577 for(
int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
578 for(
int z = 0; z < GRIDz; z++)
582 for(
int x = 0; x < GRIDX; x++)
600 kvec[0] = kfacx * xx;
601 kvec[1] = kfacy * yy;
602 kvec[2] = kfacz * zz;
604 double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
612 double fx = 1, fy = 1, fz = 1;
628 double ff = 1 / (fx * fy * fz);
634#ifndef FFT_COLUMN_BASED
635 large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
637 large_array_offset elem = ip + x;
640 fft_real re = smth * fft_of_grid[elem][0];
641 fft_real im = smth * fft_of_grid[elem][1];
646 fft_of_grid[elem][0] = (kmag2 > 0.0 ? -kvec[axes1] / kmag2 * im : 0.0);
647 fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] / kmag2 * re : 0.0);
652 fft_of_grid[elem][0] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * re : 0.0);
653 fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * im : 0.0);
658#ifdef FFT_COLUMN_BASED
659 if(myplan.second_transposed_firstcol == 0)
660 fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
662 if(myplan.slabstart_y == 0)
663 fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
667 fft_real *workspace = (fft_real *)
Mem.mymalloc(
"workspace", maxfftsize *
sizeof(fft_real));
669#ifndef FFT_COLUMN_BASED
670 my_slab_based_fft(&myplan, &fft_of_grid[0], &workspace[0], -1);
672 my_column_based_fft(&myplan, fft_of_grid, workspace, -1);
673 memcpy(fft_of_grid, workspace, maxfftsize *
sizeof(fft_real));
676 Mem.myfree(workspace);
679void ngenic::ngenic_setup_modes_in_kspace(fft_complex *fft_of_grid)
684 memset(fft_of_grid, 0, maxfftsize *
sizeof(fft_real));
686 mpi_printf(
"NGENIC: setting up modes in kspace...\n");
692#ifdef FFT_COLUMN_BASED
693 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
695 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
696 int y = ipcell / (GRIDX * GRIDz);
697 int yr = ipcell % (GRIDX * GRIDz);
700 Terminate(
"x-column seems incomplete. This is not expected");
702 for(
int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
703 for(
int z = 0; z < GRIDz; z++)
708 gsl_rng_set(rnd_generator, seedtable[y * NGENIC + z]);
723 gsl_rng_set(rnd_generator_conjugate, seedtable[y_conj * NGENIC + z_conj]);
725#ifndef NGENIC_FIX_MODE_AMPLITUDES
726 double mode_ampl[GRIDX], mode_ampl_conj[GRIDX];
728 double mode_phase[GRIDX], mode_phase_conj[GRIDX];
732 for(
int xoff = 0; xoff < GRIDX / 2; xoff++)
733 for(
int side = 0; side < 2; side++)
739 x = GRIDX - 1 - xoff;
741 double phase = gsl_rng_uniform(rnd_generator) * 2 *
M_PI;
742 double phase_conj = gsl_rng_uniform(rnd_generator_conjugate) * 2 *
M_PI;
744#ifdef NGENIC_MIRROR_PHASES
746 if(phase >= 2 *
M_PI)
750 if(phase_conj >= 2 *
M_PI)
751 phase_conj -= 2 *
M_PI;
753 mode_phase[x] = phase;
754 mode_phase_conj[x] = phase_conj;
756#ifndef NGENIC_FIX_MODE_AMPLITUDES
760 ampl = gsl_rng_uniform(rnd_generator);
767 ampl_conj = gsl_rng_uniform(rnd_generator_conjugate);
769 while(ampl_conj == 0);
773 mode_ampl_conj[x] = ampl_conj;
778 for(
int x = 0; x < GRIDX; x++)
796 kvec[0] = kfacx * xx;
797 kvec[1] = kfacy * yy;
798 kvec[2] = kfacz * zz;
800 double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
801 double kmag =
sqrt(kmag2);
803 if(
All.SphereMode == 1)
818 double p_of_k = ngenic_power_spec(kmag);
822 int conjugate_flag = 0;
824 if(z == 0 || z == GRIDZ / 2)
826 if(x > GRIDX / 2 && x < GRIDX)
828 else if(x == 0 || x == GRIDX / 2)
830 if(y > GRIDY / 2 && y < GRIDY)
832 else if(y == 0 || y == GRIDX / 2)
847#ifndef NGENIC_FIX_MODE_AMPLITUDES
849 p_of_k *= -
log(mode_ampl_conj[x_conj]);
851 p_of_k *= -
log(mode_ampl[x]);
854 double delta = fac *
sqrt(p_of_k) / Dplus;
856#ifndef FFT_COLUMN_BASED
857 large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
859 large_array_offset elem = ip + x;
864 fft_of_grid[elem][0] = delta *
cos(mode_phase_conj[x_conj]);
865 fft_of_grid[elem][1] = -delta *
sin(mode_phase_conj[x_conj]);
869 fft_of_grid[elem][0] = delta *
cos(mode_phase[x]);
870 fft_of_grid[elem][1] = delta *
sin(mode_phase[x]);
876void ngenic::ngenic_readout_disp(fft_real *grid,
int axis,
double pfac,
double vfac)
878#ifdef FFT_COLUMN_BASED
879 int columns = GRIDX * GRIDY;
880 int avg = (columns - 1) / NTask + 1;
881 int exc = NTask * avg - columns;
882 int tasklastsection = NTask - exc;
883 int pivotcol = tasklastsection * avg;
886 double *flistin = (
double *)
Mem.mymalloc(
"flistin", nimport *
sizeof(
double));
887 double *flistout = (
double *)
Mem.mymalloc(
"flistout", nexport *
sizeof(
double));
889 for(
size_t i = 0; i < nimport; i++)
893 int slab_x = partin[i].IntPos[0] / INTCELL;
896 int slab_y = partin[i].IntPos[1] / INTCELL;
899 int slab_z = partin[i].IntPos[2] / INTCELL;
902 double dx = rmd_x * (1.0 / INTCELL);
903 double dy = rmd_y * (1.0 / INTCELL);
904 double dz = rmd_z * (1.0 / INTCELL);
906 int slab_xx = slab_x + 1;
907 int slab_yy = slab_y + 1;
908 int slab_zz = slab_z + 1;
917#ifndef FFT_COLUMN_BASED
918 if(myplan.slab_to_task[slab_x] == ThisTask)
920 slab_x -= myplan.first_slab_x_of_task[ThisTask];
922 flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
923 grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
924 grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
925 grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
928 if(myplan.slab_to_task[slab_xx] == ThisTask)
930 slab_xx -= myplan.first_slab_x_of_task[ThisTask];
932 flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
933 grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
934 grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
935 grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
938 int column0 = slab_x * GRIDY + slab_y;
939 int column1 = slab_x * GRIDY + slab_yy;
940 int column2 = slab_xx * GRIDY + slab_y;
941 int column3 = slab_xx * GRIDY + slab_yy;
943 if(column0 >= myplan.firstcol_XY && column0 <= myplan.lastcol_XY)
945 flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
946 grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
948 if(column1 >= myplan.firstcol_XY && column1 <= myplan.lastcol_XY)
951 grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
954 if(column2 >= myplan.firstcol_XY && column2 <= myplan.lastcol_XY)
957 grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
960 if(column3 >= myplan.firstcol_XY && column3 <= myplan.lastcol_XY)
962 flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
968 int flag_big = 0, flag_big_all;
969 for(
int i = 0; i < NTask; i++)
976 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
979 myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset,
sizeof(
double), flag_big_all, Communicator);
982 size_t *send_count = Sndpm_count;
983 size_t *send_offset = Sndpm_offset;
985 for(
int j = 0; j < NTask; j++)
988 for(
int i = 0; i < Sp->NumPart; i++)
990 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
991 int slab_xx = slab_x + 1;
996#ifndef FFT_COLUMN_BASED
997 int task0 = myplan.slab_to_task[slab_x];
998 int task1 = myplan.slab_to_task[slab_xx];
1000 double value = flistout[send_offset[task0] + send_count[task0]++];
1003 value += flistout[send_offset[task1] + send_count[task1]++];
1005 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
1006 int slab_yy = slab_y + 1;
1008 if(slab_yy >= GRIDY)
1011 int column0 = slab_x * GRIDY + slab_y;
1012 int column1 = slab_x * GRIDY + slab_yy;
1013 int column2 = slab_xx * GRIDY + slab_y;
1014 int column3 = slab_xx * GRIDY + slab_yy;
1016 int task0, task1, task2, task3;
1018 if(column0 < pivotcol)
1019 task0 = column0 / avg;
1021 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1023 if(column1 < pivotcol)
1024 task1 = column1 / avg;
1026 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1028 if(column2 < pivotcol)
1029 task2 = column2 / avg;
1031 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1033 if(column3 < pivotcol)
1034 task3 = column3 / avg;
1036 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1038 double value = flistout[send_offset[task0] + send_count[task0]++];
1041 value += flistout[send_offset[task1] + send_count[task1]++];
1043 if(task2 != task1 && task2 != task0)
1044 value += flistout[send_offset[task2] + send_count[task2]++];
1046 if(task3 != task0 && task3 != task1 && task3 != task2)
1047 value += flistout[send_offset[task3] + send_count[task3]++];
1050 Pdisp[i].deltapos[axis] += pfac * value;
1051 Sp->P[i].Vel[axis] += vfac * value;
1055 Mem.myfree(flistout);
1056 Mem.myfree(flistin);
1059void ngenic::ngenic_initialize_ffts(
void)
1063 Terminate(
"LONG_X must be an integer if used with PMGRID");
1068 Terminate(
"LONG_Y must be an integer if used with PMGRID");
1073 Terminate(
"LONG_Z must be an integer if used with PMGRID");
1077 int ndimx[1] = {GRIDX};
1078 int ndimy[1] = {GRIDY};
1079 int ndimz[1] = {GRIDZ};
1081 int max_GRID2 = 2 * (std::max<int>(std::max<int>(GRIDX, GRIDY), GRIDZ) / 2 + 1);
1084 fft_real *DispGrid = (fft_real *)
Mem.mymalloc(
"DispGrid", max_GRID2 *
sizeof(fft_real));
1085 fft_complex *fft_of_DispGrid = (fft_complex *)
Mem.mymalloc(
"DispGrid", max_GRID2 *
sizeof(fft_real));
1087#ifdef DOUBLEPRECISION_FFTW
1091 int alignflag = FFTW_UNALIGNED;
1094#ifndef FFT_COLUMN_BASED
1100 myplan.backward_plan_xdir =
1101 FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0, stride, GRIDz * GRIDX,
1102 FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1104 myplan.backward_plan_ydir =
1105 FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0, stride, GRIDz * GRIDY,
1106 FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1108 myplan.backward_plan_zdir =
FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)DispGrid, 0, 1, GRIDz, (fft_real *)fft_of_DispGrid,
1109 0, 1, GRID2, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1111 myplan.forward_plan_xdir =
FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0,
1112 stride, GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1114 myplan.forward_plan_ydir =
FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0,
1115 stride, GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1117 myplan.forward_plan_zdir =
FFTW(plan_many_dft_r2c)(1, ndimz, 1, DispGrid, 0, 1, GRID2, (fft_complex *)fft_of_DispGrid, 0, 1, GRIDz,
1118 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1120 Mem.myfree(fft_of_DispGrid);
1121 Mem.myfree(DispGrid);
1123#ifndef FFT_COLUMN_BASED
1125 my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1127 maxfftsize = std::max<int>(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2);
1131 my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1133 maxfftsize = myplan.max_datasize;
1138void ngenic::print_spec(
void)
1145 FILE *fd = fopen(buf,
"w");
1147 double gf = ngenic_growth_factor(0.001, 1.0) / (1.0 / 0.001);
1157 for(
double k = kstart; k < kend; k *= 1.025)
1159 double po = ngenic_power_spec(k);
1160 double dl = 4.0 *
M_PI * k * k * k * po;
1164 double po2 = ngenic_power_spec(1.001 * k * kf);
1165 double po1 = ngenic_power_spec(k * kf);
1166 double dnl = 0, knl = 0;
1168 if(po != 0 && po1 != 0 && po2 != 0)
1170 double neff = (
log(po2) -
log(po1)) / (
log(1.001 * k * kf) -
log(k * kf));
1172 if(1 + neff / 3 > 0)
1174 double A = 0.482 *
pow(1 + neff / 3, -0.947);
1175 double B = 0.226 *
pow(1 + neff / 3, -1.778);
1176 double alpha = 3.310 *
pow(1 + neff / 3, -0.244);
1177 double beta = 0.862 *
pow(1 + neff / 3, -0.287);
1178 double V = 11.55 *
pow(1 + neff / 3, -0.423) * 1.2;
1180 dnl = fnl(dl, A, B, alpha, beta, V, gf);
1181 knl = k *
pow(1 + dnl, 1.0 / 3);
1185 fprintf(fd,
"%12g %12g %12g %12g\n", k, dl, knl, dnl);
1200 FILE *fd = fopen(buf,
"w");
1202 const int NSTEPS = 100;
1204 for(
int i = 0; i <= NSTEPS; i++)
1208 double d = ngenic_growth_factor(a, 1.0);
1210 fprintf(fd,
"%12g %12g\n", a, 1.0 / d);
1225 FILE *fd = fopen(buf,
"w");
1229 for(
double M = 1.0e5; M <= 1.01e16; M *=
pow(10.0, 1.0 / 16))
1233 double R =
pow(3.0 * Mint / (4 *
M_PI * rhoback), 1.0 / 3);
1235 double sigma2 = ngenic_tophat_sigma2(R);
1237 fprintf(fd,
"%12g %12g %12g %12g\n", M, Mint, sigma2,
sqrt(sigma2));
global_data_all_processes All
void ** SharedMemBaseAddr
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
int32_t MySignedIntPosType
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
expr pow(half base, half exp)
char OutputDir[MAXLEN_PATH]
char SnapshotFileBase[MAXLEN_PATH]
void set_cosmo_factors_for_current_time(void)