GADGET-4
ngenic.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef NGENIC
15
16#include <gsl/gsl_rng.h>
17#include <math.h>
18#include <mpi.h>
19#include <stdlib.h>
20#include <algorithm>
21
22#include "../data/allvars.h"
23#include "../data/dtypes.h"
24#include "../data/intposconvert.h"
25#include "../data/mymalloc.h"
26#include "../logs/logs.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../mpi_utils/shared_mem_handler.h"
30#include "../ngenic/ngenic.h"
31#include "../pm/pm_mpi_fft.h"
32#include "../system/system.h"
33
34#ifdef GRIDX
35#undef GRIDX
36#undef GRIDY
37#undef GRIDZ
38#undef INTCELL
39#endif
40
41#define GRIDX (NGENIC)
42#define GRIDY (NGENIC)
43#define GRIDZ (NGENIC)
44
45#define INTCELL ((~((MyIntPosType)0)) / GRIDX + 1)
46
47#define GRIDz (GRIDZ / 2 + 1)
48#define GRID2 (2 * GRIDz)
49
50#define FI(x, y, z) (((large_array_offset)GRID2) * (GRIDY * (x) + (y)) + (z))
51#define FC(c, z) (((large_array_offset)GRID2) * ((c)-myplan.firstcol_XY) + (z))
52
53#if(GRIDZ > 1024)
54typedef long long large_array_offset; /* use a larger data type in this case so that we can always address all cells of the 3D grid
55 with a single index */
56#else
57typedef unsigned int large_array_offset;
58#endif
59
60#ifdef NUMPART_PER_TASK_LARGE
61typedef long long large_numpart_type; /* if there is a risk that the local particle number times 8 overflows a 32-bit integer, this
62 data type should be used */
63#else
64typedef int large_numpart_type;
65#endif
66
67void ngenic::ngenic_displace_particles(void)
68{
69 TIMER_START(CPU_NGENIC);
70
71 mpi_printf("NGENIC: computing displacement fields...\n");
72
74
75 double vel_prefac1 = All.cf_atime * All.cf_hubble_a * ngenic_f1_omega(All.cf_atime);
76 double vel_prefac2 = All.cf_atime * All.cf_hubble_a * ngenic_f2_omega(All.cf_atime);
77
78 vel_prefac1 /= sqrt(All.cf_atime); /* converts to Gadget velocity */
79 vel_prefac2 /= sqrt(All.cf_atime); /* converts to Gadget velocity */
80
81 mpi_printf("NGENIC: vel_prefac1= %g hubble_a=%g fom1=%g\n", vel_prefac1, All.cf_hubble_a, ngenic_f1_omega(All.cf_atime));
82 mpi_printf("NGENIC: vel_prefac2= %g hubble_a=%g fom2=%g\n", vel_prefac2, All.cf_hubble_a, ngenic_f2_omega(All.cf_atime));
83
84 rnd_generator_conjugate = gsl_rng_alloc(gsl_rng_ranlxd1);
85 rnd_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
86 gsl_rng_set(rnd_generator, All.NgenicSeed);
87
88 ngenic_initialize_powerspectrum();
89
90 ngenic_initialize_ffts();
91
92 if(!(seedtable = (unsigned int *)Mem.mymalloc("seedtable", NGENIC * NGENIC * sizeof(unsigned int))))
93 Terminate("could not allocate seed table");
94
95 for(int i = 0; i < NGENIC / 2; i++)
96 {
97 for(int j = 0; j < i; j++)
98 seedtable[i * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
99
100 for(int j = 0; j < i + 1; j++)
101 seedtable[j * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
102
103 for(int j = 0; j < i; j++)
104 seedtable[(NGENIC - 1 - i) * NGENIC + j] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
105
106 for(int j = 0; j < i + 1; j++)
107 seedtable[(NGENIC - 1 - j) * NGENIC + i] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
108
109 for(int j = 0; j < i; j++)
110 seedtable[i * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
111
112 for(int j = 0; j < i + 1; j++)
113 seedtable[j * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
114
115 for(int j = 0; j < i; j++)
116 seedtable[(NGENIC - 1 - i) * NGENIC + (NGENIC - 1 - j)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
117
118 for(int j = 0; j < i + 1; j++)
119 seedtable[(NGENIC - 1 - j) * NGENIC + (NGENIC - 1 - i)] = 0x7fffffff * gsl_rng_uniform(rnd_generator);
120 }
121
123 {
124 // We actually have multiple shared memory nodes in which we set aside one MPI rank for shared memory communictiona.
125 // In this casem move the seedtable to the communication rank in order to consume this memory only once on the node
126
127 if(Shmem.Island_ThisTask == 0)
128 {
129 size_t tab_len = NGENIC * NGENIC * sizeof(unsigned int);
130
131 MPI_Send(&tab_len, sizeof(tab_len), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_TABLE_ALLOC, MPI_COMM_WORLD);
132 MPI_Send(seedtable, tab_len, MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_DMOM, MPI_COMM_WORLD);
133 }
134
135 Mem.myfree(seedtable);
136
137 ptrdiff_t off;
138 MPI_Bcast(&off, sizeof(ptrdiff_t), MPI_BYTE, Shmem.Island_NTask - 1, Shmem.SharedMemComm);
139
140 seedtable = (unsigned int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off);
141 }
142
143 ngenic_distribute_particles();
144
145 /* allocate displacement vectors */
146 Pdisp = (disp_data *)Mem.mymalloc_clear("disp_data", Sp->NumPart * sizeof(disp_data));
147
148#if defined(MULTICOMPONENTGLASSFILE) && defined(DIFFERENT_TRANSFER_FUNC)
149 for(Type = MinType; Type <= MaxType; Type++)
150#endif
151 {
152#ifdef NGENIC_2LPT
153
154 /* allocate temporary buffers for second derivatives */
155 fft_real *d2phi1[3];
156 for(int axes = 0; axes < 3; axes++)
157 d2phi1[axes] = (fft_real *)Mem.mymalloc_clear("d2Phi1", maxfftsize * sizeof(fft_real));
158
159 for(int axes = 0; axes < 3; axes++)
160 {
161 mpi_printf("NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", axes, axes);
162
163 fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
164
165 ngenic_setup_modes_in_kspace((fft_complex *)disp);
166 ngenic_get_derivate_from_fourier_field(axes, axes, (fft_complex *)disp);
167
168 memcpy(d2phi1[axes], disp, maxfftsize * sizeof(fft_real));
169
170 Mem.myfree(disp);
171 }
172
173 /* allocate second source potential */
174 fft_real *Phi2 = (fft_real *)Mem.mymalloc_movable(&Phi2, "Phi2", maxfftsize * sizeof(fft_real));
175
176 for(size_t n = 0; n < maxfftsize; n++)
177 Phi2[n] = d2phi1[0][n] * d2phi1[1][n] + d2phi1[0][n] * d2phi1[2][n] + d2phi1[1][n] * d2phi1[2][n];
178
179 for(int axes = 2; axes >= 0; axes--)
180 Mem.myfree_movable(d2phi1[axes]);
181
182 for(int i = 0; i < 3; i++)
183 for(int j = i + 1; j < 3; j++)
184 {
185 mpi_printf("NGENIC_2LPT: Computing secondary source term, derivatices %d %d\n", i, j);
186
187 fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
188
189 ngenic_setup_modes_in_kspace((fft_complex *)disp);
190 ngenic_get_derivate_from_fourier_field(i, j, (fft_complex *)disp);
191
192 for(size_t n = 0; n < maxfftsize; n++)
193 Phi2[n] -= disp[n] * disp[n];
194
195 Mem.myfree(disp);
196 }
197
198 mpi_printf("NGENIC_2LPT: Secondary source term computed in real space\n");
199
200 /* Do a forward inplace-FFT to get Phi2 in Fourier space */
201 ngenic_compute_transform_of_source_potential(Phi2);
202
203 mpi_printf("NGENIC_2LPT: Done transforming it to k-space\n");
204
205 for(int axes = 0; axes < 3; axes++)
206 {
207 mpi_printf("NGENIC_2LPT: Obtaining second order displacements for axes=%d\n", axes);
208
209 fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
210
211 memcpy(disp, Phi2, maxfftsize * sizeof(fft_real));
212
213 ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
214
215 ngenic_readout_disp(disp, axes, 3.0 / 7, 3.0 / 7 * vel_prefac2);
216
217 Mem.myfree(disp);
218 }
219
220 Mem.myfree(Phi2);
221#endif
222
223 /* now carry out Zeldovich approximation, yielding first order displacements */
224 for(int axes = 0; axes < 3; axes++)
225 {
226 mpi_printf("NGENIC_2LPT: Obtaining Zeldovich displacements for axes=%d\n", axes);
227
228 fft_real *disp = (fft_real *)Mem.mymalloc("disp", maxfftsize * sizeof(fft_real));
229
230 ngenic_setup_modes_in_kspace((fft_complex *)disp);
231
232 ngenic_get_derivate_from_fourier_field(axes, -1, (fft_complex *)disp);
233
234 ngenic_readout_disp(disp, axes, 1.0, vel_prefac1);
235
236 Mem.myfree(disp);
237 }
238 }
239
240 /* now add displacement to Lagrangian coordinates */
241 double maxdisp = 0;
242 double maxvel = 0;
243 for(int n = 0; n < Sp->NumPart; n++)
244 {
245 double posdiff[3] = {Pdisp[n].deltapos[0], Pdisp[n].deltapos[1], Pdisp[n].deltapos[2]};
246
247 MyIntPosType delta[3];
248 Sp->pos_to_signedintpos(posdiff, (MySignedIntPosType *)delta);
249
250 for(int axes = 0; axes < 3; axes++)
251 {
252 Sp->P[n].IntPos[axes] += delta[axes];
253
254 if(Pdisp[n].deltapos[axes] > maxdisp)
255 maxdisp = Pdisp[n].deltapos[axes];
256
257 if(Sp->P[n].Vel[axes] > maxvel)
258 maxvel = Sp->P[n].Vel[axes];
259 }
260 }
261
262 double max_disp_global, maxvel_global;
263 MPI_Reduce(&maxdisp, &max_disp_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
264 MPI_Reduce(&maxvel, &maxvel_global, 1, MPI_DOUBLE, MPI_MAX, 0, Communicator);
265
266 mpi_printf("\nNGENIC: Maximum displacement: %g, in units of the part-spacing= %g\n\n", max_disp_global,
267 max_disp_global / (All.BoxSize / NGENIC));
268 mpi_printf("\nNGENIC: Maximum velocity component: %g\n\n", maxvel_global);
269
270 Mem.myfree(Pdisp);
271
272 Mem.myfree(partin);
273 Mem.myfree(Rcvpm_offset);
274 Mem.myfree(Rcvpm_count);
275 Mem.myfree(Sndpm_offset);
276 Mem.myfree(Sndpm_count);
277
279 {
280 if(Shmem.Island_ThisTask == 0)
281 {
282 // need to send this flag to the correct processor rank (our shared memory handler) so that the table is freed there
283 size_t tab_len = NGENIC * NGENIC * sizeof(unsigned int);
284 MPI_Send(&tab_len, sizeof(tab_len), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_TABLE_FREE, MPI_COMM_WORLD);
285 }
286 }
287 else
288 {
289 Mem.myfree(seedtable);
290 }
291
292#ifndef FFT_COLUMN_BASED
293 my_slab_based_fft_free(&myplan);
294#else
295 my_column_based_fft_free(&myplan);
296#endif
297
298 FFTW(destroy_plan)(myplan.forward_plan_zdir);
299 FFTW(destroy_plan)(myplan.forward_plan_ydir);
300 FFTW(destroy_plan)(myplan.forward_plan_xdir);
301
302 FFTW(destroy_plan)(myplan.backward_plan_zdir);
303 FFTW(destroy_plan)(myplan.backward_plan_ydir);
304 FFTW(destroy_plan)(myplan.backward_plan_xdir);
305
306 print_spec();
307
308 if(All.PowerSpectrumType == 2)
309 free_power_table();
310
311 gsl_rng_free(rnd_generator);
312 gsl_rng_free(rnd_generator_conjugate);
313
314 TIMER_STOP(CPU_NGENIC);
315}
316
317void ngenic::ngenic_distribute_particles(void)
318{
319 Sndpm_count = (size_t *)Mem.mymalloc("Sndpm_count", NTask * sizeof(size_t));
320 Sndpm_offset = (size_t *)Mem.mymalloc("Sndpm_offset", NTask * sizeof(size_t));
321 Rcvpm_count = (size_t *)Mem.mymalloc("Rcvpm_count", NTask * sizeof(size_t));
322 Rcvpm_offset = (size_t *)Mem.mymalloc("Rcvpm_offset", NTask * sizeof(size_t));
323
324#ifdef FFT_COLUMN_BASED
325 int columns = GRIDX * GRIDY;
326 int avg = (columns - 1) / NTask + 1;
327 int exc = NTask * avg - columns;
328 int tasklastsection = NTask - exc;
329 int pivotcol = tasklastsection * avg;
330#endif
331
332 /* determine the slabs/columns each particles accesses */
333 {
334 size_t *send_count = Sndpm_count;
335
336 for(int j = 0; j < NTask; j++)
337 send_count[j] = 0;
338
339 for(int i = 0; i < Sp->NumPart; i++)
340 {
341 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
342 int slab_xx = slab_x + 1;
343
344 if(slab_xx >= GRIDX)
345 slab_xx = 0;
346
347#ifndef FFT_COLUMN_BASED
348 int task0 = myplan.slab_to_task[slab_x];
349 int task1 = myplan.slab_to_task[slab_xx];
350
351 send_count[task0]++;
352 if(task0 != task1)
353 send_count[task1]++;
354#else
355 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
356 int slab_yy = slab_y + 1;
357
358 if(slab_yy >= GRIDY)
359 slab_yy = 0;
360
361 int column0 = slab_x * GRIDY + slab_y;
362 int column1 = slab_x * GRIDY + slab_yy;
363 int column2 = slab_xx * GRIDY + slab_y;
364 int column3 = slab_xx * GRIDY + slab_yy;
365
366 int task0, task1, task2, task3;
367
368 if(column0 < pivotcol)
369 task0 = column0 / avg;
370 else
371 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
372
373 if(column1 < pivotcol)
374 task1 = column1 / avg;
375 else
376 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
377
378 if(column2 < pivotcol)
379 task2 = column2 / avg;
380 else
381 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
382
383 if(column3 < pivotcol)
384 task3 = column3 / avg;
385 else
386 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
387
388 send_count[task0]++;
389 if(task1 != task0)
390 send_count[task1]++;
391 if(task2 != task1 && task2 != task0)
392 send_count[task2]++;
393 if(task3 != task0 && task3 != task1 && task3 != task2)
394 send_count[task3]++;
395#endif
396 }
397 }
398
399 /* collect thread-specific offset table and collect the results from the other threads */
400 Sndpm_offset[0] = 0;
401 for(int i = 1; i < NTask; i++)
402 {
403 int ind = i;
404 int ind_prev = i - 1;
405
406 Sndpm_offset[ind] = Sndpm_offset[ind_prev] + Sndpm_count[ind_prev];
407 }
408
409 myMPI_Alltoall(Sndpm_count, sizeof(size_t), MPI_BYTE, Rcvpm_count, sizeof(size_t), MPI_BYTE, Communicator);
410
411 nimport = 0, nexport = 0, Rcvpm_offset[0] = 0, Sndpm_offset[0] = 0;
412 for(int j = 0; j < NTask; j++)
413 {
414 nexport += Sndpm_count[j];
415 nimport += Rcvpm_count[j];
416
417 if(j > 0)
418 {
419 Sndpm_offset[j] = Sndpm_offset[j - 1] + Sndpm_count[j - 1];
420 Rcvpm_offset[j] = Rcvpm_offset[j - 1] + Rcvpm_count[j - 1];
421 }
422 }
423
424 /* allocate import and export buffer */
425 partin = (partbuf *)Mem.mymalloc("partin", nimport * sizeof(partbuf));
426 partout = (partbuf *)Mem.mymalloc("partout", nexport * sizeof(partbuf));
427
428 {
429 size_t *send_count = Sndpm_count;
430 size_t *send_offset = Sndpm_offset;
431
432 for(int j = 0; j < NTask; j++)
433 send_count[j] = 0;
434
435 /* fill export buffer */
436 for(int i = 0; i < Sp->NumPart; i++)
437 {
438 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
439 int slab_xx = slab_x + 1;
440
441 if(slab_xx >= GRIDX)
442 slab_xx = 0;
443
444#ifndef FFT_COLUMN_BASED
445 int task0 = myplan.slab_to_task[slab_x];
446 int task1 = myplan.slab_to_task[slab_xx];
447
448 size_t ind0 = send_offset[task0] + send_count[task0]++;
449 for(int j = 0; j < 3; j++)
450 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
451
452 if(task0 != task1)
453 {
454 size_t ind1 = send_offset[task1] + send_count[task1]++;
455 for(int j = 0; j < 3; j++)
456 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
457 }
458#else
459 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
460 int slab_yy = slab_y + 1;
461
462 if(slab_yy >= GRIDY)
463 slab_yy = 0;
464
465 int column0 = slab_x * GRIDY + slab_y;
466 int column1 = slab_x * GRIDY + slab_yy;
467 int column2 = slab_xx * GRIDY + slab_y;
468 int column3 = slab_xx * GRIDY + slab_yy;
469
470 int task0, task1, task2, task3;
471
472 if(column0 < pivotcol)
473 task0 = column0 / avg;
474 else
475 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
476
477 if(column1 < pivotcol)
478 task1 = column1 / avg;
479 else
480 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
481
482 if(column2 < pivotcol)
483 task2 = column2 / avg;
484 else
485 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
486
487 if(column3 < pivotcol)
488 task3 = column3 / avg;
489 else
490 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
491
492 size_t ind0 = send_offset[task0] + send_count[task0]++;
493 for(int j = 0; j < 3; j++)
494 partout[ind0].IntPos[j] = Sp->P[i].IntPos[j];
495
496 if(task1 != task0)
497 {
498 size_t ind1 = send_offset[task1] + send_count[task1]++;
499
500 for(int j = 0; j < 3; j++)
501 partout[ind1].IntPos[j] = Sp->P[i].IntPos[j];
502 }
503 if(task2 != task1 && task2 != task0)
504 {
505 size_t ind2 = send_offset[task2] + send_count[task2]++;
506
507 for(int j = 0; j < 3; j++)
508 partout[ind2].IntPos[j] = Sp->P[i].IntPos[j];
509 }
510 if(task3 != task0 && task3 != task1 && task3 != task2)
511 {
512 size_t ind3 = send_offset[task3] + send_count[task3]++;
513
514 for(int j = 0; j < 3; j++)
515 partout[ind3].IntPos[j] = Sp->P[i].IntPos[j];
516 }
517#endif
518 }
519 }
520
521 int flag_big = 0, flag_big_all;
522 for(int i = 0; i < NTask; i++)
523 if(Sndpm_count[i] * sizeof(partbuf) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
524 flag_big = 1;
525
526 /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
527 * transfer the data in chunks.
528 */
529 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
530
531 /* exchange particle data */
532 myMPI_Alltoallv(partout, Sndpm_count, Sndpm_offset, partin, Rcvpm_count, Rcvpm_offset, sizeof(partbuf), flag_big_all, Communicator);
533
534 Mem.myfree(partout);
535}
536
537void ngenic::ngenic_compute_transform_of_source_potential(fft_real *pot)
538{
539 fft_real *workspace = (fft_real *)Mem.mymalloc("workspace", maxfftsize * sizeof(fft_real));
540
541#ifndef FFT_COLUMN_BASED
542 my_slab_based_fft(&myplan, &pot[0], &workspace[0], +1);
543#else
544 my_column_based_fft(&myplan, pot, workspace, +1); // result is in workspace, not in Phi2
545 memcpy(pot, workspace, maxfftsize * sizeof(fft_real));
546#endif
547
548 Mem.myfree(workspace);
549
550 double normfac = 1 / (((double)GRIDX) * GRIDY * GRIDZ);
551
552 for(size_t n = 0; n < maxfftsize; n++)
553 pot[n] *= normfac;
554}
555
556/* this function returns the component 'axes' (0, 1, or 2) of the gradient of a field phi,
557 * which is the solution of nabla^2 phi = grid.
558 * We input the Fourier transform of grid to the function, and this field is overwritten with
559 * the gradient.
560 */
561void ngenic::ngenic_get_derivate_from_fourier_field(int axes1, int axes2, fft_complex *fft_of_grid)
562{
563 double kfacx = 2.0 * M_PI / All.BoxSize;
564 double kfacy = 2.0 * M_PI / All.BoxSize;
565 double kfacz = 2.0 * M_PI / All.BoxSize;
566
567#ifdef FFT_COLUMN_BASED
568 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
569 {
570 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
571 int y = ipcell / (GRIDX * GRIDz);
572 int yr = ipcell % (GRIDX * GRIDz);
573 int z = yr / GRIDX;
574 if(yr % GRIDX != 0) // Note: check that x-columns are really complete
575 Terminate("x-column seems incomplete. This is not expected");
576#else
577 for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
578 for(int z = 0; z < GRIDz; z++)
579 {
580#endif
581
582 for(int x = 0; x < GRIDX; x++)
583 {
584 int xx, yy, zz;
585
586 if(x >= (GRIDX / 2))
587 xx = x - GRIDX;
588 else
589 xx = x;
590 if(y >= (GRIDY / 2))
591 yy = y - GRIDY;
592 else
593 yy = y;
594 if(z >= (GRIDZ / 2))
595 zz = z - GRIDZ;
596 else
597 zz = z;
598
599 double kvec[3];
600 kvec[0] = kfacx * xx;
601 kvec[1] = kfacy * yy;
602 kvec[2] = kfacz * zz;
603
604 double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
605
606 double smth = 1;
607
608#ifdef CORRECT_CIC
609 if(axes2 >= 0)
610 {
611 /* do deconvolution of CIC interpolation */
612 double fx = 1, fy = 1, fz = 1;
613 if(kvec[0] != 0)
614 {
615 fx = (kvec[0] * All.BoxSize / 2) / NGENIC;
616 fx = sin(fx) / fx;
617 }
618 if(kvec[1] != 0)
619 {
620 fy = (kvec[1] * All.BoxSize / 2) / NGENIC;
621 fy = sin(fy) / fy;
622 }
623 if(kvec[2] != 0)
624 {
625 fz = (kvec[2] * All.BoxSize / 2) / NGENIC;
626 fz = sin(fz) / fz;
627 }
628 double ff = 1 / (fx * fy * fz);
629 smth = ff * ff;
630 /* end deconvolution */
631 }
632#endif
633
634#ifndef FFT_COLUMN_BASED
635 large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
636#else
637 large_array_offset elem = ip + x;
638#endif
639
640 fft_real re = smth * fft_of_grid[elem][0];
641 fft_real im = smth * fft_of_grid[elem][1];
642
643 if(axes2 < 0)
644 {
645 /* first derivative */
646 fft_of_grid[elem][0] = (kmag2 > 0.0 ? -kvec[axes1] / kmag2 * im : 0.0);
647 fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] / kmag2 * re : 0.0);
648 }
649 else
650 {
651 /* second derivative */
652 fft_of_grid[elem][0] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * re : 0.0);
653 fft_of_grid[elem][1] = (kmag2 > 0.0 ? kvec[axes1] * kvec[axes2] / kmag2 * im : 0.0);
654 }
655 }
656 }
657
658#ifdef FFT_COLUMN_BASED
659 if(myplan.second_transposed_firstcol == 0)
660 fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
661#else
662 if(myplan.slabstart_y == 0)
663 fft_of_grid[0][0] = fft_of_grid[0][1] = 0.0;
664#endif
665
666 /* Do the inverse FFT to get the displacement field */
667 fft_real *workspace = (fft_real *)Mem.mymalloc("workspace", maxfftsize * sizeof(fft_real));
668
669#ifndef FFT_COLUMN_BASED
670 my_slab_based_fft(&myplan, &fft_of_grid[0], &workspace[0], -1);
671#else
672 my_column_based_fft(&myplan, fft_of_grid, workspace, -1); // result is in workspace
673 memcpy(fft_of_grid, workspace, maxfftsize * sizeof(fft_real));
674#endif
675
676 Mem.myfree(workspace);
677}
678
679void ngenic::ngenic_setup_modes_in_kspace(fft_complex *fft_of_grid)
680{
681 double fac = pow(2 * M_PI / All.BoxSize, 1.5);
682
683 /* clear local FFT-mesh */
684 memset(fft_of_grid, 0, maxfftsize * sizeof(fft_real));
685
686 mpi_printf("NGENIC: setting up modes in kspace...\n");
687
688 double kfacx = 2.0 * M_PI / All.BoxSize;
689 double kfacy = 2.0 * M_PI / All.BoxSize;
690 double kfacz = 2.0 * M_PI / All.BoxSize;
691
692#ifdef FFT_COLUMN_BASED
693 for(large_array_offset ip = 0; ip < myplan.second_transposed_ncells; ip += GRIDX)
694 {
695 large_array_offset ipcell = ip + ((large_array_offset)myplan.second_transposed_firstcol) * GRIDX;
696 int y = ipcell / (GRIDX * GRIDz);
697 int yr = ipcell % (GRIDX * GRIDz);
698 int z = yr / GRIDX;
699 if(yr % GRIDX != 0) // Note: check that x-columns are really complete
700 Terminate("x-column seems incomplete. This is not expected");
701#else
702 for(int y = myplan.slabstart_y; y < myplan.slabstart_y + myplan.nslab_y; y++)
703 for(int z = 0; z < GRIDz; z++)
704 {
705#endif
706
707 // let's use the y and z plane here, because the x-column is available in full for both FFT schemes
708 gsl_rng_set(rnd_generator, seedtable[y * NGENIC + z]);
709
710 // we also create the modes for the conjugate column so that we can fulfill the reality constraint
711 // by using the conjugate of the corresponding mode if needed
712 int y_conj, z_conj;
713 if(y > 0)
714 y_conj = GRIDY - y;
715 else
716 y_conj = 0;
717
718 if(z > 0)
719 z_conj = GRIDZ - z;
720 else
721 z_conj = 0;
722
723 gsl_rng_set(rnd_generator_conjugate, seedtable[y_conj * NGENIC + z_conj]);
724
725#ifndef NGENIC_FIX_MODE_AMPLITUDES
726 double mode_ampl[GRIDX], mode_ampl_conj[GRIDX];
727#endif
728 double mode_phase[GRIDX], mode_phase_conj[GRIDX];
729
730 // in this loop we precompute the modes for both columns, from low-k to high-k,
731 // so that after an increase of resolution, one gets the same modes plus new ones
732 for(int xoff = 0; xoff < GRIDX / 2; xoff++)
733 for(int side = 0; side < 2; side++)
734 {
735 int x;
736 if(side == 0)
737 x = xoff;
738 else
739 x = GRIDX - 1 - xoff;
740
741 double phase = gsl_rng_uniform(rnd_generator) * 2 * M_PI;
742 double phase_conj = gsl_rng_uniform(rnd_generator_conjugate) * 2 * M_PI;
743
744#ifdef NGENIC_MIRROR_PHASES
745 phase += M_PI;
746 if(phase >= 2 * M_PI)
747 phase -= 2 * M_PI;
748
749 phase_conj += M_PI;
750 if(phase_conj >= 2 * M_PI)
751 phase_conj -= 2 * M_PI;
752#endif
753 mode_phase[x] = phase;
754 mode_phase_conj[x] = phase_conj;
755
756#ifndef NGENIC_FIX_MODE_AMPLITUDES
757 double ampl;
758 do
759 {
760 ampl = gsl_rng_uniform(rnd_generator);
761 }
762 while(ampl == 0);
763
764 double ampl_conj;
765 do
766 {
767 ampl_conj = gsl_rng_uniform(rnd_generator_conjugate);
768 }
769 while(ampl_conj == 0);
770
771 mode_ampl[x] = ampl;
772
773 mode_ampl_conj[x] = ampl_conj;
774#endif
775 }
776
777 // now let's populate the full x-column of modes
778 for(int x = 0; x < GRIDX; x++)
779 {
780 int xx, yy, zz;
781
782 if(x >= (GRIDX / 2))
783 xx = x - GRIDX;
784 else
785 xx = x;
786 if(y >= (GRIDY / 2))
787 yy = y - GRIDY;
788 else
789 yy = y;
790 if(z >= (GRIDZ / 2))
791 zz = z - GRIDZ;
792 else
793 zz = z;
794
795 double kvec[3];
796 kvec[0] = kfacx * xx;
797 kvec[1] = kfacy * yy;
798 kvec[2] = kfacz * zz;
799
800 double kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
801 double kmag = sqrt(kmag2);
802
803 if(All.SphereMode == 1)
804 {
805 if(kmag * All.BoxSize / (2 * M_PI) > All.NSample / 2) /* select a sphere in k-space */
806 continue;
807 }
808 else
809 {
810 if(fabs(kvec[0]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
811 continue;
812 if(fabs(kvec[1]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
813 continue;
814 if(fabs(kvec[2]) * All.BoxSize / (2 * M_PI) > All.NSample / 2)
815 continue;
816 }
817
818 double p_of_k = ngenic_power_spec(kmag);
819
820 /* Note: kmag and p_of_k are unaffected by whether or not we use the conjugate mode */
821
822 int conjugate_flag = 0;
823
824 if(z == 0 || z == GRIDZ / 2)
825 {
826 if(x > GRIDX / 2 && x < GRIDX)
827 conjugate_flag = 1;
828 else if(x == 0 || x == GRIDX / 2)
829 {
830 if(y > GRIDY / 2 && y < GRIDY)
831 conjugate_flag = 1;
832 else if(y == 0 || y == GRIDX / 2)
833 {
834 continue;
835 }
836 }
837 }
838
839 // determine location of conjugate mode in x column
840 int x_conj;
841
842 if(x > 0)
843 x_conj = GRIDX - x;
844 else
845 x_conj = 0;
846
847#ifndef NGENIC_FIX_MODE_AMPLITUDES
848 if(conjugate_flag)
849 p_of_k *= -log(mode_ampl_conj[x_conj]);
850 else
851 p_of_k *= -log(mode_ampl[x]);
852#endif
853
854 double delta = fac * sqrt(p_of_k) / Dplus; /* scale back to starting redshift */
855
856#ifndef FFT_COLUMN_BASED
857 large_array_offset elem = ((large_array_offset)GRIDz) * (GRIDX * (y - myplan.slabstart_y) + x) + z;
858#else
859 large_array_offset elem = ip + x;
860#endif
861
862 if(conjugate_flag)
863 {
864 fft_of_grid[elem][0] = delta * cos(mode_phase_conj[x_conj]);
865 fft_of_grid[elem][1] = -delta * sin(mode_phase_conj[x_conj]);
866 }
867 else
868 {
869 fft_of_grid[elem][0] = delta * cos(mode_phase[x]);
870 fft_of_grid[elem][1] = delta * sin(mode_phase[x]);
871 }
872 }
873 }
874}
875
876void ngenic::ngenic_readout_disp(fft_real *grid, int axis, double pfac, double vfac)
877{
878#ifdef FFT_COLUMN_BASED
879 int columns = GRIDX * GRIDY;
880 int avg = (columns - 1) / NTask + 1;
881 int exc = NTask * avg - columns;
882 int tasklastsection = NTask - exc;
883 int pivotcol = tasklastsection * avg;
884#endif
885
886 double *flistin = (double *)Mem.mymalloc("flistin", nimport * sizeof(double));
887 double *flistout = (double *)Mem.mymalloc("flistout", nexport * sizeof(double));
888
889 for(size_t i = 0; i < nimport; i++)
890 {
891 flistin[i] = 0;
892
893 int slab_x = partin[i].IntPos[0] / INTCELL;
894 MyIntPosType rmd_x = partin[i].IntPos[0] % INTCELL;
895
896 int slab_y = partin[i].IntPos[1] / INTCELL;
897 MyIntPosType rmd_y = partin[i].IntPos[1] % INTCELL;
898
899 int slab_z = partin[i].IntPos[2] / INTCELL;
900 MyIntPosType rmd_z = partin[i].IntPos[2] % INTCELL;
901
902 double dx = rmd_x * (1.0 / INTCELL);
903 double dy = rmd_y * (1.0 / INTCELL);
904 double dz = rmd_z * (1.0 / INTCELL);
905
906 int slab_xx = slab_x + 1;
907 int slab_yy = slab_y + 1;
908 int slab_zz = slab_z + 1;
909
910 if(slab_xx >= GRIDX)
911 slab_xx = 0;
912 if(slab_yy >= GRIDY)
913 slab_yy = 0;
914 if(slab_zz >= GRIDZ)
915 slab_zz = 0;
916
917#ifndef FFT_COLUMN_BASED
918 if(myplan.slab_to_task[slab_x] == ThisTask)
919 {
920 slab_x -= myplan.first_slab_x_of_task[ThisTask];
921
922 flistin[i] += grid[FI(slab_x, slab_y, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
923 grid[FI(slab_x, slab_y, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz) +
924 grid[FI(slab_x, slab_yy, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) +
925 grid[FI(slab_x, slab_yy, slab_zz)] * (1.0 - dx) * (dy) * (dz);
926 }
927
928 if(myplan.slab_to_task[slab_xx] == ThisTask)
929 {
930 slab_xx -= myplan.first_slab_x_of_task[ThisTask];
931
932 flistin[i] += grid[FI(slab_xx, slab_y, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) +
933 grid[FI(slab_xx, slab_y, slab_zz)] * (dx) * (1.0 - dy) * (dz) +
934 grid[FI(slab_xx, slab_yy, slab_z)] * (dx) * (dy) * (1.0 - dz) +
935 grid[FI(slab_xx, slab_yy, slab_zz)] * (dx) * (dy) * (dz);
936 }
937#else
938 int column0 = slab_x * GRIDY + slab_y;
939 int column1 = slab_x * GRIDY + slab_yy;
940 int column2 = slab_xx * GRIDY + slab_y;
941 int column3 = slab_xx * GRIDY + slab_yy;
942
943 if(column0 >= myplan.firstcol_XY && column0 <= myplan.lastcol_XY)
944 {
945 flistin[i] += grid[FC(column0, slab_z)] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz) +
946 grid[FC(column0, slab_zz)] * (1.0 - dx) * (1.0 - dy) * (dz);
947 }
948 if(column1 >= myplan.firstcol_XY && column1 <= myplan.lastcol_XY)
949 {
950 flistin[i] +=
951 grid[FC(column1, slab_z)] * (1.0 - dx) * (dy) * (1.0 - dz) + grid[FC(column1, slab_zz)] * (1.0 - dx) * (dy) * (dz);
952 }
953
954 if(column2 >= myplan.firstcol_XY && column2 <= myplan.lastcol_XY)
955 {
956 flistin[i] +=
957 grid[FC(column2, slab_z)] * (dx) * (1.0 - dy) * (1.0 - dz) + grid[FC(column2, slab_zz)] * (dx) * (1.0 - dy) * (dz);
958 }
959
960 if(column3 >= myplan.firstcol_XY && column3 <= myplan.lastcol_XY)
961 {
962 flistin[i] += grid[FC(column3, slab_z)] * (dx) * (dy) * (1.0 - dz) + grid[FC(column3, slab_zz)] * (dx) * (dy) * (dz);
963 }
964#endif
965 }
966
967 /* exchange the potential component data */
968 int flag_big = 0, flag_big_all;
969 for(int i = 0; i < NTask; i++)
970 if(Sndpm_count[i] * sizeof(double) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
971 flag_big = 1;
972
973 /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
974 * transfer the data in chunks.
975 */
976 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
977
978 /* exchange data */
979 myMPI_Alltoallv(flistin, Rcvpm_count, Rcvpm_offset, flistout, Sndpm_count, Sndpm_offset, sizeof(double), flag_big_all, Communicator);
980
981 {
982 size_t *send_count = Sndpm_count;
983 size_t *send_offset = Sndpm_offset;
984
985 for(int j = 0; j < NTask; j++)
986 send_count[j] = 0;
987
988 for(int i = 0; i < Sp->NumPart; i++)
989 {
990 int slab_x = Sp->P[i].IntPos[0] / INTCELL;
991 int slab_xx = slab_x + 1;
992
993 if(slab_xx >= GRIDX)
994 slab_xx = 0;
995
996#ifndef FFT_COLUMN_BASED
997 int task0 = myplan.slab_to_task[slab_x];
998 int task1 = myplan.slab_to_task[slab_xx];
999
1000 double value = flistout[send_offset[task0] + send_count[task0]++];
1001
1002 if(task0 != task1)
1003 value += flistout[send_offset[task1] + send_count[task1]++];
1004#else
1005 int slab_y = Sp->P[i].IntPos[1] / INTCELL;
1006 int slab_yy = slab_y + 1;
1007
1008 if(slab_yy >= GRIDY)
1009 slab_yy = 0;
1010
1011 int column0 = slab_x * GRIDY + slab_y;
1012 int column1 = slab_x * GRIDY + slab_yy;
1013 int column2 = slab_xx * GRIDY + slab_y;
1014 int column3 = slab_xx * GRIDY + slab_yy;
1015
1016 int task0, task1, task2, task3;
1017
1018 if(column0 < pivotcol)
1019 task0 = column0 / avg;
1020 else
1021 task0 = (column0 - pivotcol) / (avg - 1) + tasklastsection;
1022
1023 if(column1 < pivotcol)
1024 task1 = column1 / avg;
1025 else
1026 task1 = (column1 - pivotcol) / (avg - 1) + tasklastsection;
1027
1028 if(column2 < pivotcol)
1029 task2 = column2 / avg;
1030 else
1031 task2 = (column2 - pivotcol) / (avg - 1) + tasklastsection;
1032
1033 if(column3 < pivotcol)
1034 task3 = column3 / avg;
1035 else
1036 task3 = (column3 - pivotcol) / (avg - 1) + tasklastsection;
1037
1038 double value = flistout[send_offset[task0] + send_count[task0]++];
1039
1040 if(task1 != task0)
1041 value += flistout[send_offset[task1] + send_count[task1]++];
1042
1043 if(task2 != task1 && task2 != task0)
1044 value += flistout[send_offset[task2] + send_count[task2]++];
1045
1046 if(task3 != task0 && task3 != task1 && task3 != task2)
1047 value += flistout[send_offset[task3] + send_count[task3]++];
1048#endif
1049
1050 Pdisp[i].deltapos[axis] += pfac * value;
1051 Sp->P[i].Vel[axis] += vfac * value;
1052 }
1053 }
1054
1055 Mem.myfree(flistout);
1056 Mem.myfree(flistin);
1057}
1058
1059void ngenic::ngenic_initialize_ffts(void)
1060{
1061#ifdef LONG_X
1062 if(LONG_X != (int)(LONG_X))
1063 Terminate("LONG_X must be an integer if used with PMGRID");
1064#endif
1065
1066#ifdef LONG_Y
1067 if(LONG_Y != (int)(LONG_Y))
1068 Terminate("LONG_Y must be an integer if used with PMGRID");
1069#endif
1070
1071#ifdef LONG_Z
1072 if(LONG_Z != (int)(LONG_Z))
1073 Terminate("LONG_Z must be an integer if used with PMGRID");
1074#endif
1075
1076 /* Set up the FFTW-3 plan files. */
1077 int ndimx[1] = {GRIDX}; /* dimension of the 1D transforms */
1078 int ndimy[1] = {GRIDY}; /* dimension of the 1D transforms */
1079 int ndimz[1] = {GRIDZ}; /* dimension of the 1D transforms */
1080
1081 int max_GRID2 = 2 * (std::max<int>(std::max<int>(GRIDX, GRIDY), GRIDZ) / 2 + 1);
1082
1083 /* temporarily allocate some arrays to make sure that out-of-place plans are created */
1084 fft_real *DispGrid = (fft_real *)Mem.mymalloc("DispGrid", max_GRID2 * sizeof(fft_real));
1085 fft_complex *fft_of_DispGrid = (fft_complex *)Mem.mymalloc("DispGrid", max_GRID2 * sizeof(fft_real));
1086
1087#ifdef DOUBLEPRECISION_FFTW
1088 int alignflag = 0;
1089#else
1090 /* for single precision, the start of our FFT columns is presently only guaranteed to be 8-byte aligned */
1091 int alignflag = FFTW_UNALIGNED;
1092#endif
1093
1094#ifndef FFT_COLUMN_BASED
1095 int stride = GRIDz;
1096#else
1097 int stride = 1;
1098#endif
1099
1100 myplan.backward_plan_xdir =
1101 FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0, stride, GRIDz * GRIDX,
1102 FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1103
1104 myplan.backward_plan_ydir =
1105 FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0, stride, GRIDz * GRIDY,
1106 FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1107
1108 myplan.backward_plan_zdir = FFTW(plan_many_dft_c2r)(1, ndimz, 1, (fft_complex *)DispGrid, 0, 1, GRIDz, (fft_real *)fft_of_DispGrid,
1109 0, 1, GRID2, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1110
1111 myplan.forward_plan_xdir = FFTW(plan_many_dft)(1, ndimx, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDX, fft_of_DispGrid, 0,
1112 stride, GRIDz * GRIDX, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1113
1114 myplan.forward_plan_ydir = FFTW(plan_many_dft)(1, ndimy, 1, (fft_complex *)DispGrid, 0, stride, GRIDz * GRIDY, fft_of_DispGrid, 0,
1115 stride, GRIDz * GRIDY, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1116
1117 myplan.forward_plan_zdir = FFTW(plan_many_dft_r2c)(1, ndimz, 1, DispGrid, 0, 1, GRID2, (fft_complex *)fft_of_DispGrid, 0, 1, GRIDz,
1118 FFTW_ESTIMATE | FFTW_DESTROY_INPUT | alignflag);
1119
1120 Mem.myfree(fft_of_DispGrid);
1121 Mem.myfree(DispGrid);
1122
1123#ifndef FFT_COLUMN_BASED
1124
1125 my_slab_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1126
1127 maxfftsize = std::max<int>(myplan.largest_x_slab * GRIDY, myplan.largest_y_slab * GRIDX) * ((size_t)GRID2);
1128
1129#else
1130
1131 my_column_based_fft_init(&myplan, GRIDX, GRIDY, GRIDZ);
1132
1133 maxfftsize = myplan.max_datasize;
1134
1135#endif
1136}
1137
1138void ngenic::print_spec(void)
1139{
1140 if(ThisTask == 0)
1141 {
1142 char buf[3 * MAXLEN_PATH];
1143 sprintf(buf, "%s/inputspec_%s.txt", All.OutputDir, All.SnapshotFileBase);
1144
1145 FILE *fd = fopen(buf, "w");
1146
1147 double gf = ngenic_growth_factor(0.001, 1.0) / (1.0 / 0.001);
1148
1149 double DDD = ngenic_growth_factor(All.cf_atime, 1.0);
1150
1151 fprintf(fd, "%12g %12g\n", All.cf_redshift, DDD); /* print actual starting redshift and
1152 linear growth factor for this cosmology */
1153
1154 double kstart = 2 * M_PI / (1000.0 * (1e6 * PARSEC / All.UnitLength_in_cm)); /* 1000 Mpc/h */
1155 double kend = 2 * M_PI / (0.001 * (3.1e6 * PARSEC / All.UnitLength_in_cm)); /* 0.001 Mpc/h */
1156
1157 for(double k = kstart; k < kend; k *= 1.025)
1158 {
1159 double po = ngenic_power_spec(k);
1160 double dl = 4.0 * M_PI * k * k * k * po;
1161
1162 double kf = 0.5;
1163
1164 double po2 = ngenic_power_spec(1.001 * k * kf);
1165 double po1 = ngenic_power_spec(k * kf);
1166 double dnl = 0, knl = 0;
1167
1168 if(po != 0 && po1 != 0 && po2 != 0)
1169 {
1170 double neff = (log(po2) - log(po1)) / (log(1.001 * k * kf) - log(k * kf));
1171
1172 if(1 + neff / 3 > 0)
1173 {
1174 double A = 0.482 * pow(1 + neff / 3, -0.947);
1175 double B = 0.226 * pow(1 + neff / 3, -1.778);
1176 double alpha = 3.310 * pow(1 + neff / 3, -0.244);
1177 double beta = 0.862 * pow(1 + neff / 3, -0.287);
1178 double V = 11.55 * pow(1 + neff / 3, -0.423) * 1.2;
1179
1180 dnl = fnl(dl, A, B, alpha, beta, V, gf);
1181 knl = k * pow(1 + dnl, 1.0 / 3);
1182 }
1183 }
1184
1185 fprintf(fd, "%12g %12g %12g %12g\n", k, dl, knl, dnl);
1186 }
1187 fclose(fd);
1188 }
1189
1190 /* create an output file with the growth factor as a function of redshift, which
1191 * can be handy in analysis routines
1192 */
1193 if(ThisTask == 0)
1194 {
1195 if(All.cf_atime < 1.0)
1196 {
1197 char buf[3 * MAXLEN_PATH];
1198 sprintf(buf, "%s/growthfac.txt", All.OutputDir);
1199
1200 FILE *fd = fopen(buf, "w");
1201
1202 const int NSTEPS = 100;
1203
1204 for(int i = 0; i <= NSTEPS; i++)
1205 {
1206 double a = exp(log(All.cf_atime) + ((log(1.0) - log(All.cf_atime)) / NSTEPS) * i);
1207
1208 double d = ngenic_growth_factor(a, 1.0);
1209
1210 fprintf(fd, "%12g %12g\n", a, 1.0 / d);
1211 }
1212 fclose(fd);
1213 }
1214 }
1215
1216 /* also create an output file with the sigma(M), i.e. the variance as a function of the mass scale,
1217 * for simplifying analytic mass function plots, such as Press-Schechter
1218 */
1219
1220 if(ThisTask == 0)
1221 {
1222 char buf[3 * MAXLEN_PATH];
1223 sprintf(buf, "%s/variance.txt", All.OutputDir);
1224
1225 FILE *fd = fopen(buf, "w");
1226
1227 double rhoback = All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G);
1228
1229 for(double M = 1.0e5; M <= 1.01e16; M *= pow(10.0, 1.0 / 16)) // mass in solar masses / h
1230 {
1231 double Mint = M * (SOLAR_MASS / All.UnitMass_in_g);
1232
1233 double R = pow(3.0 * Mint / (4 * M_PI * rhoback), 1.0 / 3);
1234
1235 double sigma2 = ngenic_tophat_sigma2(R);
1236
1237 fprintf(fd, "%12g %12g %12g %12g\n", M, Mint, sigma2, sqrt(sigma2));
1238 }
1239
1240 fclose(fd);
1241 }
1242}
1243
1244#endif
global_data_all_processes All
Definition: main.cc:40
MPI_Comm SharedMemComm
int Island_ThisTask
int Island_NTask
void ** SharedMemBaseAddr
int MyShmRankInGlobal
#define SOLAR_MASS
Definition: constants.h:119
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
Definition: constants.h:53
#define MAXLEN_PATH
Definition: constants.h:300
#define PARSEC
Definition: constants.h:123
#define M_PI
Definition: constants.h:56
#define LONG_Y
Definition: dtypes.h:369
uint32_t MyIntPosType
Definition: dtypes.h:35
int32_t MySignedIntPosType
Definition: dtypes.h:36
#define LONG_X
Definition: dtypes.h:361
#define LONG_Z
Definition: dtypes.h:377
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
#define TAG_TABLE_FREE
Definition: mpi_utils.h:24
#define TAG_DMOM
Definition: mpi_utils.h:30
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Definition: myalltoall.cc:301
#define TAG_TABLE_ALLOC
Definition: mpi_utils.h:23
void myMPI_Alltoallv(void *sendbuf, size_t *sendcounts, size_t *sdispls, void *recvbuf, size_t *recvcounts, size_t *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:181
#define DDD(x)
Definition: mymalloc.h:24
memory Mem
Definition: main.cc:44
expr exp(half arg)
Definition: half.hpp:2724
expr cos(half arg)
Definition: half.hpp:2823
expr sin(half arg)
Definition: half.hpp:2816
half fabs(half arg)
Definition: half.hpp:2627
expr log(half arg)
Definition: half.hpp:2745
expr pow(half base, half exp)
Definition: half.hpp:2803
expr sqrt(half arg)
Definition: half.hpp:2777
#define FFTW(x)
Definition: pm_mpi_fft.h:22
char OutputDir[MAXLEN_PATH]
Definition: allvars.h:272
char SnapshotFileBase[MAXLEN_PATH]
Definition: allvars.h:272
void set_cosmo_factors_for_current_time(void)
Definition: allvars.cc:20