GADGET-4
ewald.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/mymalloc.h"
23#include "../gravity/ewald.h"
24#include "../gravity/ewaldtensors.h"
25#include "../gravtree/gravtree.h"
26#include "../io/io.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/shared_mem_handler.h"
29#include "../sort/cxxsort.h"
30#include "../system/system.h"
31
70{
71 mpi_printf("EWALD: initialize Ewald correction...\n");
72
76
77 Ewd = (ewald_data *)Mem.mymalloc("Ewd", sizeof(ewald_data) * (ENX + 1) * (ENY + 1) * (ENZ + 1));
78
79 char buf[200];
80 sprintf(buf, "ewald_table_%d-%d-%d_%d-%d-%d_precision%d-order%d.dat", LONG_X, LONG_Y, LONG_Z, ENX, ENY, ENZ, (int)sizeof(MyReal),
82
83 int recomputeflag = 0;
84
85 if(ThisTask == 0)
86 {
87 FILE *fd;
88 if((fd = fopen(buf, "r")))
89 {
90 mpi_printf("\nEWALD: reading Ewald tables from file `%s'\n", buf);
91
92 ewald_header tabh;
93 my_fread(&tabh, sizeof(ewald_header), 1, fd);
94
95#ifndef GRAVITY_TALLBOX
96 int ewaldtype = -1;
97#else
98 int ewaldtype = GRAVITY_TALLBOX + 1;
99#endif
100 if(tabh.resx != ENX || tabh.resy != ENY || tabh.resz != ENZ || tabh.varsize != sizeof(MyFloat) ||
101 tabh.ewaldtype != ewaldtype)
102 {
103 mpi_printf("\nEWALD: something's wrong with this table file. Discarding it.\n");
104 recomputeflag = 1;
105 }
106 else
107 {
108 my_fread(Ewd, sizeof(ewald_data), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
109
110 recomputeflag = 0;
111 }
112 fclose(fd);
113 }
114 else
115 recomputeflag = 1;
116 }
117
118 MPI_Bcast(&recomputeflag, 1, MPI_INT, 0, Communicator);
119
120 if(recomputeflag)
121 {
122 mpi_printf("\nEWALD: No usable Ewald tables in file `%s' found. Recomputing them...\n", buf);
123
124 /* ok, let's recompute things. Actually, we do that in parallel. */
125
126 int size = (ENX + 1) * (ENY + 1) * (ENZ + 1);
127 int first, count;
128
129 subdivide_evenly(size, NTask, ThisTask, &first, &count);
130
131 for(int n = first; n < first + count; n++)
132 {
133 int i = n / ((ENY + 1) * (ENZ + 1));
134 int j = (n - i * (ENY + 1) * (ENZ + 1)) / (ENZ + 1);
135 int k = (n - i * (ENY + 1) * (ENZ + 1) - j * (ENZ + 1));
136
137 if(ThisTask == 0)
138 {
139 if(count > 20)
140 if(((n - first) % (count / 20)) == 0)
141 {
142 printf("%4.1f percent done\n", (n - first) / (count / 100.0));
143 myflush(stdout);
144 }
145 }
146
147 double xx = 0.5 * DBX * (1.0 / LONG_X) * ((double)i) / ENX;
148 double yy = 0.5 * DBY * (1.0 / LONG_Y) * ((double)j) / ENY;
149 double zz = 0.5 * DBZ * (1.0 / LONG_Z) * ((double)k) / ENZ;
150
151 ewald_data *ewdp = Ewd + ewd_offset(i, j, k);
152
153#ifndef GRAVITY_TALLBOX
154 ewdp->D0phi = ewald_D0(xx, yy, zz);
155 ewdp->D1phi = ewald_D1(xx, yy, zz);
156 ewdp->D2phi = ewald_D2(xx, yy, zz);
157 ewdp->D3phi = ewald_D3(xx, yy, zz);
158#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 4
159 ewdp->D4phi = ewald_D4(xx, yy, zz);
160#endif
161#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 5
162 ewdp->D5phi = ewald_D5(xx, yy, zz);
163#endif
164#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 6
165 ewdp->D6phi = ewald_D6(xx, yy, zz);
166#endif
167#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 7
168 ewdp->D7phi = ewald_D7(xx, yy, zz);
169#endif
170#else
171
172 switch(GRAVITY_TALLBOX)
173 {
174 case 0:
175 {
176 ewdp->D0phi = ewald_D0(yy, zz, xx);
177 auto D1phi = ewald_D1(yy, zz, xx);
178 auto D2phi = ewald_D2(yy, zz, xx);
179 auto D3phi = ewald_D3(yy, zz, xx);
180
181 ewdp->D1phi[vX] = D1phi[vZ];
182 ewdp->D1phi[vY] = D1phi[vX];
183 ewdp->D1phi[vZ] = D1phi[vY];
184
185 ewdp->D2phi[qXX] = D2phi[qZZ];
186 ewdp->D2phi[qXY] = D2phi[qZX];
187 ewdp->D2phi[qXZ] = D2phi[qZY];
188 ewdp->D2phi[qYY] = D2phi[qXX];
189 ewdp->D2phi[qYZ] = D2phi[qXY];
190 ewdp->D2phi[qZZ] = D2phi[qYY];
191
192 ewdp->D3phi[dXXX] = D3phi[dZZZ];
193 ewdp->D3phi[dXXY] = D3phi[dZZX];
194 ewdp->D3phi[dXXZ] = D3phi[dZZY];
195 ewdp->D3phi[dXYY] = D3phi[dZXX];
196 ewdp->D3phi[dXYZ] = D3phi[dZXY];
197 ewdp->D3phi[dXZZ] = D3phi[dZYY];
198 ewdp->D3phi[dYYY] = D3phi[dXXX];
199 ewdp->D3phi[dYYZ] = D3phi[dXXY];
200 ewdp->D3phi[dYZZ] = D3phi[dXYY];
201 ewdp->D3phi[dZZZ] = D3phi[dYYY];
202 }
203 break;
204
205 case 1:
206 {
207 ewdp->D0phi = ewald_D0(xx, zz, yy);
208 auto D1phi = ewald_D1(xx, zz, yy);
209 auto D2phi = ewald_D2(xx, zz, yy);
210 auto D3phi = ewald_D3(xx, zz, yy);
211
212 ewdp->D1phi[vX] = D1phi[vX];
213 ewdp->D1phi[vY] = D1phi[vZ];
214 ewdp->D1phi[vZ] = D1phi[vY];
215
216 ewdp->D2phi[qXX] = D2phi[qXX];
217 ewdp->D2phi[qXY] = D2phi[qXZ];
218 ewdp->D2phi[qXZ] = D2phi[qXY];
219 ewdp->D2phi[qYY] = D2phi[qZZ];
220 ewdp->D2phi[qYZ] = D2phi[qZY];
221 ewdp->D2phi[qZZ] = D2phi[qYY];
222
223 ewdp->D3phi[dXXX] = D3phi[dXXX];
224 ewdp->D3phi[dXXY] = D3phi[dXXZ];
225 ewdp->D3phi[dXXZ] = D3phi[dXXY];
226 ewdp->D3phi[dXYY] = D3phi[dXZZ];
227 ewdp->D3phi[dXYZ] = D3phi[dXZY];
228 ewdp->D3phi[dXZZ] = D3phi[dXYY];
229 ewdp->D3phi[dYYY] = D3phi[dZZZ];
230 ewdp->D3phi[dYYZ] = D3phi[dZZY];
231 ewdp->D3phi[dYZZ] = D3phi[dZYY];
232 ewdp->D3phi[dZZZ] = D3phi[dYYY];
233 }
234 break;
235
236 case 2:
237 {
238 ewdp->D0phi = ewald_D0(xx, yy, zz);
239 ewdp->D1phi = ewald_D1(xx, yy, zz);
240 ewdp->D2phi = ewald_D2(xx, yy, zz);
241 ewdp->D3phi = ewald_D3(xx, yy, zz);
242 }
243 break;
244 }
245#endif
246 }
247
248 int *recvcnts = (int *)Mem.mymalloc("recvcnts", NTask * sizeof(int));
249 int *recvoffs = (int *)Mem.mymalloc("recvoffs", NTask * sizeof(int));
250
251 for(int i = 0; i < NTask; i++)
252 {
253 int off, cnt;
254 subdivide_evenly(size, NTask, i, &off, &cnt);
255 recvcnts[i] = cnt * sizeof(ewald_data);
256 recvoffs[i] = off * sizeof(ewald_data);
257 }
258
259 myMPI_Allgatherv(MPI_IN_PLACE, size * sizeof(ewald_data), MPI_BYTE, Ewd, recvcnts, recvoffs, MPI_BYTE, Communicator);
260
261 Mem.myfree(recvoffs);
262 Mem.myfree(recvcnts);
263
264 mpi_printf("\nEWALD: writing Ewald tables to file `%s'\n", buf);
265 if(ThisTask == 0)
266 {
267 FILE *fd;
268 if((fd = fopen(buf, "w")))
269 {
270 ewald_header tabh;
271 tabh.resx = ENX;
272 tabh.resy = ENY;
273 tabh.resz = ENZ;
274 tabh.varsize = sizeof(MyFloat);
275#ifndef GRAVITY_TALLBOX
276 tabh.ewaldtype = -1;
277#else
278 tabh.ewaldtype = GRAVITY_TALLBOX + 1;
279#endif
280 my_fwrite(&tabh, sizeof(ewald_header), 1, fd);
281
282 my_fwrite(Ewd, sizeof(ewald_data), (ENX + 1) * (ENY + 1) * (ENZ + 1), fd);
283 fclose(fd);
284 }
285 else
286 Terminate("can't write to file '%s'\n", buf);
287 }
288 }
289 else
290 {
291 /* here we got them from disk */
292 int len = (ENX + 1) * (ENY + 1) * (ENZ + 1) * sizeof(ewald_data);
293 MPI_Bcast(Ewd, len, MPI_BYTE, 0, Communicator);
294 }
295
296 Ewd_fac_intp[0] = 2.0 * EN * LONG_X / All.BoxSize;
297 Ewd_fac_intp[1] = 2.0 * EN * LONG_Y / All.BoxSize;
298 Ewd_fac_intp[2] = 2.0 * EN * LONG_Z / All.BoxSize;
299
300 /* now scale things to the boxsize that is actually used */
301 for(int i = 0; i <= ENX; i++)
302 for(int j = 0; j <= ENY; j++)
303 for(int k = 0; k <= ENZ; k++)
304 {
305 ewald_data *ewdp = Ewd + ewd_offset(i, j, k);
306
307 ewdp->D0phi *= 1 / All.BoxSize; /* potential */
308 ewdp->D1phi *= 1 / pow(All.BoxSize, 2);
309 ewdp->D2phi *= 1 / pow(All.BoxSize, 3);
310 ewdp->D3phi *= 1 / pow(All.BoxSize, 4);
311#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 4
312 ewdp->D4phi *= 1 / pow(All.BoxSize, 5);
313#endif
314#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 5
315 ewdp->D5phi *= 1 / pow(All.BoxSize, 6);
316#endif
317#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 6
318 ewdp->D6phi *= 1 / pow(All.BoxSize, 7);
319#endif
320#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 7
321 ewdp->D7phi *= 1 / pow(All.BoxSize, 8);
322#endif
323 }
324
325 mpi_printf("EWALD: Initialization of periodic boundaries finished.\n");
326
327 ewald_is_initialized = 1;
328
330 {
331 // We actually have multiple shared memory nodes in which we set aside one MPI rank for shared memory communication.
332 // In this case, move the ewaldtable to the communication rank in order to consume this memory only once on the node
333
334 if(Shmem.Island_ThisTask == 0)
335 {
336 size_t tab_len = sizeof(ewald_data) * (ENX + 1) * (ENY + 1) * (ENZ + 1);
337
338 MPI_Send(&tab_len, sizeof(tab_len), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_EWALD_ALLOC, MPI_COMM_WORLD);
339 MPI_Send(Ewd, tab_len, MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_DMOM, MPI_COMM_WORLD);
340 }
341
342 Mem.myfree(Ewd);
343
344 ptrdiff_t off;
345 MPI_Bcast(&off, sizeof(ptrdiff_t), MPI_BYTE, Shmem.Island_NTask - 1, Shmem.SharedMemComm);
346
347 Ewd = (ewald_data *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off);
348 }
349
350#ifdef EWALD_TEST
351 test_interpolation_accuracy();
352#endif
353}
354
355void ewald::ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag,
356 ewald_data &fper)
357{
358 // we determine the closest available point in our Ewald look-up table
359
360 static MyIntPosType const halflenX = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_X_BITS);
361 static MyIntPosType const intlenX = halflenX << 1;
362 static MyIntPosType const ewaldmaskX = ~(intlenX - 1);
363
364 static MyIntPosType const halflenY = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_Y_BITS);
365 static MyIntPosType const intlenY = halflenY << 1;
366 static MyIntPosType const ewaldmaskY = ~(intlenY - 1);
367
368 static MyIntPosType const halflenZ = ((MyIntPosType)1) << ((BITS_FOR_POSITIONS - 1) - (EWLEVEL + 1) - MAX_LONG_Z_BITS);
369 static MyIntPosType const intlenZ = halflenZ << 1;
370 static MyIntPosType const ewaldmaskZ = ~(intlenZ - 1);
371
372 MyIntPosType temppos[3] = {p_intpos[0] - target_intpos[0], p_intpos[1] - target_intpos[1], p_intpos[2] - target_intpos[2]};
373
374 constrain_intpos(temppos);
375
376 MyIntPosType gridpos[3];
377 gridpos[0] = (temppos[0] + halflenX) & ewaldmaskX;
378 gridpos[1] = (temppos[1] + halflenY) & ewaldmaskY;
379 gridpos[2] = (temppos[2] + halflenZ) & ewaldmaskZ;
380
381 vector<double> off;
382 nearest_image_intpos_to_pos(temppos, gridpos, off.da);
383
384 int i = (gridpos[0] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_X_BITS));
385 int j = (gridpos[1] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_Y_BITS));
386 int k = (gridpos[2] >> (BITS_FOR_POSITIONS - (EWLEVEL + 1) - MAX_LONG_Z_BITS));
387
388 int signx = 1, signy = 1, signz = 1;
389
390#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 0)
391 if(p_intpos[0] < target_intpos[0])
392 {
393 signx = -1;
394 i = ENX - i;
395 }
396#else
397 if(i > ENX)
398 {
399 i = 2 * ENX - i;
400 signx = -1;
401 }
402 else if(i == ENX && gridpos[0] < temppos[0])
403 signx = -1;
404#endif
405
406#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 1)
407 if(p_intpos[1] < target_intpos[1])
408 {
409 signx = -1;
410 j = ENY - i;
411 }
412#else
413 if(j > ENY)
414 {
415 j = 2 * ENY - j;
416 signy = -1;
417 }
418 else if(j == ENY && gridpos[1] < temppos[1])
419 signy = -1;
420#endif
421
422#if defined(GRAVITY_TALLBOX) && (GRAVITY_TALLBOX == 2)
423 if(p_intpos[2] < target_intpos[2])
424 {
425 signz = -1;
426 k = ENZ - k;
427 }
428#else
429 if(k > ENZ)
430 {
431 k = 2 * ENZ - k;
432 signz = -1;
433 }
434 else if(k == ENZ && gridpos[2] < temppos[2])
435 signz = -1;
436#endif
437
438 fper = Ewd[ewd_offset(i, j, k)];
439
440 /* change signs as needed */
441
442 fper.D1phi[0] *= signx;
443 fper.D1phi[1] *= signy;
444 fper.D1phi[2] *= signz;
445
446 fper.D2phi[qXY] *= signx * signy;
447 fper.D2phi[qXZ] *= signx * signz;
448 fper.D2phi[qYZ] *= signy * signz;
449
450 fper.D3phi[dXXX] *= signx;
451 fper.D3phi[dXXY] *= signy;
452 fper.D3phi[dXXZ] *= signz;
453 fper.D3phi[dXYY] *= signx;
454 fper.D3phi[dXYZ] *= signx * signy * signz;
455 fper.D3phi[dXZZ] *= signx;
456 fper.D3phi[dYYY] *= signy;
457 fper.D3phi[dYYZ] *= signz;
458 fper.D3phi[dYZZ] *= signy;
459 fper.D3phi[dZZZ] *= signz;
460
461#if EWALD_TAYLOR_ORDER == 3
462
463 fper.D4phi[sXXXY] *= signx * signy;
464 fper.D4phi[sXYYY] *= signx * signy;
465 fper.D4phi[sXXXZ] *= signx * signz;
466 fper.D4phi[sXZZZ] *= signx * signz;
467 fper.D4phi[sYYYZ] *= signy * signz;
468 fper.D4phi[sYZZZ] *= signy * signz;
469 fper.D4phi[sXXYZ] *= signy * signz;
470 fper.D4phi[sXYYZ] *= signx * signz;
471 fper.D4phi[sXYZZ] *= signx * signy;
472
473 // now Taylor corrections
474
475 fper.D0phi += fper.D1phi * off + 0.5 * ((fper.D2phi * off) * off) + (1.0 / 6) * (((fper.D3phi * off) * off) * off);
476 fper.D1phi += fper.D2phi * off + 0.5 * ((fper.D3phi * off) * off) + (1.0 / 6) * (((fper.D4phi * off) * off) * off);
477
478 if(flag == POINTMASS)
479 return;
480
481#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 5
482 fper.D5phi[rXXXXX] *= signx;
483 fper.D5phi[rYYYYY] *= signy;
484 fper.D5phi[rZZZZZ] *= signz;
485
486 fper.D5phi[rXXXXY] *= signy;
487 fper.D5phi[rXXXXZ] *= signz;
488 fper.D5phi[rXYYYY] *= signx;
489 fper.D5phi[rXZZZZ] *= signx;
490 fper.D5phi[rYYYYZ] *= signz;
491 fper.D5phi[rYZZZZ] *= signy;
492
493 fper.D5phi[rXXXYY] *= signx;
494 fper.D5phi[rXXXZZ] *= signx;
495 fper.D5phi[rXXYYY] *= signy;
496 fper.D5phi[rXXZZZ] *= signz;
497 fper.D5phi[rYYYZZ] *= signy;
498 fper.D5phi[rYYZZZ] *= signz;
499
500 fper.D5phi[rXXYZZ] *= signy;
501 fper.D5phi[rXXYYZ] *= signz;
502 fper.D5phi[rXYYZZ] *= signx;
503
504 fper.D5phi[rXXXYZ] *= signx * signy * signz;
505 fper.D5phi[rXYYYZ] *= signx * signy * signz;
506 fper.D5phi[rXYZZZ] *= signx * signy * signz;
507
508 fper.D2phi += fper.D3phi * off + 0.5 * ((fper.D4phi * off) * off) + (1.0 / 6) * (((fper.D5phi * off) * off) * off);
509#endif
510
511#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 6
512 fper.D6phi[pXXXXXY] *= signx * signy;
513 fper.D6phi[pXXXXXZ] *= signx * signz;
514 fper.D6phi[pXXXXYZ] *= signy * signz;
515 fper.D6phi[pXXXYYY] *= signx * signy;
516 fper.D6phi[pXXXYYZ] *= signx * signz;
517 fper.D6phi[pXXXYZZ] *= signx * signy;
518 fper.D6phi[pXXXZZZ] *= signx * signz;
519 fper.D6phi[pXXYYYZ] *= signy * signz;
520 fper.D6phi[pXXYZZZ] *= signy * signz;
521 fper.D6phi[pXYYYYY] *= signx * signy;
522 fper.D6phi[pXYYYYZ] *= signx * signz;
523 fper.D6phi[pXYYYZZ] *= signx * signy;
524 fper.D6phi[pXYYZZZ] *= signx * signz;
525 fper.D6phi[pXYZZZZ] *= signx * signy;
526 fper.D6phi[pXZZZZZ] *= signx * signz;
527 fper.D6phi[pYYYYYZ] *= signy * signz;
528 fper.D6phi[pYYYZZZ] *= signy * signz;
529 fper.D6phi[pYZZZZZ] *= signy * signz;
530
531 fper.D3phi += fper.D4phi * off + 0.5 * ((fper.D5phi * off) * off) + (1.0 / 6) * (((fper.D6phi * off) * off) * off);
532#endif
533
534#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 7
535 fper.D7phi[tXXXXXXX] *= signx;
536 fper.D7phi[tXXXXXXY] *= signy;
537 fper.D7phi[tXXXXXXZ] *= signz;
538 fper.D7phi[tXXXXXYY] *= signx;
539 fper.D7phi[tXXXXXYZ] *= signx * signy * signz;
540 fper.D7phi[tXXXXXZZ] *= signx;
541 fper.D7phi[tXXXXYYY] *= signy;
542 fper.D7phi[tXXXXYYZ] *= signz;
543 fper.D7phi[tXXXXYZZ] *= signy;
544 fper.D7phi[tXXXXZZZ] *= signz;
545 fper.D7phi[tXXXYYYY] *= signx;
546 fper.D7phi[tXXXYYYZ] *= signx * signy * signz;
547 fper.D7phi[tXXXYYZZ] *= signx;
548 fper.D7phi[tXXXYZZZ] *= signx * signy * signz;
549 fper.D7phi[tXXXZZZZ] *= signx;
550 fper.D7phi[tXXYYYYY] *= signy;
551 fper.D7phi[tXXYYYYZ] *= signz;
552 fper.D7phi[tXXYYYZZ] *= signy;
553 fper.D7phi[tXXYYZZZ] *= signz;
554 fper.D7phi[tXXYZZZZ] *= signy;
555 fper.D7phi[tXXZZZZZ] *= signz;
556 fper.D7phi[tXYYYYYY] *= signx;
557 fper.D7phi[tXYYYYYZ] *= signx * signy * signz;
558 fper.D7phi[tXYYYYZZ] *= signx;
559 fper.D7phi[tXYYYZZZ] *= signx * signy * signz;
560 fper.D7phi[tXYYZZZZ] *= signx;
561 fper.D7phi[tXYZZZZZ] *= signx * signy * signz;
562 fper.D7phi[tXZZZZZZ] *= signx;
563 fper.D7phi[tYYYYYYY] *= signy;
564 fper.D7phi[tYYYYYYZ] *= signz;
565 fper.D7phi[tYYYYYZZ] *= signy;
566 fper.D7phi[tYYYYZZZ] *= signz;
567 fper.D7phi[tYYYZZZZ] *= signy;
568 fper.D7phi[tYYZZZZZ] *= signz;
569 fper.D7phi[tYZZZZZZ] *= signy;
570 fper.D7phi[tZZZZZZZ] *= signz;
571
572 fper.D4phi += fper.D5phi * off + 0.5 * ((fper.D6phi * off) * off) + (1.0 / 6) * (((fper.D7phi * off) * off) * off);
573 fper.D5phi += fper.D6phi * off + 0.5 * ((fper.D7phi * off) * off);
574#endif
575
576#else
577
578 // only second order Taylor expansion, i.e. EWALD_TAYLOR_ORDER==2
579
580 // now Taylor corrections
581 fper.D0phi += fper.D1phi * off + 0.5 * ((fper.D2phi * off) * off);
582 fper.D1phi += fper.D2phi * off + 0.5 * ((fper.D3phi * off) * off);
583
584 if(flag == POINTMASS)
585 return;
586
587#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 4
588 fper.D4phi[sXXXY] *= signx * signy;
589 fper.D4phi[sXYYY] *= signx * signy;
590 fper.D4phi[sXXXZ] *= signx * signz;
591 fper.D4phi[sXZZZ] *= signx * signz;
592 fper.D4phi[sYYYZ] *= signy * signz;
593 fper.D4phi[sYZZZ] *= signy * signz;
594 fper.D4phi[sXXYZ] *= signy * signz;
595 fper.D4phi[sXYYZ] *= signx * signz;
596 fper.D4phi[sXYZZ] *= signx * signy;
597
598 fper.D2phi += fper.D3phi * off + 0.5 * ((fper.D4phi * off) * off);
599#endif
600
601#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 5
602 fper.D5phi[rXXXXX] *= signx;
603 fper.D5phi[rYYYYY] *= signy;
604 fper.D5phi[rZZZZZ] *= signz;
605
606 fper.D5phi[rXXXXY] *= signy;
607 fper.D5phi[rXXXXZ] *= signz;
608 fper.D5phi[rXYYYY] *= signx;
609 fper.D5phi[rXZZZZ] *= signx;
610 fper.D5phi[rYYYYZ] *= signz;
611 fper.D5phi[rYZZZZ] *= signy;
612
613 fper.D5phi[rXXXYY] *= signx;
614 fper.D5phi[rXXXZZ] *= signx;
615 fper.D5phi[rXXYYY] *= signy;
616 fper.D5phi[rXXZZZ] *= signz;
617 fper.D5phi[rYYYZZ] *= signy;
618 fper.D5phi[rYYZZZ] *= signz;
619
620 fper.D5phi[rXXYZZ] *= signy;
621 fper.D5phi[rXXYYZ] *= signz;
622 fper.D5phi[rXYYZZ] *= signx;
623
624 fper.D5phi[rXXXYZ] *= signx * signy * signz;
625 fper.D5phi[rXYYYZ] *= signx * signy * signz;
626 fper.D5phi[rXYZZZ] *= signx * signy * signz;
627
628 fper.D3phi += fper.D4phi * off + 0.5 * ((fper.D5phi * off) * off);
629#endif
630
631#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 6
632 fper.D6phi[pXXXXXY] *= signx * signy;
633 fper.D6phi[pXXXXXZ] *= signx * signz;
634 fper.D6phi[pXXXXYZ] *= signy * signz;
635 fper.D6phi[pXXXYYY] *= signx * signy;
636 fper.D6phi[pXXXYYZ] *= signx * signz;
637 fper.D6phi[pXXXYZZ] *= signx * signy;
638 fper.D6phi[pXXXZZZ] *= signx * signz;
639 fper.D6phi[pXXYYYZ] *= signy * signz;
640 fper.D6phi[pXXYZZZ] *= signy * signz;
641 fper.D6phi[pXYYYYY] *= signx * signy;
642 fper.D6phi[pXYYYYZ] *= signx * signz;
643 fper.D6phi[pXYYYZZ] *= signx * signy;
644 fper.D6phi[pXYYZZZ] *= signx * signz;
645 fper.D6phi[pXYZZZZ] *= signx * signy;
646 fper.D6phi[pXZZZZZ] *= signx * signz;
647 fper.D6phi[pYYYYYZ] *= signy * signz;
648 fper.D6phi[pYYYZZZ] *= signy * signz;
649 fper.D6phi[pYZZZZZ] *= signy * signz;
650
651 fper.D4phi += fper.D5phi * off + 0.5 * ((fper.D6phi * off) * off);
652#endif
653
654#if(HIGHEST_NEEDEDORDER_EWALD_DPHI + EWALD_TAYLOR_ORDER) >= 7
655 fper.D7phi[tXXXXXXX] *= signx;
656 fper.D7phi[tXXXXXXY] *= signy;
657 fper.D7phi[tXXXXXXZ] *= signz;
658 fper.D7phi[tXXXXXYY] *= signx;
659 fper.D7phi[tXXXXXYZ] *= signx * signy * signz;
660 fper.D7phi[tXXXXXZZ] *= signx;
661 fper.D7phi[tXXXXYYY] *= signy;
662 fper.D7phi[tXXXXYYZ] *= signz;
663 fper.D7phi[tXXXXYZZ] *= signy;
664 fper.D7phi[tXXXXZZZ] *= signz;
665 fper.D7phi[tXXXYYYY] *= signx;
666 fper.D7phi[tXXXYYYZ] *= signx * signy * signz;
667 fper.D7phi[tXXXYYZZ] *= signx;
668 fper.D7phi[tXXXYZZZ] *= signx * signy * signz;
669 fper.D7phi[tXXXZZZZ] *= signx;
670 fper.D7phi[tXXYYYYY] *= signy;
671 fper.D7phi[tXXYYYYZ] *= signz;
672 fper.D7phi[tXXYYYZZ] *= signy;
673 fper.D7phi[tXXYYZZZ] *= signz;
674 fper.D7phi[tXXYZZZZ] *= signy;
675 fper.D7phi[tXXZZZZZ] *= signz;
676 fper.D7phi[tXYYYYYY] *= signx;
677 fper.D7phi[tXYYYYYZ] *= signx * signy * signz;
678 fper.D7phi[tXYYYYZZ] *= signx;
679 fper.D7phi[tXYYYZZZ] *= signx * signy * signz;
680 fper.D7phi[tXYYZZZZ] *= signx;
681 fper.D7phi[tXYZZZZZ] *= signx * signy * signz;
682 fper.D7phi[tXZZZZZZ] *= signx;
683 fper.D7phi[tYYYYYYY] *= signy;
684 fper.D7phi[tYYYYYYZ] *= signz;
685 fper.D7phi[tYYYYYZZ] *= signy;
686 fper.D7phi[tYYYYZZZ] *= signz;
687 fper.D7phi[tYYYZZZZ] *= signy;
688 fper.D7phi[tYYZZZZZ] *= signz;
689 fper.D7phi[tYZZZZZZ] *= signy;
690 fper.D7phi[tZZZZZZZ] *= signz;
691
692 fper.D5phi += fper.D6phi * off + 0.5 * ((fper.D7phi * off) * off);
693#endif
694
695#endif
696}
697
705double ewald::ewald_D0(double x, double y, double z)
706{
707 static int printed = 0;
708
709 double D0 = 0.0;
710
711#ifndef GRAVITY_TALLBOX
712
713 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
714 double alpha = 2.0 / leff;
715 double alpha2 = alpha * alpha;
716
717 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
718 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
719 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
720
721 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
722 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
723 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
724
725 if(printed == 0)
726 {
727 mpi_printf("EWALD: D0 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
728 nzmax);
729 printed = 1;
730 }
731
732 for(int nx = -qxmax; nx <= qxmax; nx++)
733 for(int ny = -qymax; ny <= qymax; ny++)
734 for(int nz = -qzmax; nz <= qzmax; nz++)
735 {
736 double dx = x - nx * (1.0 / LONG_X);
737 double dy = y - ny * (1.0 / LONG_Y);
738 double dz = z - nz * (1.0 / LONG_Z);
739
740 double r2 = dx * dx + dy * dy + dz * dz;
741 double r = sqrt(r2);
742
743 double rinv = (r > 0) ? 1.0 / r : 0.0;
744
745 double g0;
746
747 if(nx != 0 || ny != 0 || nz != 0)
748 {
749 g0 = -erfc(alpha * r) * rinv;
750 }
751 else
752 {
753 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
754 * results at the origin
755 */
756
757 /* for small r:
758 *
759 * [1- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
760 */
761
762 if((alpha * r) < 0.5)
763 {
764 g0 = 2.0 * pow(alpha, 1) / sqrt(M_PI) *
765 (1.0 - pow(alpha * r, 2) / 3.0 + pow(alpha * r, 4) / 10.0 - pow(alpha * r, 6) / 42.0 +
766 pow(alpha * r, 8) / 216.0 - pow(alpha * r, 10) / 1320.0);
767 }
768 else
769 {
770 g0 = erf(alpha * r) * rinv;
771 }
772 }
773
774 D0 += g0;
775 }
776
777 for(int nx = -nxmax; nx <= nxmax; nx++)
778 for(int ny = -nymax; ny <= nymax; ny++)
779 for(int nz = -nzmax; nz <= nzmax; nz++)
780 {
781 if(nx != 0 || ny != 0 || nz != 0)
782 {
783 double kx = (2.0 * M_PI * LONG_X) * nx;
784 double ky = (2.0 * M_PI * LONG_Y) * ny;
785 double kz = (2.0 * M_PI * LONG_Z) * nz;
786 double k2 = kx * kx + ky * ky + kz * kz;
787 double kdotx = (x * kx + y * ky + z * kz);
788
789 D0 += -4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * cos(kdotx);
790 }
791 }
792
793 D0 += M_PI * (LONG_X * LONG_Y * LONG_Z) / (alpha * alpha);
794
795#else
796 /* in the tallbox case, the third dimension, z, is assumed to be the non-periodic one */
797
798 double leff = sqrt(BOXX * BOXY);
799 double alpha = 2.0 / leff;
800
801 int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5);
802 int qymax = (int)(8.0 / (BOXY * alpha) + 0.5);
803
804 int nxmax = (int)(2.0 * alpha * BOXX + 0.5);
805 int nymax = (int)(2.0 * alpha * BOXY + 0.5);
806
807 if(printed == 0)
808 {
809 mpi_printf("EWALD: D0 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax);
810 printed = 1;
811 }
812
813 for(int nx = -qxmax; nx <= qxmax; nx++)
814 for(int ny = -qymax; ny <= qymax; ny++)
815 {
816 double dx = x - nx * BOXX;
817 double dy = y - ny * BOXY;
818 double r = sqrt(dx * dx + dy * dy + z * z);
819
820 double rinv = (r > 0) ? 1.0 / r : 0.0;
821
822 double g0;
823
824 if(nx != 0 || ny != 0)
825 {
826 g0 = -erfc(alpha * r) * rinv;
827 }
828 else
829 {
830 /* we add the 1/r term here to the (0|0) entry */
831
832 if((alpha * r) < 0.5)
833 {
834 g0 = 2.0 * pow(alpha, 1) / sqrt(M_PI) *
835 (1.0 - pow(alpha * r, 2) / 3.0 + pow(alpha * r, 4) / 10.0 - pow(alpha * r, 6) / 42.0 + pow(alpha * r, 8) / 216.0 -
836 pow(alpha * r, 10) / 1320.0);
837 }
838 else
839 {
840 g0 = erf(alpha * r) * rinv;
841 }
842 }
843
844 D0 += g0;
845 }
846
847 double alpha2 = alpha * alpha;
848
849 for(int nx = -nxmax; nx <= nxmax; nx++)
850 for(int ny = -nymax; ny <= nymax; ny++)
851 {
852 if(nx != 0 || ny != 0)
853 {
854 double kx = (2.0 * M_PI / BOXX) * nx;
855 double ky = (2.0 * M_PI / BOXY) * ny;
856 double k2 = kx * kx + ky * ky;
857 double k = sqrt(k2);
858
859 double ex = exp(-k * z); // note: z positive here
860
861 if(ex > 1.0e-60) // to prevent divisions by zero due to underflows
862 D0 += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha));
863 }
864 }
865
866 D0 += 2.0 * alpha / sqrt(M_PI) + 2 * sqrt(M_PI) / (BOXX * BOXY) * (exp(-alpha2 * z * z) / alpha + sqrt(M_PI) * z * erf(alpha * z));
867
868#endif
869
870 return D0;
871}
872
873double ewald::specerf(double z, double k, double alpha) { return exp(k * z) * erfc(k / (2 * alpha) + alpha * z); }
874
875double ewald::d_specerf(double z, double k, double alpha)
876{
877 return -2 * alpha / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * specerf(z, k, alpha);
878}
879
880double ewald::dd_specerf(double z, double k, double alpha)
881{
882 return +4 * pow(alpha, 3) * z / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * d_specerf(z, k, alpha);
883}
884
885double ewald::ddd_specerf(double z, double k, double alpha)
886{
887 return +4 * pow(alpha, 3) / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) -
888 8 * pow(alpha, 5) * z * z / (sqrt(M_PI) * exp(pow(k / (2 * alpha), 2) + pow(alpha * z, 2))) + k * dd_specerf(z, k, alpha);
889}
890
898vector<double> ewald::ewald_D1(double x, double y, double z)
899{
900 static int printed = 0;
901
902 vector<double> D1 = 0.0;
903
904#ifndef GRAVITY_TALLBOX
905
906 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
907 double alpha = 2.0 / leff;
908 double alpha2 = alpha * alpha;
909
910 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
911 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
912 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
913
914 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
915 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
916 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
917
918 if(printed == 0)
919 {
920 mpi_printf("EWALD: D1 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
921 nzmax);
922 printed = 1;
923 }
924
925 for(int nx = -qxmax; nx <= qxmax; nx++)
926 for(int ny = -qymax; ny <= qymax; ny++)
927 for(int nz = -qzmax; nz <= qzmax; nz++)
928 {
929 double dx = x - nx * (1.0 / LONG_X);
930 double dy = y - ny * (1.0 / LONG_Y);
931 double dz = z - nz * (1.0 / LONG_Z);
932
933 vector<double> dxyz(dx, dy, dz);
934
935 double r2 = dx * dx + dy * dy + dz * dz;
936 double r = sqrt(r2);
937
938 double rinv = (r > 0) ? 1.0 / r : 0.0;
939 double r2inv = rinv * rinv;
940 double r3inv = r2inv * rinv;
941
942 double g1;
943
944 if(nx != 0 || ny != 0 || nz != 0)
945 {
946 g1 = (erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
947 }
948 else
949 {
950 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
951 * results at the origin
952 */
953
954 /* Note, for small r:
955 *
956 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
957 *
958 * Hence for r = 0:
959 *
960 * g0 = 2 * alpha / sqrt(pi)
961 * g1 = -4/3 * alpha^3 / sqrt(pi)
962 * g2 = 8/5 * alpha^5 / sqrt(pi)
963 * g3 = -16/7 * alpha^7 / sqrt(pi)
964 * g4 = 32/9 * alpha^9 / sqrt(pi)
965 * g5 = -64/11 * alpha^11/ sqrt(pi)
966 */
967
968 if((alpha * r) < 0.5)
969 {
970 g1 = 4.0 * pow(alpha, 3) / sqrt(M_PI) *
971 (-1.0 / 3.0 + pow(alpha * r, 2) / 5.0 - pow(alpha * r, 4) / 14.0 + pow(alpha * r, 6) / 54.0 -
972 pow(alpha * r, 8) / 264.0 + pow(alpha * r, 10) / 1560.0);
973 }
974 else
975 {
976 g1 = (-erf(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
977 }
978 }
979
980 D1 += g1 * dxyz;
981 }
982
983 for(int nx = -nxmax; nx <= nxmax; nx++)
984 for(int ny = -nymax; ny <= nymax; ny++)
985 for(int nz = -nzmax; nz <= nzmax; nz++)
986 {
987 double kx = (2.0 * M_PI * LONG_X) * nx;
988 double ky = (2.0 * M_PI * LONG_Y) * ny;
989 double kz = (2.0 * M_PI * LONG_Z) * nz;
990 double k2 = kx * kx + ky * ky + kz * kz;
991
992 if(k2 > 0)
993 {
994 double kdotx = (x * kx + y * ky + z * kz);
995 double val = 4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * sin(kdotx);
996 D1[0] += kx * val;
997 D1[1] += ky * val;
998 D1[2] += kz * val;
999 }
1000 }
1001#else
1002 /* this is the case with periodicity only in two dimensions */
1003
1004 double leff = sqrt(BOXX * BOXY);
1005 double alpha = 2.0 / leff;
1006 double alpha2 = alpha * alpha;
1007
1008 int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5);
1009 int qymax = (int)(8.0 / (BOXY * alpha) + 0.5);
1010
1011 int nxmax = (int)(2.0 * alpha * BOXX + 0.5);
1012 int nymax = (int)(2.0 * alpha * BOXY + 0.5);
1013
1014 if(printed == 0)
1015 {
1016 mpi_printf("EWALD: D1 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax);
1017 printed = 1;
1018 }
1019
1020 for(int nx = -qxmax; nx <= qxmax; nx++)
1021 for(int ny = -qymax; ny <= qymax; ny++)
1022 {
1023 double dx = x - nx * BOXX;
1024 double dy = y - ny * BOXY;
1025 double dz = z;
1026
1027 vector<double> dxyz(dx, dy, dz);
1028
1029 double r2 = dx * dx + dy * dy + dz * dz;
1030 double r = sqrt(r2);
1031
1032 double rinv = (r > 0) ? 1.0 / r : 0.0;
1033 double r2inv = rinv * rinv;
1034 double r3inv = r2inv * rinv;
1035
1036 double g1;
1037
1038 if(nx != 0 || ny != 0)
1039 {
1040 g1 = (erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1041 }
1042 else
1043 {
1044 /* we add the 1/r term here to the (0|0) entry */
1045
1046 if((alpha * r) < 0.5)
1047 {
1048 g1 = 4.0 * pow(alpha, 3) / sqrt(M_PI) *
1049 (-1.0 / 3.0 + pow(alpha * r, 2) / 5.0 - pow(alpha * r, 4) / 14.0 + pow(alpha * r, 6) / 54.0 -
1050 pow(alpha * r, 8) / 264.0 + pow(alpha * r, 10) / 1560.0);
1051 }
1052 else
1053 {
1054 g1 = (-erf(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1055 }
1056 }
1057
1058 D1 += g1 * dxyz;
1059 }
1060
1061 for(int nx = -nxmax; nx <= nxmax; nx++)
1062 for(int ny = -nymax; ny <= nymax; ny++)
1063 {
1064 if(nx != 0 || ny != 0)
1065 {
1066 double kx = (2.0 * M_PI / BOXX) * nx;
1067 double ky = (2.0 * M_PI / BOXY) * ny;
1068 double k2 = kx * kx + ky * ky;
1069 double k = sqrt(k2);
1070
1071 double ex = exp(-k * z); // note: z positive here
1072
1073 if(ex > 1.0e-60) // to prevent divisions by zero due to underflows
1074 {
1075 double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha));
1076
1077 D1[0] += kx * val * sin(kx * x + ky * y);
1078 D1[1] += ky * val * sin(kx * x + ky * y);
1079 D1[2] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha));
1080 }
1081 }
1082 }
1083
1084 D1[2] += 2.0 * M_PI / (BOXX * BOXY) * erf(alpha * z);
1085#endif
1086
1087 return D1; // now in dimensionless form;
1088}
1089
1090symtensor2<double> ewald::ewald_D2(double x, double y, double z)
1091{
1092 static int printed = 0;
1093
1094 symtensor2<double> D2 = 0.0;
1095
1096#ifndef GRAVITY_TALLBOX
1097
1098 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
1099 double alpha = 2.0 / leff;
1100 double alpha2 = alpha * alpha;
1101
1102 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
1103 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
1104 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
1105
1106 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
1107 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
1108 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
1109
1110 if(printed == 0)
1111 {
1112 mpi_printf("EWALD: D2 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
1113 nzmax);
1114 printed = 1;
1115 }
1116
1117 for(int nx = -qxmax; nx <= qxmax; nx++)
1118 for(int ny = -qymax; ny <= qymax; ny++)
1119 for(int nz = -qzmax; nz <= qzmax; nz++)
1120 {
1121 double dx = x - nx * (1.0 / LONG_X);
1122 double dy = y - ny * (1.0 / LONG_Y);
1123 double dz = z - nz * (1.0 / LONG_Z);
1124
1125 vector<double> dxyz(dx, dy, dz);
1126
1127 double r2 = dx * dx + dy * dy + dz * dz;
1128 double r = sqrt(r2);
1129
1130 double rinv = (r > 0) ? 1.0 / r : 0.0;
1131 double r2inv = rinv * rinv;
1132 double r3inv = r2inv * rinv;
1133 double r5inv = r3inv * r2inv;
1134
1135 double g1, g2;
1136
1137 if(nx != 0 || ny != 0 || nz != 0)
1138 {
1139 g1 = (erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1140
1141 g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1142 }
1143 else
1144 {
1145 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
1146 * results at the origin
1147 */
1148
1149 /* Note, for small r:
1150 *
1151 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
1152 *
1153 * Hence for r = 0:
1154 *
1155 * g0 = 2 * alpha / sqrt(pi)
1156 * g1 = -4/3 * alpha^3 / sqrt(pi)
1157 * g2 = 8/5 * alpha^5 / sqrt(pi)
1158 * g3 = -16/7 * alpha^7 / sqrt(pi)
1159 * g4 = 32/9 * alpha^9 / sqrt(pi)
1160 * g5 = -64/11 * alpha^11/ sqrt(pi)
1161 */
1162
1163 if((alpha * r) < 0.5)
1164 {
1165 g1 = 4.0 * pow(alpha, 3) / sqrt(M_PI) *
1166 (-1.0 / 3.0 + pow(alpha * r, 2) / 5.0 - pow(alpha * r, 4) / 14.0 + pow(alpha * r, 6) / 54.0 -
1167 pow(alpha * r, 8) / 264.0 + pow(alpha * r, 10) / 1560.0);
1168
1169 g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) *
1170 (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 +
1171 pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0);
1172 }
1173 else
1174 {
1175 g1 = (-erf(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1176
1177 g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1178 }
1179 }
1180
1181 D2 += g2 * (dxyz % dxyz);
1182 D2[qXX] += g1;
1183 D2[qYY] += g1;
1184 D2[qZZ] += g1;
1185 }
1186
1187 for(int nx = -nxmax; nx <= nxmax; nx++)
1188 for(int ny = -nymax; ny <= nymax; ny++)
1189 for(int nz = -nzmax; nz <= nzmax; nz++)
1190 {
1191 if(nx != 0 || ny != 0 || nz != 0)
1192 {
1193 double kx = (2.0 * M_PI * LONG_X) * nx;
1194 double ky = (2.0 * M_PI * LONG_Y) * ny;
1195 double kz = (2.0 * M_PI * LONG_Z) * nz;
1196 double k2 = kx * kx + ky * ky + kz * kz;
1197
1198 double kdotx = (x * kx + y * ky + z * kz);
1199 double val = 4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * cos(kdotx);
1200
1201 vector<double> kxyz(kx, ky, kz);
1202
1203 D2 += (val * kxyz) % kxyz;
1204 }
1205 }
1206#else
1207 /* this is the case with periodicity only in two dimensions */
1208 /* this is the case with periodicity only in two dimensions */
1209
1210 double leff = sqrt(BOXX * BOXY);
1211 double alpha = 2.0 / leff;
1212 double alpha2 = alpha * alpha;
1213
1214 int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5);
1215 int qymax = (int)(8.0 / (BOXY * alpha) + 0.5);
1216
1217 int nxmax = (int)(2.0 * alpha * BOXX + 0.5);
1218 int nymax = (int)(2.0 * alpha * BOXY + 0.5);
1219
1220 if(printed == 0)
1221 {
1222 mpi_printf("EWALD: D2 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax);
1223 printed = 1;
1224 }
1225
1226 for(int nx = -qxmax; nx <= qxmax; nx++)
1227 for(int ny = -qymax; ny <= qymax; ny++)
1228 {
1229 double dx = x - nx * BOXX;
1230 double dy = y - ny * BOXY;
1231 double dz = z;
1232
1233 vector<double> dxyz(dx, dy, dz);
1234
1235 double r2 = dx * dx + dy * dy + dz * dz;
1236 double r = sqrt(r2);
1237
1238 double rinv = (r > 0) ? 1.0 / r : 0.0;
1239 double r2inv = rinv * rinv;
1240 double r3inv = r2inv * rinv;
1241 double r5inv = r3inv * r2inv;
1242
1243 double g1, g2;
1244
1245 if(nx != 0 || ny != 0)
1246 {
1247 g1 = (erfc(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1248
1249 g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1250 }
1251 else
1252 {
1253 /* we add the 1/r term here to the (0|0) entry */
1254
1255 if((alpha * r) < 0.5)
1256 {
1257 g1 = 4.0 * pow(alpha, 3) / sqrt(M_PI) *
1258 (-1.0 / 3.0 + pow(alpha * r, 2) / 5.0 - pow(alpha * r, 4) / 14.0 + pow(alpha * r, 6) / 54.0 -
1259 pow(alpha * r, 8) / 264.0 + pow(alpha * r, 10) / 1560.0);
1260
1261 g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) *
1262 (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 +
1263 pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0);
1264 }
1265 else
1266 {
1267 g1 = (-erf(alpha * r) + 2.0 * alpha * r / sqrt(M_PI) * exp(-alpha2 * r2)) * r3inv;
1268
1269 g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1270 }
1271 }
1272
1273 D2 += g2 * (dxyz % dxyz);
1274 D2[qXX] += g1;
1275 D2[qYY] += g1;
1276 D2[qZZ] += g1;
1277 }
1278
1279 for(int nx = -nxmax; nx <= nxmax; nx++)
1280 for(int ny = -nymax; ny <= nymax; ny++)
1281 {
1282 if(nx != 0 || ny != 0)
1283 {
1284 double kx = (2.0 * M_PI / BOXX) * nx;
1285 double ky = (2.0 * M_PI / BOXY) * ny;
1286 double k2 = kx * kx + ky * ky;
1287 double k = sqrt(k2);
1288
1289 double ex = exp(-k * z); // note: z positive here
1290
1291 if(ex > 1.0e-60) // to prevent divisions by zero due to underflows
1292 {
1293 double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha));
1294 double dzval = M_PI / (BOXX * BOXY) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha));
1295
1296 D2[qXX] += kx * kx * val * cos(kx * x + ky * y);
1297 D2[qXY] += kx * ky * val * cos(kx * x + ky * y);
1298 D2[qXZ] += kx * dzval * sin(kx * x + ky * y);
1299 D2[qYY] += ky * ky * val * cos(kx * x + ky * y);
1300 D2[qYZ] += ky * dzval * sin(kx * x + ky * y);
1301 D2[qZZ] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (dd_specerf(z, k, alpha) + dd_specerf(-z, k, alpha));
1302 }
1303 }
1304 }
1305
1306 D2[qZZ] += 4.0 * alpha * sqrt(M_PI) / (BOXX * BOXY) * exp(-pow(alpha * z, 2));
1307#endif
1308
1309 return D2;
1310}
1311
1312symtensor3<double> ewald::ewald_D3(double x, double y, double z)
1313{
1314 static int printed = 0;
1315
1316 symtensor3<double> D3 = 0.0;
1317
1318#ifndef GRAVITY_TALLBOX
1319
1320 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
1321 double alpha = 2.0 / leff;
1322 double alpha2 = alpha * alpha;
1323
1324 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
1325 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
1326 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
1327
1328 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
1329 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
1330 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
1331
1332 if(printed == 0)
1333 {
1334 mpi_printf("EWALD: D3 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
1335 nzmax);
1336 printed = 1;
1337 }
1338
1339 for(int nx = -qxmax; nx <= qxmax; nx++)
1340 for(int ny = -qymax; ny <= qymax; ny++)
1341 for(int nz = -qzmax; nz <= qzmax; nz++)
1342 {
1343 double dx = x - nx * (1.0 / LONG_X);
1344 double dy = y - ny * (1.0 / LONG_Y);
1345 double dz = z - nz * (1.0 / LONG_Z);
1346
1347 vector<double> dxyz(dx, dy, dz);
1348
1349 double r2 = dx * dx + dy * dy + dz * dz;
1350 double r = sqrt(r2);
1351
1352 double rinv = (r > 0) ? 1.0 / r : 0.0;
1353 double r2inv = rinv * rinv;
1354 double r3inv = r2inv * rinv;
1355 double r4inv = r2inv * r2inv;
1356 double r5inv = r2inv * r3inv;
1357 double r7inv = r3inv * r4inv;
1358
1359 double g2, g3;
1360
1361 if(nx != 0 || ny != 0 || nz != 0)
1362 {
1363 g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1364
1365 g3 = (15.0 * erfc(alpha * r) +
1366 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1367 r7inv;
1368 }
1369 else
1370 {
1371 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
1372 * results at the origin
1373 */
1374
1375 /* Note, for small r:
1376 *
1377 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
1378 *
1379 * Hence for r = 0:
1380 *
1381 * g0 = 2 * alpha / sqrt(pi)
1382 * g1 = -4/3 * alpha^3 / sqrt(pi)
1383 * g2 = 8/5 * alpha^5 / sqrt(pi)
1384 * g3 = -16/7 * alpha^7 / sqrt(pi)
1385 * g4 = 32/9 * alpha^9 / sqrt(pi)
1386 * g5 = -64/11 * alpha^11/ sqrt(pi)
1387 */
1388 if((alpha * r) < 0.5)
1389 {
1390 g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) *
1391 (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 +
1392 pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0);
1393
1394 g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) *
1395 (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 -
1396 pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0);
1397 }
1398 else
1399 {
1400 g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1401
1402 g3 = (-15.0 * erf(alpha * r) +
1403 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1404 r7inv;
1405 }
1406 }
1407
1408 symtensor2<double> aux2 = dxyz % dxyz;
1409 symtensor3<double> aux3;
1410
1411 setup_D3(ADD, D3, dxyz, aux2, aux3, g2, g3);
1412 }
1413
1414 for(int nx = -nxmax; nx <= nxmax; nx++)
1415 for(int ny = -nymax; ny <= nymax; ny++)
1416 for(int nz = -nzmax; nz <= nzmax; nz++)
1417 {
1418 if(nx != 0 || ny != 0 || nz != 0)
1419 {
1420 double kx = (2.0 * M_PI * LONG_X) * nx;
1421 double ky = (2.0 * M_PI * LONG_Y) * ny;
1422 double kz = (2.0 * M_PI * LONG_Z) * nz;
1423 double k2 = kx * kx + ky * ky + kz * kz;
1424
1425 double kdotx = (x * kx + y * ky + z * kz);
1426 double val = -4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * sin(kdotx);
1427
1428 vector<double> kxyz(kx, ky, kz);
1429
1430 D3 += (val * kxyz) % (kxyz % kxyz);
1431 }
1432 }
1433#else
1434 /* this is the case with periodicity only in two dimensions */
1435 /* this is the case with periodicity only in two dimensions */
1436 /* this is the case with periodicity only in two dimensions */
1437
1438 double leff = sqrt(BOXX * BOXY);
1439 double alpha = 2.0 / leff;
1440 double alpha2 = alpha * alpha;
1441
1442 int qxmax = (int)(8.0 / (BOXX * alpha) + 0.5);
1443 int qymax = (int)(8.0 / (BOXY * alpha) + 0.5);
1444
1445 int nxmax = (int)(2.0 * alpha * BOXX + 0.5);
1446 int nymax = (int)(2.0 * alpha * BOXY + 0.5);
1447
1448 if(printed == 0)
1449 {
1450 mpi_printf("EWALD: D2 table: qxmax=%d qymax=%d nxmax=%d nymax=%d\n", qxmax, qymax, nxmax, nymax);
1451 printed = 1;
1452 }
1453
1454 for(int nx = -qxmax; nx <= qxmax; nx++)
1455 for(int ny = -qymax; ny <= qymax; ny++)
1456 {
1457 double dx = x - nx * BOXX;
1458 double dy = y - ny * BOXY;
1459 double dz = z;
1460
1461 vector<double> dxyz(dx, dy, dz);
1462
1463 double r2 = dx * dx + dy * dy + dz * dz;
1464 double r = sqrt(r2);
1465
1466 double rinv = (r > 0) ? 1.0 / r : 0.0;
1467 double r2inv = rinv * rinv;
1468 double r3inv = r2inv * rinv;
1469 double r4inv = r2inv * r2inv;
1470 double r5inv = r2inv * r3inv;
1471 double r7inv = r3inv * r4inv;
1472
1473 double g2, g3;
1474
1475 if(nx != 0 || ny != 0)
1476 {
1477 g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1478
1479 g3 = (15.0 * erfc(alpha * r) +
1480 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1481 r7inv;
1482 }
1483 else
1484 {
1485 if((alpha * r) < 0.5)
1486 {
1487 g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) *
1488 (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 +
1489 pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0);
1490
1491 g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) *
1492 (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 -
1493 pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0);
1494 }
1495 else
1496 {
1497 g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1498
1499 g3 = (-15.0 * erf(alpha * r) +
1500 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1501 r7inv;
1502 }
1503 }
1504
1505 symtensor2<double> aux2 = dxyz % dxyz;
1506 symtensor3<double> aux3;
1507
1508 setup_D3(ADD, D3, dxyz, aux2, aux3, g2, g3);
1509 }
1510
1511 for(int nx = -nxmax; nx <= nxmax; nx++)
1512 for(int ny = -nymax; ny <= nymax; ny++)
1513 {
1514 if(nx != 0 || ny != 0)
1515 {
1516 double kx = (2.0 * M_PI / BOXX) * nx;
1517 double ky = (2.0 * M_PI / BOXY) * ny;
1518 double k2 = kx * kx + ky * ky;
1519 double k = sqrt(k2);
1520
1521 double ex = exp(-k * z); // note: z positive here
1522
1523 if(ex > 1.0e-60) // to prevent divisions by zero due to underflows
1524 {
1525 double val = M_PI / (BOXX * BOXY) / k * (specerf(z, k, alpha) + specerf(-z, k, alpha));
1526 double dzval = M_PI / (BOXX * BOXY) / k * (d_specerf(z, k, alpha) - d_specerf(-z, k, alpha));
1527 double dzdzval = M_PI / (BOXX * BOXY) / k * (dd_specerf(z, k, alpha) + dd_specerf(-z, k, alpha));
1528
1529 D3[dXXX] += -kx * kx * kx * val * sin(kx * x + ky * y);
1530 D3[dXXY] += -kx * kx * ky * val * sin(kx * x + ky * y);
1531 D3[dXXZ] += kx * kx * dzval * cos(kx * x + ky * y);
1532 D3[dXYY] += -kx * ky * ky * val * sin(kx * x + ky * y);
1533 D3[dXYZ] += kx * ky * dzval * cos(kx * x + ky * y);
1534 D3[dXZZ] += kx * dzdzval * sin(kx * x + ky * y);
1535 D3[dYYY] += -ky * ky * ky * val * sin(kx * x + ky * y);
1536 D3[dYYZ] += ky * ky * dzval * cos(kx * x + ky * y);
1537 D3[dYZZ] += ky * dzdzval * sin(kx * x + ky * y);
1538 D3[dZZZ] += -M_PI / (BOXX * BOXY) * cos(kx * x + ky * y) / k * (ddd_specerf(z, k, alpha) - ddd_specerf(-z, k, alpha));
1539 }
1540 }
1541 }
1542
1543 D3[dZZZ] += -8.0 * pow(alpha, 3) * z * sqrt(M_PI) / (BOXX * BOXY) * exp(-pow(alpha * z, 2));
1544
1545#endif
1546
1547 return D3;
1548}
1549
1550symtensor4<double> ewald::ewald_D4(double x, double y, double z)
1551{
1552 static int printed = 0;
1553
1554#ifdef GRAVITY_TALLBOX
1555 Terminate("GRAVITY_TALLBOX is not implemented for MULTIPOLE_ORDER >= 4");
1556#endif
1557
1558 symtensor4<double> D4 = 0.0;
1559
1560 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
1561 double alpha = 2.0 / leff;
1562 double alpha2 = alpha * alpha;
1563
1564 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
1565 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
1566 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
1567
1568 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
1569 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
1570 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
1571
1572 if(printed == 0)
1573 {
1574 mpi_printf("EWALD: D4 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
1575 nzmax);
1576 printed = 1;
1577 }
1578
1579 for(int nx = -qxmax; nx <= qxmax; nx++)
1580 for(int ny = -qymax; ny <= qymax; ny++)
1581 for(int nz = -qzmax; nz <= qzmax; nz++)
1582 {
1583 double dx = x - nx * (1.0 / LONG_X);
1584 double dy = y - ny * (1.0 / LONG_Y);
1585 double dz = z - nz * (1.0 / LONG_Z);
1586
1587 vector<double> dxyz(dx, dy, dz);
1588
1589 double r2 = dx * dx + dy * dy + dz * dz;
1590 double r = sqrt(r2);
1591
1592 double rinv = (r > 0) ? 1.0 / r : 0.0;
1593 double r2inv = rinv * rinv;
1594 double r3inv = r2inv * rinv;
1595 double r4inv = r2inv * r2inv;
1596 double r5inv = r2inv * r3inv;
1597 double r7inv = r3inv * r4inv;
1598 double r9inv = r4inv * r5inv;
1599
1600 double g2, g3, g4;
1601
1602 if(nx != 0 || ny != 0 || nz != 0)
1603 {
1604 g2 = -(3.0 * erfc(alpha * r) + (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1605
1606 g3 = (15.0 * erfc(alpha * r) +
1607 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1608 r7inv;
1609
1610 g4 = -(105.0 * erfc(alpha * r) +
1611 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1612 sqrt(M_PI) * exp(-alpha2 * r2)) *
1613 r9inv;
1614 }
1615 else
1616 {
1617 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
1618 * results at the origin
1619 */
1620
1621 /* Note, for small r:
1622 *
1623 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
1624 *
1625 * Hence for r = 0:
1626 *
1627 * g0 = 2 * alpha / sqrt(pi)
1628 * g1 = -4/3 * alpha^3 / sqrt(pi)
1629 * g2 = 8/5 * alpha^5 / sqrt(pi)
1630 * g3 = -16/7 * alpha^7 / sqrt(pi)
1631 * g4 = 32/9 * alpha^9 / sqrt(pi)
1632 * g5 = -64/11 * alpha^11/ sqrt(pi)
1633 */
1634
1635 if((alpha * r) < 0.5)
1636 {
1637 g2 = 8.0 * pow(alpha, 5) / sqrt(M_PI) *
1638 (1.0 / 5.0 - pow(alpha * r, 2) / 7.0 + pow(alpha * r, 4) / 18.0 - pow(alpha * r, 6) / 66.0 +
1639 pow(alpha * r, 8) / 312.0 - pow(alpha * r, 10) / 1800.0);
1640
1641 g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) *
1642 (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 -
1643 pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0);
1644
1645 g4 = 32.0 * pow(alpha, 9) / sqrt(M_PI) *
1646 (1.0 / 9.0 - pow(alpha * r, 2) / 11.0 + pow(alpha * r, 4) / 26.0 - pow(alpha * r, 6) / 90.0 +
1647 pow(alpha * r, 8) / 408.0 - pow(alpha * r, 10) / 2280.0);
1648 }
1649 else
1650 {
1651 g2 = (3.0 * erf(alpha * r) - (6.0 * alpha * r + 4.0 * pow(alpha * r, 3)) / sqrt(M_PI) * exp(-alpha2 * r2)) * r5inv;
1652
1653 g3 = (-15.0 * erf(alpha * r) +
1654 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1655 r7inv;
1656
1657 g4 = (105.0 * erf(alpha * r) -
1658 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1659 sqrt(M_PI) * exp(-alpha2 * r2)) *
1660 r9inv;
1661 }
1662 }
1663
1664 symtensor2<double> aux2 = dxyz % dxyz;
1665 symtensor3<double> aux3 = dxyz % aux2;
1666 symtensor4<double> aux4;
1667
1668 setup_D4(ADD, D4, dxyz, aux2, aux3, aux4, g2, g3, g4);
1669 }
1670
1671 for(int nx = -nxmax; nx <= nxmax; nx++)
1672 for(int ny = -nymax; ny <= nymax; ny++)
1673 for(int nz = -nzmax; nz <= nzmax; nz++)
1674 {
1675 if(nx != 0 || ny != 0 || nz != 0)
1676 {
1677 double kx = (2.0 * M_PI * LONG_X) * nx;
1678 double ky = (2.0 * M_PI * LONG_Y) * ny;
1679 double kz = (2.0 * M_PI * LONG_Z) * nz;
1680 double k2 = kx * kx + ky * ky + kz * kz;
1681
1682 double kdotx = (x * kx + y * ky + z * kz);
1683 double val = -4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * cos(kdotx);
1684
1685 vector<double> kxyz(kx, ky, kz);
1686
1687 D4 += (val * kxyz) % ((kxyz % (kxyz % kxyz)));
1688 }
1689 }
1690
1691 return D4;
1692}
1693
1694symtensor5<double> ewald::ewald_D5(double x, double y, double z)
1695{
1696 static int printed = 0;
1697
1698#ifdef GRAVITY_TALLBOX
1699 Terminate("GRAVITY_TALLBOX is not implemented for MULTIPOLE_ORDER >= 4");
1700#endif
1701
1702 symtensor5<double> D5 = 0.0;
1703
1704 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
1705 double alpha = 2.0 / leff;
1706 double alpha2 = alpha * alpha;
1707
1708 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
1709 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
1710 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
1711
1712 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
1713 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
1714 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
1715
1716 if(printed == 0)
1717 {
1718 mpi_printf("EWALD: D5 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
1719 nzmax);
1720 printed = 1;
1721 }
1722
1723 for(int nx = -qxmax; nx <= qxmax; nx++)
1724 for(int ny = -qymax; ny <= qymax; ny++)
1725 for(int nz = -qzmax; nz <= qzmax; nz++)
1726 {
1727 double dx = x - nx * (1.0 / LONG_X);
1728 double dy = y - ny * (1.0 / LONG_Y);
1729 double dz = z - nz * (1.0 / LONG_Z);
1730
1731 vector<double> dxyz(dx, dy, dz);
1732
1733 double r2 = dx * dx + dy * dy + dz * dz;
1734 double r = sqrt(r2);
1735
1736 double rinv = (r > 0) ? 1.0 / r : 0.0;
1737 double r2inv = rinv * rinv;
1738 double r3inv = r2inv * rinv;
1739 double r4inv = r2inv * r2inv;
1740 double r5inv = r2inv * r3inv;
1741 double r7inv = r3inv * r4inv;
1742 double r9inv = r4inv * r5inv;
1743 double r11inv = r4inv * r7inv;
1744
1745 double g3, g4, g5;
1746
1747 if(nx != 0 || ny != 0 || nz != 0)
1748 {
1749 g3 = (15.0 * erfc(alpha * r) +
1750 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1751 r7inv;
1752
1753 g4 = -(105.0 * erfc(alpha * r) +
1754 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1755 sqrt(M_PI) * exp(-alpha2 * r2)) *
1756 r9inv;
1757
1758 g5 = (945.0 * erfc(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
1759 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
1760 sqrt(M_PI) * exp(-alpha2 * r2)) *
1761 r11inv;
1762 }
1763 else
1764 {
1765 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
1766 * results at the origin
1767 */
1768
1769 /* Note, for small r:
1770 *
1771 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
1772 *
1773 * Hence for r = 0:
1774 *
1775 * g0 = 2 * alpha / sqrt(pi)
1776 * g1 = -4/3 * alpha^3 / sqrt(pi)
1777 * g2 = 8/5 * alpha^5 / sqrt(pi)
1778 * g3 = -16/7 * alpha^7 / sqrt(pi)
1779 * g4 = 32/9 * alpha^9 / sqrt(pi)
1780 * g5 = -64/11 * alpha^11/ sqrt(pi)
1781 */
1782
1783 if((alpha * r) < 0.5)
1784 {
1785 g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) *
1786 (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 -
1787 pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0);
1788
1789 g4 = 32.0 * pow(alpha, 9) / sqrt(M_PI) *
1790 (1.0 / 9.0 - pow(alpha * r, 2) / 11.0 + pow(alpha * r, 4) / 26.0 - pow(alpha * r, 6) / 90.0 +
1791 pow(alpha * r, 8) / 408.0 - pow(alpha * r, 10) / 2280.0);
1792
1793 g5 = 64.0 * pow(alpha, 11) / sqrt(M_PI) *
1794 (-1.0 / 11.0 + pow(alpha * r, 2) / 13.0 - pow(alpha * r, 4) / 30.0 + pow(alpha * r, 6) / 102.0 -
1795 pow(alpha * r, 8) / 456.0 + pow(alpha * r, 10) / 2520.0);
1796 }
1797 else
1798 {
1799 g3 = (-15.0 * erf(alpha * r) +
1800 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1801 r7inv;
1802
1803 g4 = (105.0 * erf(alpha * r) -
1804 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1805 sqrt(M_PI) * exp(-alpha2 * r2)) *
1806 r9inv;
1807
1808 g5 = (-945.0 * erf(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
1809 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
1810 sqrt(M_PI) * exp(-alpha2 * r2)) *
1811 r11inv;
1812 }
1813 }
1814
1815 symtensor3<double> aux3 = dxyz % (dxyz % dxyz);
1816 symtensor4<double> aux4 = dxyz % aux3;
1817 symtensor5<double> aux5;
1818
1819 setup_D5(ADD, D5, dxyz, aux3, aux4, aux5, g3, g4, g5);
1820 }
1821
1822 for(int nx = -nxmax; nx <= nxmax; nx++)
1823 for(int ny = -nymax; ny <= nymax; ny++)
1824 for(int nz = -nzmax; nz <= nzmax; nz++)
1825 {
1826 double kx = (2.0 * M_PI * LONG_X) * nx;
1827 double ky = (2.0 * M_PI * LONG_Y) * ny;
1828 double kz = (2.0 * M_PI * LONG_Z) * nz;
1829 double k2 = kx * kx + ky * ky + kz * kz;
1830
1831 if(k2 > 0)
1832 {
1833 double kdotx = (x * kx + y * ky + z * kz);
1834 double val = 4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * sin(kdotx);
1835
1836 vector<double> kxyz(kx, ky, kz);
1837
1838 D5 += (val * kxyz) % (kxyz % ((kxyz % (kxyz % kxyz))));
1839 }
1840 }
1841
1842 return D5;
1843}
1844
1845symtensor6<double> ewald::ewald_D6(double x, double y, double z)
1846{
1847 static int printed = 0;
1848
1849#ifdef GRAVITY_TALLBOX
1850 Terminate("GRAVITY_TALLBOX is not implemented for MULTIPOLE_ORDER >= 4");
1851#endif
1852
1853 symtensor6<double> D6 = 0.0;
1854
1855 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
1856 double alpha = 2.0 / leff;
1857 double alpha2 = alpha * alpha;
1858
1859 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
1860 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
1861 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
1862
1863 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
1864 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
1865 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
1866
1867 if(printed == 0)
1868 {
1869 mpi_printf("EWALD: D6 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
1870 nzmax);
1871 printed = 1;
1872 }
1873
1874 for(int nx = -qxmax; nx <= qxmax; nx++)
1875 for(int ny = -qymax; ny <= qymax; ny++)
1876 for(int nz = -qzmax; nz <= qzmax; nz++)
1877 {
1878 double dx = x - nx * (1.0 / LONG_X);
1879 double dy = y - ny * (1.0 / LONG_Y);
1880 double dz = z - nz * (1.0 / LONG_Z);
1881
1882 vector<double> dxyz(dx, dy, dz);
1883
1884 double r2 = dx * dx + dy * dy + dz * dz;
1885 double r = sqrt(r2);
1886
1887 double rinv = (r > 0) ? 1.0 / r : 0.0;
1888 double r2inv = rinv * rinv;
1889 double r3inv = r2inv * rinv;
1890 double r4inv = r2inv * r2inv;
1891 double r5inv = r2inv * r3inv;
1892 double r7inv = r3inv * r4inv;
1893 double r9inv = r4inv * r5inv;
1894 double r11inv = r4inv * r7inv;
1895 double r13inv = r4inv * r9inv;
1896
1897 double g3, g4, g5, g6;
1898
1899 if(nx != 0 || ny != 0 || nz != 0)
1900 {
1901 g3 = (15.0 * erfc(alpha * r) +
1902 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1903 r7inv;
1904
1905 g4 = -(105.0 * erfc(alpha * r) +
1906 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1907 sqrt(M_PI) * exp(-alpha2 * r2)) *
1908 r9inv;
1909
1910 g5 = (945.0 * erfc(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
1911 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
1912 sqrt(M_PI) * exp(-alpha2 * r2)) *
1913 r11inv;
1914
1915 g6 = (-10395.0 * erfc(alpha * r) -
1916 2.0 *
1917 (10395.0 * alpha * r + 6930.0 * pow(alpha * r, 3) + 2772.0 * pow(alpha * r, 5) + 792.0 * pow(alpha * r, 7) +
1918 176.0 * pow(alpha * r, 9) + 32.0 * pow(alpha * r, 11)) /
1919 sqrt(M_PI) * exp(-alpha2 * r2)) *
1920 r13inv;
1921 }
1922 else
1923 {
1924 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
1925 * results at the origin
1926 */
1927
1928 /* Note, for small r:
1929 *
1930 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
1931 *
1932 * Hence for r = 0:
1933 *
1934 * g0 = 2 * alpha / sqrt(pi)
1935 * g1 = -4/3 * alpha^3 / sqrt(pi)
1936 * g2 = 8/5 * alpha^5 / sqrt(pi)
1937 * g3 = -16/7 * alpha^7 / sqrt(pi)
1938 * g4 = 32/9 * alpha^9 / sqrt(pi)
1939 * g5 = -64/11 * alpha^11/ sqrt(pi)
1940 */
1941
1942 if((alpha * r) < 0.5)
1943 {
1944 g3 = 16.0 * pow(alpha, 7) / sqrt(M_PI) *
1945 (-1.0 / 7.0 + pow(alpha * r, 2) / 9.0 - pow(alpha * r, 4) / 22.0 + pow(alpha * r, 6) / 78.0 -
1946 pow(alpha * r, 8) / 360.0 + pow(alpha * r, 10) / 2040.0);
1947
1948 g4 = 32.0 * pow(alpha, 9) / sqrt(M_PI) *
1949 (1.0 / 9.0 - pow(alpha * r, 2) / 11.0 + pow(alpha * r, 4) / 26.0 - pow(alpha * r, 6) / 90.0 +
1950 pow(alpha * r, 8) / 408.0 - pow(alpha * r, 10) / 2280.0);
1951
1952 g5 = 64.0 * pow(alpha, 11) / sqrt(M_PI) *
1953 (-1.0 / 11.0 + pow(alpha * r, 2) / 13.0 - pow(alpha * r, 4) / 30.0 + pow(alpha * r, 6) / 102.0 -
1954 pow(alpha * r, 8) / 456.0 + pow(alpha * r, 10) / 2520.0);
1955
1956 g6 = 128.0 * pow(alpha, 13) / sqrt(M_PI) *
1957 (1.0 / 13.0 - pow(alpha * r, 2) / 15.0 + pow(alpha * r, 4) / 34.0 - pow(alpha * r, 6) / 114.0 +
1958 pow(alpha * r, 8) / 504.0 - pow(alpha * r, 10) / 2760.0);
1959 }
1960 else
1961 {
1962 g3 = (-15.0 * erf(alpha * r) +
1963 (30.0 * alpha * r + 20.0 * pow(alpha * r, 3) + 8.0 * pow(alpha * r, 5)) / sqrt(M_PI) * exp(-alpha2 * r2)) *
1964 r7inv;
1965
1966 g4 = (105.0 * erf(alpha * r) -
1967 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
1968 sqrt(M_PI) * exp(-alpha2 * r2)) *
1969 r9inv;
1970
1971 g5 = (-945.0 * erf(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
1972 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
1973 sqrt(M_PI) * exp(-alpha2 * r2)) *
1974 r11inv;
1975
1976 g6 = (10395.0 * erf(alpha * r) -
1977 2.0 *
1978 (10395.0 * alpha * r + 6930.0 * pow(alpha * r, 3) + 2772.0 * pow(alpha * r, 5) +
1979 792.0 * pow(alpha * r, 7) + 176.0 * pow(alpha * r, 9) + 32.0 * pow(alpha * r, 11)) /
1980 sqrt(M_PI) * exp(-alpha2 * r2)) *
1981 r13inv;
1982 }
1983 }
1984
1985 setup_D6(ADD, D6, dxyz, g3, g4, g5, g6);
1986 }
1987
1988 for(int nx = -nxmax; nx <= nxmax; nx++)
1989 for(int ny = -nymax; ny <= nymax; ny++)
1990 for(int nz = -nzmax; nz <= nzmax; nz++)
1991 {
1992 double kx = (2.0 * M_PI * LONG_X) * nx;
1993 double ky = (2.0 * M_PI * LONG_Y) * ny;
1994 double kz = (2.0 * M_PI * LONG_Z) * nz;
1995 double k2 = kx * kx + ky * ky + kz * kz;
1996
1997 if(k2 > 0)
1998 {
1999 double kdotx = (x * kx + y * ky + z * kz);
2000 double val = 4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * cos(kdotx);
2001
2002 vector<double> kxyz(kx, ky, kz);
2003
2004 D6 += (val * kxyz) % (kxyz % (kxyz % ((kxyz % (kxyz % kxyz)))));
2005 }
2006 }
2007
2008 return D6;
2009}
2010
2011symtensor7<double> ewald::ewald_D7(double x, double y, double z)
2012{
2013 static int printed = 0;
2014
2015#ifdef GRAVITY_TALLBOX
2016 Terminate("GRAVITY_TALLBOX is not implemented for MULTIPOLE_ORDER >= 4");
2017#endif
2018
2019 symtensor7<double> D7 = 0.0;
2020
2021 double leff = pow((1.0 / LONG_X) * (1.0 / LONG_Y) * (1.0 / LONG_Z), 1.0 / 3);
2022 double alpha = 2.0 / leff;
2023 double alpha2 = alpha * alpha;
2024
2025 int qxmax = (int)(8.0 * LONG_X / alpha + 0.5);
2026 int qymax = (int)(8.0 * LONG_Y / alpha + 0.5);
2027 int qzmax = (int)(8.0 * LONG_Z / alpha + 0.5);
2028
2029 int nxmax = (int)(2.0 * alpha / LONG_X + 0.5);
2030 int nymax = (int)(2.0 * alpha / LONG_Y + 0.5);
2031 int nzmax = (int)(2.0 * alpha / LONG_Z + 0.5);
2032
2033 if(printed == 0)
2034 {
2035 mpi_printf("EWALD: D7 table: qxmax=%d qymax=%d qzmax=%d nxmax=%d nymax=%d nzmax=%d\n", qxmax, qymax, qzmax, nxmax, nymax,
2036 nzmax);
2037 printed = 1;
2038 }
2039
2040 for(int nx = -qxmax; nx <= qxmax; nx++)
2041 for(int ny = -qymax; ny <= qymax; ny++)
2042 for(int nz = -qzmax; nz <= qzmax; nz++)
2043 {
2044 double dx = x - nx * (1.0 / LONG_X);
2045 double dy = y - ny * (1.0 / LONG_Y);
2046 double dz = z - nz * (1.0 / LONG_Z);
2047
2048 vector<double> dxyz(dx, dy, dz);
2049
2050 double r2 = dx * dx + dy * dy + dz * dz;
2051 double r = sqrt(r2);
2052
2053 double rinv = (r > 0) ? 1.0 / r : 0.0;
2054 double r2inv = rinv * rinv;
2055 double r3inv = r2inv * rinv;
2056 double r4inv = r2inv * r2inv;
2057 double r5inv = r2inv * r3inv;
2058 double r7inv = r3inv * r4inv;
2059 double r9inv = r4inv * r5inv;
2060 double r11inv = r4inv * r7inv;
2061 double r13inv = r4inv * r9inv;
2062 double r15inv = r4inv * r11inv;
2063
2064 double g4, g5, g6, g7;
2065
2066 if(nx != 0 || ny != 0 || nz != 0)
2067 {
2068 g4 = -(105.0 * erfc(alpha * r) +
2069 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
2070 sqrt(M_PI) * exp(-alpha2 * r2)) *
2071 r9inv;
2072
2073 g5 = (945.0 * erfc(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
2074 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
2075 sqrt(M_PI) * exp(-alpha2 * r2)) *
2076 r11inv;
2077
2078 g6 = (-10395.0 * erfc(alpha * r) -
2079 2.0 *
2080 (10395.0 * alpha * r + 6930.0 * pow(alpha * r, 3) + 2772.0 * pow(alpha * r, 5) + 792.0 * pow(alpha * r, 7) +
2081 176.0 * pow(alpha * r, 9) + 32.0 * pow(alpha * r, 11)) /
2082 sqrt(M_PI) * exp(-alpha2 * r2)) *
2083 r13inv;
2084
2085 g7 =
2086 (135135.0 * erfc(alpha * r) + 2.0 *
2087 (135135.0 * alpha * r + 90090.0 * pow(alpha * r, 3) + 36036.0 * pow(alpha * r, 5) +
2088 10296.0 * pow(alpha * r, 7) + 2288.0 * pow(alpha * r, 9) +
2089 416.0 * pow(alpha * r, 11) + 64.0 * pow(alpha * r, 13)) /
2090 sqrt(M_PI) * exp(-alpha2 * r2)) *
2091 r15inv;
2092 }
2093 else
2094 {
2095 /* we add the 1/r term here to the (0|0|0) entry, followed by differentiation, and the limit r->0 to obtain accurate
2096 * results at the origin
2097 */
2098
2099 /* Note, for small r:
2100 *
2101 * [1/- erfc(a r)]/r = 2 a/sqrt(pi) * [ 1 - (a r)^2/3 + (a r)^4 / 10 - (a r)^6 / 42 + (a r)^8 / 216 - ...]
2102 *
2103 * Hence for r = 0:
2104 *
2105 * g0 = 2 * alpha / sqrt(pi)
2106 * g1 = -4/3 * alpha^3 / sqrt(pi)
2107 * g2 = 8/5 * alpha^5 / sqrt(pi)
2108 * g3 = -16/7 * alpha^7 / sqrt(pi)
2109 * g4 = 32/9 * alpha^9 / sqrt(pi)
2110 * g5 = -64/11 * alpha^11/ sqrt(pi)
2111 */
2112
2113 if((alpha * r) < 0.5)
2114 {
2115 g4 = 32.0 * pow(alpha, 9) / sqrt(M_PI) *
2116 (1.0 / 9.0 - pow(alpha * r, 2) / 11.0 + pow(alpha * r, 4) / 26.0 - pow(alpha * r, 6) / 90.0 +
2117 pow(alpha * r, 8) / 408.0 - pow(alpha * r, 10) / 2280.0);
2118
2119 g5 = 64.0 * pow(alpha, 11) / sqrt(M_PI) *
2120 (-1.0 / 11.0 + pow(alpha * r, 2) / 13.0 - pow(alpha * r, 4) / 30.0 + pow(alpha * r, 6) / 102.0 -
2121 pow(alpha * r, 8) / 456.0 + pow(alpha * r, 10) / 2520.0);
2122
2123 g6 = 128.0 * pow(alpha, 13) / sqrt(M_PI) *
2124 (1.0 / 13.0 - pow(alpha * r, 2) / 15.0 + pow(alpha * r, 4) / 34.0 - pow(alpha * r, 6) / 114.0 +
2125 pow(alpha * r, 8) / 504.0 - pow(alpha * r, 10) / 2760.0);
2126
2127 g7 = 256.0 * pow(alpha, 15) / sqrt(M_PI) *
2128 (-1.0 / 15.0 + pow(alpha * r, 2) / 17.0 - pow(alpha * r, 4) / 38.0 + pow(alpha * r, 6) / 126.0 -
2129 pow(alpha * r, 8) / 552.0 + pow(alpha * r, 10) / 3000.0);
2130 }
2131 else
2132 {
2133 g4 = (105.0 * erf(alpha * r) -
2134 (210.0 * alpha * r + 140.0 * pow(alpha * r, 3) + 56.0 * pow(alpha * r, 5) + 16.0 * pow(alpha * r, 7)) /
2135 sqrt(M_PI) * exp(-alpha2 * r2)) *
2136 r9inv;
2137
2138 g5 = (-945.0 * erf(alpha * r) + (1890.0 * alpha * r + 1260.0 * pow(alpha * r, 3) + 504.0 * pow(alpha * r, 5) +
2139 144.0 * pow(alpha * r, 7) + 32.0 * pow(alpha * r, 9)) /
2140 sqrt(M_PI) * exp(-alpha2 * r2)) *
2141 r11inv;
2142
2143 g6 = (10395.0 * erf(alpha * r) -
2144 2.0 *
2145 (10395.0 * alpha * r + 6930.0 * pow(alpha * r, 3) + 2772.0 * pow(alpha * r, 5) +
2146 792.0 * pow(alpha * r, 7) + 176.0 * pow(alpha * r, 9) + 32.0 * pow(alpha * r, 11)) /
2147 sqrt(M_PI) * exp(-alpha2 * r2)) *
2148 r13inv;
2149
2150 g7 = (-135135.0 * erf(alpha * r) +
2151 2.0 *
2152 (135135.0 * alpha * r + 90090.0 * pow(alpha * r, 3) + 36036.0 * pow(alpha * r, 5) +
2153 10296.0 * pow(alpha * r, 7) + 2288.0 * pow(alpha * r, 9) + 416.0 * pow(alpha * r, 11) +
2154 64.0 * pow(alpha * r, 13)) /
2155 sqrt(M_PI) * exp(-alpha2 * r2)) *
2156 r15inv;
2157 }
2158 }
2159
2160 setup_D7(ADD, D7, dxyz, g4, g5, g6, g7);
2161 }
2162
2163 for(int nx = -nxmax; nx <= nxmax; nx++)
2164 for(int ny = -nymax; ny <= nymax; ny++)
2165 for(int nz = -nzmax; nz <= nzmax; nz++)
2166 {
2167 double kx = (2.0 * M_PI * LONG_X) * nx;
2168 double ky = (2.0 * M_PI * LONG_Y) * ny;
2169 double kz = (2.0 * M_PI * LONG_Z) * nz;
2170 double k2 = kx * kx + ky * ky + kz * kz;
2171
2172 if(k2 > 0)
2173 {
2174 double kdotx = (x * kx + y * ky + z * kz);
2175 double val = -4.0 * M_PI * (LONG_X * LONG_Y * LONG_Z) / k2 * exp(-k2 / (4.0 * alpha2)) * sin(kdotx);
2176
2177 vector<double> kxyz(kx, ky, kz);
2178
2179 D7 += (val * kxyz) % (kxyz % (kxyz % (kxyz % (kxyz % (kxyz % kxyz)))));
2180 }
2181 }
2182
2183 return D7;
2184}
global_data_all_processes All
Definition: main.cc:40
symtensor3< double > ewald_D3(double x, double y, double z)
Definition: ewald.cc:1312
double ewald_D0(double x, double y, double z)
This function computes the potential correction term by means of Ewald summation.
Definition: ewald.cc:705
symtensor4< double > ewald_D4(double x, double y, double z)
Definition: ewald.cc:1550
void ewald_init(void)
This function initializes tables with the correction force and the correction potential due to the pe...
Definition: ewald.cc:69
symtensor6< double > ewald_D6(double x, double y, double z)
Definition: ewald.cc:1845
interpolate_options
Definition: ewald.h:66
@ POINTMASS
Definition: ewald.h:67
symtensor7< double > ewald_D7(double x, double y, double z)
Definition: ewald.cc:2011
symtensor5< double > ewald_D5(double x, double y, double z)
Definition: ewald.cc:1694
vector< double > ewald_D1(double x, double y, double z)
This function computes the force correction term (difference between full force of infinite lattice a...
Definition: ewald.cc:898
symtensor2< double > ewald_D2(double x, double y, double z)
Definition: ewald.cc:1090
void ewald_gridlookup(const MyIntPosType *p_intpos, const MyIntPosType *target_intpos, enum interpolate_options flag, ewald_data &fper)
Definition: ewald.cc:355
MyReal FacCoordToInt
Definition: intposconvert.h:38
void nearest_image_intpos_to_pos(const MyIntPosType *const a, const MyIntPosType *const b, T *posdiff)
MyReal RegionLen
Definition: intposconvert.h:39
MyReal FacIntToCoord
Definition: intposconvert.h:37
void constrain_intpos(MyIntPosType *pos)
Definition: intposconvert.h:68
size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
A wrapper for the fread() function.
size_t my_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
A wrapper for the fwrite() function.
void mpi_printf(const char *fmt,...)
Definition: setcomm.h:55
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
MPI_Comm Communicator
Definition: setcomm.h:31
MPI_Comm SharedMemComm
int Island_ThisTask
int Island_NTask
void ** SharedMemBaseAddr
int MyShmRankInGlobal
T da[3]
Definition: symtensors.h:106
#define M_PI
Definition: constants.h:56
#define MAX_LONG_Z_BITS
Definition: dtypes.h:378
float MyFloat
Definition: dtypes.h:86
#define LONG_Y
Definition: dtypes.h:369
#define DBY
Definition: dtypes.h:420
uint32_t MyIntPosType
Definition: dtypes.h:35
#define MAX_LONG_X_BITS
Definition: dtypes.h:362
double MyReal
Definition: dtypes.h:82
#define MAX_LONG_Y_BITS
Definition: dtypes.h:370
#define LONG_X
Definition: dtypes.h:361
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
#define DBX
Definition: dtypes.h:419
#define LONG_Z
Definition: dtypes.h:377
#define DBZ
Definition: dtypes.h:421
#define EN
Base dimension of cubical Ewald lookup table, for one octant.
Definition: ewald.h:43
#define ENX
Definition: ewald.h:45
#define ENZ
Definition: ewald.h:47
#define ENY
Definition: ewald.h:46
#define EWLEVEL
Definition: ewald.h:41
#define HIGHEST_NEEDEDORDER_EWALD_DPHI
Definition: gravtree.h:153
#define EWALD_TAYLOR_ORDER
Definition: gravtree.h:179
int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
#define TAG_DMOM
Definition: mpi_utils.h:30
#define TAG_EWALD_ALLOC
Definition: mpi_utils.h:22
memory Mem
Definition: main.cc:44
expr exp(half arg)
Definition: half.hpp:2724
expr cos(half arg)
Definition: half.hpp:2823
expr sin(half arg)
Definition: half.hpp:2816
expr erf(half arg)
Definition: half.hpp:2918
expr pow(half base, half exp)
Definition: half.hpp:2803
expr erfc(half arg)
Definition: half.hpp:2925
expr sqrt(half arg)
Definition: half.hpp:2777
vector< MyReal > D1phi
Definition: gravtree.h:186
symtensor3< MyReal > D3phi
Definition: gravtree.h:188
MyReal D0phi
Definition: gravtree.h:185
symtensor2< MyReal > D2phi
Definition: gravtree.h:187
#define tXXXZZZZ
#define tZZZZZZZ
#define tXYYZZZZ
#define pXYYYZZ
#define rYZZZZ
#define tYYZZZZZ
#define rXXYZZ
#define tXYYYYYZ
#define vZ
#define dYZZ
#define tXXYYYYZ
#define rYYZZZ
#define sXXXY
#define vY
#define vX
#define pXXXXYZ
#define rXXZZZ
#define dXYY
#define rXXXYY
#define tXXXXXXX
#define sXXYZ
#define pXYYYYY
#define tXXZZZZZ
#define tXYYYZZZ
#define pXYYZZZ
#define tYYYZZZZ
#define qXX
#define sXYYZ
#define pXXXYYY
#define dZZZ
#define rXXXYZ
#define tYYYYYZZ
#define dYYY
#define dXYZ
#define pXXYYYZ
#define sYYYZ
#define rXXYYY
#define qZX
#define tXXXYYYZ
#define tXYZZZZZ
#define rXXXXZ
#define sXZZZ
#define qYY
#define rXXXXY
#define tXXYYYYY
#define rYYYYZ
#define pXXXXXZ
#define rXYYYZ
#define tXXXXYZZ
#define tXXXYZZZ
#define pXYZZZZ
#define qYZ
#define tXXXYYZZ
#define dZZY
#define tYYYYYYZ
#define rXYYYY
#define sXYYY
#define dXXY
#define pXXXYYZ
#define tXXXXXYZ
#define tXXXXZZZ
#define pYZZZZZ
#define dZXY
#define dXXX
#define dXXZ
#define rYYYZZ
#define pXYYYYZ
#define dYYZ
#define pYYYZZZ
#define tXXYZZZZ
#define tXXXXXXY
#define rXXXXX
#define tXXXXXZZ
#define pXXXYZZ
#define rXXYYZ
#define dZZX
#define tYZZZZZZ
#define tXXXXYYY
#define sXYZZ
#define qZZ
#define pXXXXXY
#define tXXXXXXZ
#define rZZZZZ
#define rXXXZZ
#define rXYYZZ
#define sXXXZ
#define tXYYYYZZ
#define dXZY
#define dZYY
#define sYZZZ
#define pXXYZZZ
#define tXYYYYYY
#define tXXXYYYY
#define tYYYYYYY
#define pYYYYYZ
#define qXY
#define dZXX
#define qZY
#define rXYZZZ
#define tYYYYZZZ
#define tXXXXXYY
#define rYYYYY
#define tXZZZZZZ
#define pXXXZZZ
#define tXXYYZZZ
#define dXZZ
#define tXXXXYYZ
#define rXZZZZ
#define qXZ
#define tXXYYYZZ
#define pXZZZZZ
void setup_D6(enum setup_options opt, symtensor6< T > &D6, vector< T > &dxyz, TypeGfac g3, TypeGfac g4, TypeGfac g5, TypeGfac g6)
Definition: symtensors.h:1912
void setup_D5(enum setup_options opt, symtensor5< T > &D5, vector< T > &dxyz, symtensor3< T > &aux3, symtensor4< T > &aux4, symtensor5< T > &aux5, TypeGfac g3, TypeGfac g4, TypeGfac g5)
Definition: symtensors.h:1842
void setup_D3(enum setup_options opt, symtensor3< T > &D3, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, TypeGfac g2, TypeGfac g3)
Definition: symtensors.h:1775
void setup_D7(enum setup_options opt, symtensor7< T > &D7, vector< T > &dxyz, TypeGfac g4, TypeGfac g5, TypeGfac g6, TypeGfac g7)
Definition: symtensors.h:2028
@ ADD
Definition: symtensors.h:1771
void setup_D4(enum setup_options opt, symtensor4< T > &D4, vector< T > &dxyz, symtensor2< T > &aux2, symtensor3< T > &aux3, symtensor4< T > &aux4, TypeGfac g2, TypeGfac g3, TypeGfac g4)
Definition: symtensors.h:1801
void subdivide_evenly(long long N, int pieces, int index_bin, long long *first, int *count)
Definition: system.cc:51
void myflush(FILE *fstream)
Definition: system.cc:125