GADGET-4
domain_exchange.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <mpi.h>
15#include <algorithm>
16#include <cmath>
17#include <cstdio>
18#include <cstdlib>
19#include <cstring>
20
21#include "../data/allvars.h"
22#include "../data/dtypes.h"
23#include "../data/mymalloc.h"
24#include "../domain/domain.h"
25#include "../fof/fof.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../ngbtree/ngbtree.h"
30#include "../sort/cxxsort.h"
31#include "../system/system.h"
32
36template <typename partset>
37void domain<partset>::domain_resize_storage(int count_get_total, int count_get_sph, int option_flag)
38{
39 int max_load, load = count_get_total;
40 int max_sphload, sphload = count_get_sph;
41 MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
42 MPI_Allreduce(&sphload, &max_sphload, 1, MPI_INT, MPI_MAX, Communicator);
43
44 if(max_load > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPart)
45 {
46 Tp->reallocate_memory_maxpart(max_load / (1.0 - 2 * ALLOC_TOLERANCE));
47
48 if(option_flag == 1)
49 domain_key = (peanokey *)Mem.myrealloc_movable(domain_key, sizeof(peanokey) * Tp->MaxPart);
50 }
51
52 if(max_sphload > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPartSph || max_sphload < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPartSph)
53 {
54 int maxpartsphNew = max_sphload / (1.0 - 2 * ALLOC_TOLERANCE);
55 if(option_flag == 2)
56 {
57 Terminate("need to reactivate this");
58 }
59 Tp->reallocate_memory_maxpartsph(maxpartsphNew);
60 }
61}
62
67template <typename partset>
68void domain<partset>::domain_countToGo(int *toGoDM, int *toGoSph)
69{
70 for(int n = 0; n < NTask; n++)
71 {
72 toGoDM[n] = toGoSph[n] = 0;
73 }
74
75 for(int n = 0; n < Tp->NumPart; n++)
76 {
77 int no = n_to_no(n);
78
79 if(Tp->P[n].getType() == 0)
80 toGoSph[TaskOfLeaf[no]]++;
81 else
82 toGoDM[TaskOfLeaf[no]]++;
83 }
84}
85
86template <typename partset>
88{
89#ifdef SUBFIND
90 for(int i = 0; i < Tp->NumPart; i++)
91 {
92 int task = TaskOfLeaf[n_to_no(i)];
93
94 Tp->PS[i].TargetTask = task;
95 Tp->PS[i].TargetIndex = 0; /* unimportant here */
96 }
97#endif
98}
99
100template <typename partset>
102{
103 double t0 = Logs.second();
104
105 int *toGoDM = (int *)Mem.mymalloc_movable(&toGoDM, "toGoDM", NTask * sizeof(int));
106 int *toGoSph = (int *)Mem.mymalloc_movable(&toGoSph, "toGoSph", NTask * sizeof(int));
107 int *toGetDM = (int *)Mem.mymalloc_movable(&toGetDM, "toGetDM", NTask * sizeof(int));
108 int *toGetSph = (int *)Mem.mymalloc_movable(&toGetSph, "toGetSph", NTask * sizeof(int));
109
110 domain_countToGo(toGoDM, toGoSph);
111
112 int *toGo = (int *)Mem.mymalloc("toGo", 2 * NTask * sizeof(int));
113 int *toGet = (int *)Mem.mymalloc("toGet", 2 * NTask * sizeof(int));
114
115 for(int i = 0; i < NTask; ++i)
116 {
117 toGo[2 * i] = toGoDM[i];
118 toGo[2 * i + 1] = toGoSph[i];
119 }
120 myMPI_Alltoall(toGo, 2, MPI_INT, toGet, 2, MPI_INT, Communicator);
121 for(int i = 0; i < NTask; ++i)
122 {
123 toGetDM[i] = toGet[2 * i];
124 toGetSph[i] = toGet[2 * i + 1];
125 }
126 Mem.myfree(toGet);
127 Mem.myfree(toGo);
128
129 int count_togo_dm = 0, count_togo_sph = 0, count_get_dm = 0, count_get_sph = 0;
130 for(int i = 0; i < NTask; i++)
131 {
132 count_togo_dm += toGoDM[i];
133 count_togo_sph += toGoSph[i];
134 count_get_dm += toGetDM[i];
135 count_get_sph += toGetSph[i];
136 }
137
138 long long sumtogo = count_togo_dm;
139 sumup_longs(1, &sumtogo, &sumtogo, Communicator);
140
141 domain_printf("DOMAIN: exchange of %lld particles\n", sumtogo);
142
143 if(Tp->NumPart != count_togo_dm + count_togo_sph)
144 Terminate("NumPart != count_togo");
145
146 int *send_sph_offset = (int *)Mem.mymalloc_movable(&send_sph_offset, "send_sph_offset", NTask * sizeof(int));
147 int *send_dm_offset = (int *)Mem.mymalloc_movable(&send_dm_offset, "send_dm_offset", NTask * sizeof(int));
148 int *recv_sph_offset = (int *)Mem.mymalloc_movable(&recv_sph_offset, "recv_sph_offset", NTask * sizeof(int));
149 int *recv_dm_offset = (int *)Mem.mymalloc_movable(&recv_dm_offset, "recv_dm_offset", NTask * sizeof(int));
150
151 send_sph_offset[0] = send_dm_offset[0] = recv_sph_offset[0] = recv_dm_offset[0] = 0;
152 for(int i = 1; i < NTask; i++)
153 {
154 send_sph_offset[i] = send_sph_offset[i - 1] + toGoSph[i - 1];
155 send_dm_offset[i] = send_dm_offset[i - 1] + toGoDM[i - 1];
156
157 recv_sph_offset[i] = recv_sph_offset[i - 1] + toGetSph[i - 1];
158 recv_dm_offset[i] = recv_dm_offset[i - 1] + toGetDM[i - 1];
159 }
160
161 for(int i = 0; i < NTask; i++)
162 {
163 send_dm_offset[i] += count_togo_sph;
164 recv_dm_offset[i] += count_get_sph;
165 }
166
167 pdata *partBuf =
168 (typename partset::pdata *)Mem.mymalloc_movable_clear(&partBuf, "partBuf", (count_togo_dm + count_togo_sph) * sizeof(pdata));
169 sph_particle_data *sphBuf =
170 (sph_particle_data *)Mem.mymalloc_movable_clear(&sphBuf, "sphBuf", count_togo_sph * sizeof(sph_particle_data));
171 peanokey *keyBuf = (peanokey *)Mem.mymalloc_movable_clear(&keyBuf, "keyBuf", (count_togo_dm + count_togo_sph) * sizeof(peanokey));
172
173 for(int i = 0; i < NTask; i++)
174 toGoSph[i] = toGoDM[i] = 0;
175
176 for(int n = 0; n < Tp->NumPart; n++)
177 {
178 int off, num;
179 int task = TaskOfLeaf[n_to_no(n)];
180
181 if(Tp->P[n].getType() == 0)
182 {
183 num = toGoSph[task]++;
184
185 off = send_sph_offset[task] + num;
186 sphBuf[off] = Tp->SphP[n];
187 }
188 else
189 {
190 num = toGoDM[task]++;
191
192 off = send_dm_offset[task] + num;
193 }
194
195 partBuf[off] = Tp->P[n];
196 keyBuf[off] = domain_key[n];
197 }
198
199 /**** now resize the storage for the P[] and SphP[] arrays if needed ****/
200 domain_resize_storage(count_get_dm + count_get_sph, count_get_sph, 1);
201
202 /***** space has been created, now we can do the actual exchange *****/
203
204 /* produce a flag if any of the send sizes is above our transfer limit, in this case we will
205 * transfer the data in chunks.
206 */
207
208 int flag_big = 0, flag_big_all;
209 for(int i = 0; i < NTask; i++)
210 {
211 if(toGoSph[i] * sizeof(sph_particle_data) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
212 flag_big = 1;
213
214 if(std::max<int>(toGoSph[i], toGoDM[i]) * sizeof(typename partset::pdata) > MPI_MESSAGE_SIZELIMIT_IN_BYTES)
215 flag_big = 1;
216 }
217
218 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
219
220#if 1
221#ifdef USE_MPIALLTOALLV_IN_DOMAINDECOMP
222 int method = 0;
223#else
224#ifndef ISEND_IRECV_IN_DOMAIN /* synchronous communication */
225 int method = 1;
226#else
227 int method = 2; /* asynchronous communication */
228#endif
229#endif
230 MPI_Datatype tp;
231 MPI_Type_contiguous(sizeof(typename partset::pdata), MPI_CHAR, &tp);
232 MPI_Type_commit(&tp);
233 myMPI_Alltoallv_new(partBuf, toGoSph, send_sph_offset, tp, Tp->P, toGetSph, recv_sph_offset, tp, Communicator, method);
234 myMPI_Alltoallv_new(partBuf, toGoDM, send_dm_offset, tp, Tp->P, toGetDM, recv_dm_offset, tp, Communicator, method);
235 MPI_Type_free(&tp);
236 MPI_Type_contiguous(sizeof(sph_particle_data), MPI_CHAR, &tp);
237 MPI_Type_commit(&tp);
238 myMPI_Alltoallv_new(sphBuf, toGoSph, send_sph_offset, tp, Tp->SphP, toGetSph, recv_sph_offset, tp, Communicator, method);
239 MPI_Type_free(&tp);
240 MPI_Type_contiguous(sizeof(peanokey), MPI_CHAR, &tp);
241 MPI_Type_commit(&tp);
242 myMPI_Alltoallv_new(keyBuf, toGoSph, send_sph_offset, tp, domain_key, toGetSph, recv_sph_offset, tp, Communicator, method);
243 myMPI_Alltoallv_new(keyBuf, toGoDM, send_dm_offset, tp, domain_key, toGetDM, recv_dm_offset, tp, Communicator, method);
244 MPI_Type_free(&tp);
245#else
246 my_int_MPI_Alltoallv(partBuf, toGoSph, send_sph_offset, P, toGetSph, recv_sph_offset, sizeof(pdata), flag_big_all, Communicator);
247
248 my_int_MPI_Alltoallv(sphBuf, toGoSph, send_sph_offset, SphP, toGetSph, recv_sph_offset, sizeof(sph_particle_data), flag_big_all,
249 Communicator);
250
251 my_int_MPI_Alltoallv(keyBuf, toGoSph, send_sph_offset, domain_key, toGetSph, recv_sph_offset, sizeof(peanokey), flag_big_all,
252 Communicator);
253
254 my_int_MPI_Alltoallv(partBuf, toGoDM, send_dm_offset, P, toGetDM, recv_dm_offset, sizeof(pdata), flag_big_all, Communicator);
255
256 my_int_MPI_Alltoallv(keyBuf, toGoDM, send_dm_offset, domain_key, toGetDM, recv_dm_offset, sizeof(peanokey), flag_big_all,
257 Communicator);
258#endif
259
260 Tp->NumPart = count_get_dm + count_get_sph;
261 Tp->NumGas = count_get_sph;
262
263 Mem.myfree(keyBuf);
264 Mem.myfree(sphBuf);
265 Mem.myfree(partBuf);
266
267 Mem.myfree(recv_dm_offset);
268 Mem.myfree(recv_sph_offset);
269 Mem.myfree(send_dm_offset);
270 Mem.myfree(send_sph_offset);
271
272 Mem.myfree(toGetSph);
273 Mem.myfree(toGetDM);
274 Mem.myfree(toGoSph);
275 Mem.myfree(toGoDM);
276
277 double t1 = Logs.second();
278
279 domain_printf("DOMAIN: particle exchange done. (took %g sec)\n", Logs.timediff(t0, t1));
280}
281
282template <typename partset>
284{
285 mpi_printf("PEANO: Begin Peano-Hilbert order...\n");
286 double t0 = Logs.second();
287
288 if(Tp->NumGas)
289 {
290 peano_hilbert_data *pmp = (peano_hilbert_data *)Mem.mymalloc("pmp", sizeof(peano_hilbert_data) * Tp->NumGas);
291 int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * Tp->NumGas);
292
293 for(int i = 0; i < Tp->NumGas; i++)
294 {
295 pmp[i].index = i;
296 pmp[i].key = key[i];
297 }
298
299 mycxxsort(pmp, pmp + Tp->NumGas, compare_peano_hilbert_data);
300
301 for(int i = 0; i < Tp->NumGas; i++)
302 Id[pmp[i].index] = i;
303
304 reorder_gas(Id);
305
306 Mem.myfree(Id);
307 Mem.myfree(pmp);
308 }
309
310 if(Tp->NumPart - Tp->NumGas > 0)
311 {
312 peano_hilbert_data *pmp = (peano_hilbert_data *)Mem.mymalloc("pmp", sizeof(peano_hilbert_data) * (Tp->NumPart - Tp->NumGas));
313 int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * (Tp->NumPart - Tp->NumGas));
314
315 for(int i = Tp->NumGas; i < Tp->NumPart; i++)
316 {
317 pmp[i - Tp->NumGas].index = i;
318 pmp[i - Tp->NumGas].key = key[i];
319 }
320
321 mycxxsort(pmp, pmp + Tp->NumPart - Tp->NumGas, compare_peano_hilbert_data);
322
323 for(int i = Tp->NumGas; i < Tp->NumPart; i++)
324 Id[pmp[i - Tp->NumGas].index - Tp->NumGas] = i;
325
326 reorder_particles(Id - Tp->NumGas, Tp->NumGas, Tp->NumPart);
327
328 Mem.myfree(Id);
329 Mem.myfree(pmp);
330 }
331
332 mpi_printf("PEANO: done, took %g sec.\n", Logs.timediff(t0, Logs.second()));
333}
334
335template <typename partset>
337{
338 for(int i = 0; i < Tp->NumGas; i++)
339 {
340 if(Id[i] != i)
341 {
342 pdata Psource = Tp->P[i];
343 sph_particle_data SphPsource = Tp->SphP[i];
344
345 int idsource = Id[i];
346 int dest = Id[i];
347
348 do
349 {
350 pdata Psave = Tp->P[dest];
351 sph_particle_data SphPsave = Tp->SphP[dest];
352 int idsave = Id[dest];
353
354 Tp->P[dest] = Psource;
355 Tp->SphP[dest] = SphPsource;
356 Id[dest] = idsource;
357
358 if(dest == i)
359 break;
360
361 Psource = Psave;
362 SphPsource = SphPsave;
363 idsource = idsave;
364
365 dest = idsource;
366 }
367 while(1);
368 }
369 }
370}
371
372template <typename partset>
373void domain<partset>::reorder_particles(int *Id, int Nstart, int N)
374{
375 for(int i = Nstart; i < N; i++)
376 {
377 if(Id[i] != i)
378 {
379 pdata Psource = Tp->P[i];
380 int idsource = Id[i];
381
382 int dest = Id[i];
383
384 do
385 {
386 pdata Psave = Tp->P[dest];
387 int idsave = Id[dest];
388
389 Tp->P[dest] = Psource;
390 Id[dest] = idsource;
391
392 if(dest == i)
393 break;
394
395 Psource = Psave;
396 idsource = idsave;
397
398 dest = idsource;
399 }
400 while(1);
401 }
402 }
403}
404
405template <typename partset>
406void domain<partset>::reorder_PS(int *Id, int Nstart, int N)
407{
408 for(int i = Nstart; i < N; i++)
409 {
410 if(Id[i] != i)
411 {
412 subfind_data PSsource = Tp->PS[i];
413
414 int idsource = Id[i];
415 int dest = Id[i];
416
417 do
418 {
419 subfind_data PSsave = Tp->PS[dest];
420 int idsave = Id[dest];
421
422 Tp->PS[dest] = PSsource;
423 Id[dest] = idsource;
424
425 if(dest == i)
426 break;
427
428 PSsource = PSsave;
429 idsource = idsave;
430
431 dest = idsource;
432 }
433 while(1);
434 }
435 }
436}
437
438template <typename partset>
440{
441 for(int i = 0; i < Tp->NumPart; i++)
442 {
443 if(Id[i] != i)
444 {
445 pdata Psource = Tp->P[i];
446 subfind_data PSsource = Tp->PS[i];
447
448 int idsource = Id[i];
449 int dest = Id[i];
450
451 do
452 {
453 pdata Psave = Tp->P[dest];
454 subfind_data PSsave = Tp->PS[dest];
455 int idsave = Id[dest];
456
457 Tp->P[dest] = Psource;
458 Tp->PS[dest] = PSsource;
459 Id[dest] = idsource;
460
461 if(dest == i)
462 break;
463
464 Psource = Psave;
465 PSsource = PSsave;
466 idsource = idsave;
467
468 dest = idsource;
469 }
470 while(1);
471 }
472 }
473}
474
475template <typename partset>
476void domain<partset>::reorder_P_PS(int loc_numgas, int loc_numpart)
477{
478 local_sort_data *mp = (local_sort_data *)Mem.mymalloc("mp", sizeof(local_sort_data) * (loc_numpart - loc_numgas));
479 mp -= loc_numgas;
480
481 int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * (loc_numpart - loc_numgas));
482 Id -= loc_numgas;
483
484 for(int i = loc_numgas; i < loc_numpart; i++)
485 {
486 mp[i].index = i;
487 mp[i].targetindex = Tp->PS[i].TargetIndex;
488 }
489
490 mycxxsort(mp + loc_numgas, mp + loc_numpart, compare_local_sort_data_targetindex);
491
492 for(int i = loc_numgas; i < loc_numpart; i++)
493 Id[mp[i].index] = i;
494
495 reorder_particles(Id, loc_numgas, loc_numpart);
496
497 for(int i = loc_numgas; i < loc_numpart; i++)
498 Id[mp[i].index] = i;
499
500 reorder_PS(Id, loc_numgas, loc_numpart);
501
502 Id += loc_numgas;
503 Mem.myfree(Id);
504 mp += loc_numgas;
505 Mem.myfree(mp);
506}
507
508/* This function redistributes the particles according to what is stored in
509 * PS[].TargetTask, and PS[].TargetIndex.
510 */
511template <typename partset>
513{
514 int CommThisTask, CommNTask, CommPTask;
515 MPI_Comm_size(Communicator, &CommNTask);
516 MPI_Comm_rank(Communicator, &CommThisTask);
517
518 for(CommPTask = 0; CommNTask > (1 << CommPTask); CommPTask++)
519 ;
520
521 int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * CommNTask);
522 int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * CommNTask);
523 int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * CommNTask);
524 int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * CommNTask);
525 int nimport = 0, nexport = 0, nstay = 0, nlocal = 0;
526
527 /* for type_select == 0, we process gas particles, otherwise all other particles */
528 for(int type_select = 0; type_select < 2; type_select++)
529 {
530 /* In order to be able to later distribute the PS[] array, we need to temporarily save the particle type array,
531 * and save the old particle number
532 */
533
534 unsigned char *Ptype = (unsigned char *)Mem.mymalloc_movable(&Ptype, "Ptype", sizeof(unsigned char) * Tp->NumPart);
535 int *Ptask = (int *)Mem.mymalloc_movable(&Ptask, "Ptask", sizeof(int) * Tp->NumPart);
536
537 for(int i = 0; i < Tp->NumPart; i++)
538 {
539 Ptype[i] = Tp->P[i].getType();
540 Ptask[i] = Tp->PS[i].TargetTask;
541
542 if(Ptype[i] == 0 && i >= Tp->NumGas)
543 Terminate("Bummer1");
544
545 if(Ptype[i] != 0 && i < Tp->NumGas)
546 Terminate("Bummer2");
547 }
548
549 int NumPart_saved = Tp->NumPart;
550
551 /* distribute gas particles up front */
552 if(type_select == 0)
553 {
554 sph_particle_data *sphBuf = NULL;
555
556 for(int rep = 0; rep < 2; rep++)
557 {
558 for(int n = 0; n < CommNTask; n++)
559 Send_count[n] = 0;
560
561 nstay = 0;
562
563 for(int n = 0; n < Tp->NumGas; n++)
564 {
565 int target = Ptask[n];
566
567 if(rep == 0)
568 {
569 if(target != CommThisTask)
570 Send_count[target]++;
571 else
572 nstay++;
573 }
574 else
575 {
576 if(target != CommThisTask)
577 sphBuf[Send_offset[target] + Send_count[target]++] = Tp->SphP[n];
578 else
579 Tp->SphP[nstay++] = Tp->SphP[n];
580 }
581 }
582
583 if(rep == 0)
584 {
585 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
586
587 nimport = 0, nexport = 0;
588 Recv_offset[0] = Send_offset[0] = 0;
589 for(int j = 0; j < CommNTask; j++)
590 {
591 nexport += Send_count[j];
592 nimport += Recv_count[j];
593
594 if(j > 0)
595 {
596 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
597 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
598 }
599 }
600
601 sphBuf = (sph_particle_data *)Mem.mymalloc_movable(&sphBuf, "sphBuf", nexport * sizeof(sph_particle_data));
602 }
603 else
604 {
605 Tp->NumGas += (nimport - nexport);
606
607 int max_loadsph = Tp->NumGas;
608 MPI_Allreduce(MPI_IN_PLACE, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
609
610 if(max_loadsph > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPartSph ||
611 max_loadsph < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPartSph)
612 Tp->reallocate_memory_maxpartsph(max_loadsph / (1.0 - 2 * ALLOC_TOLERANCE));
613
614 for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
615 {
616 int target = CommThisTask ^ ngrp;
617
618 if(target < CommNTask)
619 {
620 if(Send_count[target] > 0 || Recv_count[target] > 0)
621 {
622 myMPI_Sendrecv(sphBuf + Send_offset[target], Send_count[target] * sizeof(sph_particle_data), MPI_BYTE,
623 target, TAG_SPHDATA, Tp->SphP + Recv_offset[target] + nstay,
624 Recv_count[target] * sizeof(sph_particle_data), MPI_BYTE, target, TAG_SPHDATA,
625 Communicator, MPI_STATUS_IGNORE);
626 }
627 }
628 }
629
630 Mem.myfree(sphBuf);
631 }
632 }
633 }
634
635 pdata *partBuf = NULL;
636
637 for(int rep = 0; rep < 2; rep++)
638 {
639 for(int n = 0; n < CommNTask; n++)
640 Send_count[n] = 0;
641
642 nstay = 0;
643 nlocal = 0;
644
645 for(int n = 0; n < NumPart_saved; n++)
646 {
647 if(Ptype[n] == type_select || (type_select != 0))
648 {
649 int target = Ptask[n];
650
651 if(rep == 0)
652 {
653 if(target != CommThisTask)
654 Send_count[target]++;
655 else
656 {
657 nstay++;
658 nlocal++;
659 }
660 }
661 else
662 {
663 if(target != CommThisTask)
664 partBuf[Send_offset[target] + Send_count[target]++] = Tp->P[n];
665 else
666 {
667 Tp->P[nstay++] = Tp->P[n];
668 nlocal++;
669 }
670 }
671 }
672 else
673 {
674 // this is only relevant for type_select == 0
675 if(rep == 0)
676 nstay++;
677 else
678 Tp->P[nstay++] = Tp->P[n];
679 }
680 }
681
682 if(rep == 0)
683 {
684 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
685
686 nimport = 0, nexport = 0;
687 Recv_offset[0] = Send_offset[0] = 0;
688 for(int j = 0; j < CommNTask; j++)
689 {
690 nexport += Send_count[j];
691 nimport += Recv_count[j];
692
693 if(j > 0)
694 {
695 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
696 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
697 }
698 }
699
700 partBuf = (pdata *)Mem.mymalloc_movable(&partBuf, "partBuf", nexport * sizeof(pdata));
701 }
702 else
703 {
704 Tp->NumPart += (nimport - nexport);
705
706 int max_load = Tp->NumPart;
707 MPI_Allreduce(MPI_IN_PLACE, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
708
709 if(max_load > (1.0 - ALLOC_TOLERANCE) * Tp->MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * Tp->MaxPart)
710 Tp->reallocate_memory_maxpart(max_load / (1.0 - 2 * ALLOC_TOLERANCE));
711
712 if(type_select == 0)
713 {
714 // create a gap to place the incoming particles at the end of the already present gas particles
715 memmove(static_cast<void *>(Tp->P + nlocal + nimport), static_cast<void *>(Tp->P + nlocal),
716 (nstay - nlocal) * sizeof(pdata));
717 }
718
719 for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
720 {
721 int target = CommThisTask ^ ngrp;
722
723 if(target < CommNTask)
724 {
725 if(Send_count[target] > 0 || Recv_count[target] > 0)
726 {
727 myMPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] * sizeof(pdata), MPI_BYTE, target,
728 TAG_PDATA, Tp->P + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(pdata), MPI_BYTE,
729 target, TAG_PDATA, Communicator, MPI_STATUS_IGNORE);
730 }
731 }
732 }
733
734 Mem.myfree(partBuf);
735 }
736 }
737
738 /* now deal with subfind data */
739
740 subfind_data *subBuf = NULL;
741 for(int rep = 0; rep < 2; rep++)
742 {
743 for(int n = 0; n < CommNTask; n++)
744 Send_count[n] = 0;
745
746 nstay = 0;
747 nlocal = 0;
748
749 for(int n = 0; n < NumPart_saved; n++)
750 {
751 if(Ptype[n] == type_select || (type_select != 0))
752 {
753 int target = Ptask[n];
754
755 if(rep == 0)
756 {
757 if(target != CommThisTask)
758 Send_count[target]++;
759 else
760 {
761 nstay++;
762 nlocal++;
763 }
764 }
765 else
766 {
767 if(target != CommThisTask)
768 subBuf[Send_offset[target] + Send_count[target]++] = Tp->PS[n];
769 else
770 {
771 Tp->PS[nstay++] = Tp->PS[n];
772 nlocal++;
773 }
774 }
775 }
776 else
777 {
778 // this is only relevant for type_select == 0
779 if(rep == 0)
780 nstay++;
781 else
782 Tp->PS[nstay++] = Tp->PS[n];
783 }
784 }
785
786 if(rep == 0)
787 {
788 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
789
790 nimport = 0, nexport = 0;
791 Recv_offset[0] = Send_offset[0] = 0;
792 for(int j = 0; j < CommNTask; j++)
793 {
794 nexport += Send_count[j];
795 nimport += Recv_count[j];
796
797 if(j > 0)
798 {
799 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
800 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
801 }
802 }
803
804 subBuf = (subfind_data *)Mem.mymalloc_movable(&subBuf, "subBuf", nexport * sizeof(subfind_data));
805 }
806 else
807 {
808 /* reallocate with new particle number */
809 Tp->PS = (subfind_data *)Mem.myrealloc_movable(Tp->PS, Tp->NumPart * sizeof(subfind_data));
810
811 if(type_select == 0)
812 {
813 // create a gap to place the incoming particles at the end of the already present gas particles
814 memmove(Tp->PS + nlocal + nimport, Tp->PS + nlocal, (nstay - nlocal) * sizeof(subfind_data));
815 }
816
817 for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
818 {
819 int target = CommThisTask ^ ngrp;
820
821 if(target < CommNTask)
822 {
823 if(Send_count[target] > 0 || Recv_count[target] > 0)
824 {
825 myMPI_Sendrecv(subBuf + Send_offset[target], Send_count[target] * sizeof(subfind_data), MPI_BYTE, target,
826 TAG_KEY, Tp->PS + Recv_offset[target] + nlocal, Recv_count[target] * sizeof(subfind_data),
827 MPI_BYTE, target, TAG_KEY, Communicator, MPI_STATUS_IGNORE);
828 }
829 }
830 }
831
832 Mem.myfree(subBuf);
833 }
834 }
835
836 Mem.myfree(Ptask);
837 Mem.myfree(Ptype);
838 }
839
840 Mem.myfree(Recv_offset);
841 Mem.myfree(Recv_count);
842 Mem.myfree(Send_offset);
843 Mem.myfree(Send_count);
844
845 /* finally, let's also address the desired local order according to PS[].TargetIndex */
846
847 if(Tp->NumGas)
848 {
849 local_sort_data *mp = (local_sort_data *)Mem.mymalloc("mp", sizeof(local_sort_data) * Tp->NumGas);
850 int *Id = (int *)Mem.mymalloc("Id", sizeof(int) * Tp->NumGas);
851
852 for(int i = 0; i < Tp->NumGas; i++)
853 {
854 mp[i].index = i;
855 mp[i].targetindex = Tp->PS[i].TargetIndex;
856 }
857
858 mycxxsort(mp, mp + Tp->NumGas, compare_local_sort_data_targetindex);
859
860 for(int i = 0; i < Tp->NumGas; i++)
861 Id[mp[i].index] = i;
862
863 reorder_gas(Id);
864
865 for(int i = 0; i < Tp->NumGas; i++)
866 Id[mp[i].index] = i;
867
868 reorder_PS(Id, 0, Tp->NumGas);
869
870 Mem.myfree(Id);
871 Mem.myfree(mp);
872 }
873
874 if(Tp->NumPart - Tp->NumGas > 0)
875 {
876 reorder_P_PS(Tp->NumGas, Tp->NumPart);
877 }
878}
879
880#include "../data/simparticles.h"
881template class domain<simparticles>;
882
883#ifdef LIGHTCONE_PARTICLES
884#include "../data/lcparticles.h"
885template class domain<lcparticles>;
886#endif
Definition: domain.h:31
void reorder_gas(int *Id)
void reorder_PS(int *Id, int Nstart, int N)
void reorder_particles(int *Id, int Nstart, int N)
void particle_exchange_based_on_PS(MPI_Comm Communicator)
partset::pdata pdata
Definition: domain.h:47
void reorder_P_and_PS(int *Id)
void reorder_P_PS(int NumGas, int NumPart)
void domain_resize_storage(int count_get, int count_get_sph, int option_flag)
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
#define ALLOC_TOLERANCE
Definition: constants.h:74
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
Definition: constants.h:53
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:15
#define TAG_SPHDATA
Definition: mpi_utils.h:28
void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len, int big_flag, MPI_Comm comm)
Definition: myalltoall.cc:241
#define TAG_PDATA
Definition: mpi_utils.h:27
void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, int method)
Definition: myalltoall.cc:74
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Definition: myalltoall.cc:301
#define TAG_KEY
Definition: mpi_utils.h:29
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44