12#include "gadgetconfig.h"
21#include "../data/allvars.h"
22#include "../data/dtypes.h"
23#include "../data/mymalloc.h"
24#include "../domain/domain.h"
25#include "../fof/fof.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../ngbtree/ngbtree.h"
30#include "../sort/cxxsort.h"
31#include "../system/system.h"
36template <
typename partset>
39 int max_load, load = count_get_total;
40 int max_sphload, sphload = count_get_sph;
41 MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
42 MPI_Allreduce(&sphload, &max_sphload, 1, MPI_INT, MPI_MAX, Communicator);
59 Tp->reallocate_memory_maxpartsph(maxpartsphNew);
67template <
typename partset>
70 for(
int n = 0; n < NTask; n++)
72 toGoDM[n] = toGoSph[n] = 0;
75 for(
int n = 0; n < Tp->NumPart; n++)
79 if(Tp->P[n].getType() == 0)
80 toGoSph[TaskOfLeaf[no]]++;
82 toGoDM[TaskOfLeaf[no]]++;
86template <
typename partset>
90 for(
int i = 0; i < Tp->NumPart; i++)
92 int task = TaskOfLeaf[n_to_no(i)];
94 Tp->PS[i].TargetTask = task;
95 Tp->PS[i].TargetIndex = 0;
100template <
typename partset>
105 int *toGoDM = (
int *)
Mem.mymalloc_movable(&toGoDM,
"toGoDM", NTask *
sizeof(
int));
106 int *toGoSph = (
int *)
Mem.mymalloc_movable(&toGoSph,
"toGoSph", NTask *
sizeof(
int));
107 int *toGetDM = (
int *)
Mem.mymalloc_movable(&toGetDM,
"toGetDM", NTask *
sizeof(
int));
108 int *toGetSph = (
int *)
Mem.mymalloc_movable(&toGetSph,
"toGetSph", NTask *
sizeof(
int));
110 domain_countToGo(toGoDM, toGoSph);
112 int *toGo = (
int *)
Mem.mymalloc(
"toGo", 2 * NTask *
sizeof(
int));
113 int *toGet = (
int *)
Mem.mymalloc(
"toGet", 2 * NTask *
sizeof(
int));
115 for(
int i = 0; i < NTask; ++i)
117 toGo[2 * i] = toGoDM[i];
118 toGo[2 * i + 1] = toGoSph[i];
120 myMPI_Alltoall(toGo, 2, MPI_INT, toGet, 2, MPI_INT, Communicator);
121 for(
int i = 0; i < NTask; ++i)
123 toGetDM[i] = toGet[2 * i];
124 toGetSph[i] = toGet[2 * i + 1];
129 int count_togo_dm = 0, count_togo_sph = 0, count_get_dm = 0, count_get_sph = 0;
130 for(
int i = 0; i < NTask; i++)
132 count_togo_dm += toGoDM[i];
133 count_togo_sph += toGoSph[i];
134 count_get_dm += toGetDM[i];
135 count_get_sph += toGetSph[i];
138 long long sumtogo = count_togo_dm;
141 domain_printf(
"DOMAIN: exchange of %lld particles\n", sumtogo);
143 if(Tp->NumPart != count_togo_dm + count_togo_sph)
146 int *send_sph_offset = (
int *)
Mem.mymalloc_movable(&send_sph_offset,
"send_sph_offset", NTask *
sizeof(
int));
147 int *send_dm_offset = (
int *)
Mem.mymalloc_movable(&send_dm_offset,
"send_dm_offset", NTask *
sizeof(
int));
148 int *recv_sph_offset = (
int *)
Mem.mymalloc_movable(&recv_sph_offset,
"recv_sph_offset", NTask *
sizeof(
int));
149 int *recv_dm_offset = (
int *)
Mem.mymalloc_movable(&recv_dm_offset,
"recv_dm_offset", NTask *
sizeof(
int));
151 send_sph_offset[0] = send_dm_offset[0] = recv_sph_offset[0] = recv_dm_offset[0] = 0;
152 for(
int i = 1; i < NTask; i++)
154 send_sph_offset[i] = send_sph_offset[i - 1] + toGoSph[i - 1];
155 send_dm_offset[i] = send_dm_offset[i - 1] + toGoDM[i - 1];
157 recv_sph_offset[i] = recv_sph_offset[i - 1] + toGetSph[i - 1];
158 recv_dm_offset[i] = recv_dm_offset[i - 1] + toGetDM[i - 1];
161 for(
int i = 0; i < NTask; i++)
163 send_dm_offset[i] += count_togo_sph;
164 recv_dm_offset[i] += count_get_sph;
168 (
typename partset::pdata *)
Mem.mymalloc_movable_clear(&partBuf,
"partBuf", (count_togo_dm + count_togo_sph) *
sizeof(pdata));
173 for(
int i = 0; i < NTask; i++)
174 toGoSph[i] = toGoDM[i] = 0;
176 for(
int n = 0; n < Tp->NumPart; n++)
179 int task = TaskOfLeaf[n_to_no(n)];
181 if(Tp->P[n].getType() == 0)
183 num = toGoSph[task]++;
185 off = send_sph_offset[task] + num;
186 sphBuf[off] = Tp->SphP[n];
190 num = toGoDM[task]++;
192 off = send_dm_offset[task] + num;
195 partBuf[off] = Tp->P[n];
196 keyBuf[off] = domain_key[n];
200 domain_resize_storage(count_get_dm + count_get_sph, count_get_sph, 1);
208 int flag_big = 0, flag_big_all;
209 for(
int i = 0; i < NTask; i++)
218 MPI_Allreduce(&flag_big, &flag_big_all, 1, MPI_INT, MPI_MAX, Communicator);
221#ifdef USE_MPIALLTOALLV_IN_DOMAINDECOMP
224#ifndef ISEND_IRECV_IN_DOMAIN
231 MPI_Type_contiguous(
sizeof(
typename partset::pdata), MPI_CHAR, &tp);
232 MPI_Type_commit(&tp);
233 myMPI_Alltoallv_new(partBuf, toGoSph, send_sph_offset, tp, Tp->P, toGetSph, recv_sph_offset, tp, Communicator, method);
234 myMPI_Alltoallv_new(partBuf, toGoDM, send_dm_offset, tp, Tp->P, toGetDM, recv_dm_offset, tp, Communicator, method);
237 MPI_Type_commit(&tp);
238 myMPI_Alltoallv_new(sphBuf, toGoSph, send_sph_offset, tp, Tp->SphP, toGetSph, recv_sph_offset, tp, Communicator, method);
240 MPI_Type_contiguous(
sizeof(
peanokey), MPI_CHAR, &tp);
241 MPI_Type_commit(&tp);
242 myMPI_Alltoallv_new(keyBuf, toGoSph, send_sph_offset, tp, domain_key, toGetSph, recv_sph_offset, tp, Communicator, method);
243 myMPI_Alltoallv_new(keyBuf, toGoDM, send_dm_offset, tp, domain_key, toGetDM, recv_dm_offset, tp, Communicator, method);
246 my_int_MPI_Alltoallv(partBuf, toGoSph, send_sph_offset, P, toGetSph, recv_sph_offset,
sizeof(pdata), flag_big_all, Communicator);
254 my_int_MPI_Alltoallv(partBuf, toGoDM, send_dm_offset, P, toGetDM, recv_dm_offset,
sizeof(pdata), flag_big_all, Communicator);
260 Tp->NumPart = count_get_dm + count_get_sph;
261 Tp->NumGas = count_get_sph;
267 Mem.myfree(recv_dm_offset);
268 Mem.myfree(recv_sph_offset);
269 Mem.myfree(send_dm_offset);
270 Mem.myfree(send_sph_offset);
272 Mem.myfree(toGetSph);
279 domain_printf(
"DOMAIN: particle exchange done. (took %g sec)\n",
Logs.
timediff(t0, t1));
282template <
typename partset>
285 mpi_printf(
"PEANO: Begin Peano-Hilbert order...\n");
290 peano_hilbert_data *pmp = (peano_hilbert_data *)
Mem.mymalloc(
"pmp",
sizeof(peano_hilbert_data) * Tp->NumGas);
291 int *Id = (
int *)
Mem.mymalloc(
"Id",
sizeof(
int) * Tp->NumGas);
293 for(
int i = 0; i < Tp->NumGas; i++)
299 mycxxsort(pmp, pmp + Tp->NumGas, compare_peano_hilbert_data);
301 for(
int i = 0; i < Tp->NumGas; i++)
302 Id[pmp[i].index] = i;
310 if(Tp->NumPart - Tp->NumGas > 0)
312 peano_hilbert_data *pmp = (peano_hilbert_data *)
Mem.mymalloc(
"pmp",
sizeof(peano_hilbert_data) * (Tp->NumPart - Tp->NumGas));
313 int *Id = (
int *)
Mem.mymalloc(
"Id",
sizeof(
int) * (Tp->NumPart - Tp->NumGas));
315 for(
int i = Tp->NumGas; i < Tp->NumPart; i++)
317 pmp[i - Tp->NumGas].index = i;
318 pmp[i - Tp->NumGas].key = key[i];
321 mycxxsort(pmp, pmp + Tp->NumPart - Tp->NumGas, compare_peano_hilbert_data);
323 for(
int i = Tp->NumGas; i < Tp->NumPart; i++)
324 Id[pmp[i - Tp->NumGas].index - Tp->NumGas] = i;
326 reorder_particles(Id - Tp->NumGas, Tp->NumGas, Tp->NumPart);
335template <
typename partset>
338 for(
int i = 0; i < Tp->NumGas; i++)
342 pdata Psource = Tp->P[i];
345 int idsource = Id[i];
350 pdata Psave = Tp->P[dest];
352 int idsave = Id[dest];
354 Tp->P[dest] = Psource;
355 Tp->SphP[dest] = SphPsource;
362 SphPsource = SphPsave;
372template <
typename partset>
375 for(
int i = Nstart; i < N; i++)
379 pdata Psource = Tp->P[i];
380 int idsource = Id[i];
386 pdata Psave = Tp->P[dest];
387 int idsave = Id[dest];
389 Tp->P[dest] = Psource;
405template <
typename partset>
408 for(
int i = Nstart; i < N; i++)
414 int idsource = Id[i];
420 int idsave = Id[dest];
422 Tp->PS[dest] = PSsource;
438template <
typename partset>
441 for(
int i = 0; i < Tp->NumPart; i++)
445 pdata Psource = Tp->P[i];
448 int idsource = Id[i];
453 pdata Psave = Tp->P[dest];
455 int idsave = Id[dest];
457 Tp->P[dest] = Psource;
458 Tp->PS[dest] = PSsource;
475template <
typename partset>
478 local_sort_data *mp = (local_sort_data *)
Mem.mymalloc(
"mp",
sizeof(local_sort_data) * (loc_numpart - loc_numgas));
481 int *Id = (
int *)
Mem.mymalloc(
"Id",
sizeof(
int) * (loc_numpart - loc_numgas));
484 for(
int i = loc_numgas; i < loc_numpart; i++)
487 mp[i].targetindex = Tp->PS[i].TargetIndex;
490 mycxxsort(mp + loc_numgas, mp + loc_numpart, compare_local_sort_data_targetindex);
492 for(
int i = loc_numgas; i < loc_numpart; i++)
495 reorder_particles(Id, loc_numgas, loc_numpart);
497 for(
int i = loc_numgas; i < loc_numpart; i++)
500 reorder_PS(Id, loc_numgas, loc_numpart);
511template <
typename partset>
514 int CommThisTask, CommNTask, CommPTask;
515 MPI_Comm_size(Communicator, &CommNTask);
516 MPI_Comm_rank(Communicator, &CommThisTask);
518 for(CommPTask = 0; CommNTask > (1 << CommPTask); CommPTask++)
521 int *Send_count = (
int *)
Mem.mymalloc_movable(&Send_count,
"Send_count",
sizeof(
int) * CommNTask);
522 int *Send_offset = (
int *)
Mem.mymalloc_movable(&Send_offset,
"Send_offset",
sizeof(
int) * CommNTask);
523 int *Recv_count = (
int *)
Mem.mymalloc_movable(&Recv_count,
"Recv_count",
sizeof(
int) * CommNTask);
524 int *Recv_offset = (
int *)
Mem.mymalloc_movable(&Recv_offset,
"Recv_offset",
sizeof(
int) * CommNTask);
525 int nimport = 0, nexport = 0, nstay = 0, nlocal = 0;
528 for(
int type_select = 0; type_select < 2; type_select++)
534 unsigned char *Ptype = (
unsigned char *)
Mem.mymalloc_movable(&Ptype,
"Ptype",
sizeof(
unsigned char) * Tp->NumPart);
535 int *Ptask = (
int *)
Mem.mymalloc_movable(&Ptask,
"Ptask",
sizeof(
int) * Tp->NumPart);
537 for(
int i = 0; i < Tp->NumPart; i++)
539 Ptype[i] = Tp->P[i].getType();
540 Ptask[i] = Tp->PS[i].TargetTask;
542 if(Ptype[i] == 0 && i >= Tp->NumGas)
545 if(Ptype[i] != 0 && i < Tp->NumGas)
549 int NumPart_saved = Tp->NumPart;
556 for(
int rep = 0; rep < 2; rep++)
558 for(
int n = 0; n < CommNTask; n++)
563 for(
int n = 0; n < Tp->NumGas; n++)
565 int target = Ptask[n];
569 if(target != CommThisTask)
570 Send_count[target]++;
576 if(target != CommThisTask)
577 sphBuf[Send_offset[target] + Send_count[target]++] = Tp->SphP[n];
579 Tp->SphP[nstay++] = Tp->SphP[n];
585 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
587 nimport = 0, nexport = 0;
588 Recv_offset[0] = Send_offset[0] = 0;
589 for(
int j = 0; j < CommNTask; j++)
591 nexport += Send_count[j];
592 nimport += Recv_count[j];
596 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
597 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
605 Tp->NumGas += (nimport - nexport);
607 int max_loadsph = Tp->NumGas;
608 MPI_Allreduce(MPI_IN_PLACE, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
612 Tp->reallocate_memory_maxpartsph(max_loadsph / (1.0 - 2 *
ALLOC_TOLERANCE));
614 for(
int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
616 int target = CommThisTask ^ ngrp;
618 if(target < CommNTask)
620 if(Send_count[target] > 0 || Recv_count[target] > 0)
623 target,
TAG_SPHDATA, Tp->SphP + Recv_offset[target] + nstay,
625 Communicator, MPI_STATUS_IGNORE);
635 pdata *partBuf = NULL;
637 for(
int rep = 0; rep < 2; rep++)
639 for(
int n = 0; n < CommNTask; n++)
645 for(
int n = 0; n < NumPart_saved; n++)
647 if(Ptype[n] == type_select || (type_select != 0))
649 int target = Ptask[n];
653 if(target != CommThisTask)
654 Send_count[target]++;
663 if(target != CommThisTask)
664 partBuf[Send_offset[target] + Send_count[target]++] = Tp->P[n];
667 Tp->P[nstay++] = Tp->P[n];
678 Tp->P[nstay++] = Tp->P[n];
684 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
686 nimport = 0, nexport = 0;
687 Recv_offset[0] = Send_offset[0] = 0;
688 for(
int j = 0; j < CommNTask; j++)
690 nexport += Send_count[j];
691 nimport += Recv_count[j];
695 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
696 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
700 partBuf = (
pdata *)
Mem.mymalloc_movable(&partBuf,
"partBuf", nexport *
sizeof(
pdata));
704 Tp->NumPart += (nimport - nexport);
706 int max_load = Tp->NumPart;
707 MPI_Allreduce(MPI_IN_PLACE, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
715 memmove(
static_cast<void *
>(Tp->P + nlocal + nimport),
static_cast<void *
>(Tp->P + nlocal),
716 (nstay - nlocal) *
sizeof(
pdata));
719 for(
int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
721 int target = CommThisTask ^ ngrp;
723 if(target < CommNTask)
725 if(Send_count[target] > 0 || Recv_count[target] > 0)
727 myMPI_Sendrecv(partBuf + Send_offset[target], Send_count[target] *
sizeof(
pdata), MPI_BYTE, target,
728 TAG_PDATA, Tp->P + Recv_offset[target] + nlocal, Recv_count[target] *
sizeof(
pdata), MPI_BYTE,
729 target,
TAG_PDATA, Communicator, MPI_STATUS_IGNORE);
741 for(
int rep = 0; rep < 2; rep++)
743 for(
int n = 0; n < CommNTask; n++)
749 for(
int n = 0; n < NumPart_saved; n++)
751 if(Ptype[n] == type_select || (type_select != 0))
753 int target = Ptask[n];
757 if(target != CommThisTask)
758 Send_count[target]++;
767 if(target != CommThisTask)
768 subBuf[Send_offset[target] + Send_count[target]++] = Tp->PS[n];
771 Tp->PS[nstay++] = Tp->PS[n];
782 Tp->PS[nstay++] = Tp->PS[n];
788 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
790 nimport = 0, nexport = 0;
791 Recv_offset[0] = Send_offset[0] = 0;
792 for(
int j = 0; j < CommNTask; j++)
794 nexport += Send_count[j];
795 nimport += Recv_count[j];
799 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
800 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
814 memmove(Tp->PS + nlocal + nimport, Tp->PS + nlocal, (nstay - nlocal) *
sizeof(
subfind_data));
817 for(
int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
819 int target = CommThisTask ^ ngrp;
821 if(target < CommNTask)
823 if(Send_count[target] > 0 || Recv_count[target] > 0)
827 MPI_BYTE, target,
TAG_KEY, Communicator, MPI_STATUS_IGNORE);
840 Mem.myfree(Recv_offset);
841 Mem.myfree(Recv_count);
842 Mem.myfree(Send_offset);
843 Mem.myfree(Send_count);
849 local_sort_data *mp = (local_sort_data *)
Mem.mymalloc(
"mp",
sizeof(local_sort_data) * Tp->NumGas);
850 int *Id = (
int *)
Mem.mymalloc(
"Id",
sizeof(
int) * Tp->NumGas);
852 for(
int i = 0; i < Tp->NumGas; i++)
855 mp[i].targetindex = Tp->PS[i].TargetIndex;
858 mycxxsort(mp, mp + Tp->NumGas, compare_local_sort_data_targetindex);
860 for(
int i = 0; i < Tp->NumGas; i++)
865 for(
int i = 0; i < Tp->NumGas; i++)
868 reorder_PS(Id, 0, Tp->NumGas);
874 if(Tp->NumPart - Tp->NumGas > 0)
876 reorder_P_PS(Tp->NumGas, Tp->NumPart);
880#include "../data/simparticles.h"
883#ifdef LIGHTCONE_PARTICLES
884#include "../data/lcparticles.h"
void reorder_gas(int *Id)
void reorder_PS(int *Id, int Nstart, int N)
void reorder_particles(int *Id, int Nstart, int N)
void particle_exchange_based_on_PS(MPI_Comm Communicator)
void reorder_P_and_PS(int *Id)
void reorder_P_PS(int NumGas, int NumPart)
void domain_resize_storage(int count_get, int count_get_sph, int option_flag)
double timediff(double t0, double t1)
#define MPI_MESSAGE_SIZELIMIT_IN_BYTES
double mycxxsort(T *begin, T *end, Tcomp comp)
void my_int_MPI_Alltoallv(void *sendb, int *sendcounts, int *sdispls, void *recvb, int *recvcounts, int *rdispls, int len, int big_flag, MPI_Comm comm)
void myMPI_Alltoallv_new(void *sendb, int *sendcounts, int *sdispls, MPI_Datatype sendtype, void *recvb, int *recvcounts, int *rdispls, MPI_Datatype recvtype, MPI_Comm comm, int method)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)