12#include "gadgetconfig.h"
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/mymalloc.h"
28#include "../domain/domain.h"
29#include "../fof/fof.h"
30#include "../fof/fof_io.h"
31#include "../gravtree/gravtree.h"
32#include "../logs/timer.h"
33#include "../main/simulation.h"
34#include "../mergertree/mergertree.h"
35#include "../mpi_utils/mpi_utils.h"
36#include "../sort/cxxsort.h"
37#include "../sort/parallel_sort.h"
38#include "../subfind/subfind.h"
39#include "../system/system.h"
40#include "../time_integration/timestep.h"
44template <
typename partset>
45void fof<partset>::subfind_find_subhalos(
int num,
const char *basename,
const char *grpcat_dirbasename)
51 mpi_printf(
"\nSUBFIND: We now execute a parallel version of SUBFIND.\n");
54 double lensum = 0, partcount = 0;
55 for(
int i = 0; i < Tp->NumPart; i++)
57 if(Tp->P[i].PrevSizeOfSubhalo.get() > 0)
59 lensum += Tp->P[i].PrevSizeOfSubhalo.get();
60 Tp->PS[i].SizeOfSubhalo.set(0);
62 MPI_Allreduce(MPI_IN_PLACE, &partcount, 1, MPI_DOUBLE, MPI_SUM, Communicator);
63 MPI_Allreduce(MPI_IN_PLACE, &lensum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
64 mpi_printf(
"SUBFIND: Previous subhalo catalogue had approximately a size %g, and the summed squared subhalo size was %g\n",
74 FoFGravTree.treeallocate(Tp->NumPart, Tp, FoFDomain);
77 FoFGravTree.treebuild(Tp->NumPart, NULL);
80 subfind_density_hsml_guess();
85 double cputime = subfind_density();
87 mpi_printf(
"SUBFIND: iteration to correct primary neighbor count and density estimate took %g sec\n", cputime);
89 FoFGravTree.treefree();
96 for(
int i = 0; i < Tp->NumPart; i++)
97 if(Tp->P[i].getType() == 0)
98 Tp->PS[i].Utherm = Tp->get_utherm_from_entropy(i);
100 Tp->PS[i].Utherm = 0;
103 double GroupSizeThresholdFactor = 0.55;
104 int ncount = 0, nprocs = 0;
105 long long seriallen = 0;
106 long long sum_seriallen;
111 GroupSizeThresholdFactor += 0.05;
112 ncount = nprocs = seriallen = 0;
114 MaxSerialGroupLen = (int)(GroupSizeThresholdFactor * Tp->TotNumPart / NTask);
117 for(
int i = 0; i < Ngroups; i++)
118 if(Group[i].Len > MaxSerialGroupLen)
121 nprocs += ((Group[i].Len - 1) / MaxSerialGroupLen) + 1;
124 seriallen += Group[i].Len;
126 MPI_Allreduce(&ncount, &Ncollective, 1, MPI_INT, MPI_SUM, Communicator);
127 MPI_Allreduce(&nprocs, &NprocsCollective, 1, MPI_INT, MPI_SUM, Communicator);
128 sumup_longs(1, &seriallen, &sum_seriallen, Communicator);
130 while(NprocsCollective + ((sum_seriallen > 0) ? 1 : 0) > NTask);
132 mpi_printf(
"SUBFIND: Number of FOF halos treated with collective SubFind algorithm = %d\n", Ncollective);
133 mpi_printf(
"SUBFIND: Number of processors used in different partitions for the collective SubFind code = %d\n", NprocsCollective);
134 mpi_printf(
"SUBFIND: (The adopted size-limit for the collective algorithm was %d particles, for threshold size factor %g)\n",
135 MaxSerialGroupLen, GroupSizeThresholdFactor);
136 mpi_printf(
"SUBFIND: The other %lld FOF halos are treated in parallel with serial code\n", TotNgroups - Ncollective);
139 ProcAssign = (proc_assign_data *)
Mem.mymalloc_movable(&ProcAssign,
"ProcAssign", Ncollective *
sizeof(proc_assign_data));
140 proc_assign_data *locProcAssign = (proc_assign_data *)
Mem.mymalloc(
"locProcAssign", ncount *
sizeof(proc_assign_data));
143 for(
int i = 0; i < Ngroups; i++)
144 if(Group[i].Len > MaxSerialGroupLen)
146 locProcAssign[ncount].GroupNr = Group[i].GroupNr;
147 locProcAssign[ncount].Len = Group[i].Len;
152 int *recvcounts = (
int *)
Mem.mymalloc(
"recvcounts",
sizeof(
int) * NTask);
153 int *bytecounts = (
int *)
Mem.mymalloc(
"bytecounts",
sizeof(
int) * NTask);
154 int *byteoffset = (
int *)
Mem.mymalloc(
"byteoffset",
sizeof(
int) * NTask);
156 MPI_Allgather(&ncount, 1, MPI_INT, recvcounts, 1, MPI_INT, Communicator);
158 for(
int task = 0; task < NTask; task++)
159 bytecounts[task] = recvcounts[task] *
sizeof(proc_assign_data);
162 for(
int task = 1; task < NTask; task++)
163 byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
165 myMPI_Allgatherv(locProcAssign, bytecounts[ThisTask], MPI_BYTE, ProcAssign, bytecounts, byteoffset, MPI_BYTE, Communicator);
167 Mem.myfree(byteoffset);
168 Mem.myfree(bytecounts);
169 Mem.myfree(recvcounts);
170 Mem.myfree(locProcAssign);
173 mycxxsort(ProcAssign, ProcAssign + Ncollective, subfind_compare_procassign_GroupNr);
178 CommSplitColor = ThisTask;
181 for(
int i = 0; i < Ncollective; i++)
183 ProcAssign[i].FirstTask = nprocs;
184 ProcAssign[i].NTask = ((ProcAssign[i].Len - 1) / MaxSerialGroupLen) + 1;
185 nprocs += ProcAssign[i].NTask;
187 if(ThisTask >= ProcAssign[i].FirstTask && ThisTask < (ProcAssign[i].FirstTask + ProcAssign[i].NTask))
194 for(
int i = 0; i < Ngroups; i++)
196 if(Group[i].Len > MaxSerialGroupLen)
198 if(Group[i].GroupNr >= Ncollective || Group[i].GroupNr < 0)
200 Group[i].TargetTask = ProcAssign[Group[i].GroupNr].FirstTask;
203 Group[i].TargetTask = ((Group[i].GroupNr - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective;
207 subfind_distribute_groups();
210 mycxxsort(Group, Group + Ngroups, fof_compare_Group_GroupNr);
215 int *count_loc_task = (
int *)
Mem.mymalloc_clear(
"count_loc_task", NTask *
sizeof(
int));
216 int *count_task = (
int *)
Mem.mymalloc(
"count_task", NTask *
sizeof(
int));
217 int *count_free = (
int *)
Mem.mymalloc(
"count_free", NTask *
sizeof(
int));
218 int count_loc_free = 0;
220 for(
int i = 0; i < Tp->NumPart; i++)
224 if(Tp->PS[i].GroupNr.get() < (
MyIDType)Ncollective)
225 Tp->PS[i].TargetTask = ProcAssign[Tp->PS[i].GroupNr.get()].FirstTask + (i % ProcAssign[Tp->PS[i].GroupNr.get()].NTask);
227 Tp->PS[i].TargetTask = ((Tp->PS[i].GroupNr.get() - Ncollective) % (NTask - NprocsCollective)) + NprocsCollective;
229 count_loc_task[Tp->PS[i].TargetTask]++;
233 Tp->PS[i].TargetTask = ThisTask;
237 Tp->PS[i].TargetIndex = 0;
241 MPI_Allgather(&count_loc_free, 1, MPI_INT, count_free, 1, MPI_INT, Communicator);
244 MPI_Allreduce(count_loc_task, count_task, NTask, MPI_INT, MPI_SUM, Communicator);
248 for(
int i = 0; i < NTask; i++)
249 sum += count_task[i] + count_free[i];
252 int maxload = (sum + NTask - 1) / NTask;
253 for(
int i = 0; i < NTask; i++)
255 count_task[i] = maxload - count_task[i];
256 if(count_task[i] < 0)
261 for(
int i = 0; i < NTask; i++)
263 if(count_free[i] <= count_task[i])
265 count_task[i] -= count_free[i];
270 count_free[i] -= count_task[i];
277 int current_task = 0;
278 for(
int i = 0; i < ThisTask; i++)
280 while(count_free[i] > 0 && current_task < NTask)
282 if(count_free[i] < count_task[current_task])
284 count_task[current_task] -= count_free[i];
289 count_free[i] -= count_task[current_task];
290 count_task[current_task] = 0;
297 for(
int i = 0; i < Tp->NumPart && count_free[ThisTask] > 0; i++)
302 while(count_task[current_task] == 0 && current_task < NTask - 1)
305 Tp->PS[i].TargetTask = current_task;
306 count_task[current_task]--;
307 count_free[ThisTask]--;
311 Mem.myfree(count_free);
312 Mem.myfree(count_task);
313 Mem.myfree(count_loc_task);
316 double balance = subfind_get_particle_balance();
317 mpi_printf(
"SUBFIND: particle balance=%g\n", balance);
320 FoFDomain->particle_exchange_based_on_PS(Communicator);
322 mpi_printf(
"SUBFIND: subfind_exchange() took %g sec\n",
Logs.
timediff(t0, t1));
325 balance = subfind_get_particle_balance();
326 mpi_printf(
"SUBFIND: particle balance for processing=%g\n", balance);
329 if(ThisTask < NprocsCollective)
331 MaxNsubhalos = (ProcAssign[CommSplitColor].Len / ProcAssign[CommSplitColor].NTask) /
All.DesLinkNgb;
335 long long nlocid = 0;
336 for(
int i = 0; i < Ngroups; i++)
337 nlocid += Group[i].Len;
339 MaxNsubhalos = nlocid /
All.DesLinkNgb;
342 Mem.myfree(ProcAssign);
346 count_different_decisions = 0;
350 Subhalo = (subhalo_properties *)
Mem.mymalloc_movable(&Subhalo,
"Subhalo", MaxNsubhalos *
sizeof(subhalo_properties));
353 MPI_Comm_split(Communicator, CommSplitColor, ThisTask, &SubComm);
354 MPI_Comm_size(SubComm, &SubNTask);
355 MPI_Comm_rank(SubComm, &SubThisTask);
361 if(SubDomain.NumNodes != 0)
362 Terminate(
"SubDomain.NumNodes=%d\n", SubDomain.NumNodes);
364 if(CommSplitColor < Ncollective)
372 sort_as_data *as = (sort_as_data *)
Mem.mymalloc_movable(&as,
"as", Tp->NumPart *
sizeof(sort_as_data));
374 for(
int i = 0; i < Tp->NumPart; i++)
376 as[i].density = Tp->PS[i].u.s.u.DM_Density;
377 as[i].origin = (((
long long)SubThisTask) << 32) + i;
383 for(
int i = 0; i < Tp->NumPart; i++)
384 as[i].targettask = SubThisTask;
389 for(
int i = 0; i < Tp->NumPart; i++)
390 Tp->PS[i].TargetTask = as[i].targettask;
394 SubDomain.particle_exchange_based_on_PS(SubComm);
396 if(SubDomain.NumNodes != 0)
397 Terminate(
"SubDomain.NumNodes=%d\n", SubDomain.NumNodes);
405 MPI_Barrier(Communicator);
407 mpi_printf(
"SUBFIND: Processing overall took (total time=%g sec)\n",
Logs.
timediff(ti0, ti1));
410 MPI_Comm_free(&SubComm);
414 MPI_Allreduce(MPI_IN_PLACE, &count_decisions, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
415 MPI_Allreduce(MPI_IN_PLACE, &count_different_decisions, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
416 mpi_printf(
"SUBFIND: %lld out of %lld decisions, fraction %g, where influenced by previous subhalo size\n",
417 count_different_decisions, count_decisions, ((
double)count_different_decisions) / count_decisions);
424 int max_load, max_loadsph;
425 MPI_Allreduce(&Tp->MaxPart, &max_load, 1, MPI_INT, MPI_MAX, Communicator);
426 MPI_Allreduce(&Tp->MaxPartSph, &max_loadsph, 1, MPI_INT, MPI_MAX, Communicator);
429 Tp->reallocate_memory_maxpart(max_load);
432 Tp->reallocate_memory_maxpartsph(max_loadsph);
436 for(
int i = 0; i < Tp->NumPart; i++)
438 Tp->PS[i].TargetTask = Tp->PS[i].OriginTask;
439 Tp->PS[i].TargetIndex = Tp->PS[i].OriginIndex;
442 FoFDomain->particle_exchange_based_on_PS(Communicator);
445 printf(
"SUBFIND: subfind_exchange() (for return to original CPU) took %g sec\n",
Logs.
timediff(t0, t1));
451 FoFGravTree.treeallocate(Tp->NumPart, Tp, FoFDomain);
454 FoFGravTree.treebuild(Tp->NumPart, NULL);
458 double cputimeso = subfind_overdensity();
459 mpi_printf(
"SUBFIND: determining spherical overdensity masses took %g sec\n", cputimeso);
461 FoFGravTree.treefree();
466 mycxxsort_parallel(Subhalo, Subhalo + Nsubhalos, subfind_compare_Subhalo_GroupNr_SubRankInGr, Communicator);
468 mpi_printf(
"SUBFIND: assembled and ordered groups and subhalos (took %g sec)\n",
Logs.
timediff(t0, t1));
473 long long lenmax = 0, glob_lenmax;
474 long long totlen = 0;
475 long long totsublength;
476 for(
int i = 0; i < Nsubhalos; i++)
478 totlen += Subhalo[i].Len;
480 if(Subhalo[i].Len > lenmax)
481 lenmax = Subhalo[i].Len;
483 sumup_longs(1, &totlen, &totsublength, Communicator);
484 MPI_Reduce(&lenmax, &glob_lenmax, 1, MPI_LONG_LONG, MPI_MAX, 0, Communicator);
487 for(
int i = 0; i < Tp->NumPart; i++)
488 if(Tp->PS[i].SubRankInGr == INT_MAX)
489 Tp->PS[i].v.DM_BindingEnergy = 0;
494 subfind_save_final(num, basename, grpcat_dirbasename);
502 printf(
"SUBFIND: Finished with SUBFIND. (total time=%g sec)\n",
Logs.
timediff(tstart, tend));
503 printf(
"SUBFIND: Total number of subhalos with at least %d particles: %lld\n",
All.DesLinkNgb, TotNsubhalos);
506 printf(
"SUBFIND: Largest subhalo has %lld particles/cells.\n", glob_lenmax);
507 printf(
"SUBFIND: Total number of particles/cells in subhalos: %lld\n", totsublength);
511 Mem.myfree_movable(Subhalo);
516template <
typename partset>
517void fof<partset>::subfind_save_final(
int num,
const char *basename,
const char *grpcat_dirbasename)
521 long long totsubs = 0;
524 for(
int i = 0; i < Ngroups; i++)
527 Group[i].FirstSub = Group[i - 1].FirstSub + Group[i - 1].Nsubs;
529 Group[i].FirstSub = 0;
530 totsubs += Group[i].Nsubs;
533 long long *Send_count = (
long long *)
Mem.mymalloc(
"Send_count",
sizeof(
long long) * this->NTask);
534 long long *Send_offset = (
long long *)
Mem.mymalloc(
"Send_offset",
sizeof(
long long) * this->NTask);
536 MPI_Allgather(&totsubs, 1, MPI_LONG_LONG, Send_count, 1, MPI_LONG_LONG, Communicator);
540 for(
int i = 1; i < NTask; i++)
541 Send_offset[i] = Send_offset[i - 1] + Send_count[i - 1];
543 for(
int i = 0; i < Ngroups; i++)
545 if(Group[i].Nsubs > 0)
546 Group[i].FirstSub += Send_offset[ThisTask];
548 Group[i].FirstSub = -1;
551 Mem.myfree(Send_offset);
552 Mem.myfree(Send_count);
554 subfind_assign_subhalo_offsettype();
557 FoF_IO.fof_subfind_save_groups(num, basename, grpcat_dirbasename);
561 mpi_printf(
"SUBFIND: Subgroup catalogues saved. took = %g sec\n",
Logs.
timediff(t0, t1));
564template <
typename partset>
565void fof<partset>::subfind_assign_subhalo_offsettype(
void)
567 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * this->NTask);
568 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * this->NTask);
569 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * this->NTask);
570 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * this->NTask);
574 Subhalo[0].SubRankInGr = 0;
575 for(
int j = 0; j <
NTYPES; j++)
576 Subhalo[0].OffsetType[j] = 0;
579 for(
int i = 1; i < Nsubhalos; i++)
580 if(Subhalo[i].GroupNr != Subhalo[i - 1].GroupNr)
582 Subhalo[i].SubRankInGr = 0;
583 for(
int j = 0; j <
NTYPES; j++)
584 Subhalo[i].OffsetType[j] = 0;
588 Subhalo[i].SubRankInGr = Subhalo[i - 1].SubRankInGr + 1;
589 for(
int j = 0; j <
NTYPES; j++)
590 Subhalo[i].OffsetType[j] = Subhalo[i - 1].OffsetType[j] + Subhalo[i - 1].LenType[j];
597 long long stype_cum[
NTYPES];
599 subnr_info subnr_data;
603 subnr_data.grnr = Subhalo[Nsubhalos - 1].GroupNr;
604 subnr_data.cnt = Subhalo[Nsubhalos - 1].SubRankInGr + 1;
605 for(
int j = 0; j <
NTYPES; j++)
606 subnr_data.stype_cum[j] = Subhalo[Nsubhalos - 1].OffsetType[j] + Subhalo[Nsubhalos - 1].LenType[j];
609 subnr_data.grnr = INT_MAX;
611 subnr_info *info_all = (subnr_info *)
Mem.mymalloc(
"info_all", NTask *
sizeof(subnr_info));
612 MPI_Allgather(&subnr_data,
sizeof(subnr_info), MPI_BYTE, info_all,
sizeof(subnr_info), MPI_BYTE, Communicator);
617 long long stype_cum[
NTYPES];
618 for(
int j = 0; j <
NTYPES; j++)
621 for(
int i = ThisTask - 1; i >= 0; i--)
622 if(info_all[i].grnr == Subhalo[0].GroupNr)
624 cnt += info_all[i].cnt;
625 for(
int j = 0; j <
NTYPES; j++)
626 stype_cum[j] += info_all[i].stype_cum[j];
629 for(
int i = 0; i < Nsubhalos; i++)
630 if(Subhalo[i].GroupNr == Subhalo[0].GroupNr)
632 Subhalo[i].SubRankInGr += cnt;
633 for(
int j = 0; j <
NTYPES; j++)
634 Subhalo[i].OffsetType[j] += stype_cum[j];
640 Mem.myfree(info_all);
647 int *gcount = (
int *)
Mem.mymalloc(
"gcount", NTask *
sizeof(
int));
648 MPI_Allgather(&Ngroups, 1, MPI_INT, gcount, 1, MPI_INT, Communicator);
650 int nexport = 0, nimport = 0;
656 long long OffsetType[
NTYPES];
658 group_info *export_group_data = NULL, *import_group_data = NULL;
660 for(
int mode = 0; mode < 2; mode++)
662 for(
int i = 0; i < NTask; i++)
666 long long first_gr_in_target = 0;
668 for(
int i = 0; i < Nsubhalos; i++)
670 while(Subhalo[i].GroupNr >= first_gr_in_target + gcount[target])
673 Terminate(
"target=%d i=%d Nsubhalos=%d Subhalo[i],GroupNr=%lld\n", target, i, Nsubhalos, Subhalo[i].GroupNr);
675 first_gr_in_target += gcount[target];
680 Send_count[target]++;
683 int off = Send_offset[target] + Send_count[target]++;
684 export_group_data[off].grindex = Subhalo[i].GroupNr - first_gr_in_target;
685 export_group_data[off].subindex = i;
691 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
692 Recv_offset[0] = Send_offset[0] = 0;
693 for(
int j = 0; j < NTask; j++)
695 nimport += Recv_count[j];
696 nexport += Send_count[j];
699 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
700 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
704 export_group_data = (group_info *)
Mem.mymalloc(
"export_group_data", nexport *
sizeof(group_info));
705 import_group_data = (group_info *)
Mem.mymalloc(
"import_group_data", nimport *
sizeof(group_info));
709 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
711 int recvTask = ThisTask ^ ngrp;
713 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
714 myMPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(group_info), MPI_BYTE, recvTask,
715 TAG_DENS_B, &import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(group_info), MPI_BYTE,
716 recvTask,
TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
720 for(
int i = 0; i < nimport; i++)
721 for(
int j = 0; j <
NTYPES; j++)
722 import_group_data[i].OffsetType[j] = Group[import_group_data[i].grindex].OffsetType[j];
725 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
727 int recvTask = ThisTask ^ ngrp;
729 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
730 myMPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(group_info), MPI_BYTE, recvTask,
731 TAG_DENS_B, &export_group_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(group_info), MPI_BYTE,
732 recvTask,
TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
736 for(
int i = 0; i < nexport; i++)
737 for(
int j = 0; j <
NTYPES; j++)
738 Subhalo[export_group_data[i].subindex].OffsetType[j] += export_group_data[i].OffsetType[j];
740 Mem.myfree(import_group_data);
741 Mem.myfree(export_group_data);
744 Mem.myfree(Recv_offset);
745 Mem.myfree(Recv_count);
746 Mem.myfree(Send_offset);
747 Mem.myfree(Send_count);
750template <
typename partset>
751void fof<partset>::subfind_redetermine_groupnr(
void)
754 int *ngroups_all = (
int *)
Mem.mymalloc(
"ngroups_all", NTask *
sizeof(
int));
755 MPI_Allgather(&Ngroups, 1, MPI_INT, ngroups_all, 1, MPI_INT, Communicator);
758 for(
int i = 0; i < Ngroups; i++)
759 nsubs_local += Group[i].Nsubs;
762 int *nsubs_all = (
int *)
Mem.mymalloc(
"nsubs_all", NTask *
sizeof(
int));
763 MPI_Allgather(&nsubs_local, 1, MPI_INT, nsubs_all, 1, MPI_INT, Communicator);
766 int *nsubhalos_all = (
int *)
Mem.mymalloc(
"nsubhalos_all", NTask *
sizeof(
int));
767 MPI_Allgather(&Nsubhalos, 1, MPI_INT, nsubhalos_all, 1, MPI_INT, Communicator);
769 long long subhalo_previous = 0;
770 for(
int i = 0; i < ThisTask; i++)
771 subhalo_previous += nsubhalos_all[i];
773 int nexport = 0, nimport = 0;
781 group_info *export_group_data = NULL, *import_group_data = NULL;
783 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * this->NTask);
784 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * this->NTask);
785 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * this->NTask);
786 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * this->NTask);
788 for(
int mode = 0; mode < 2; mode++)
790 for(
int i = 0; i < NTask; i++)
794 long long nsubs_previous = 0;
796 for(
int i = 0; i < Nsubhalos; i++)
798 long long subhalonr = subhalo_previous + i;
800 while(subhalonr >= nsubs_previous + nsubs_all[target])
805 nsubs_previous += nsubs_all[target];
810 Send_count[target]++;
813 int off = Send_offset[target] + Send_count[target]++;
814 export_group_data[off].subhalonr = subhalonr;
815 export_group_data[off].subindex = i;
821 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
822 Recv_offset[0] = Send_offset[0] = 0;
823 for(
int j = 0; j < NTask; j++)
825 nimport += Recv_count[j];
826 nexport += Send_count[j];
829 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
830 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
834 export_group_data = (group_info *)
Mem.mymalloc(
"export_group_data", nexport *
sizeof(group_info));
835 import_group_data = (group_info *)
Mem.mymalloc(
"import_group_data", nimport *
sizeof(group_info));
839 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
841 int recvTask = ThisTask ^ ngrp;
843 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
844 myMPI_Sendrecv(&export_group_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(group_info), MPI_BYTE, recvTask,
845 TAG_DENS_B, &import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(group_info), MPI_BYTE,
846 recvTask,
TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
852 for(
int i = 0; i < ThisTask; i++)
853 nsubs += nsubs_all[i];
855 long long group_previous = 0;
856 for(
int i = 0; i < ThisTask; i++)
857 group_previous += ngroups_all[i];
860 for(
int i = 0; i < nimport; i++)
862 while(import_group_data[i].subhalonr >= nsubs + Group[gr].Nsubs)
864 nsubs += Group[gr].Nsubs;
868 Terminate(
"i=%d|%d gr=%d >= Ngroups=%d\n", i, nimport, gr, Ngroups);
871 import_group_data[i].groupnr = group_previous + gr;
875 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
877 int recvTask = ThisTask ^ ngrp;
879 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
880 myMPI_Sendrecv(&import_group_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(group_info), MPI_BYTE, recvTask,
881 TAG_DENS_B, &export_group_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(group_info), MPI_BYTE,
882 recvTask,
TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
886 for(
int i = 0; i < nexport; i++)
893 Subhalo[export_group_data[i].subindex].GroupNr = export_group_data[i].groupnr;
896 Mem.myfree(import_group_data);
897 Mem.myfree(export_group_data);
898 Mem.myfree(Recv_offset);
899 Mem.myfree(Recv_count);
900 Mem.myfree(Send_offset);
901 Mem.myfree(Send_count);
903 Mem.myfree(nsubhalos_all);
904 Mem.myfree(nsubs_all);
905 Mem.myfree(ngroups_all);
908template <
typename partset>
909double fof<partset>::subfind_get_particle_balance(
void)
913 MPI_Allreduce(&Tp->NumPart, &maxpart, 1, MPI_INT, MPI_MAX, Communicator);
915 return maxpart / (((double)sum) / NTask);
919#include "../data/simparticles.h"
920template class fof<simparticles>;
922#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
923#include "../data/lcparticles.h"
924template class fof<lcparticles>;
global_data_all_processes All
double timediff(double t0, double t1)
double mycxxsort(T *begin, T *end, Tcomp comp)
int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
void sumup_longs(int n, long long *src, long long *res, MPI_Comm comm)
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
void set_cosmo_factors_for_current_time(void)