12#include "gadgetconfig.h"
15#if defined(MERGERTREE) && defined(SUBFIND_HBT)
17#include <gsl/gsl_math.h>
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/intposconvert.h"
28#include "../data/mymalloc.h"
29#include "../domain/domain.h"
30#include "../fof/fof.h"
31#include "../gravtree/gravtree.h"
32#include "../logs/timer.h"
33#include "../main/simulation.h"
34#include "../mpi_utils/mpi_utils.h"
35#include "../sort/cxxsort.h"
36#include "../sort/parallel_sort.h"
37#include "../sort/peano.h"
38#include "../subfind/subfind.h"
39#include "../system/system.h"
41template <
typename partset>
45 long long totgrouplen;
50 hbt_pcand_t *pcand = (hbt_pcand_t *)
Mem.mymalloc_movable(&pcand,
"pcand", (NumPartGroup + 1) *
sizeof(hbt_pcand_t));
52 for(
int k = 0; k < NumPartGroup; k++)
55 Tp->PS[IndexList[k]].SubhaloNr = Tp->P[IndexList[k]].PrevSubhaloNr;
57 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
58 pcand[k].PrevSizeOfSubhalo = Tp->P[IndexList[k]].PrevSizeOfSubhalo;
63 mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
65 int *NumPartGroup_list = (
int *)
Mem.mymalloc_movable(&NumPartGroup_list,
"NumPartGroup_list", SubNTask *
sizeof(
int));
66 MPI_Allgather(&NumPartGroup,
sizeof(
int), MPI_BYTE, NumPartGroup_list,
sizeof(
int), MPI_BYTE, SubComm);
69 hbt_pcand_t *elem_last = (hbt_pcand_t *)
Mem.mymalloc_movable(&elem_last,
"elem_last", SubNTask *
sizeof(hbt_pcand_t));
72 MPI_Allgather(&pcand[NumPartGroup > 0 ? NumPartGroup - 1 : 0],
sizeof(hbt_pcand_t), MPI_BYTE, elem_last,
sizeof(hbt_pcand_t),
77 bool element_before_present =
false;
78 hbt_pcand_t element_before{};
80 for(
int task = SubThisTask - 1; task >= 0; task--)
82 if(NumPartGroup_list[task] > 0)
84 element_before_present =
true;
85 element_before = elem_last[task];
93 for(
int i = 0; i < NumPartGroup; i++)
95 if(i == 0 && !element_before_present)
101 if(i == 0 && element_before_present)
102 prevnr = element_before.SubhaloNr;
104 prevnr = pcand[i - 1].SubhaloNr;
106 if(pcand[i].SubhaloNr != prevnr)
112 hbt_subcand_t *loc_candidates =
113 (hbt_subcand_t *)
Mem.mymalloc_movable(&loc_candidates,
"loc_candidates", count_cand *
sizeof(hbt_subcand_t));
117 for(
int i = 0; i < NumPartGroup; i++)
119 if(i == 0 && !element_before_present)
121 loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
128 if(i == 0 && element_before_present)
129 prevnr = element_before.SubhaloNr;
131 prevnr = pcand[i - 1].SubhaloNr;
133 if(pcand[i].SubhaloNr != prevnr)
135 loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
145 int nsubhalos_old = Nsubhalos;
147 if(Nsubhalos + totcand + 1 > MaxNsubhalos)
151 MaxNsubhalos = 1.25 * (Nsubhalos + totcand + 1);
153 Subhalo = (subhalo_properties *)
Mem.myrealloc_movable(Subhalo, MaxNsubhalos *
sizeof(subhalo_properties));
157 hbt_subcand_t *all_candidates =
158 (hbt_subcand_t *)
Mem.mymalloc_movable(&all_candidates,
"all_candidates", (totcand + 1) *
sizeof(hbt_subcand_t));
160 int *countlist = (
int *)
Mem.mymalloc_movable(&countlist,
"countlist", SubNTask *
sizeof(
int));
161 int *offset = (
int *)
Mem.mymalloc_movable(&offset,
"offset", SubNTask *
sizeof(
int));
163 int count = count_cand *
sizeof(hbt_subcand_t);
164 MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
167 for(
int i = 1; i < SubNTask; i++)
168 offset[i] = offset[i - 1] + countlist[i - 1];
170 myMPI_Allgatherv(loc_candidates, count, MPI_BYTE, all_candidates, countlist, offset, MPI_BYTE, SubComm);
173 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
176 long long *size_list = (
long long *)
Mem.mymalloc_clear(
"size_list", totcand *
sizeof(
long long));
177 long long *summed_prevsize_list = (
long long *)
Mem.mymalloc_clear(
"summed_prevsize_list", totcand *
sizeof(
long long));
180 for(
int k = 0; k < NumPartGroup; k++)
183 Terminate(
"can't be: k=%d j=%d NumPartGroup=%d totcand=%lld\n", k, j, NumPartGroup, totcand);
185 while(j < totcand && pcand[k].SubhaloNr > all_candidates[j].SubhaloNr)
188 if(pcand[k].SubhaloNr != all_candidates[j].SubhaloNr)
189 Terminate(
"can't be: k=%d NumPartGroup=%d pcand[k].SubhaloNr=%lld j=%d all_candidates[j].SubhaloNr=%lld\n", k,
190 NumPartGroup, (
long long)pcand[k].SubhaloNr.get(), j, (
long long)all_candidates[j].SubhaloNr.get());
193 summed_prevsize_list[j] += pcand[k].PrevSizeOfSubhalo.get();
196 MPI_Allreduce(MPI_IN_PLACE, size_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
197 MPI_Allreduce(MPI_IN_PLACE, summed_prevsize_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
199 for(
int i = 0; i < totcand; i++)
201 all_candidates[i].len = size_list[i];
202 all_candidates[i].summedprevlen = summed_prevsize_list[i];
205 Mem.myfree(summed_prevsize_list);
206 Mem.myfree(size_list);
209 long long lensum = 0;
210 for(
int i = 0; i < totcand; i++)
211 lensum += all_candidates[i].len;
213 if(lensum != totgrouplen)
214 Terminate(
"lensum=%lld != totgrouplen=%lld\n", lensum, totgrouplen);
219 for(
int i = 0; i < totcand; i++)
220 if(all_candidates[i].SubhaloNr.get() ==
HALONR_MAX)
222 all_candidates[i] = all_candidates[totcand - 1];
230 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
233 for(
int k = 0; k < NumPartGroup; k++)
235 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
236 pcand[k].index = IndexList[k];
238 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
244 for(
int i = 0; i < totcand; i++)
245 if(all_candidates[i].len <
All.DesLinkNgb)
247 while(p < NumPartGroup && pcand[p].SubhaloNr < all_candidates[i].SubhaloNr)
250 while(p < NumPartGroup && pcand[p].SubhaloNr == all_candidates[i].SubhaloNr)
252 if(Tp->PS[pcand[p].index].SubhaloNr != all_candidates[i].SubhaloNr)
254 "we have an issue! p=%d NumPartGroup=%d pcand[p].index=%d pcand[p].SubhaloNr=%lld "
255 "all_candidates[i].SubhaloNr=%lld Tp->P[pcand[p].index].SubhaloNr=%lld",
256 p, NumPartGroup, pcand[p].index, (
long long)pcand[p].SubhaloNr.get(), (
long long)all_candidates[i].SubhaloNr.get(),
257 (
long long)Tp->PS[pcand[p].index].SubhaloNr.get());
260 Tp->PS[pcand[p].index].SubhaloNr.set(
HALONR_MAX);
266 MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_LONG_LONG, MPI_SUM, SubComm);
270 for(
int i = 0; i < totcand; i++)
271 if(all_candidates[i].len <
All.DesLinkNgb)
273 sum2 += all_candidates[i].len;
274 all_candidates[i] = all_candidates[totcand - 1];
280 Terminate(
"consistency check failed sum = %lld sum2 = %lld\n", sum, sum2);
283 mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
290 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_len);
294 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_summedprevlen);
297 for(
int k = 0; k < NumPartGroup; k++)
298 if(Tp->PS[IndexList[k]].SubhaloNr == all_candidates[totcand].SubhaloNr)
299 Tp->PS[IndexList[k]].SubhaloNr.set(
HALONR_MAX);
303 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
305 subfind_collective_printf(
"SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
312 int n_small_cand = 0;
316 int task_scatter = 0;
317 for(
int i = 0; i < totcand; i++)
319 if(all_candidates[i].len < 0.20 * Tp->TotNumPart / NTask)
321 all_candidates[i].DoIt =
true;
322 all_candidates[i].TargetTask = task++;
323 all_candidates[i].TargetIndex = n_small_cand;
328 if(all_candidates[i].len > max_length)
329 max_length = all_candidates[i].len;
335 all_candidates[i].DoIt =
false;
336 all_candidates[i].TargetTask = task_scatter++;
337 all_candidates[i].TargetIndex = INT_MAX;
338 if(task_scatter >= SubNTask)
343 subfind_collective_printf(
344 "SUBFIND: root-task=%d: number of subhalo candidates small enough to be done with one cpu: %d. (Largest size %d)\n", ThisTask,
345 n_small_cand, max_length);
348 for(
int k = 0; k < NumPartGroup; k++)
350 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
351 pcand[k].index = IndexList[k];
355 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
363 for(
int i = 0; i < Tp->NumPart; i++)
365 Tp->PS[i].u.s.origintask = SubThisTask;
366 Tp->PS[i].u.s.originindex = i;
368 Tp->PS[i].TargetTask = SubThisTask;
369 Tp->PS[i].TargetIndex = INT_MAX;
373 for(
int k = 0; k < totcand; k++)
374 if(all_candidates[k].DoIt)
376 while(i < NumPartGroup && pcand[i].SubhaloNr < all_candidates[k].SubhaloNr)
379 while(i < NumPartGroup && pcand[i].SubhaloNr == all_candidates[k].SubhaloNr)
381 Tp->PS[pcand[i].index].TargetTask = all_candidates[k].TargetTask;
382 Tp->PS[pcand[i].index].TargetIndex = all_candidates[k].TargetIndex;
388 subfind_distribute_particles(SubComm);
395 unbind_list = (
int *)
Mem.mymalloc(
"unbind_list", Tp->NumPart *
sizeof(
int));
399 for(
int k = 0; k < totcand; k++)
400 if(all_candidates[k].DoIt)
401 if(all_candidates[k].TargetTask == SubThisTask)
407 while(i < Tp->NumPart && Tp->PS[i].SubhaloNr < all_candidates[k].SubhaloNr)
410 while(i < Tp->NumPart && Tp->PS[i].SubhaloNr == all_candidates[k].SubhaloNr && Tp->PS[i].GroupNr.get() == GroupNr)
412 unbind_list[len] = i;
419 while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr < all_candidates[k].SubhaloNr)
422 while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr == all_candidates[k].SubhaloNr &&
423 Tp->PS[pcand[i].index].GroupNr.get() == GroupNr)
425 unbind_list[len] = pcand[i].index;
431 if(len != all_candidates[k].len)
432 Terminate(
"this is unexpected: k=%d len=%lld != all_candidates[k].len=%lld) \n", k, (
long long)len,
433 (
long long)all_candidates[k].len);
436 for(
int n = 0; n < len; n++)
437 Tp->PS[unbind_list[n]].SubhaloNr.set(
HALONR_MAX);
440 len = subfind_unbind(SingleDomain, SingleDomain->
Communicator, unbind_list, len);
442 if(len >=
All.DesLinkNgb)
445 for(
int n = 0; n < len; n++)
447 Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
448 Tp->PS[unbind_list[n]].SizeOfSubhalo.set(len);
451 if(Nsubhalos >= MaxNsubhalos)
452 Terminate(
"no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
453 nsubhalos_old, totcand);
456 marked += subfind_determine_sub_halo_properties(unbind_list, len, &Subhalo[Nsubhalos], SingleDomain->
Communicator);
458 Subhalo[Nsubhalos].GroupNr = GroupNr;
459 Subhalo[Nsubhalos].SubParentRank = 0;
460 Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
466 Mem.myfree(unbind_list);
472 for(
int i = 0; i < Tp->NumPart; i++)
474 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
475 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
478 subfind_distribute_particles(SubComm);
489 all_candidates[totcand].DoIt =
false;
490 all_candidates[totcand].SubhaloNr.set(
HALONR_MAX);
493 for(
int k = 0; k < totcand; k++)
494 if(all_candidates[k].DoIt ==
false)
502 for(
int i = 0; i < Tp->NumPart; i++)
504 Tp->PS[i].u.s.origintask = SubThisTask;
505 Tp->PS[i].u.s.originindex = i;
506 Tp->PS[i].DomainFlag = 0;
510 for(
int i = 0; i < NumPartGroup; i++)
511 if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
512 Tp->PS[IndexList[i]].DomainFlag = 1;
515 subfind_distribute_particles(SubComm);
518 for(
int i = 0; i < Tp->NumPart; i++)
519 if(Tp->PS[i].DomainFlag)
522 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", LocalLen *
sizeof(
int));
526 for(
int i = 0; i < Tp->NumPart; i++)
527 if(Tp->PS[i].DomainFlag)
528 unbind_list[LocalLen++] = i;
533 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", NumPartGroup *
sizeof(
int));
536 for(
int i = 0; i < NumPartGroup; i++)
537 if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
538 unbind_list[LocalLen++] = IndexList[i];
542 for(
int n = 0; n < LocalLen; n++)
543 Tp->PS[unbind_list[n]].SubhaloNr.set(
HALONR_MAX);
546 LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
548 LocalLen = subfind_unbind(SingleDomain, SingleDomain->
Communicator, unbind_list, LocalLen);
551 MPI_Allreduce(&LocalLen, &FullLen, 1, MPI_INT, MPI_SUM, SubComm);
553 if(FullLen >=
All.DesLinkNgb)
555 if(all_candidates[k].SubhaloNr.get() ==
HALONR_MAX)
556 all_candidates[k].SubhaloNr.set(
HALONR_MAX - 1);
559 for(
int n = 0; n < LocalLen; n++)
561 Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
562#if defined(MERGERTREE)
563 Tp->PS[unbind_list[n]].SizeOfSubhalo.set(FullLen);
567 if(Nsubhalos >= MaxNsubhalos)
568 Terminate(
"no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
569 nsubhalos_old, totcand);
571 marked += subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
575 Subhalo[Nsubhalos].GroupNr = GroupNr;
576 Subhalo[Nsubhalos].SubParentRank = 0;
577 Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
583 Mem.myfree(unbind_list);
587 SubUnbindDomain.domain_free();
589 for(
int i = 0; i < Tp->NumPart; i++)
591 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
592 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
596 subfind_distribute_particles(SubComm);
599 subfind_collective_printf(
"SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
607 subfind_collective_printf(
"SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
611 int countloc = Nsubhalos - nsubhalos_old;
614 MPI_Allreduce(&countloc, &countall, 1, MPI_INT, MPI_SUM, SubComm);
616 MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
621 Group[gr].Nsubs = countall;
623#if defined(SUBFIND_ORPHAN_TREATMENT)
624 Group[gr].LenPrevMostBnd += marked;
630 hbt_subhalo_t *subhalo_list = (hbt_subhalo_t *)
Mem.mymalloc(
"subhalo_list", countloc *
sizeof(hbt_subhalo_t));
632 for(
int n = 0; n < countloc; n++)
634 subhalo_list[n].Len = Subhalo[n + nsubhalos_old].Len;
635 subhalo_list[n].
ThisTask = SubThisTask;
636 subhalo_list[n].ThisIndex = n;
637 subhalo_list[n].SubhaloNr = Subhalo[n + nsubhalos_old].SubhaloNr;
640 mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_len, SubComm);
642 int *countloc_list = (
int *)
Mem.mymalloc(
"countloc_list", SubNTask *
sizeof(
int));
643 MPI_Allgather(&countloc, 1, MPI_INT, countloc_list, 1, MPI_INT, SubComm);
644 int npreviousranks = 0;
645 for(
int i = 0; i < SubThisTask; i++)
646 npreviousranks += countloc_list[i];
648 for(
int n = 0; n < countloc; n++)
649 subhalo_list[n].SubRankInGr = n + npreviousranks;
651 mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_thistask_thisindex, SubComm);
657 for(
int n = 0; n < countloc; n++)
659 Subhalo[n + nsubhalos_old].SubRankInGr = subhalo_list[n].SubRankInGr;
661 if(subhalo_list[n].SubRankInGr == 0)
664 taskmain = SubThisTask;
665 indexmain = n + nsubhalos_old;
673 for(
int k = 0; k < NumPartGroup; k++)
675 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
676 pcand[k].index = IndexList[k];
680 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
682 int sizelocsubhalolist = countloc *
sizeof(hbt_subhalo_t);
683 MPI_Allgather(&sizelocsubhalolist, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
686 for(
int i = 1; i < SubNTask; i++)
687 offset[i] = offset[i - 1] + countlist[i - 1];
689 hbt_subhalo_t *all_subhalo_list = (hbt_subhalo_t *)
Mem.mymalloc(
"all_subhalo_list", countall *
sizeof(hbt_subhalo_t));
691 myMPI_Allgatherv(subhalo_list, sizelocsubhalolist, MPI_BYTE, all_subhalo_list, countlist, offset, MPI_BYTE, SubComm);
694 mycxxsort(all_subhalo_list, all_subhalo_list + countall, subfind_hbt_compare_subhalolist_prevsubhalonr);
698 for(
int k = 0; k < NumPartGroup; k++)
701 Tp->PS[pcand[k].index].SubRankInGr = INT_MAX;
704 while(n < countall && all_subhalo_list[n].SubhaloNr < (
long long)pcand[k].SubhaloNr.get())
708 Terminate(
"unexpected: n=%d countall=%d", n, countall);
710 if(all_subhalo_list[n].SubhaloNr != (
long long)pcand[k].SubhaloNr.get())
711 Terminate(
"also unexpected: k=%d NumPartGroup=%d all_subhalo_list[n].SubhaloNr=%lld != pcand[k].SubhaloNr=%lld\n", k,
712 NumPartGroup, (
long long)all_subhalo_list[n].SubhaloNr, (
long long)pcand[k].SubhaloNr.get());
714 Tp->PS[pcand[k].index].SubRankInGr = all_subhalo_list[n].SubRankInGr;
720 MPI_Allreduce(MPI_IN_PLACE, &taskmain, 1, MPI_INT, MPI_MAX, SubComm);
721 MPI_Allreduce(MPI_IN_PLACE, &indexmain, 1, MPI_INT, MPI_MAX, SubComm);
723 subhalo_properties MainSubhalo;
725 if(taskmain == SubThisTask)
727 MainSubhalo = Subhalo[indexmain];
730 MPI_Send(&MainSubhalo,
sizeof(subhalo_properties), MPI_BYTE, 0,
TAG_N, SubComm);
736 MPI_Recv(&MainSubhalo,
sizeof(subhalo_properties), MPI_BYTE, taskmain,
TAG_N, SubComm, MPI_STATUS_IGNORE);
738 for(
int j = 0; j < 3; j++)
740 Group[gr].Pos[j] = MainSubhalo.Pos[j];
741 Group[gr].IntPos[j] = MainSubhalo.IntPos[j];
746 Mem.myfree(all_subhalo_list);
747 Mem.myfree(countloc_list);
748 Mem.myfree(subhalo_list);
750 Mem.myfree(countlist);
751 Mem.myfree(all_candidates);
752 Mem.myfree(loc_candidates);
753 Mem.myfree(elem_last);
754 Mem.myfree(NumPartGroup_list);
757 subfind_collective_printf(
"SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
762#include "../data/simparticles.h"
763template class fof<simparticles>;
765#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
766#include "../data/lcparticles.h"
767template class fof<lcparticles>;
global_data_all_processes All
void domain_decomposition(domain_options mode)
double timediff(double t0, double t1)
double mycxxsort(T *begin, T *end, Tcomp comp)
int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)