12#include "gadgetconfig.h"
17#include <gsl/gsl_math.h>
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/intposconvert.h"
28#include "../data/mymalloc.h"
29#include "../domain/domain.h"
30#include "../fof/fof.h"
31#include "../gravtree/gravtree.h"
32#include "../logs/timer.h"
33#include "../main/simulation.h"
34#include "../mpi_utils/mpi_utils.h"
35#include "../sort/cxxsort.h"
36#include "../sort/parallel_sort.h"
37#include "../sort/peano.h"
38#include "../subfind/subfind.h"
39#include "../system/system.h"
41#define HIGHBIT (1 << 30)
43template <
typename partset>
48 for(
int i = 0; i < NumPartGroup; i++)
49 Tp->PS[IndexList[i]].InvIndex = i;
52 sd = (sort_density_data *)
Mem.mymalloc_movable(&sd,
"sd", NumPartGroup *
sizeof(sort_density_data));
56 FoFGravTree.treeallocate(Tp->NumPart, Tp, SubDomain);
57 FoFGravTree.treebuild(NumPartGroup, IndexList);
60 subfind_find_linkngb(SubDomain, NumPartGroup, IndexList);
66 Tp->R2Loc = (
typename partset::nearest_r2_data *)
Mem.mymalloc_movable(&Tp->R2Loc,
"R2Loc",
67 Tp->NumPart *
sizeof(
typename partset::nearest_r2_data));
70 subfind_find_nearesttwo(SubDomain, NumPartGroup, IndexList);
73 for(
int i = 0; i < NumPartGroup; i++)
75 if(IndexList[i] >= Tp->NumPart || IndexList[i] < 0)
78 sd[i].density = Tp->PS[IndexList[i]].u.s.u.DM_Density;
79 sd[i].ngbcount = Tp->PS[IndexList[i]].nearest.count;
80 sd[i].index = {SubThisTask, i};
81 sd[i].ngb_index1 = Tp->PS[IndexList[i]].nearest.index[0];
82 sd[i].ngb_index2 = Tp->PS[IndexList[i]].nearest.index[1];
84 sd[i].PrevSizeOfSubhalo = Tp->P[IndexList[i]].PrevSizeOfSubhalo;
86 sd[i].PrevSizeOfSubhalo.set(0);
89 Mem.myfree(Tp->R2Loc);
94 subfind_collective_printf(
"SUBFIND: root-task=%d: parallel sort of 'sd' done.\n", ThisTask);
97 FoFGravTree.treefree();
100 SFHead = (
location *)
Mem.mymalloc_movable(&SFHead,
"SFHead", NumPartGroup *
sizeof(
location));
101 SFNext = (
location *)
Mem.mymalloc_movable(&SFNext,
"SFNext", NumPartGroup *
sizeof(
location));
102 SFTail = (
location *)
Mem.mymalloc_movable(&SFTail,
"SFTail", NumPartGroup *
sizeof(
location));
104 SFPrevLen = (
double *)
Mem.mymalloc_movable(&SFPrevLen,
"SFPrevLen", NumPartGroup *
sizeof(
double));
106 for(
int i = 0; i < NumPartGroup; i++)
108 SFHead[i] = {-1, -1};
109 SFNext[i] = {-1, -1};
110 SFTail[i] = {-1, -1};
116 max_coll_candidates = std::max<int>((NumPartGroup / 50), 200);
118 (coll_cand_dat *)
Mem.mymalloc_movable(&coll_candidates,
"coll_candidates", max_coll_candidates *
sizeof(coll_cand_dat));
124 long long totgrouplen;
128 subfind_col_find_coll_candidates(totgrouplen);
134 subfind_collective_printf(
"SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
136 for(
int i = 0; i < NumPartGroup; i++)
137 SFTail[i] = {-1, -1};
140 for(
int i = 0; i < count_cand; i++)
141 coll_candidates[i].parent = 0;
143 int count_leaves = 0;
144 long long nremaining = totcand;
152 coll_cand_dat *tmp_coll_candidates = 0;
154 tmp_coll_candidates = (coll_cand_dat *)
Mem.mymalloc(
"tmp_coll_candidates", totcand *
sizeof(coll_cand_dat));
156 int *countlist = (
int *)
Mem.mymalloc(
"countlist", SubNTask *
sizeof(
int));
157 int *offset = (
int *)
Mem.mymalloc(
"offset", SubNTask *
sizeof(
int));
159 int count = count_cand *
sizeof(coll_cand_dat);
160 MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
163 for(
int i = 1; i < SubNTask; i++)
164 offset[i] = offset[i - 1] + countlist[i - 1];
167 MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
171 for(
int k = 0; k < totcand; k++)
173 tmp_coll_candidates[k].nsub = k;
174 tmp_coll_candidates[k].subnr = k;
177 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
178 for(
int k = 0; k < totcand; k++)
180 if(tmp_coll_candidates[k].parent >= 0)
182 tmp_coll_candidates[k].parent = 0;
184 for(
int j = k + 1; j < totcand; j++)
186 if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
189 if(tmp_coll_candidates[j].parent < 0)
192 if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >=
193 tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
195 tmp_coll_candidates[k].parent++;
199 Terminate(
"k=%d|%lld has rank=%d and len=%d. j=%d has rank=%d and len=%d\n", k, totcand,
200 (
int)tmp_coll_candidates[k].rank, (
int)tmp_coll_candidates[k].len, j,
201 (
int)tmp_coll_candidates[j].rank, (
int)tmp_coll_candidates[j].len);
207 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
211 MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
214 Mem.myfree(countlist);
217 Mem.myfree(tmp_coll_candidates);
221 for(
int i = 0; i < count_cand; i++)
222 if(coll_candidates[i].parent == 0)
225 if(coll_candidates[i].len > 0.20 * Tp->TotNumPart / NTask)
226 coll_candidates[i].parent++;
229 if(coll_candidates[i].len > max_length)
230 max_length = coll_candidates[i].len;
237 MPI_Allreduce(MPI_IN_PLACE, &count_leaves, 1, MPI_INT, MPI_SUM, SubComm);
238 MPI_Allreduce(MPI_IN_PLACE, &max_length, 1, MPI_INT, MPI_MAX, SubComm);
242 subfind_collective_printf(
243 "SUBFIND: root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, finding took "
245 ThisTask, count_leaves, max_length,
Logs.
timediff(t0, t1));
248 if(count_leaves <= 0)
250 subfind_collective_printf(
"SUBFIND: root-task=%d: too few, let's do the rest of %d collectively\n", ThisTask, nremaining);
255 if(max_length > 0.5 * Tp->TotNumPart / NTask)
257 subfind_collective_printf(
"SUBFIND: root-task=%d: too big coll_candidates, I do the rest collectively\n", ThisTask);
261 nremaining -= count_leaves;
264 for(
int i = 0; i < Tp->NumPart; i++)
266 Tp->PS[i].u.s.origintask = SubThisTask;
267 Tp->PS[i].u.s.originindex = i;
269 Tp->PS[i].TargetTask = SubThisTask;
270 Tp->PS[i].submark = HIGHBIT;
273 for(
int i = 0; i < NumPartGroup; i++)
275 if(SFTail[i].index >= 0)
276 Tp->PS[IndexList[i]].u.s.origintask |= HIGHBIT;
281 for(
int master = 0; master < SubNTask; master++)
283 int ncand = count_cand;
284 MPI_Bcast(&ncand,
sizeof(ncand), MPI_BYTE, master, SubComm);
286 for(
int k = 0; k < ncand; k++)
290 if(SubThisTask == master)
292 len = coll_candidates[k].len;
293 parent = coll_candidates[k].parent;
296 MPI_Bcast(&len,
sizeof(len), MPI_BYTE, master, SubComm);
297 MPI_Bcast(&parent,
sizeof(parent), MPI_BYTE, master, SubComm);
298 MPI_Barrier(SubComm);
302 if(SubThisTask != master)
303 subfind_poll_for_requests();
306 location p = coll_candidates[k].head;
307 for(
MyLenType i = 0; i < coll_candidates[k].len; i++)
309 subfind_distlinklist_mark_particle(p, master, nsubs);
314 p = subfind_distlinklist_get_next(p);
318 for(
int i = 0; i < SubNTask; i++)
320 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
323 MPI_Barrier(SubComm);
333 for(
int i = 0; i < Tp->NumPart; i++)
334 Tp->PS[i].TargetIndex = Tp->PS[i].submark;
337 subfind_distribute_particles(SubComm);
339 PPS = (PPS_data *)
Mem.mymalloc(
"PPS", Tp->NumPart *
sizeof(PPS_data));
341 for(
int i = 0; i < Tp->NumPart; i++)
346 PPS = (PPS_data *)
Mem.mymalloc(
"PPS", Tp->NumPart *
sizeof(PPS_data));
348 for(
int i = 0; i < Tp->NumPart; i++)
350 PPS[i].submark = Tp->PS[i].submark;
354 mycxxsort(PPS, PPS + Tp->NumPart, subfind_compare_PPS);
357 MPI_Barrier(SubComm);
360 subfind_unbind_independent_ones(SingleDomain, count_cand);
362 MPI_Barrier(SubComm);
367 subfind_collective_printf(
"SUBFIND: root-task=%d: unbinding of independent ones took %g sec\n", ThisTask,
Logs.
timediff(ta, tb));
371 for(
int i = 0; i < Tp->NumPart; i++)
373 Tp->PS[i].u.s.origintask &= (HIGHBIT - 1);
374 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
375 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
379 subfind_distribute_particles(SubComm);
382 subfind_collective_printf(
"SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
389 for(
int i = 0; i < NumPartGroup; i++)
390 if(Tp->PS[IndexList[i]].submark >= 0 && Tp->PS[IndexList[i]].submark < nsubs)
391 SFTail[i].index = Tp->PS[IndexList[i]].submark;
393 for(
int i = 0; i < count_cand; i++)
394 if(coll_candidates[i].parent == 0)
395 coll_candidates[i].parent = -1;
397 while(count_leaves > 0);
406 for(
int master = 0, nr = 0; master < SubNTask; master++)
408 int ncand = count_cand;
409 MPI_Bcast(&ncand,
sizeof(ncand), MPI_BYTE, master, SubComm);
411 for(
int k = 0; k < ncand; k++)
415 if(SubThisTask == master)
417 len = coll_candidates[k].len;
418 nsubs = coll_candidates[k].nsub;
419 parent = coll_candidates[k].parent;
422 MPI_Bcast(&parent,
sizeof(parent), MPI_BYTE, master, SubComm);
423 MPI_Barrier(SubComm);
427 MPI_Bcast(&len,
sizeof(len), MPI_BYTE, master, SubComm);
428 MPI_Bcast(&nsubs,
sizeof(nsubs), MPI_BYTE, master, SubComm);
430 subfind_collective_printf(
"SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr,
431 nremaining, (
int)len);
439 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", NumPartGroup *
sizeof(
int));
441 if(SubThisTask != master)
442 subfind_poll_for_requests();
445 location p = coll_candidates[k].head;
446 for(
int i = 0; i < coll_candidates[k].len; i++)
451 subfind_distlinklist_add_particle(p);
453 p = subfind_distlinklist_get_next(p);
457 for(
int i = 0; i < SubNTask; i++)
459 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
462 if(LocalLen > NumPartGroup)
463 Terminate(
"LocalLen=%d > NumPartGroup=%d", LocalLen, NumPartGroup);
466 for(
int i = 0; i < LocalLen; i++)
468 unbind_list[i] = IndexList[unbind_list[i]];
469 if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
470 Terminate(
"bad! unbind_list[i]=%d\n", unbind_list[i]);
474 for(
int i = 0; i < Tp->NumPart; i++)
476 Tp->PS[i].u.s.origintask = SubThisTask;
477 Tp->PS[i].u.s.originindex = i;
478 Tp->PS[i].DomainFlag = 0;
481 for(
int i = 0; i < LocalLen; i++)
482 Tp->PS[unbind_list[i]].DomainFlag = 1;
484 Mem.myfree(unbind_list);
488 if(SubUnbindDomain.NumNodes != 0)
489 Terminate(
"SubDomain.NumNodes=%d\n", SubUnbindDomain.NumNodes);
491 SubUnbindDomain.domain_decomposition(mode);
494 subfind_distribute_particles(SubComm);
498 for(
int i = 0; i < Tp->NumPart; i++)
499 if(Tp->PS[i].DomainFlag)
502 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", LocalLen *
sizeof(
int));
506 for(
int i = 0; i < Tp->NumPart; i++)
507 if(Tp->PS[i].DomainFlag)
508 unbind_list[LocalLen++] = i;
510 LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
512 for(
int i = 0; i < Tp->NumPart; i++)
514 Tp->PS[i].DomainFlag = 0;
515 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
516 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
519 for(
int i = 0; i < LocalLen; i++)
520 Tp->PS[unbind_list[i]].DomainFlag = 1;
522 Mem.myfree(unbind_list);
524 SubUnbindDomain.domain_free();
527 subfind_distribute_particles(SubComm);
530 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", NumPartGroup *
sizeof(
int));
534 for(
int i = 0; i < Tp->NumPart; i++)
535 if(Tp->PS[i].DomainFlag)
537 if(LocalLen >= NumPartGroup)
538 Terminate(
"LocalLen=%d >= NumPartGroup=%d", LocalLen, NumPartGroup);
539 unbind_list[LocalLen++] = i;
542 subfind_collective_printf(
"SUBFIND: root-task=%d: bringing the collective ones back took %g sec\n", ThisTask,
546 for(
int i = 0; i < LocalLen; i++)
548 unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex;
549 if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
550 Terminate(
"ups, bad! unbind_list[i]=%d NumPartGroup=%d\n", unbind_list[i], NumPartGroup);
557 MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm);
559 subfind_collective_printf(
560 "SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d, bound length=%d took %g sec\n", ThisTask,
561 nr - 1, nremaining, oldlen, (
int)len,
Logs.
timediff(tt0, tt1));
563 if(len >=
All.DesLinkNgb)
566 for(
int i = 0; i < LocalLen; i++)
567 SFTail[unbind_list[i]].index = nsubs;
569 if(SubThisTask == master)
570 coll_candidates[k].bound_length = len;
575 if(SubThisTask == master)
576 coll_candidates[k].bound_length = 0;
579 Mem.myfree(unbind_list);
585 subfind_collective_printf(
"SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
590 for(
int k = 0; k < count_cand; k++)
591 if(coll_candidates[k].bound_length >=
All.DesLinkNgb)
593 if(coll_candidates[k].len <
All.DesLinkNgb)
594 Terminate(
"coll_candidates[k=%d].len=%lld bound=%lld\n", k, (
long long)coll_candidates[k].len,
595 (
long long)coll_candidates[k].bound_length);
600 MPI_Allreduce(MPI_IN_PLACE, &countall, 1, MPI_INT, MPI_SUM, SubComm);
602 subfind_collective_printf(
"SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
607 mycxxsort_parallel(coll_candidates, coll_candidates + count_cand, subfind_compare_coll_candidates_boundlength, SubComm);
609 coll_cand_dat *tmp_coll_candidates = 0;
612 tmp_coll_candidates = (coll_cand_dat *)
Mem.mymalloc(
"tmp_coll_candidates", totcand *
sizeof(coll_cand_dat));
614 int *countlist = (
int *)
Mem.mymalloc(
"countlist", SubNTask *
sizeof(
int));
615 int *offset = (
int *)
Mem.mymalloc(
"offset", SubNTask *
sizeof(
int));
617 int count_size = count_cand *
sizeof(coll_cand_dat);
618 MPI_Allgather(&count_size, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
621 for(
int i = 1; i < SubNTask; i++)
622 offset[i] = offset[i - 1] + countlist[i - 1];
624 MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
628 for(
int k = 0; k < totcand; k++)
630 tmp_coll_candidates[k].subnr = k;
631 tmp_coll_candidates[k].parent = 0;
634 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
636 for(
int k = 0; k < totcand; k++)
638 for(
int j = k + 1; j < totcand; j++)
640 if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
643 if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
645 if(tmp_coll_candidates[k].bound_length >=
All.DesLinkNgb)
646 tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr;
650 Terminate(
"k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, countall,
651 (
int)tmp_coll_candidates[k].rank, (
int)tmp_coll_candidates[k].len,
652 (
int)tmp_coll_candidates[k].bound_length, (
int)tmp_coll_candidates[j].rank,
653 (
int)tmp_coll_candidates[j].len, (
int)tmp_coll_candidates[j].bound_length);
658 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
661 MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
664 Mem.myfree(countlist);
667 Mem.myfree(tmp_coll_candidates);
671 subfind_collective_printf(
"SUBFIND: root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n",
676 Group[gr].Nsubs = countall;
680 unbind_list = (
int *)
Mem.mymalloc_movable(&unbind_list,
"unbind_list", NumPartGroup *
sizeof(
int));
682 for(
int master = 0, subnr = 0; master < SubNTask; master++)
684 int ncand = count_cand;
685 MPI_Bcast(&ncand,
sizeof(ncand), MPI_BYTE, master, SubComm);
687 for(
int k = 0; k < ncand; k++)
691 if(SubThisTask == master)
693 len = coll_candidates[k].bound_length;
694 nsubs = coll_candidates[k].nsub;
695 parent = coll_candidates[k].parent;
698 MPI_Bcast(&len,
sizeof(len), MPI_BYTE, master, SubComm);
699 MPI_Barrier(SubComm);
703 MPI_Bcast(&nsubs,
sizeof(nsubs), MPI_BYTE, master, SubComm);
704 MPI_Bcast(&parent,
sizeof(parent), MPI_BYTE, master, SubComm);
708 if(SubThisTask != master)
709 subfind_poll_for_requests();
712 location p = coll_candidates[k].head;
713 for(
MyLenType i = 0; i < coll_candidates[k].len; i++)
715 subfind_distlinklist_add_bound_particles(p, nsubs);
716 p = subfind_distlinklist_get_next(p);
720 for(
int i = 0; i < SubNTask; i++)
722 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
726 MPI_Allreduce(&Nsubhalos, &max_nsubhalos, 1, MPI_INT, MPI_MAX, SubComm);
728 if(max_nsubhalos >= MaxNsubhalos)
731 warn(
"Nsubhalos=%d >= MaxNsubhalos=%d", max_nsubhalos, MaxNsubhalos);
733 MaxNsubhalos = 1.25 * max_nsubhalos;
735 Subhalo = (subhalo_properties *)
Mem.myrealloc_movable(Subhalo, MaxNsubhalos *
sizeof(subhalo_properties));
738 for(
int i = 0; i < LocalLen; i++)
740 unbind_list[i] = IndexList[unbind_list[i]];
742 if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
746 int marked = subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
748 for(
int i = 0; i < LocalLen; i++)
750 unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex;
752 if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
756 MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
762 for(
int j = 0; j < 3; j++)
764 Group[gr].Pos[j] = Subhalo[Nsubhalos].Pos[j];
765 Group[gr].IntPos[j] = Subhalo[Nsubhalos].IntPos[j];
769#if defined(SUBFIND_ORPHAN_TREATMENT)
770 Group[gr].LenPrevMostBnd += marked;
773 Subhalo[Nsubhalos].GroupNr = GroupNr;
774 Subhalo[Nsubhalos].SubRankInGr = subnr;
775 Subhalo[Nsubhalos].SubParentRank = parent;
781 for(
int i = 0; i < LocalLen; i++)
783 Tp->PS[IndexList[unbind_list[i]]].SubRankInGr = subnr;
784#if defined(MERGERTREE)
785 Tp->PS[IndexList[unbind_list[i]]].SizeOfSubhalo.set(len);
794 subfind_collective_printf(
"SUBFIND: root-task=%d: determining substructure properties done\n", ThisTask);
796 Mem.myfree(unbind_list);
797 Mem.myfree(coll_candidates);
798 Mem.myfree(SFPrevLen);
809template <
typename partset>
810void fof<partset>::subfind_col_find_coll_candidates(
long long totgrouplen)
812 subfind_collective_printf(
"SUBFIND: root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask,
822 for(
int master = 0; master < SubNTask; master++)
825 if(SubThisTask != master)
826 subfind_poll_for_requests();
830 for(
int k = 0; k < NumPartGroup; k++)
832 int ngbcount = sd[k].ngbcount;
833 location ngb_index1 = sd[k].ngb_index1;
834 location ngb_index2 = sd[k].ngb_index2;
840 subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, {-1, -1}, sd[k].PrevSizeOfSubhalo);
845 if(ngb_index1.
task < 0 || ngb_index1.
task >= SubNTask)
846 Terminate(
"ngb_index1.task=%d SubNTask=%d", ngb_index1.
task, SubNTask);
848 location head = subfind_distlinklist_get_head(ngb_index1);
851 Terminate(
"We have a problem! head=%d for k=%d on task=%d\n", head.
index, k, SubThisTask);
855 subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
858 subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
860 subfind_distlinklist_set_next(tail, sd[k].index);
868 if(ngb_index1.
task < 0 || ngb_index1.
task >= SubNTask)
869 Terminate(
"ngb_index1.task=%d SubNTask=%d", ngb_index1.
task, SubNTask);
871 if(ngb_index2.
task < 0 || ngb_index2.
task >= SubNTask)
872 Terminate(
"ngb_index2.task=%d SubNTask=%d", ngb_index2.
task, SubNTask);
874 if(ngb_index1.
task == ngb_index2.
task)
876 subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, head, head_attach);
880 head = subfind_distlinklist_get_head(ngb_index1);
881 head_attach = subfind_distlinklist_get_head(ngb_index2);
884 if(head.
index == -1 || head_attach.
index == -1)
885 Terminate(
"We have a problem! head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", head.
task, head.
index,
886 head_attach.
task, head_attach.
index, k, SubThisTask);
888 if(head != head_attach)
892 double prevlen, prevlen_attach;
894 subfind_distlinklist_get_tailandlen(head, tail, len, prevlen);
895 subfind_distlinklist_get_tailandlen(head_attach, tail_attach, len_attach, prevlen_attach);
897 bool swap_len =
false;
898 bool swap_prevlen =
false;
900 if(len_attach > len || (len_attach == len && head_attach < head))
903 if(prevlen > 0 && prevlen_attach > 0 && len >=
All.DesLinkNgb && len_attach >=
All.DesLinkNgb)
905 if(prevlen_attach > prevlen || (prevlen_attach == prevlen && swap_len ==
true))
909 swap_prevlen = swap_len;
926 double tmpprevlen = prevlen;
927 prevlen = prevlen_attach;
928 prevlen_attach = tmpprevlen;
934 if(len_attach >=
All.DesLinkNgb && len >=
All.DesLinkNgb)
938 if(swap_prevlen != swap_len)
941 "SUBFIND: TASK=%d: made a different main trunk decision due to previous length: prevlen=%g "
942 "prevlen_attach=%g len=%g len_attach=%g\n",
943 ThisTask, prevlen / len, prevlen_attach / len_attach, (
double)len, (
double)len_attach);
945 count_different_decisions++;
948 if(count_cand < max_coll_candidates)
950 coll_candidates[count_cand].len = len_attach;
951 coll_candidates[count_cand].head = head_attach;
955 Terminate(
"Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates,
960 subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach, prevlen + prevlen_attach);
961 subfind_distlinklist_set_next(tail, head_attach);
966 ss = subfind_distlinklist_set_head_get_next(ss, head);
968 while(ss.
index >= 0);
974 subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
977 subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
979 subfind_distlinklist_set_next(tail, sd[k].index);
988 for(
int k = 0; k < SubNTask; k++)
990 MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
993 MPI_Barrier(SubComm);
996 subfind_collective_printf(
"SUBFIND: root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask,
Logs.
timediff(tt0, tt1));
1000 subfind_collective_printf(
"SUBFIND: root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask,
1007 for(
int master = 0; master < SubNTask; master++)
1009 if(SubThisTask != master)
1010 subfind_poll_for_requests();
1013 for(
int i = 0; i < NumPartGroup; i++)
1017 if(SFHead[i] == index)
1022 subfind_distlinklist_get_tailandlen(SFHead[i], tail, len, prevlen);
1023 location next = subfind_distlinklist_get_next(tail);
1025 if(next.
index == -1)
1031 subfind_distlinklist_set_next(prev, index);
1039 for(
int k = 0; k < SubNTask; k++)
1040 if(k != SubThisTask)
1041 MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
1044 MPI_Barrier(SubComm);
1045 MPI_Bcast(&head,
sizeof(head), MPI_BYTE, master, SubComm);
1046 MPI_Bcast(&prev,
sizeof(prev), MPI_BYTE, master, SubComm);
1049 if(SubThisTask == SubNTask - 1)
1051 if(count_cand < max_coll_candidates)
1053 coll_candidates[count_cand].len = totgrouplen;
1054 coll_candidates[count_cand].head = head;
1058 Terminate(
"count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates);
1061 subfind_collective_printf(
"SUBFIND: root-task=%d: adding background as candidate\n", ThisTask);
1067 int master = head.
task;
1069 if(master < 0 || master >= SubNTask)
1070 Terminate(
"master=%d SubNTask=%d\n", master, SubNTask);
1072 if(SubThisTask != master)
1073 subfind_poll_for_requests();
1081 p = subfind_distlinklist_setrank_and_get_next(p, rank);
1085 for(
int i = 0; i < SubNTask; i++)
1087 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1090 MPI_Barrier(SubComm);
1093 for(
int master = 0; master < SubNTask; master++)
1095 if(SubThisTask != master)
1096 subfind_poll_for_requests();
1099 for(
int k = 0; k < count_cand; k++)
1100 coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head);
1103 for(
int i = 0; i < SubNTask; i++)
1104 if(i != SubThisTask)
1105 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1108 MPI_Barrier(SubComm);
1112 subfind_collective_printf(
1113 "SUBFIND: root-task=%d: establishing of rank order took %g sec (grouplen=%lld) presently allocated %g MB\n", ThisTask,
1117template <
typename partset>
1118void fof<partset>::subfind_unbind_independent_ones(
domain<partset> *SingleDomain,
int count_cand_l)
1120 unbind_list = (
int *)
Mem.mymalloc(
"unbind_list", Tp->NumPart *
sizeof(
int));
1122 mycxxsort(coll_candidates, coll_candidates + count_cand_l, subfind_compare_coll_candidates_nsubs);
1124 for(
int k = 0, ii = 0; k < count_cand_l; k++)
1125 if(coll_candidates[k].parent == 0)
1127 int i = PPS[ii].index;
1129 while(Tp->PS[i].submark < coll_candidates[k].nsub)
1134 if(i >= Tp->NumPart)
1138 if(Tp->PS[i].submark >= 0 && Tp->PS[i].submark < HIGHBIT)
1141 int nsubs = Tp->PS[i].submark;
1143 if(nsubs != coll_candidates[k].nsub)
1144 Terminate(
"TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs, coll_candidates[k].nsub);
1146 while(i < Tp->NumPart)
1148 if(Tp->PS[i].submark == nsubs)
1150 Tp->PS[i].submark = HIGHBIT;
1151 if((Tp->PS[i].u.s.origintask & HIGHBIT) == 0)
1153 unbind_list[len] = i;
1164 len = subfind_unbind(SingleDomain, SingleDomain->
Communicator, unbind_list, len);
1166 if(len >=
All.DesLinkNgb)
1169 coll_candidates[k].bound_length = len;
1171 for(
int j = 0; j < len; j++)
1172 Tp->PS[unbind_list[j]].submark = nsubs;
1175 coll_candidates[k].bound_length = 0;
1179 Mem.myfree(unbind_list);
1233template <
typename partset>
1234void fof<partset>::subfind_poll_for_requests(
void)
1240 MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status);
1242 int source = status.MPI_SOURCE;
1243 tag = status.MPI_TAG;
1248 case TAG_GET_TWOHEADS:
1251 MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE);
1253 buf[0] = SFHead[ibuf[0]];
1254 buf[1] = SFHead[ibuf[1]];
1255 MPI_Send(buf, 2 *
sizeof(
location), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm);
1259 case TAG_SET_NEWTAIL:
1262 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE);
1264 int index = data.index;
1267 SFTail[index] = newtail;
1269 SFPrevLen[index] += data.prevlen.get();
1271 if(newtail.
task == SubThisTask)
1273 SFHead[newtail.
index] = {SubThisTask, index};
1274 SFNext[newtail.
index] = {-1, -1};
1277 if(oldtail.
task == SubThisTask)
1279 SFNext[oldtail.
index] = newtail;
1282 MPI_Send(&oldtail,
sizeof(
location), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm);
1289 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE);
1290 int index = data.index;
1291 SFLen[index] = data.len;
1292 SFHead[index] = data.head;
1293 SFTail[index] = data.tail;
1294 SFNext[index] = data.next;
1295 SFPrevLen[index] = data.prevlen.get();
1299 case TAG_GET_TAILANDLEN:
1302 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1303 loc_compound2 data = {SFTail[index], SFLen[index], SFPrevLen[index]};
1304 MPI_Send(&data,
sizeof(data), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm);
1308 case TAG_SET_TAILANDLEN:
1311 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE);
1312 int index = data.index;
1313 SFTail[index] = data.tail;
1314 SFLen[index] = data.len;
1315 SFPrevLen[index] = data.prevlen;
1319 case TAG_SET_HEADANDNEXT:
1322 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE);
1323 int index = data.index;
1324 SFHead[index] = data.head;
1325 SFNext[index] = data.next;
1332 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE);
1333 int index = data.index;
1334 SFNext[index] = data.loc;
1338 case TAG_SETHEADGETNEXT:
1341 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE);
1342 int index = data.index;
1348 SFHead[index] = head;
1349 next = SFNext[index];
1353 while(next.
index >= 0 && task == SubThisTask);
1354 MPI_Send(&next,
sizeof(
location), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm);
1361 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1362 MPI_Send(&SFNext[index],
sizeof(
location), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm);
1369 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1370 MPI_Send(&SFHead[index],
sizeof(
location), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm);
1374 case TAG_ADD_PARTICLE:
1377 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1378 if(SFTail[index].index < 0)
1380 unbind_list[LocalLen] = index;
1381 if(index >= NumPartGroup)
1382 Terminate(
"What: index=%d NumPartGroup=%d\n", index, NumPartGroup);
1388 case TAG_MARK_PARTICLE:
1391 MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE);
1392 int index = ibuf[0];
1393 int target = ibuf[1];
1394 int submark = ibuf[2];
1396 if(Tp->PS[IndexList[index]].submark != HIGHBIT)
1397 Terminate(
"TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, IndexList[index], Tp->PS[IndexList[index]].submark);
1399 Tp->PS[IndexList[index]].TargetTask = target;
1400 Tp->PS[IndexList[index]].submark = submark;
1407 MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status);
1408 int index = ibuf[0];
1410 if(SFTail[index].index == nsub)
1412 unbind_list[LocalLen] = index;
1421 MPI_Recv(&data,
sizeof(data), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE);
1422 int index = data.loc.index;
1427 SFLen[index] = rank++;
1428 next = SFNext[index];
1433 while(next.
task == SubThisTask);
1436 MPI_Send(&data,
sizeof(data), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm);
1443 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1445 MPI_Send(&rank,
sizeof(
MyLenType), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm);
1449 case TAG_POLLING_DONE:
1452 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1457 Terminate(
"tag not present in the switch");
1461 while(tag != TAG_POLLING_DONE);
1464template <
typename partset>
1467 int task = loc.
task;
1472 if(SubThisTask == task)
1479 loc_compound6 data = {loc, rank};
1480 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SETRANK, SubComm);
1481 MPI_Recv(&data,
sizeof(data), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE);
1488template <
typename partset>
1491 int task = loc.
task;
1496 if(SubThisTask == task)
1503 loc_compound1 data = {i, head};
1504 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm);
1505 MPI_Recv(&next,
sizeof(
location), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1511template <
typename partset>
1512void fof<partset>::subfind_distlinklist_set_next(
location loc,
location next)
1514 int task = loc.
task;
1517 if(SubThisTask == task)
1523 loc_compound1 data = {i, next};
1524 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SET_NEXT, SubComm);
1528template <
typename partset>
1529void fof<partset>::subfind_distlinklist_add_particle(
location loc)
1531 int task = loc.
task;
1534 if(SubThisTask == task)
1536 if(SFTail[i].index < 0)
1538 if(i >= NumPartGroup)
1539 Terminate(
"What: index=%d NumPartGroup=%d\n", i, NumPartGroup);
1541 unbind_list[LocalLen] = i;
1547 MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm);
1551template <
typename partset>
1552void fof<partset>::subfind_distlinklist_mark_particle(
location loc,
int target,
int submark)
1554 int task = loc.
task;
1557 if(SubThisTask == task)
1559 if(Tp->PS[IndexList[i]].submark != HIGHBIT)
1560 Terminate(
"Tas=%d i=%d PS[i].submark=%d?\n", SubThisTask, i, Tp->PS[IndexList[i]].submark);
1562 Tp->PS[IndexList[i]].TargetTask = target;
1563 Tp->PS[IndexList[i]].submark = submark;
1567 int ibuf[3] = {i, target, submark};
1568 MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm);
1572template <
typename partset>
1573void fof<partset>::subfind_distlinklist_add_bound_particles(
location loc,
int nsub)
1575 int task = loc.
task;
1578 if(SubThisTask == task)
1580 if(SFTail[i].index == nsub)
1582 unbind_list[LocalLen] = i;
1588 int ibuf[2] = {i, nsub};
1589 MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm);
1593template <
typename partset>
1596 int task = loc.
task;
1601 if(SubThisTask == task)
1607 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm);
1608 MPI_Recv(&next,
sizeof(
location), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1614template <
typename partset>
1617 int task = loc.
task;
1622 if(SubThisTask == task)
1628 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm);
1629 MPI_Recv(&rank,
sizeof(
MyLenType), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE);
1635template <
typename partset>
1638 int task = loc.
task;
1643 if(SubThisTask == task)
1649 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm);
1650 MPI_Recv(&head,
sizeof(
location), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE);
1656template <
typename partset>
1659 if(ngb_index1.
task != ngb_index2.
task)
1660 Terminate(
"ngb_index1.task != ngb_index2.task");
1662 int task = ngb_index1.
task;
1663 int i1 = ngb_index1.
index;
1664 int i2 = ngb_index2.
index;
1666 if(SubThisTask == task)
1669 head_attach = SFHead[i2];
1673 int ibuf[2] = {i1, i2};
1674 MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm);
1676 MPI_Recv(buf, 2 *
sizeof(
location), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE);
1678 head_attach = buf[1];
1682template <
typename partset>
1685 int task = loc.
task;
1688 if(SubThisTask == task)
1695 loc_compound4 data = {i, head, next};
1696 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm);
1700template <
typename partset>
1703 int task = loc.
task;
1708 if(SubThisTask == task)
1711 SFTail[i] = newtail;
1713 SFPrevLen[i] += prevlen.
get();
1716 if(newtail.
task == SubThisTask)
1718 SFHead[newtail.
index] = loc;
1719 SFNext[newtail.
index] = {-1, -1};
1723 if(oldtail.
task == SubThisTask)
1725 SFNext[oldtail.
index] = newtail;
1731 loc_compound0 data = {i, newtail, prevlen};
1732 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm);
1734 MPI_Recv(&oldtail,
sizeof(
location), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE);
1737 if(newtail.
task == task)
1739 if(oldtail.
task == task)
1746template <
typename partset>
1749 int task = loc.
task;
1752 if(SubThisTask == task)
1756 SFPrevLen[i] = prevlen;
1760 loc_compound3 data = {i, len, tail, prevlen};
1761 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm);
1765template <
typename partset>
1768 int task = loc.
task;
1771 if(SubThisTask == task)
1775 prevlen = SFPrevLen[i];
1779 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm);
1782 MPI_Recv(&data,
sizeof(data), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE);
1785 prevlen = data.prevlen;
1789template <
typename partset>
1793 int task = loc.
task;
1796 if(SubThisTask == task)
1802 SFPrevLen[i] = prevlen.
get();
1806 loc_compound5 data = {i, len, head, tail, next, prevlen};
1807 MPI_Send(&data,
sizeof(data), MPI_BYTE, task, TAG_SET_ALL, SubComm);
1812#include "../data/simparticles.h"
1813template class fof<simparticles>;
1815#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
1816#include "../data/lcparticles.h"
1817template class fof<lcparticles>;
global_data_all_processes All
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
double mycxxsort(T *begin, T *end, Tcomp comp)
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
void myflush(FILE *fstream)