GADGET-4
subfind_history.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef SUBFIND
15#if defined(MERGERTREE) && defined(SUBFIND_HBT)
16
17#include <gsl/gsl_math.h>
18#include <mpi.h>
19#include <algorithm>
20#include <cmath>
21#include <cstdio>
22#include <cstdlib>
23#include <cstring>
24
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/intposconvert.h"
28#include "../data/mymalloc.h"
29#include "../domain/domain.h"
30#include "../fof/fof.h"
31#include "../gravtree/gravtree.h"
32#include "../logs/timer.h"
33#include "../main/simulation.h"
34#include "../mpi_utils/mpi_utils.h"
35#include "../sort/cxxsort.h"
36#include "../sort/parallel_sort.h"
37#include "../sort/peano.h"
38#include "../subfind/subfind.h"
39#include "../system/system.h"
40
41template <typename partset>
42void fof<partset>::subfind_hbt_single_group(domain<partset> *SubDomain, domain<partset> *SingleDomain, domain_options mode, int gr)
43{
44 /* get total group length */
45 long long totgrouplen;
46 sumup_large_ints(1, &NumPartGroup, &totgrouplen, SubComm);
47
48 /******************* determine subhalo candidates based on previous subhalo catalogue ***************/
49
50 hbt_pcand_t *pcand = (hbt_pcand_t *)Mem.mymalloc_movable(&pcand, "pcand", (NumPartGroup + 1) * sizeof(hbt_pcand_t));
51
52 for(int k = 0; k < NumPartGroup; k++)
53 {
54 /* provisionally assign a new subhalo number based on the previous group catalogue - this will be modified in the following */
55 Tp->PS[IndexList[k]].SubhaloNr = Tp->P[IndexList[k]].PrevSubhaloNr;
56
57 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
58 pcand[k].PrevSizeOfSubhalo = Tp->P[IndexList[k]].PrevSizeOfSubhalo;
59 pcand[k].index = -1; // just put here to signal that this is invalid at this stage (note that we'll do a parallel sort)
60 }
61
62 /* sort according to subhalonr */
63 mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
64
65 int *NumPartGroup_list = (int *)Mem.mymalloc_movable(&NumPartGroup_list, "NumPartGroup_list", SubNTask * sizeof(int));
66 MPI_Allgather(&NumPartGroup, sizeof(int), MPI_BYTE, NumPartGroup_list, sizeof(int), MPI_BYTE, SubComm);
67
68 /* get the last element of each task */
69 hbt_pcand_t *elem_last = (hbt_pcand_t *)Mem.mymalloc_movable(&elem_last, "elem_last", SubNTask * sizeof(hbt_pcand_t));
70
71 /* note: the 0th element is guaranteed to be allocated even on ranks with zero NumPartGroup */
72 MPI_Allgather(&pcand[NumPartGroup > 0 ? NumPartGroup - 1 : 0], sizeof(hbt_pcand_t), MPI_BYTE, elem_last, sizeof(hbt_pcand_t),
73 MPI_BYTE, SubComm);
74
75 /* if a new section begins on the current processor, we register it on this processor as a candidate */
76 /* initialize the number of current candidates */
77 bool element_before_present = false;
78 hbt_pcand_t element_before{};
79
80 for(int task = SubThisTask - 1; task >= 0; task--)
81 {
82 if(NumPartGroup_list[task] > 0)
83 {
84 element_before_present = true;
85 element_before = elem_last[task];
86 break;
87 }
88 }
89
90 int marked = 0;
91 count_cand = 0;
92
93 for(int i = 0; i < NumPartGroup; i++)
94 {
95 if(i == 0 && !element_before_present)
96 count_cand++;
97 else
98 {
99 MyHaloNrType prevnr;
100
101 if(i == 0 && element_before_present)
102 prevnr = element_before.SubhaloNr;
103 else
104 prevnr = pcand[i - 1].SubhaloNr;
105
106 if(pcand[i].SubhaloNr != prevnr)
107 count_cand++;
108 }
109 }
110
111 /* allocate a list to store local subhalo candidates for this group */
112 hbt_subcand_t *loc_candidates =
113 (hbt_subcand_t *)Mem.mymalloc_movable(&loc_candidates, "loc_candidates", count_cand * sizeof(hbt_subcand_t));
114
115 count_cand = 0;
116
117 for(int i = 0; i < NumPartGroup; i++)
118 {
119 if(i == 0 && !element_before_present)
120 {
121 loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
122 count_cand++;
123 }
124 else
125 {
126 MyHaloNrType prevnr;
127
128 if(i == 0 && element_before_present)
129 prevnr = element_before.SubhaloNr;
130 else
131 prevnr = pcand[i - 1].SubhaloNr;
132
133 if(pcand[i].SubhaloNr != prevnr)
134 {
135 loc_candidates[count_cand].SubhaloNr = pcand[i].SubhaloNr;
136 count_cand++;
137 }
138 }
139 }
140
141 /* establish total number of candidates */
142 long long totcand;
143 sumup_large_ints(1, &count_cand, &totcand, SubComm);
144
145 int nsubhalos_old = Nsubhalos;
146
147 if(Nsubhalos + totcand + 1 > MaxNsubhalos)
148 {
149 // warn("Nsubhalos=%d + totcand=%lld >= MaxNsubhalos=%d", Nsubhalos, totcand, MaxNsubhalos);
150
151 MaxNsubhalos = 1.25 * (Nsubhalos + totcand + 1);
152
153 Subhalo = (subhalo_properties *)Mem.myrealloc_movable(Subhalo, MaxNsubhalos * sizeof(subhalo_properties));
154 }
155
156 /* assemble a list of the candidates on all tasks */
157 hbt_subcand_t *all_candidates =
158 (hbt_subcand_t *)Mem.mymalloc_movable(&all_candidates, "all_candidates", (totcand + 1) * sizeof(hbt_subcand_t));
159
160 int *countlist = (int *)Mem.mymalloc_movable(&countlist, "countlist", SubNTask * sizeof(int));
161 int *offset = (int *)Mem.mymalloc_movable(&offset, "offset", SubNTask * sizeof(int));
162
163 int count = count_cand * sizeof(hbt_subcand_t); /* length in bytes */
164 MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
165
166 offset[0] = 0;
167 for(int i = 1; i < SubNTask; i++)
168 offset[i] = offset[i - 1] + countlist[i - 1];
169
170 myMPI_Allgatherv(loc_candidates, count, MPI_BYTE, all_candidates, countlist, offset, MPI_BYTE, SubComm);
171
172 /* sort the candidates by subhalonr */
173 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
174
175 /* now determine the size of the candidates */
176 long long *size_list = (long long *)Mem.mymalloc_clear("size_list", totcand * sizeof(long long));
177 long long *summed_prevsize_list = (long long *)Mem.mymalloc_clear("summed_prevsize_list", totcand * sizeof(long long));
178
179 int j = 0;
180 for(int k = 0; k < NumPartGroup; k++)
181 {
182 if(j >= totcand)
183 Terminate("can't be: k=%d j=%d NumPartGroup=%d totcand=%lld\n", k, j, NumPartGroup, totcand);
184
185 while(j < totcand && pcand[k].SubhaloNr > all_candidates[j].SubhaloNr)
186 j++;
187
188 if(pcand[k].SubhaloNr != all_candidates[j].SubhaloNr)
189 Terminate("can't be: k=%d NumPartGroup=%d pcand[k].SubhaloNr=%lld j=%d all_candidates[j].SubhaloNr=%lld\n", k,
190 NumPartGroup, (long long)pcand[k].SubhaloNr.get(), j, (long long)all_candidates[j].SubhaloNr.get());
191
192 size_list[j]++;
193 summed_prevsize_list[j] += pcand[k].PrevSizeOfSubhalo.get();
194 }
195
196 MPI_Allreduce(MPI_IN_PLACE, size_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
197 MPI_Allreduce(MPI_IN_PLACE, summed_prevsize_list, totcand, MPI_LONG_LONG, MPI_SUM, SubComm);
198
199 for(int i = 0; i < totcand; i++)
200 {
201 all_candidates[i].len = size_list[i];
202 all_candidates[i].summedprevlen = summed_prevsize_list[i];
203 }
204
205 Mem.myfree(summed_prevsize_list);
206 Mem.myfree(size_list);
207
208 /* do a sanity test */
209 long long lensum = 0;
210 for(int i = 0; i < totcand; i++)
211 lensum += all_candidates[i].len;
212
213 if(lensum != totgrouplen)
214 Terminate("lensum=%lld != totgrouplen=%lld\n", lensum, totgrouplen);
215
216 /*******************************************/
217
218 /* find the group of previously unbound ones, and if this candidate exists, eliminate it */
219 for(int i = 0; i < totcand; i++)
220 if(all_candidates[i].SubhaloNr.get() == HALONR_MAX)
221 {
222 all_candidates[i] = all_candidates[totcand - 1];
223 totcand--;
224 break;
225 }
226
227 /* let's now eliminate small groups and flag the corresponding particles as unbound */
228 {
229 /* sort the candidates according to previous subhalonr */
230 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
231
232 /* reestablish a locally sorted pcand */
233 for(int k = 0; k < NumPartGroup; k++)
234 {
235 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
236 pcand[k].index = IndexList[k];
237 }
238 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
239
240 long long sum = 0;
241
242 int p = 0;
243
244 for(int i = 0; i < totcand; i++)
245 if(all_candidates[i].len < All.DesLinkNgb)
246 {
247 while(p < NumPartGroup && pcand[p].SubhaloNr < all_candidates[i].SubhaloNr)
248 p++;
249
250 while(p < NumPartGroup && pcand[p].SubhaloNr == all_candidates[i].SubhaloNr)
251 {
252 if(Tp->PS[pcand[p].index].SubhaloNr != all_candidates[i].SubhaloNr)
253 Terminate(
254 "we have an issue! p=%d NumPartGroup=%d pcand[p].index=%d pcand[p].SubhaloNr=%lld "
255 "all_candidates[i].SubhaloNr=%lld Tp->P[pcand[p].index].SubhaloNr=%lld",
256 p, NumPartGroup, pcand[p].index, (long long)pcand[p].SubhaloNr.get(), (long long)all_candidates[i].SubhaloNr.get(),
257 (long long)Tp->PS[pcand[p].index].SubhaloNr.get());
258
259 pcand[p].SubhaloNr.set(HALONR_MAX);
260 Tp->PS[pcand[p].index].SubhaloNr.set(HALONR_MAX);
261 p++;
262 sum++;
263 }
264 }
265
266 MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_LONG_LONG, MPI_SUM, SubComm);
267
268 long long sum2 = 0;
269
270 for(int i = 0; i < totcand; i++)
271 if(all_candidates[i].len < All.DesLinkNgb)
272 {
273 sum2 += all_candidates[i].len;
274 all_candidates[i] = all_candidates[totcand - 1];
275 totcand--;
276 i--;
277 }
278
279 if(sum != sum2)
280 Terminate("consistency check failed sum = %lld sum2 = %lld\n", sum, sum2);
281
282 /* sort according to subhalonr */
283 mycxxsort_parallel(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr, SubComm);
284 }
285
286 /* if a largest one exists, eliminate it, because this is the one we will treat as background halo */
287 if(totcand > 0)
288 {
289 /* sort the candidates by size */
290 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_len);
291
292 /* sort the candidates by summed previous length, as this is arguably a more robust decision of which one should be the largest
293 */
294 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_summedprevlen);
295
296 totcand--;
297 for(int k = 0; k < NumPartGroup; k++)
298 if(Tp->PS[IndexList[k]].SubhaloNr == all_candidates[totcand].SubhaloNr)
299 Tp->PS[IndexList[k]].SubhaloNr.set(HALONR_MAX);
300 }
301
302 /* sort the candidates according to previous subhalonr */
303 mycxxsort(all_candidates, all_candidates + totcand, subfind_hbt_compare_subcand_subhalonr);
304
305 subfind_collective_printf("SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
306
307 /*******************************************/
308 /* Let's now see which candidates can be treated with serial CPUs, which is more efficient than doing them all collectively.
309 * We identify them with those candidates that are sufficiently small, which should be most of them by number.
310 */
311
312 int n_small_cand = 0;
313 int max_length = 0;
314
315 int task = 0;
316 int task_scatter = 0;
317 for(int i = 0; i < totcand; i++)
318 {
319 if(all_candidates[i].len < 0.20 * Tp->TotNumPart / NTask) // small enough
320 {
321 all_candidates[i].DoIt = true;
322 all_candidates[i].TargetTask = task++;
323 all_candidates[i].TargetIndex = n_small_cand;
324
325 if(task >= SubNTask)
326 task = 0;
327
328 if(all_candidates[i].len > max_length)
329 max_length = all_candidates[i].len;
330
331 n_small_cand++;
332 }
333 else
334 {
335 all_candidates[i].DoIt = false;
336 all_candidates[i].TargetTask = task_scatter++;
337 all_candidates[i].TargetIndex = INT_MAX;
338 if(task_scatter >= SubNTask)
339 task_scatter = 0;
340 }
341 }
342
343 subfind_collective_printf(
344 "SUBFIND: root-task=%d: number of subhalo candidates small enough to be done with one cpu: %d. (Largest size %d)\n", ThisTask,
345 n_small_cand, max_length);
346
347 /* now get target information to particles */
348 for(int k = 0; k < NumPartGroup; k++)
349 {
350 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
351 pcand[k].index = IndexList[k];
352 }
353
354 // note: local serial sort sufficient here
355 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
356
357 if(SubNTask > 1)
358 {
359 /* we only need to redistribute the particles if we are processing groups collectively */
360 /* Note: setting of TargetIndex make sure that the particles are grouped together on the target task */
361
362 /* set default values for current particle distribution */
363 for(int i = 0; i < Tp->NumPart; i++)
364 {
365 Tp->PS[i].u.s.origintask = SubThisTask;
366 Tp->PS[i].u.s.originindex = i;
367
368 Tp->PS[i].TargetTask = SubThisTask;
369 Tp->PS[i].TargetIndex = INT_MAX;
370 }
371
372 int i = 0;
373 for(int k = 0; k < totcand; k++)
374 if(all_candidates[k].DoIt)
375 {
376 while(i < NumPartGroup && pcand[i].SubhaloNr < all_candidates[k].SubhaloNr)
377 i++;
378
379 while(i < NumPartGroup && pcand[i].SubhaloNr == all_candidates[k].SubhaloNr)
380 {
381 Tp->PS[pcand[i].index].TargetTask = all_candidates[k].TargetTask;
382 Tp->PS[pcand[i].index].TargetIndex = all_candidates[k].TargetIndex;
383 i++;
384 }
385 }
386
387 /* assemble the particles on individual processors (note: IndexList[] becomes temporarily meaningless) */
388 subfind_distribute_particles(SubComm);
389 }
390
391 /* now do the serial unbinding */
392 /*----------------------------------------------------*/
393
394 {
395 unbind_list = (int *)Mem.mymalloc("unbind_list", Tp->NumPart * sizeof(int));
396
397 int i = 0; // particle index
398
399 for(int k = 0; k < totcand; k++)
400 if(all_candidates[k].DoIt)
401 if(all_candidates[k].TargetTask == SubThisTask)
402 {
403 int len = 0;
404
405 if(SubNTask > 1)
406 {
407 while(i < Tp->NumPart && Tp->PS[i].SubhaloNr < all_candidates[k].SubhaloNr)
408 i++;
409
410 while(i < Tp->NumPart && Tp->PS[i].SubhaloNr == all_candidates[k].SubhaloNr && Tp->PS[i].GroupNr.get() == GroupNr)
411 {
412 unbind_list[len] = i;
413 len++;
414 i++;
415 }
416 }
417 else
418 {
419 while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr < all_candidates[k].SubhaloNr)
420 i++;
421
422 while(i < NumPartGroup && Tp->PS[pcand[i].index].SubhaloNr == all_candidates[k].SubhaloNr &&
423 Tp->PS[pcand[i].index].GroupNr.get() == GroupNr)
424 {
425 unbind_list[len] = pcand[i].index;
426 len++;
427 i++;
428 }
429 }
430
431 if(len != all_candidates[k].len)
432 Terminate("this is unexpected: k=%d len=%lld != all_candidates[k].len=%lld) \n", k, (long long)len,
433 (long long)all_candidates[k].len);
434
435 /* default is that all particles end up unbound */
436 for(int n = 0; n < len; n++)
437 Tp->PS[unbind_list[n]].SubhaloNr.set(HALONR_MAX);
438
439 /* call the serial unbind function */
440 len = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, len);
441
442 if(len >= All.DesLinkNgb)
443 {
444 /* set as provisional group number the previous group number */
445 for(int n = 0; n < len; n++)
446 {
447 Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
448 Tp->PS[unbind_list[n]].SizeOfSubhalo.set(len);
449 }
450
451 if(Nsubhalos >= MaxNsubhalos)
452 Terminate("no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
453 nsubhalos_old, totcand);
454
455 /* ok, we found a substructure */
456 marked += subfind_determine_sub_halo_properties(unbind_list, len, &Subhalo[Nsubhalos], SingleDomain->Communicator);
457
458 Subhalo[Nsubhalos].GroupNr = GroupNr;
459 Subhalo[Nsubhalos].SubParentRank = 0;
460 Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
461
462 Nsubhalos++;
463 }
464 }
465
466 Mem.myfree(unbind_list);
467 }
468
469 if(SubNTask > 1)
470 {
471 /* bring them back to their original processor */
472 for(int i = 0; i < Tp->NumPart; i++)
473 {
474 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
475 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
476 }
477
478 subfind_distribute_particles(SubComm);
479 }
480
481 double t0 = Logs.second();
482
483 /**************************************************/
484 /**************************************************/
485 /******* now do remaining ones collectively *****/
486
487 /* first, add a fiducial candidate which will be our background halo, swallowing all unbound particles */
488
489 all_candidates[totcand].DoIt = false; /* marks collective ones */
490 all_candidates[totcand].SubhaloNr.set(HALONR_MAX);
491 totcand++;
492
493 for(int k = 0; k < totcand; k++)
494 if(all_candidates[k].DoIt == false)
495 {
496 domain<partset> SubUnbindDomain{SubComm, Tp};
497
498 int *unbind_list;
499
500 if(mode == COLL_SUBFIND)
501 {
502 for(int i = 0; i < Tp->NumPart; i++)
503 {
504 Tp->PS[i].u.s.origintask = SubThisTask;
505 Tp->PS[i].u.s.originindex = i;
506 Tp->PS[i].DomainFlag = 0;
507 }
508
509 /* mark the one to be unbound in PS[] */
510 for(int i = 0; i < NumPartGroup; i++)
511 if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
512 Tp->PS[IndexList[i]].DomainFlag = 1;
513
514 SubUnbindDomain.domain_decomposition(mode);
515 subfind_distribute_particles(SubComm);
516
517 LocalLen = 0;
518 for(int i = 0; i < Tp->NumPart; i++)
519 if(Tp->PS[i].DomainFlag)
520 LocalLen++;
521
522 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", LocalLen * sizeof(int));
523
524 /* refill unbind_list */
525 LocalLen = 0;
526 for(int i = 0; i < Tp->NumPart; i++)
527 if(Tp->PS[i].DomainFlag)
528 unbind_list[LocalLen++] = i;
529 }
530
531 else
532 {
533 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
534
535 LocalLen = 0;
536 for(int i = 0; i < NumPartGroup; i++)
537 if(Tp->PS[IndexList[i]].SubhaloNr == all_candidates[k].SubhaloNr)
538 unbind_list[LocalLen++] = IndexList[i];
539 }
540
541 /* default is that all particles end up unbound */
542 for(int n = 0; n < LocalLen; n++)
543 Tp->PS[unbind_list[n]].SubhaloNr.set(HALONR_MAX);
544
545 if(mode == COLL_SUBFIND)
546 LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
547 else
548 LocalLen = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, LocalLen);
549
550 int FullLen;
551 MPI_Allreduce(&LocalLen, &FullLen, 1, MPI_INT, MPI_SUM, SubComm);
552
553 if(FullLen >= All.DesLinkNgb)
554 {
555 if(all_candidates[k].SubhaloNr.get() == HALONR_MAX)
556 all_candidates[k].SubhaloNr.set(HALONR_MAX - 1);
557
558 /* set as provisional group number the previous group number */
559 for(int n = 0; n < LocalLen; n++)
560 {
561 Tp->PS[unbind_list[n]].SubhaloNr = all_candidates[k].SubhaloNr;
562#if defined(MERGERTREE)
563 Tp->PS[unbind_list[n]].SizeOfSubhalo.set(FullLen);
564#endif
565 }
566
567 if(Nsubhalos >= MaxNsubhalos)
568 Terminate("no storage: Nsubhalos=%d MaxNsubhalos=%d nsubhalos_old=%d totcand=%lld\n", Nsubhalos, MaxNsubhalos,
569 nsubhalos_old, totcand);
570
571 marked += subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
572
573 if(SubThisTask == 0)
574 {
575 Subhalo[Nsubhalos].GroupNr = GroupNr;
576 Subhalo[Nsubhalos].SubParentRank = 0;
577 Subhalo[Nsubhalos].SubhaloNr = all_candidates[k].SubhaloNr.get();
578
579 Nsubhalos++;
580 }
581 }
582
583 Mem.myfree(unbind_list);
584
585 if(mode == COLL_SUBFIND)
586 {
587 SubUnbindDomain.domain_free();
588
589 for(int i = 0; i < Tp->NumPart; i++)
590 {
591 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
592 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
593 }
594
595 double t0 = Logs.second();
596 subfind_distribute_particles(SubComm); /* bring them back to their original processor */
597 double t1 = Logs.second();
598
599 subfind_collective_printf("SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
600 Logs.timediff(t0, t1));
601
602 /* Since we reestablished the original order, we can use IndexList[] again */
603 }
604 }
605
606 double t1 = Logs.second();
607 subfind_collective_printf("SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
608 Logs.timediff(t0, t1));
609
610 /* get the total substructure count */
611 int countloc = Nsubhalos - nsubhalos_old;
612 int countall;
613
614 MPI_Allreduce(&countloc, &countall, 1, MPI_INT, MPI_SUM, SubComm);
615
616 MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
617
618 /* Now let's save some properties of the substructures */
619 if(SubThisTask == 0)
620 {
621 Group[gr].Nsubs = countall;
622
623#if defined(SUBFIND_ORPHAN_TREATMENT)
624 Group[gr].LenPrevMostBnd += marked;
625#endif
626 }
627
628 /* also need to set the field SubRankInGr in all found Subhalos */
629
630 hbt_subhalo_t *subhalo_list = (hbt_subhalo_t *)Mem.mymalloc("subhalo_list", countloc * sizeof(hbt_subhalo_t));
631
632 for(int n = 0; n < countloc; n++)
633 {
634 subhalo_list[n].Len = Subhalo[n + nsubhalos_old].Len;
635 subhalo_list[n].ThisTask = SubThisTask;
636 subhalo_list[n].ThisIndex = n;
637 subhalo_list[n].SubhaloNr = Subhalo[n + nsubhalos_old].SubhaloNr;
638 }
639
640 mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_len, SubComm);
641
642 int *countloc_list = (int *)Mem.mymalloc("countloc_list", SubNTask * sizeof(int));
643 MPI_Allgather(&countloc, 1, MPI_INT, countloc_list, 1, MPI_INT, SubComm);
644 int npreviousranks = 0;
645 for(int i = 0; i < SubThisTask; i++)
646 npreviousranks += countloc_list[i];
647
648 for(int n = 0; n < countloc; n++)
649 subhalo_list[n].SubRankInGr = n + npreviousranks;
650
651 mycxxsort_parallel(subhalo_list, subhalo_list + countloc, subfind_hbt_compare_subhalolist_thistask_thisindex, SubComm);
652
653 /* rank and index of main subhalo */
654 int taskmain = 0;
655 int indexmain = 0;
656
657 for(int n = 0; n < countloc; n++)
658 {
659 Subhalo[n + nsubhalos_old].SubRankInGr = subhalo_list[n].SubRankInGr;
660
661 if(subhalo_list[n].SubRankInGr == 0)
662 {
663 /* here we have the main subhalo */
664 taskmain = SubThisTask;
665 indexmain = n + nsubhalos_old;
666 }
667 }
668
669 /* now we need to fill in SubRankInGr to T->PS[] so that the particles can be sorted in subhalo order
670 * we use subhalo_list[] as translation table
671 */
672
673 for(int k = 0; k < NumPartGroup; k++)
674 {
675 pcand[k].SubhaloNr = Tp->PS[IndexList[k]].SubhaloNr;
676 pcand[k].index = IndexList[k];
677 }
678
679 /* sort locally */
680 mycxxsort(pcand, pcand + NumPartGroup, subfind_hbt_compare_pcand_subhalonr);
681
682 int sizelocsubhalolist = countloc * sizeof(hbt_subhalo_t); /* length in bytes */
683 MPI_Allgather(&sizelocsubhalolist, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
684
685 offset[0] = 0;
686 for(int i = 1; i < SubNTask; i++)
687 offset[i] = offset[i - 1] + countlist[i - 1];
688
689 hbt_subhalo_t *all_subhalo_list = (hbt_subhalo_t *)Mem.mymalloc("all_subhalo_list", countall * sizeof(hbt_subhalo_t));
690
691 myMPI_Allgatherv(subhalo_list, sizelocsubhalolist, MPI_BYTE, all_subhalo_list, countlist, offset, MPI_BYTE, SubComm);
692
693 /* sort locally */
694 mycxxsort(all_subhalo_list, all_subhalo_list + countall, subfind_hbt_compare_subhalolist_prevsubhalonr);
695
696 int n = 0;
697
698 for(int k = 0; k < NumPartGroup; k++)
699 {
700 if(pcand[k].SubhaloNr.get() == HALONR_MAX)
701 Tp->PS[pcand[k].index].SubRankInGr = INT_MAX;
702 else
703 {
704 while(n < countall && all_subhalo_list[n].SubhaloNr < (long long)pcand[k].SubhaloNr.get())
705 n++;
706
707 if(n >= countall)
708 Terminate("unexpected: n=%d countall=%d", n, countall);
709
710 if(all_subhalo_list[n].SubhaloNr != (long long)pcand[k].SubhaloNr.get())
711 Terminate("also unexpected: k=%d NumPartGroup=%d all_subhalo_list[n].SubhaloNr=%lld != pcand[k].SubhaloNr=%lld\n", k,
712 NumPartGroup, (long long)all_subhalo_list[n].SubhaloNr, (long long)pcand[k].SubhaloNr.get());
713
714 Tp->PS[pcand[k].index].SubRankInGr = all_subhalo_list[n].SubRankInGr;
715 }
716 }
717
718 if(countall > 0)
719 {
720 MPI_Allreduce(MPI_IN_PLACE, &taskmain, 1, MPI_INT, MPI_MAX, SubComm);
721 MPI_Allreduce(MPI_IN_PLACE, &indexmain, 1, MPI_INT, MPI_MAX, SubComm);
722
723 subhalo_properties MainSubhalo;
724
725 if(taskmain == SubThisTask)
726 {
727 MainSubhalo = Subhalo[indexmain];
728
729 if(taskmain != 0)
730 MPI_Send(&MainSubhalo, sizeof(subhalo_properties), MPI_BYTE, 0, TAG_N, SubComm);
731 }
732
733 if(SubThisTask == 0)
734 {
735 if(taskmain != 0)
736 MPI_Recv(&MainSubhalo, sizeof(subhalo_properties), MPI_BYTE, taskmain, TAG_N, SubComm, MPI_STATUS_IGNORE);
737
738 for(int j = 0; j < 3; j++)
739 {
740 Group[gr].Pos[j] = MainSubhalo.Pos[j];
741 Group[gr].IntPos[j] = MainSubhalo.IntPos[j];
742 }
743 }
744 }
745
746 Mem.myfree(all_subhalo_list);
747 Mem.myfree(countloc_list);
748 Mem.myfree(subhalo_list);
749 Mem.myfree(offset);
750 Mem.myfree(countlist);
751 Mem.myfree(all_candidates);
752 Mem.myfree(loc_candidates);
753 Mem.myfree(elem_last);
754 Mem.myfree(NumPartGroup_list);
755 Mem.myfree(pcand);
756
757 subfind_collective_printf("SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
758 totgrouplen);
759}
760
761/* now make sure that the following classes are really instantiated, otherwise we may get a linking problem */
762#include "../data/simparticles.h"
763template class fof<simparticles>;
764
765#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
766#include "../data/lcparticles.h"
767template class fof<lcparticles>;
768#endif
769
770#endif
771#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
void domain_decomposition(domain_options mode)
Definition: domain.cc:51
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
int ThisTask
Definition: setcomm.h:33
MPI_Comm Communicator
Definition: setcomm.h:31
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
int myMPI_Allgatherv(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
#define HALONR_MAX
Definition: idstorage.h:20
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:15
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
#define TAG_N
Definition: mpi_utils.h:25
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)