GADGET-4
subfind_excursionset.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef SUBFIND
15#ifndef SUBFIND_HBT
16
17#include <gsl/gsl_math.h>
18#include <mpi.h>
19#include <algorithm>
20#include <cmath>
21#include <cstdio>
22#include <cstdlib>
23#include <cstring>
24
25#include "../data/allvars.h"
26#include "../data/dtypes.h"
27#include "../data/intposconvert.h"
28#include "../data/mymalloc.h"
29#include "../domain/domain.h"
30#include "../fof/fof.h"
31#include "../gravtree/gravtree.h"
32#include "../logs/timer.h"
33#include "../main/simulation.h"
34#include "../mpi_utils/mpi_utils.h"
35#include "../sort/cxxsort.h"
36#include "../sort/parallel_sort.h"
37#include "../sort/peano.h"
38#include "../subfind/subfind.h"
39#include "../system/system.h"
40
41#define HIGHBIT (1 << 30)
42
43template <typename partset>
44void fof<partset>::subfind_process_single_group(domain<partset> *SubDomain, domain<partset> *SingleDomain, domain_options mode, int gr)
45{
46 /* set up an inverse look-up of the position of local particles in the index list, for later use
47 */
48 for(int i = 0; i < NumPartGroup; i++)
49 Tp->PS[IndexList[i]].InvIndex = i;
50
51 /* allocated storage for an auxiliary array needed for sorting */
52 sd = (sort_density_data *)Mem.mymalloc_movable(&sd, "sd", NumPartGroup * sizeof(sort_density_data));
53
54 /* construct a tree for all particles in the halo */
55
56 FoFGravTree.treeallocate(Tp->NumPart, Tp, SubDomain);
57 FoFGravTree.treebuild(NumPartGroup, IndexList);
58
59 /* determine the radius that encloses a certain number of link particles */
60 subfind_find_linkngb(SubDomain, NumPartGroup, IndexList);
61
62 /* now determine the indices of the nearest two denser neighbours within this link region */
63
64 /* first, allocate some auxialiary arrays for storing this info */
65
66 Tp->R2Loc = (typename partset::nearest_r2_data *)Mem.mymalloc_movable(&Tp->R2Loc, "R2Loc",
67 Tp->NumPart * sizeof(typename partset::nearest_r2_data));
68
69 /* find the nearest two denser particles around each point in the group */
70 subfind_find_nearesttwo(SubDomain, NumPartGroup, IndexList);
71
72 /* create an array that we can conveniently sort according to density in group subsets */
73 for(int i = 0; i < NumPartGroup; i++)
74 {
75 if(IndexList[i] >= Tp->NumPart || IndexList[i] < 0)
76 Terminate("Really?");
77
78 sd[i].density = Tp->PS[IndexList[i]].u.s.u.DM_Density;
79 sd[i].ngbcount = Tp->PS[IndexList[i]].nearest.count;
80 sd[i].index = {SubThisTask, i};
81 sd[i].ngb_index1 = Tp->PS[IndexList[i]].nearest.index[0];
82 sd[i].ngb_index2 = Tp->PS[IndexList[i]].nearest.index[1];
83#ifdef MERGERTREE
84 sd[i].PrevSizeOfSubhalo = Tp->P[IndexList[i]].PrevSizeOfSubhalo;
85#else
86 sd[i].PrevSizeOfSubhalo.set(0);
87#endif
88 }
89 Mem.myfree(Tp->R2Loc);
90
91 /* sort sd according to densities (in parallel if needed) */
92 mycxxsort_parallel(sd, sd + NumPartGroup, subfind_compare_densities, SubComm);
93
94 subfind_collective_printf("SUBFIND: root-task=%d: parallel sort of 'sd' done.\n", ThisTask);
95
96 /* can now release the tree */
97 FoFGravTree.treefree();
98
99 /* allocate and initialize distributed link list for storing subhalo connectivity */
100 SFHead = (location *)Mem.mymalloc_movable(&SFHead, "SFHead", NumPartGroup * sizeof(location));
101 SFNext = (location *)Mem.mymalloc_movable(&SFNext, "SFNext", NumPartGroup * sizeof(location));
102 SFTail = (location *)Mem.mymalloc_movable(&SFTail, "SFTail", NumPartGroup * sizeof(location));
103 SFLen = (MyLenType *)Mem.mymalloc_movable(&SFLen, "SFLen", NumPartGroup * sizeof(MyLenType));
104 SFPrevLen = (double *)Mem.mymalloc_movable(&SFPrevLen, "SFPrevLen", NumPartGroup * sizeof(double));
105
106 for(int i = 0; i < NumPartGroup; i++)
107 {
108 SFHead[i] = {-1, -1};
109 SFNext[i] = {-1, -1};
110 SFTail[i] = {-1, -1};
111 SFLen[i] = 0;
112 SFPrevLen[i] = 0;
113 }
114
115 /* allocate a list to store subhalo candidates */
116 max_coll_candidates = std::max<int>((NumPartGroup / 50), 200);
117 coll_candidates =
118 (coll_cand_dat *)Mem.mymalloc_movable(&coll_candidates, "coll_candidates", max_coll_candidates * sizeof(coll_cand_dat));
119
120 /* initialize the number of current candidates */
121 count_cand = 0;
122
123 /* get total group length */
124 long long totgrouplen;
125 sumup_large_ints(1, &NumPartGroup, &totgrouplen, SubComm);
126
127 /* determine subhalo candidates */
128 subfind_col_find_coll_candidates(totgrouplen);
129
130 /* establish total number of candidates */
131 long long totcand;
132 sumup_large_ints(1, &count_cand, &totcand, SubComm);
133
134 subfind_collective_printf("SUBFIND: root-task=%d: total number of subhalo coll_candidates=%lld\n", ThisTask, totcand);
135
136 for(int i = 0; i < NumPartGroup; i++)
137 SFTail[i] = {-1, -1};
138
139 /* default is to be not nested */
140 for(int i = 0; i < count_cand; i++)
141 coll_candidates[i].parent = 0;
142
143 int count_leaves = 0;
144 long long nremaining = totcand;
145
146 do
147 {
148 /* Let's see which coll_candidates can be unbound independent from each other.
149 * We identify them with those candidates that have no embedded other candidate, which are most of them by number.
150 */
151 double t0 = Logs.second();
152 coll_cand_dat *tmp_coll_candidates = 0;
153 if(SubThisTask == 0)
154 tmp_coll_candidates = (coll_cand_dat *)Mem.mymalloc("tmp_coll_candidates", totcand * sizeof(coll_cand_dat));
155
156 int *countlist = (int *)Mem.mymalloc("countlist", SubNTask * sizeof(int));
157 int *offset = (int *)Mem.mymalloc("offset", SubNTask * sizeof(int));
158
159 int count = count_cand * sizeof(coll_cand_dat);
160 MPI_Allgather(&count, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
161
162 offset[0] = 0;
163 for(int i = 1; i < SubNTask; i++)
164 offset[i] = offset[i - 1] + countlist[i - 1];
165
166 /* assemble a list of the candidates on subtask 0 */
167 MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
168
169 if(SubThisTask == 0)
170 {
171 for(int k = 0; k < totcand; k++)
172 {
173 tmp_coll_candidates[k].nsub = k;
174 tmp_coll_candidates[k].subnr = k;
175 }
176
177 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
178 for(int k = 0; k < totcand; k++)
179 {
180 if(tmp_coll_candidates[k].parent >= 0)
181 {
182 tmp_coll_candidates[k].parent = 0;
183
184 for(int j = k + 1; j < totcand; j++)
185 {
186 if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
187 break;
188
189 if(tmp_coll_candidates[j].parent < 0) /* ignore these */
190 continue;
191
192 if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >=
193 tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
194 {
195 tmp_coll_candidates[k].parent++; /* we here count the number of subhalos that are enclosed */
196 }
197 else
198 {
199 Terminate("k=%d|%lld has rank=%d and len=%d. j=%d has rank=%d and len=%d\n", k, totcand,
200 (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len, j,
201 (int)tmp_coll_candidates[j].rank, (int)tmp_coll_candidates[j].len);
202 }
203 }
204 }
205 }
206
207 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
208 }
209
210 /* send the stuff back */
211 MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
212
213 Mem.myfree(offset);
214 Mem.myfree(countlist);
215
216 if(SubThisTask == 0)
217 Mem.myfree(tmp_coll_candidates);
218
219 count_leaves = 0;
220 int max_length = 0;
221 for(int i = 0; i < count_cand; i++)
222 if(coll_candidates[i].parent == 0) /* if it's not a nested one, candidate is eligible for the independent/parallel unbinding */
223 {
224 /* if it seems large (heuristic criterion), let's rather do it collectively */
225 if(coll_candidates[i].len > 0.20 * Tp->TotNumPart / NTask)
226 coll_candidates[i].parent++; /* this trick will ensure that it is not considered in this round */
227 else
228 {
229 if(coll_candidates[i].len > max_length)
230 max_length = coll_candidates[i].len;
231
232 count_leaves++;
233 }
234 }
235
236 /* get total count of these eligible subhalos, and their maximum length */
237 MPI_Allreduce(MPI_IN_PLACE, &count_leaves, 1, MPI_INT, MPI_SUM, SubComm);
238 MPI_Allreduce(MPI_IN_PLACE, &max_length, 1, MPI_INT, MPI_MAX, SubComm);
239
240 double t1 = Logs.second();
241
242 subfind_collective_printf(
243 "SUBFIND: root-task=%d: number of subhalo coll_candidates that can be done independently=%d. (Largest size %d, finding took "
244 "%g sec)\n",
245 ThisTask, count_leaves, max_length, Logs.timediff(t0, t1));
246
247 /* if there are none left, we break and do the reset collectively */
248 if(count_leaves <= 0)
249 {
250 subfind_collective_printf("SUBFIND: root-task=%d: too few, let's do the rest of %d collectively\n", ThisTask, nremaining);
251 break;
252 }
253
254 /* seems large, let's rather do it collectively */
255 if(max_length > 0.5 * Tp->TotNumPart / NTask)
256 {
257 subfind_collective_printf("SUBFIND: root-task=%d: too big coll_candidates, I do the rest collectively\n", ThisTask);
258 break;
259 }
260
261 nremaining -= count_leaves;
262
263 /* set default values */
264 for(int i = 0; i < Tp->NumPart; i++)
265 {
266 Tp->PS[i].u.s.origintask = SubThisTask;
267 Tp->PS[i].u.s.originindex = i;
268
269 Tp->PS[i].TargetTask = SubThisTask;
270 Tp->PS[i].submark = HIGHBIT;
271 }
272
273 for(int i = 0; i < NumPartGroup; i++)
274 {
275 if(SFTail[i].index >= 0) /* this means this particle is already bound to a substructure */
276 Tp->PS[IndexList[i]].u.s.origintask |= HIGHBIT;
277 }
278
279 /* we now mark the particles that are in subhalo candidates that can be processed independently in parallel */
280 int nsubs = 0;
281 for(int master = 0; master < SubNTask; master++)
282 {
283 int ncand = count_cand;
284 MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
285
286 for(int k = 0; k < ncand; k++)
287 {
288 MyLenType len;
289 int parent;
290 if(SubThisTask == master)
291 {
292 len = coll_candidates[k].len;
293 parent = coll_candidates[k].parent; /* this is here actually the daughter count */
294 }
295
296 MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
297 MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
298 MPI_Barrier(SubComm);
299
300 if(parent == 0)
301 {
302 if(SubThisTask != master)
303 subfind_poll_for_requests();
304 else
305 {
306 location p = coll_candidates[k].head;
307 for(MyLenType i = 0; i < coll_candidates[k].len; i++)
308 {
309 subfind_distlinklist_mark_particle(p, master, nsubs);
310
311 if(p.index < 0)
312 Terminate("Bummer\n");
313
314 p = subfind_distlinklist_get_next(p);
315 }
316
317 /* now tell the others to stop polling */
318 for(int i = 0; i < SubNTask; i++)
319 if(i != SubThisTask)
320 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
321 }
322
323 MPI_Barrier(SubComm);
324 }
325
326 nsubs++;
327 }
328 }
329
330 if(mode == COLL_SUBFIND)
331 {
332 /* this will make sure that the particles are grouped by submark on the target task */
333 for(int i = 0; i < Tp->NumPart; i++)
334 Tp->PS[i].TargetIndex = Tp->PS[i].submark;
335
336 /* assemble the particles on individual processors (note: IndexList[] becomes temporarily meaningless) */
337 subfind_distribute_particles(SubComm);
338
339 PPS = (PPS_data *)Mem.mymalloc("PPS", Tp->NumPart * sizeof(PPS_data));
340
341 for(int i = 0; i < Tp->NumPart; i++)
342 PPS[i].index = i;
343 }
344 else
345 {
346 PPS = (PPS_data *)Mem.mymalloc("PPS", Tp->NumPart * sizeof(PPS_data));
347
348 for(int i = 0; i < Tp->NumPart; i++)
349 {
350 PPS[i].submark = Tp->PS[i].submark;
351 PPS[i].index = i;
352 }
353
354 mycxxsort(PPS, PPS + Tp->NumPart, subfind_compare_PPS);
355 }
356
357 MPI_Barrier(SubComm);
358 double ta = Logs.second();
359
360 subfind_unbind_independent_ones(SingleDomain, count_cand);
361
362 MPI_Barrier(SubComm);
363 double tb = Logs.second();
364
365 Mem.myfree(PPS);
366
367 subfind_collective_printf("SUBFIND: root-task=%d: unbinding of independent ones took %g sec\n", ThisTask, Logs.timediff(ta, tb));
368
369 if(mode == COLL_SUBFIND)
370 {
371 for(int i = 0; i < Tp->NumPart; i++)
372 {
373 Tp->PS[i].u.s.origintask &= (HIGHBIT - 1); /* clear high bit if set */
374 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
375 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
376 }
377
378 t0 = Logs.second();
379 subfind_distribute_particles(SubComm); /* bring them back to their original processor */
380 t1 = Logs.second();
381
382 subfind_collective_printf("SUBFIND: root-task=%d: bringing the independent ones back took %g sec\n", ThisTask,
383 Logs.timediff(t0, t1));
384
385 /* Since we reestablihed the original order, we can use IndexList[] again */
386 }
387
388 /* now mark the bound particles */
389 for(int i = 0; i < NumPartGroup; i++)
390 if(Tp->PS[IndexList[i]].submark >= 0 && Tp->PS[IndexList[i]].submark < nsubs)
391 SFTail[i].index = Tp->PS[IndexList[i]].submark; /* we use this to flag bound parts of substructures */
392
393 for(int i = 0; i < count_cand; i++)
394 if(coll_candidates[i].parent == 0)
395 coll_candidates[i].parent = -1;
396 }
397 while(count_leaves > 0);
398
399 /*
400 * Now we do the unbinding of the subhalo candidates that contain other subhalo candidates.
401 * This will be done with several CPUs if needed.
402 */
403
404 double t0 = Logs.second();
405
406 for(int master = 0, nr = 0; master < SubNTask; master++)
407 {
408 int ncand = count_cand;
409 MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
410
411 for(int k = 0; k < ncand; k++)
412 {
413 MyLenType len;
414 int parent, nsubs;
415 if(SubThisTask == master)
416 {
417 len = coll_candidates[k].len;
418 nsubs = coll_candidates[k].nsub;
419 parent = coll_candidates[k].parent; /* this is here actually the daughter count */
420 }
421
422 MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
423 MPI_Barrier(SubComm);
424
425 if(parent >= 0)
426 {
427 MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
428 MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
429
430 subfind_collective_printf("SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d\n", ThisTask, nr,
431 nremaining, (int)len);
432
433 nr++;
434
435 LocalLen = 0;
436
437 double tt0 = Logs.second();
438
439 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
440
441 if(SubThisTask != master)
442 subfind_poll_for_requests();
443 else
444 {
445 location p = coll_candidates[k].head;
446 for(int i = 0; i < coll_candidates[k].len; i++)
447 {
448 if(p.index < 0)
449 Terminate("Bummer i=%d \n", i);
450
451 subfind_distlinklist_add_particle(p);
452
453 p = subfind_distlinklist_get_next(p);
454 }
455
456 /* now tell the others to stop polling */
457 for(int i = 0; i < SubNTask; i++)
458 if(i != SubThisTask)
459 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
460 }
461
462 if(LocalLen > NumPartGroup)
463 Terminate("LocalLen=%d > NumPartGroup=%d", LocalLen, NumPartGroup);
464
465 /* rewrite list of group indices to particle indices */
466 for(int i = 0; i < LocalLen; i++)
467 {
468 unbind_list[i] = IndexList[unbind_list[i]];
469 if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
470 Terminate("bad! unbind_list[i]=%d\n", unbind_list[i]);
471 }
472
473 /* mark the one to be unbound in PS[] */
474 for(int i = 0; i < Tp->NumPart; i++)
475 {
476 Tp->PS[i].u.s.origintask = SubThisTask;
477 Tp->PS[i].u.s.originindex = i;
478 Tp->PS[i].DomainFlag = 0;
479 }
480
481 for(int i = 0; i < LocalLen; i++)
482 Tp->PS[unbind_list[i]].DomainFlag = 1;
483
484 Mem.myfree(unbind_list);
485
486 domain<partset> SubUnbindDomain{SubComm, Tp};
487
488 if(SubUnbindDomain.NumNodes != 0)
489 Terminate("SubDomain.NumNodes=%d\n", SubUnbindDomain.NumNodes);
490
491 SubUnbindDomain.domain_decomposition(mode);
492
493 if(mode == COLL_SUBFIND)
494 subfind_distribute_particles(SubComm);
495
496 /* refill unbind_list */
497 LocalLen = 0;
498 for(int i = 0; i < Tp->NumPart; i++)
499 if(Tp->PS[i].DomainFlag)
500 LocalLen++;
501
502 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", LocalLen * sizeof(int));
503
504 /* refill unbind_list */
505 LocalLen = 0;
506 for(int i = 0; i < Tp->NumPart; i++)
507 if(Tp->PS[i].DomainFlag)
508 unbind_list[LocalLen++] = i;
509
510 LocalLen = subfind_unbind(&SubUnbindDomain, SubComm, unbind_list, LocalLen);
511
512 for(int i = 0; i < Tp->NumPart; i++)
513 {
514 Tp->PS[i].DomainFlag = 0;
515 Tp->PS[i].TargetTask = Tp->PS[i].u.s.origintask;
516 Tp->PS[i].TargetIndex = Tp->PS[i].u.s.originindex;
517 }
518
519 for(int i = 0; i < LocalLen; i++)
520 Tp->PS[unbind_list[i]].DomainFlag = 1;
521
522 Mem.myfree(unbind_list);
523
524 SubUnbindDomain.domain_free();
525
526 double ta = Logs.second();
527 subfind_distribute_particles(SubComm); /* bring them back to their original processor */
528 double tb = Logs.second();
529
530 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
531
532 /* refill unbind_list */
533 LocalLen = 0;
534 for(int i = 0; i < Tp->NumPart; i++)
535 if(Tp->PS[i].DomainFlag)
536 {
537 if(LocalLen >= NumPartGroup)
538 Terminate("LocalLen=%d >= NumPartGroup=%d", LocalLen, NumPartGroup);
539 unbind_list[LocalLen++] = i;
540 }
541
542 subfind_collective_printf("SUBFIND: root-task=%d: bringing the collective ones back took %g sec\n", ThisTask,
543 Logs.timediff(ta, tb));
544
545 /* go from particle indices back to group indices */
546 for(int i = 0; i < LocalLen; i++)
547 {
548 unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex;
549 if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
550 Terminate("ups, bad! unbind_list[i]=%d NumPartGroup=%d\n", unbind_list[i], NumPartGroup);
551 }
552
553 double tt1 = Logs.second();
554
555 int oldlen = len;
556
557 MPI_Allreduce(&LocalLen, &len, 1, MPI_INT, MPI_SUM, SubComm);
558
559 subfind_collective_printf(
560 "SUBFIND: root-task=%d: collective unbinding of nr=%d (%d) of length=%d, bound length=%d took %g sec\n", ThisTask,
561 nr - 1, nremaining, oldlen, (int)len, Logs.timediff(tt0, tt1));
562
563 if(len >= All.DesLinkNgb)
564 {
565 /* ok, we found a substructure */
566 for(int i = 0; i < LocalLen; i++)
567 SFTail[unbind_list[i]].index = nsubs; /* we use this to flag the substructures */
568
569 if(SubThisTask == master)
570 coll_candidates[k].bound_length = len;
571 }
572 else
573 {
574 /* bound particle count too low or zero */
575 if(SubThisTask == master)
576 coll_candidates[k].bound_length = 0;
577 }
578
579 Mem.myfree(unbind_list);
580 }
581 }
582 }
583 double t1 = Logs.second();
584
585 subfind_collective_printf("SUBFIND: root-task=%d: the collective unbinding of remaining halos took %g sec\n", ThisTask,
586 Logs.timediff(t0, t1));
587
588 /* get the total substructure count */
589 int countall = 0;
590 for(int k = 0; k < count_cand; k++)
591 if(coll_candidates[k].bound_length >= All.DesLinkNgb)
592 {
593 if(coll_candidates[k].len < All.DesLinkNgb)
594 Terminate("coll_candidates[k=%d].len=%lld bound=%lld\n", k, (long long)coll_candidates[k].len,
595 (long long)coll_candidates[k].bound_length);
596
597 countall++;
598 }
599
600 MPI_Allreduce(MPI_IN_PLACE, &countall, 1, MPI_INT, MPI_SUM, SubComm);
601
602 subfind_collective_printf("SUBFIND: root-task=%d: found %d bound substructures in FoF group of length %lld\n", ThisTask, countall,
603 totgrouplen);
604
605 /* now determine the parent subhalo for each candidate */
606 t0 = Logs.second();
607 mycxxsort_parallel(coll_candidates, coll_candidates + count_cand, subfind_compare_coll_candidates_boundlength, SubComm);
608
609 coll_cand_dat *tmp_coll_candidates = 0;
610
611 if(SubThisTask == 0)
612 tmp_coll_candidates = (coll_cand_dat *)Mem.mymalloc("tmp_coll_candidates", totcand * sizeof(coll_cand_dat));
613
614 int *countlist = (int *)Mem.mymalloc("countlist", SubNTask * sizeof(int));
615 int *offset = (int *)Mem.mymalloc("offset", SubNTask * sizeof(int));
616
617 int count_size = count_cand * sizeof(coll_cand_dat);
618 MPI_Allgather(&count_size, 1, MPI_INT, countlist, 1, MPI_INT, SubComm);
619
620 offset[0] = 0;
621 for(int i = 1; i < SubNTask; i++)
622 offset[i] = offset[i - 1] + countlist[i - 1];
623
624 MPI_Gatherv(coll_candidates, countlist[SubThisTask], MPI_BYTE, tmp_coll_candidates, countlist, offset, MPI_BYTE, 0, SubComm);
625
626 if(SubThisTask == 0)
627 {
628 for(int k = 0; k < totcand; k++)
629 {
630 tmp_coll_candidates[k].subnr = k;
631 tmp_coll_candidates[k].parent = 0;
632 }
633
634 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_rank);
635
636 for(int k = 0; k < totcand; k++)
637 {
638 for(int j = k + 1; j < totcand; j++)
639 {
640 if(tmp_coll_candidates[j].rank > tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len)
641 break;
642
643 if(tmp_coll_candidates[k].rank + tmp_coll_candidates[k].len >= tmp_coll_candidates[j].rank + tmp_coll_candidates[j].len)
644 {
645 if(tmp_coll_candidates[k].bound_length >= All.DesLinkNgb)
646 tmp_coll_candidates[j].parent = tmp_coll_candidates[k].subnr;
647 }
648 else
649 {
650 Terminate("k=%d|%d has rank=%d and len=%d. j=%d has rank=%d and len=%d bound=%d\n", k, countall,
651 (int)tmp_coll_candidates[k].rank, (int)tmp_coll_candidates[k].len,
652 (int)tmp_coll_candidates[k].bound_length, (int)tmp_coll_candidates[j].rank,
653 (int)tmp_coll_candidates[j].len, (int)tmp_coll_candidates[j].bound_length);
654 }
655 }
656 }
657
658 mycxxsort(tmp_coll_candidates, tmp_coll_candidates + totcand, subfind_compare_coll_candidates_subnr);
659 }
660
661 MPI_Scatterv(tmp_coll_candidates, countlist, offset, MPI_BYTE, coll_candidates, countlist[SubThisTask], MPI_BYTE, 0, SubComm);
662
663 Mem.myfree(offset);
664 Mem.myfree(countlist);
665
666 if(SubThisTask == 0)
667 Mem.myfree(tmp_coll_candidates);
668
669 t1 = Logs.second();
670
671 subfind_collective_printf("SUBFIND: root-task=%d: determination of parent subhalo took %g sec (presently allocated %g MB)\n",
672 ThisTask, Logs.timediff(t0, t1), Mem.getAllocatedBytesInMB());
673
674 /* Now let's save some properties of the substructures */
675 if(SubThisTask == 0)
676 Group[gr].Nsubs = countall;
677
678 t0 = Logs.second();
679
680 unbind_list = (int *)Mem.mymalloc_movable(&unbind_list, "unbind_list", NumPartGroup * sizeof(int));
681
682 for(int master = 0, subnr = 0; master < SubNTask; master++)
683 {
684 int ncand = count_cand;
685 MPI_Bcast(&ncand, sizeof(ncand), MPI_BYTE, master, SubComm);
686
687 for(int k = 0; k < ncand; k++)
688 {
689 MyLenType len;
690 int parent, nsubs;
691 if(SubThisTask == master)
692 {
693 len = coll_candidates[k].bound_length;
694 nsubs = coll_candidates[k].nsub;
695 parent = coll_candidates[k].parent;
696 }
697
698 MPI_Bcast(&len, sizeof(len), MPI_BYTE, master, SubComm);
699 MPI_Barrier(SubComm);
700
701 if(len > 0)
702 {
703 MPI_Bcast(&nsubs, sizeof(nsubs), MPI_BYTE, master, SubComm);
704 MPI_Bcast(&parent, sizeof(parent), MPI_BYTE, master, SubComm);
705
706 LocalLen = 0;
707
708 if(SubThisTask != master)
709 subfind_poll_for_requests();
710 else
711 {
712 location p = coll_candidates[k].head;
713 for(MyLenType i = 0; i < coll_candidates[k].len; i++)
714 {
715 subfind_distlinklist_add_bound_particles(p, nsubs);
716 p = subfind_distlinklist_get_next(p);
717 }
718
719 /* now tell the others to stop polling */
720 for(int i = 0; i < SubNTask; i++)
721 if(i != SubThisTask)
722 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
723 }
724
725 int max_nsubhalos;
726 MPI_Allreduce(&Nsubhalos, &max_nsubhalos, 1, MPI_INT, MPI_MAX, SubComm);
727
728 if(max_nsubhalos >= MaxNsubhalos)
729 {
730 if(ThisTask == 0)
731 warn("Nsubhalos=%d >= MaxNsubhalos=%d", max_nsubhalos, MaxNsubhalos);
732
733 MaxNsubhalos = 1.25 * max_nsubhalos;
734
735 Subhalo = (subhalo_properties *)Mem.myrealloc_movable(Subhalo, MaxNsubhalos * sizeof(subhalo_properties));
736 }
737
738 for(int i = 0; i < LocalLen; i++)
739 {
740 unbind_list[i] = IndexList[unbind_list[i]]; /* move to particle index list */
741
742 if(unbind_list[i] < 0 || unbind_list[i] >= Tp->NumPart)
743 Terminate("strange");
744 }
745
746 int marked = subfind_determine_sub_halo_properties(unbind_list, LocalLen, &Subhalo[Nsubhalos], SubComm);
747
748 for(int i = 0; i < LocalLen; i++)
749 {
750 unbind_list[i] = Tp->PS[unbind_list[i]].InvIndex; /* move back to group index list */
751
752 if(unbind_list[i] < 0 || unbind_list[i] >= NumPartGroup)
753 Terminate("also strange");
754 }
755
756 MPI_Allreduce(MPI_IN_PLACE, &marked, 1, MPI_INT, MPI_SUM, SubComm);
757
758 if(SubThisTask == 0)
759 {
760 if(subnr == 0)
761 {
762 for(int j = 0; j < 3; j++)
763 {
764 Group[gr].Pos[j] = Subhalo[Nsubhalos].Pos[j];
765 Group[gr].IntPos[j] = Subhalo[Nsubhalos].IntPos[j];
766 }
767 }
768
769#if defined(SUBFIND_ORPHAN_TREATMENT)
770 Group[gr].LenPrevMostBnd += marked;
771#endif
772
773 Subhalo[Nsubhalos].GroupNr = GroupNr;
774 Subhalo[Nsubhalos].SubRankInGr = subnr;
775 Subhalo[Nsubhalos].SubParentRank = parent;
776
777 Nsubhalos++;
778 }
779
780 /* Let's now assign the subhalo number within the group */
781 for(int i = 0; i < LocalLen; i++)
782 {
783 Tp->PS[IndexList[unbind_list[i]]].SubRankInGr = subnr;
784#if defined(MERGERTREE)
785 Tp->PS[IndexList[unbind_list[i]]].SizeOfSubhalo.set(len);
786#endif
787 }
788
789 subnr++;
790 }
791 }
792 }
793
794 subfind_collective_printf("SUBFIND: root-task=%d: determining substructure properties done\n", ThisTask);
795
796 Mem.myfree(unbind_list);
797 Mem.myfree(coll_candidates);
798 Mem.myfree(SFPrevLen);
799 Mem.myfree(SFLen);
800 Mem.myfree(SFTail);
801 Mem.myfree(SFNext);
802 Mem.myfree(SFHead);
803 Mem.myfree(sd);
804}
805
806/* This function finds the subhalo candidates (i.e. locally overdense structures bounded by a saddle point).
807 * They can be nested inside each other, and will later be subjected to an unbinding procedure.
808 */
809template <typename partset>
810void fof<partset>::subfind_col_find_coll_candidates(long long totgrouplen)
811{
812 subfind_collective_printf("SUBFIND: root-task=%d: building distributed linked list. (presently allocated %g MB)\n", ThisTask,
814
815 double t0 = Logs.second();
816
817 /* Now find the subhalo candidates by building up-link lists for them.
818 * We go through the processors in the group in sequence, starting with the first one holding the densest particles according to the
819 * sd[] array In each iteration, the processor we currently deal with is called 'master', the others are listening to incoming
820 * requests for information.
821 */
822 for(int master = 0; master < SubNTask; master++)
823 {
824 double tt0 = Logs.second();
825 if(SubThisTask != master)
826 subfind_poll_for_requests(); /* if we are not the master task, we react to incoming requests for information */
827 else
828 {
829 /* we go through the sd[] indices stored on the master task, which means we start with the densest particle */
830 for(int k = 0; k < NumPartGroup; k++)
831 {
832 int ngbcount = sd[k].ngbcount;
833 location ngb_index1 = sd[k].ngb_index1;
834 location ngb_index2 = sd[k].ngb_index2;
835
836 switch(ngbcount)
837 /* treat the different possible cases */
838 {
839 case 0: /* this appears to be a lonely maximum -> new group */
840 subfind_distlinklist_set_all(sd[k].index, sd[k].index, sd[k].index, 1, {-1, -1}, sd[k].PrevSizeOfSubhalo);
841 break;
842
843 case 1: /* the particle is attached to exactly one group */
844 {
845 if(ngb_index1.task < 0 || ngb_index1.task >= SubNTask)
846 Terminate("ngb_index1.task=%d SubNTask=%d", ngb_index1.task, SubNTask);
847
848 location head = subfind_distlinklist_get_head(ngb_index1);
849
850 if(head.index == -1)
851 Terminate("We have a problem! head=%d for k=%d on task=%d\n", head.index, k, SubThisTask);
852
853 location tail;
854 int retcode =
855 subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
856
857 if(!(retcode & 1))
858 subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
859 if(!(retcode & 2))
860 subfind_distlinklist_set_next(tail, sd[k].index);
861 }
862 break;
863
864 case 2: /* the particle merges two groups together */
865 {
866 location head, head_attach;
867
868 if(ngb_index1.task < 0 || ngb_index1.task >= SubNTask)
869 Terminate("ngb_index1.task=%d SubNTask=%d", ngb_index1.task, SubNTask);
870
871 if(ngb_index2.task < 0 || ngb_index2.task >= SubNTask)
872 Terminate("ngb_index2.task=%d SubNTask=%d", ngb_index2.task, SubNTask);
873
874 if(ngb_index1.task == ngb_index2.task)
875 {
876 subfind_distlinklist_get_two_heads(ngb_index1, ngb_index2, head, head_attach);
877 }
878 else
879 {
880 head = subfind_distlinklist_get_head(ngb_index1);
881 head_attach = subfind_distlinklist_get_head(ngb_index2);
882 }
883
884 if(head.index == -1 || head_attach.index == -1)
885 Terminate("We have a problem! head=%d/%d head_attach=%d/%d for k=%d on task=%d\n", head.task, head.index,
886 head_attach.task, head_attach.index, k, SubThisTask);
887
888 if(head != head_attach)
889 {
890 location tail, tail_attach;
891 MyLenType len, len_attach;
892 double prevlen, prevlen_attach;
893
894 subfind_distlinklist_get_tailandlen(head, tail, len, prevlen);
895 subfind_distlinklist_get_tailandlen(head_attach, tail_attach, len_attach, prevlen_attach);
896
897 bool swap_len = false;
898 bool swap_prevlen = false;
899
900 if(len_attach > len || (len_attach == len && head_attach < head))
901 swap_len = true;
902
903 if(prevlen > 0 && prevlen_attach > 0 && len >= All.DesLinkNgb && len_attach >= All.DesLinkNgb)
904 {
905 if(prevlen_attach > prevlen || (prevlen_attach == prevlen && swap_len == true))
906 swap_prevlen = true;
907 }
908 else
909 swap_prevlen = swap_len;
910
911 /* if other group is longer, swap */
912 if(swap_prevlen)
913 {
914 location tmp = head;
915 head = head_attach;
916 head_attach = tmp;
917
918 tmp = tail;
919 tail = tail_attach;
920 tail_attach = tmp;
921
922 MyLenType tmplen = len;
923 len = len_attach;
924 len_attach = tmplen;
925
926 double tmpprevlen = prevlen;
927 prevlen = prevlen_attach;
928 prevlen_attach = tmpprevlen;
929 }
930
931 /* only in case the attached group is long enough we bother to register it
932 as a subhalo candidate */
933
934 if(len_attach >= All.DesLinkNgb && len >= All.DesLinkNgb)
935 {
936 count_decisions++;
937
938 if(swap_prevlen != swap_len)
939 {
940 printf(
941 "SUBFIND: TASK=%d: made a different main trunk decision due to previous length: prevlen=%g "
942 "prevlen_attach=%g len=%g len_attach=%g\n",
943 ThisTask, prevlen / len, prevlen_attach / len_attach, (double)len, (double)len_attach);
944 fflush(stdout);
945 count_different_decisions++;
946 }
947
948 if(count_cand < max_coll_candidates)
949 {
950 coll_candidates[count_cand].len = len_attach;
951 coll_candidates[count_cand].head = head_attach;
952 count_cand++;
953 }
954 else
955 Terminate("Task %d: count=%d, max=%d, npartgroup=%d\n", SubThisTask, count_cand, max_coll_candidates,
956 NumPartGroup);
957 }
958
959 /* now join the two groups */
960 subfind_distlinklist_set_tailandlen(head, tail_attach, len + len_attach, prevlen + prevlen_attach);
961 subfind_distlinklist_set_next(tail, head_attach);
962
963 location ss = head_attach;
964 do
965 {
966 ss = subfind_distlinklist_set_head_get_next(ss, head);
967 }
968 while(ss.index >= 0);
969 }
970
971 /* finally, attach the particle to 'head' */
972 location tail;
973 int retcode =
974 subfind_distlinklist_get_tail_set_tail_increaselen(head, tail, sd[k].index, sd[k].PrevSizeOfSubhalo);
975
976 if(!(retcode & 1))
977 subfind_distlinklist_set_headandnext(sd[k].index, head, {-1, -1});
978 if(!(retcode & 2))
979 subfind_distlinklist_set_next(tail, sd[k].index);
980 }
981 break;
982 }
983 }
984
985 myflush(stdout);
986
987 /* now tell the others to stop polling */
988 for(int k = 0; k < SubNTask; k++)
989 if(k != SubThisTask)
990 MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
991 }
992
993 MPI_Barrier(SubComm);
994 double tt1 = Logs.second();
995
996 subfind_collective_printf("SUBFIND: root-task=%d: ma=%d/%d took %g sec\n", ThisTask, master, SubNTask, Logs.timediff(tt0, tt1));
997 }
998 double t1 = Logs.second();
999
1000 subfind_collective_printf("SUBFIND: root-task=%d: identification of primary coll_candidates took %g sec\n", ThisTask,
1001 Logs.timediff(t0, t1));
1002
1003 /* Add the full group as the final subhalo candidate.
1004 */
1005 location head = {-1, -1};
1006 location prev = {-1, -1};
1007 for(int master = 0; master < SubNTask; master++)
1008 {
1009 if(SubThisTask != master)
1010 subfind_poll_for_requests();
1011 else
1012 {
1013 for(int i = 0; i < NumPartGroup; i++)
1014 {
1015 location index = {SubThisTask, i};
1016
1017 if(SFHead[i] == index)
1018 {
1019 location tail;
1020 MyLenType len;
1021 double prevlen;
1022 subfind_distlinklist_get_tailandlen(SFHead[i], tail, len, prevlen);
1023 location next = subfind_distlinklist_get_next(tail);
1024
1025 if(next.index == -1)
1026 {
1027 if(prev.index < 0)
1028 head = index;
1029
1030 if(prev.index >= 0)
1031 subfind_distlinklist_set_next(prev, index);
1032
1033 prev = tail;
1034 }
1035 }
1036 }
1037
1038 /* now tell the others to stop polling */
1039 for(int k = 0; k < SubNTask; k++)
1040 if(k != SubThisTask)
1041 MPI_Send(&k, 1, MPI_INT, k, TAG_POLLING_DONE, SubComm);
1042 }
1043
1044 MPI_Barrier(SubComm);
1045 MPI_Bcast(&head, sizeof(head), MPI_BYTE, master, SubComm);
1046 MPI_Bcast(&prev, sizeof(prev), MPI_BYTE, master, SubComm);
1047 }
1048
1049 if(SubThisTask == SubNTask - 1)
1050 {
1051 if(count_cand < max_coll_candidates)
1052 {
1053 coll_candidates[count_cand].len = totgrouplen;
1054 coll_candidates[count_cand].head = head;
1055 count_cand++;
1056 }
1057 else
1058 Terminate("count_cand=%d >= max_coll_candidates=%d", count_cand, max_coll_candidates);
1059 }
1060
1061 subfind_collective_printf("SUBFIND: root-task=%d: adding background as candidate\n", ThisTask);
1062
1063 /* go through the whole chain once to establish a rank order. For the rank we use SFLen[]
1064 */
1065 double ta = Logs.second();
1066
1067 int master = head.task;
1068
1069 if(master < 0 || master >= SubNTask)
1070 Terminate("master=%d SubNTask=%d\n", master, SubNTask);
1071
1072 if(SubThisTask != master)
1073 subfind_poll_for_requests();
1074 else
1075 {
1076 location p = head;
1077 MyLenType rank = 0;
1078
1079 while(p.index >= 0)
1080 {
1081 p = subfind_distlinklist_setrank_and_get_next(p, rank);
1082 }
1083
1084 /* now tell the others to stop polling */
1085 for(int i = 0; i < SubNTask; i++)
1086 if(i != master)
1087 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1088 }
1089
1090 MPI_Barrier(SubComm);
1091
1092 /* for each candidate, we now pull out the rank of its head */
1093 for(int master = 0; master < SubNTask; master++)
1094 {
1095 if(SubThisTask != master)
1096 subfind_poll_for_requests();
1097 else
1098 {
1099 for(int k = 0; k < count_cand; k++)
1100 coll_candidates[k].rank = subfind_distlinklist_get_rank(coll_candidates[k].head);
1101
1102 /* now tell the others to stop polling */
1103 for(int i = 0; i < SubNTask; i++)
1104 if(i != SubThisTask)
1105 MPI_Send(&i, 1, MPI_INT, i, TAG_POLLING_DONE, SubComm);
1106 }
1107 }
1108 MPI_Barrier(SubComm);
1109
1110 double tb = Logs.second();
1111
1112 subfind_collective_printf(
1113 "SUBFIND: root-task=%d: establishing of rank order took %g sec (grouplen=%lld) presently allocated %g MB\n", ThisTask,
1114 Logs.timediff(ta, tb), totgrouplen, Mem.getAllocatedBytesInMB());
1115}
1116
1117template <typename partset>
1118void fof<partset>::subfind_unbind_independent_ones(domain<partset> *SingleDomain, int count_cand_l)
1119{
1120 unbind_list = (int *)Mem.mymalloc("unbind_list", Tp->NumPart * sizeof(int));
1121
1122 mycxxsort(coll_candidates, coll_candidates + count_cand_l, subfind_compare_coll_candidates_nsubs);
1123
1124 for(int k = 0, ii = 0; k < count_cand_l; k++)
1125 if(coll_candidates[k].parent == 0)
1126 {
1127 int i = PPS[ii].index;
1128
1129 while(Tp->PS[i].submark < coll_candidates[k].nsub)
1130 {
1131 ii++;
1132 i = PPS[ii].index;
1133
1134 if(i >= Tp->NumPart)
1135 Terminate("i >= NumPart");
1136 }
1137
1138 if(Tp->PS[i].submark >= 0 && Tp->PS[i].submark < HIGHBIT)
1139 {
1140 int len = 0;
1141 int nsubs = Tp->PS[i].submark;
1142
1143 if(nsubs != coll_candidates[k].nsub)
1144 Terminate("TASK=%d i=%d k=%d nsubs=%d coll_candidates[k].nsub=%d\n", SubThisTask, i, k, nsubs, coll_candidates[k].nsub);
1145
1146 while(i < Tp->NumPart)
1147 {
1148 if(Tp->PS[i].submark == nsubs)
1149 {
1150 Tp->PS[i].submark = HIGHBIT;
1151 if((Tp->PS[i].u.s.origintask & HIGHBIT) == 0)
1152 {
1153 unbind_list[len] = i;
1154 len++;
1155 }
1156 ii++;
1157 i = PPS[ii].index;
1158 }
1159 else
1160 break;
1161 }
1162
1163 /* call the serial unbind function */
1164 len = subfind_unbind(SingleDomain, SingleDomain->Communicator, unbind_list, len);
1165
1166 if(len >= All.DesLinkNgb)
1167 {
1168 /* ok, we found a substructure */
1169 coll_candidates[k].bound_length = len;
1170
1171 for(int j = 0; j < len; j++)
1172 Tp->PS[unbind_list[j]].submark = nsubs; /* we use this to flag the substructures */
1173 }
1174 else
1175 coll_candidates[k].bound_length = 0;
1176 }
1177 }
1178
1179 Mem.myfree(unbind_list);
1180}
1181
1182struct loc_compound0
1183{
1184 int index;
1185 location loc;
1186 approxlen prevlen;
1187};
1188
1189struct loc_compound1
1190{
1191 int index;
1192 location loc;
1193};
1194
1195struct loc_compound2
1196{
1197 location loc;
1198 MyLenType len;
1199 double prevlen;
1200};
1201
1202struct loc_compound3
1203{
1204 int index;
1205 MyLenType len;
1206 location tail;
1207 double prevlen;
1208};
1209
1210struct loc_compound4
1211{
1212 int index;
1213 location head;
1214 location next;
1215};
1216
1217struct loc_compound5
1218{
1219 int index;
1220 MyLenType len;
1221 location head;
1222 location tail;
1223 location next;
1224 approxlen prevlen;
1225};
1226
1227struct loc_compound6
1228{
1229 location loc;
1230 MyLenType len;
1231};
1232
1233template <typename partset>
1234void fof<partset>::subfind_poll_for_requests(void)
1235{
1236 int tag;
1237 do
1238 {
1239 MPI_Status status;
1240 MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, SubComm, &status);
1241
1242 int source = status.MPI_SOURCE;
1243 tag = status.MPI_TAG;
1244
1245 /* MPI_Get_count(&status, MPI_BYTE, &count); */
1246 switch(tag)
1247 {
1248 case TAG_GET_TWOHEADS:
1249 {
1250 int ibuf[2];
1251 MPI_Recv(ibuf, 2, MPI_INT, source, TAG_GET_TWOHEADS, SubComm, MPI_STATUS_IGNORE);
1252 location buf[2];
1253 buf[0] = SFHead[ibuf[0]];
1254 buf[1] = SFHead[ibuf[1]];
1255 MPI_Send(buf, 2 * sizeof(location), MPI_BYTE, source, TAG_GET_TWOHEADS_DATA, SubComm);
1256 }
1257 break;
1258
1259 case TAG_SET_NEWTAIL:
1260 {
1261 loc_compound0 data;
1262 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_NEWTAIL, SubComm, MPI_STATUS_IGNORE);
1263
1264 int index = data.index;
1265 location newtail = data.loc;
1266 location oldtail = SFTail[index]; /* return old tail */
1267 SFTail[index] = newtail;
1268 SFLen[index]++;
1269 SFPrevLen[index] += data.prevlen.get();
1270
1271 if(newtail.task == SubThisTask)
1272 {
1273 SFHead[newtail.index] = {SubThisTask, index};
1274 SFNext[newtail.index] = {-1, -1};
1275 }
1276
1277 if(oldtail.task == SubThisTask)
1278 {
1279 SFNext[oldtail.index] = newtail;
1280 }
1281
1282 MPI_Send(&oldtail, sizeof(location), MPI_BYTE, source, TAG_GET_OLDTAIL, SubComm);
1283 }
1284 break;
1285
1286 case TAG_SET_ALL:
1287 {
1288 loc_compound5 data;
1289 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_ALL, SubComm, MPI_STATUS_IGNORE);
1290 int index = data.index;
1291 SFLen[index] = data.len;
1292 SFHead[index] = data.head;
1293 SFTail[index] = data.tail;
1294 SFNext[index] = data.next;
1295 SFPrevLen[index] = data.prevlen.get();
1296 }
1297 break;
1298
1299 case TAG_GET_TAILANDLEN:
1300 {
1301 int index;
1302 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1303 loc_compound2 data = {SFTail[index], SFLen[index], SFPrevLen[index]};
1304 MPI_Send(&data, sizeof(data), MPI_BYTE, source, TAG_GET_TAILANDLEN_DATA, SubComm);
1305 }
1306 break;
1307
1308 case TAG_SET_TAILANDLEN:
1309 {
1310 loc_compound3 data;
1311 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_TAILANDLEN, SubComm, MPI_STATUS_IGNORE);
1312 int index = data.index;
1313 SFTail[index] = data.tail;
1314 SFLen[index] = data.len;
1315 SFPrevLen[index] = data.prevlen;
1316 }
1317 break;
1318
1319 case TAG_SET_HEADANDNEXT:
1320 {
1321 loc_compound4 data;
1322 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_HEADANDNEXT, SubComm, MPI_STATUS_IGNORE);
1323 int index = data.index;
1324 SFHead[index] = data.head;
1325 SFNext[index] = data.next;
1326 }
1327 break;
1328
1329 case TAG_SET_NEXT:
1330 {
1331 loc_compound1 data;
1332 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SET_NEXT, SubComm, MPI_STATUS_IGNORE);
1333 int index = data.index;
1334 SFNext[index] = data.loc;
1335 }
1336 break;
1337
1338 case TAG_SETHEADGETNEXT:
1339 {
1340 loc_compound1 data;
1341 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SETHEADGETNEXT, SubComm, MPI_STATUS_IGNORE);
1342 int index = data.index;
1343 location head = data.loc;
1344 location next;
1345 int task;
1346 do
1347 {
1348 SFHead[index] = head;
1349 next = SFNext[index];
1350 task = next.task;
1351 index = next.index;
1352 }
1353 while(next.index >= 0 && task == SubThisTask);
1354 MPI_Send(&next, sizeof(location), MPI_BYTE, source, TAG_SETHEADGETNEXT_DATA, SubComm);
1355 }
1356 break;
1357
1358 case TAG_GET_NEXT:
1359 {
1360 int index;
1361 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1362 MPI_Send(&SFNext[index], sizeof(location), MPI_BYTE, source, TAG_GET_NEXT_DATA, SubComm);
1363 }
1364 break;
1365
1366 case TAG_GET_HEAD:
1367 {
1368 int index;
1369 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1370 MPI_Send(&SFHead[index], sizeof(location), MPI_BYTE, source, TAG_GET_HEAD_DATA, SubComm);
1371 }
1372 break;
1373
1374 case TAG_ADD_PARTICLE:
1375 {
1376 int index;
1377 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1378 if(SFTail[index].index < 0) /* consider only particles not already in substructures */
1379 {
1380 unbind_list[LocalLen] = index;
1381 if(index >= NumPartGroup)
1382 Terminate("What: index=%d NumPartGroup=%d\n", index, NumPartGroup);
1383 LocalLen++;
1384 }
1385 }
1386 break;
1387
1388 case TAG_MARK_PARTICLE:
1389 {
1390 int ibuf[3];
1391 MPI_Recv(ibuf, 3, MPI_INT, source, TAG_MARK_PARTICLE, SubComm, MPI_STATUS_IGNORE);
1392 int index = ibuf[0];
1393 int target = ibuf[1];
1394 int submark = ibuf[2];
1395
1396 if(Tp->PS[IndexList[index]].submark != HIGHBIT)
1397 Terminate("TasK=%d i=%d P[i].submark=%d?\n", SubThisTask, IndexList[index], Tp->PS[IndexList[index]].submark);
1398
1399 Tp->PS[IndexList[index]].TargetTask = target;
1400 Tp->PS[IndexList[index]].submark = submark;
1401 }
1402 break;
1403
1404 case TAG_ADDBOUND:
1405 {
1406 int ibuf[2];
1407 MPI_Recv(ibuf, 2, MPI_INT, source, TAG_ADDBOUND, SubComm, &status);
1408 int index = ibuf[0];
1409 int nsub = ibuf[1];
1410 if(SFTail[index].index == nsub) /* consider only particles in this substructure */
1411 {
1412 unbind_list[LocalLen] = index;
1413 LocalLen++;
1414 }
1415 }
1416 break;
1417
1418 case TAG_SETRANK:
1419 {
1420 loc_compound6 data;
1421 MPI_Recv(&data, sizeof(data), MPI_BYTE, source, TAG_SETRANK, SubComm, MPI_STATUS_IGNORE);
1422 int index = data.loc.index;
1423 MyLenType rank = data.len;
1424 location next;
1425 do
1426 {
1427 SFLen[index] = rank++;
1428 next = SFNext[index];
1429 if(next.index < 0)
1430 break;
1431 index = next.index;
1432 }
1433 while(next.task == SubThisTask);
1434 data.loc = next;
1435 data.len = rank;
1436 MPI_Send(&data, sizeof(data), MPI_BYTE, source, TAG_SETRANK_OUT, SubComm);
1437 }
1438 break;
1439
1440 case TAG_GET_RANK:
1441 {
1442 int index;
1443 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1444 MyLenType rank = SFLen[index];
1445 MPI_Send(&rank, sizeof(MyLenType), MPI_BYTE, source, TAG_GET_RANK_DATA, SubComm);
1446 }
1447 break;
1448
1449 case TAG_POLLING_DONE:
1450 {
1451 int index;
1452 MPI_Recv(&index, 1, MPI_INT, source, tag, SubComm, &status);
1453 }
1454 break;
1455
1456 default:
1457 Terminate("tag not present in the switch");
1458 break;
1459 }
1460 }
1461 while(tag != TAG_POLLING_DONE);
1462}
1463
1464template <typename partset>
1465location fof<partset>::subfind_distlinklist_setrank_and_get_next(location loc, MyLenType &rank)
1466{
1467 int task = loc.task;
1468 int i = loc.index;
1469
1470 location next;
1471
1472 if(SubThisTask == task)
1473 {
1474 SFLen[i] = rank++;
1475 next = SFNext[i];
1476 }
1477 else
1478 {
1479 loc_compound6 data = {loc, rank};
1480 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SETRANK, SubComm);
1481 MPI_Recv(&data, sizeof(data), MPI_BYTE, task, TAG_SETRANK_OUT, SubComm, MPI_STATUS_IGNORE);
1482 next = data.loc;
1483 rank = data.len;
1484 }
1485 return next;
1486}
1487
1488template <typename partset>
1489location fof<partset>::subfind_distlinklist_set_head_get_next(location loc, location head)
1490{
1491 int task = loc.task;
1492 int i = loc.index;
1493
1494 location next;
1495
1496 if(SubThisTask == task)
1497 {
1498 SFHead[i] = head;
1499 next = SFNext[i];
1500 }
1501 else
1502 {
1503 loc_compound1 data = {i, head};
1504 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SETHEADGETNEXT, SubComm);
1505 MPI_Recv(&next, sizeof(location), MPI_BYTE, task, TAG_SETHEADGETNEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1506 }
1507
1508 return next;
1509}
1510
1511template <typename partset>
1512void fof<partset>::subfind_distlinklist_set_next(location loc, location next)
1513{
1514 int task = loc.task;
1515 int i = loc.index;
1516
1517 if(SubThisTask == task)
1518 {
1519 SFNext[i] = next;
1520 }
1521 else
1522 {
1523 loc_compound1 data = {i, next};
1524 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_NEXT, SubComm);
1525 }
1526}
1527
1528template <typename partset>
1529void fof<partset>::subfind_distlinklist_add_particle(location loc)
1530{
1531 int task = loc.task;
1532 int i = loc.index;
1533
1534 if(SubThisTask == task)
1535 {
1536 if(SFTail[i].index < 0) /* consider only particles not already in substructures */
1537 {
1538 if(i >= NumPartGroup)
1539 Terminate("What: index=%d NumPartGroup=%d\n", i, NumPartGroup);
1540
1541 unbind_list[LocalLen] = i;
1542 LocalLen++;
1543 }
1544 }
1545 else
1546 {
1547 MPI_Send(&i, 1, MPI_INT, task, TAG_ADD_PARTICLE, SubComm);
1548 }
1549}
1550
1551template <typename partset>
1552void fof<partset>::subfind_distlinklist_mark_particle(location loc, int target, int submark)
1553{
1554 int task = loc.task;
1555 int i = loc.index;
1556
1557 if(SubThisTask == task)
1558 {
1559 if(Tp->PS[IndexList[i]].submark != HIGHBIT)
1560 Terminate("Tas=%d i=%d PS[i].submark=%d?\n", SubThisTask, i, Tp->PS[IndexList[i]].submark);
1561
1562 Tp->PS[IndexList[i]].TargetTask = target;
1563 Tp->PS[IndexList[i]].submark = submark;
1564 }
1565 else
1566 {
1567 int ibuf[3] = {i, target, submark};
1568 MPI_Send(ibuf, 3, MPI_INT, task, TAG_MARK_PARTICLE, SubComm);
1569 }
1570}
1571
1572template <typename partset>
1573void fof<partset>::subfind_distlinklist_add_bound_particles(location loc, int nsub)
1574{
1575 int task = loc.task;
1576 int i = loc.index;
1577
1578 if(SubThisTask == task)
1579 {
1580 if(SFTail[i].index == nsub) /* consider only particles not already in substructures */
1581 {
1582 unbind_list[LocalLen] = i;
1583 LocalLen++;
1584 }
1585 }
1586 else
1587 {
1588 int ibuf[2] = {i, nsub};
1589 MPI_Send(ibuf, 2, MPI_INT, task, TAG_ADDBOUND, SubComm);
1590 }
1591}
1592
1593template <typename partset>
1594location fof<partset>::subfind_distlinklist_get_next(location loc)
1595{
1596 int task = loc.task;
1597 int i = loc.index;
1598
1599 location next;
1600
1601 if(SubThisTask == task)
1602 {
1603 next = SFNext[i];
1604 }
1605 else
1606 {
1607 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_NEXT, SubComm);
1608 MPI_Recv(&next, sizeof(location), MPI_BYTE, task, TAG_GET_NEXT_DATA, SubComm, MPI_STATUS_IGNORE);
1609 }
1610
1611 return next;
1612}
1613
1614template <typename partset>
1615MyLenType fof<partset>::subfind_distlinklist_get_rank(location loc)
1616{
1617 int task = loc.task;
1618 int i = loc.index;
1619
1620 MyLenType rank;
1621
1622 if(SubThisTask == task)
1623 {
1624 rank = SFLen[i];
1625 }
1626 else
1627 {
1628 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_RANK, SubComm);
1629 MPI_Recv(&rank, sizeof(MyLenType), MPI_BYTE, task, TAG_GET_RANK_DATA, SubComm, MPI_STATUS_IGNORE);
1630 }
1631
1632 return rank;
1633}
1634
1635template <typename partset>
1636location fof<partset>::subfind_distlinklist_get_head(location loc)
1637{
1638 int task = loc.task;
1639 int i = loc.index;
1640
1641 location head;
1642
1643 if(SubThisTask == task)
1644 {
1645 head = SFHead[i];
1646 }
1647 else
1648 {
1649 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_HEAD, SubComm);
1650 MPI_Recv(&head, sizeof(location), MPI_BYTE, task, TAG_GET_HEAD_DATA, SubComm, MPI_STATUS_IGNORE);
1651 }
1652
1653 return head;
1654}
1655
1656template <typename partset>
1657void fof<partset>::subfind_distlinklist_get_two_heads(location ngb_index1, location ngb_index2, location &head, location &head_attach)
1658{
1659 if(ngb_index1.task != ngb_index2.task)
1660 Terminate("ngb_index1.task != ngb_index2.task");
1661
1662 int task = ngb_index1.task;
1663 int i1 = ngb_index1.index;
1664 int i2 = ngb_index2.index;
1665
1666 if(SubThisTask == task)
1667 {
1668 head = SFHead[i1];
1669 head_attach = SFHead[i2];
1670 }
1671 else
1672 {
1673 int ibuf[2] = {i1, i2};
1674 MPI_Send(ibuf, 2, MPI_INT, task, TAG_GET_TWOHEADS, SubComm);
1675 location buf[2];
1676 MPI_Recv(buf, 2 * sizeof(location), MPI_BYTE, task, TAG_GET_TWOHEADS_DATA, SubComm, MPI_STATUS_IGNORE);
1677 head = buf[0];
1678 head_attach = buf[1];
1679 }
1680}
1681
1682template <typename partset>
1683void fof<partset>::subfind_distlinklist_set_headandnext(location loc, location head, location next)
1684{
1685 int task = loc.task;
1686 int i = loc.index;
1687
1688 if(SubThisTask == task)
1689 {
1690 SFHead[i] = head;
1691 SFNext[i] = next;
1692 }
1693 else
1694 {
1695 loc_compound4 data = {i, head, next};
1696 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_HEADANDNEXT, SubComm);
1697 }
1698}
1699
1700template <typename partset>
1701int fof<partset>::subfind_distlinklist_get_tail_set_tail_increaselen(location loc, location &tail, location newtail, approxlen prevlen)
1702{
1703 int task = loc.task;
1704 int i = loc.index;
1705
1706 int retcode = 0;
1707
1708 if(SubThisTask == task)
1709 {
1710 location oldtail = SFTail[i];
1711 SFTail[i] = newtail;
1712 SFLen[i]++;
1713 SFPrevLen[i] += prevlen.get();
1714 tail = oldtail;
1715
1716 if(newtail.task == SubThisTask)
1717 {
1718 SFHead[newtail.index] = loc;
1719 SFNext[newtail.index] = {-1, -1};
1720 retcode |= 1;
1721 }
1722
1723 if(oldtail.task == SubThisTask)
1724 {
1725 SFNext[oldtail.index] = newtail;
1726 retcode |= 2;
1727 }
1728 }
1729 else
1730 {
1731 loc_compound0 data = {i, newtail, prevlen};
1732 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_NEWTAIL, SubComm);
1733 location oldtail;
1734 MPI_Recv(&oldtail, sizeof(location), MPI_BYTE, task, TAG_GET_OLDTAIL, SubComm, MPI_STATUS_IGNORE);
1735 tail = oldtail;
1736
1737 if(newtail.task == task)
1738 retcode |= 1;
1739 if(oldtail.task == task)
1740 retcode |= 2;
1741 }
1742
1743 return retcode;
1744}
1745
1746template <typename partset>
1747void fof<partset>::subfind_distlinklist_set_tailandlen(location loc, location tail, MyLenType len, double prevlen)
1748{
1749 int task = loc.task;
1750 int i = loc.index;
1751
1752 if(SubThisTask == task)
1753 {
1754 SFTail[i] = tail;
1755 SFLen[i] = len;
1756 SFPrevLen[i] = prevlen;
1757 }
1758 else
1759 {
1760 loc_compound3 data = {i, len, tail, prevlen};
1761 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_TAILANDLEN, SubComm);
1762 }
1763}
1764
1765template <typename partset>
1766void fof<partset>::subfind_distlinklist_get_tailandlen(location loc, location &tail, MyLenType &len, double &prevlen)
1767{
1768 int task = loc.task;
1769 int i = loc.index;
1770
1771 if(SubThisTask == task)
1772 {
1773 tail = SFTail[i];
1774 len = SFLen[i];
1775 prevlen = SFPrevLen[i];
1776 }
1777 else
1778 {
1779 MPI_Send(&i, 1, MPI_INT, task, TAG_GET_TAILANDLEN, SubComm);
1780
1781 loc_compound2 data;
1782 MPI_Recv(&data, sizeof(data), MPI_BYTE, task, TAG_GET_TAILANDLEN_DATA, SubComm, MPI_STATUS_IGNORE);
1783 tail = data.loc;
1784 len = data.len;
1785 prevlen = data.prevlen;
1786 }
1787}
1788
1789template <typename partset>
1790void fof<partset>::subfind_distlinklist_set_all(location loc, location head, location tail, MyLenType len, location next,
1791 approxlen prevlen)
1792{
1793 int task = loc.task;
1794 int i = loc.index;
1795
1796 if(SubThisTask == task)
1797 {
1798 SFHead[i] = head;
1799 SFTail[i] = tail;
1800 SFNext[i] = next;
1801 SFLen[i] = len;
1802 SFPrevLen[i] = prevlen.get();
1803 }
1804 else
1805 {
1806 loc_compound5 data = {i, len, head, tail, next, prevlen};
1807 MPI_Send(&data, sizeof(data), MPI_BYTE, task, TAG_SET_ALL, SubComm);
1808 }
1809}
1810
1811/* now make sure that the following classes are really instantiated, otherwise we may get a linking problem */
1812#include "../data/simparticles.h"
1813template class fof<simparticles>;
1814
1815#if defined(LIGHTCONE) && defined(LIGHTCONE_PARTICLES_GROUPS)
1816#include "../data/lcparticles.h"
1817template class fof<lcparticles>;
1818#endif
1819
1820#endif
1821#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
MPI_Comm Communicator
Definition: setcomm.h:31
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
domain_options
Definition: domain.h:22
@ COLL_SUBFIND
Definition: domain.h:24
int MyLenType
Definition: dtypes.h:76
logs Logs
Definition: main.cc:43
#define Terminate(...)
Definition: macros.h:15
#define warn(...)
Definition: macros.h:30
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
long long get(void)
Definition: idstorage.h:49
int task
Definition: dtypes.h:164
int index
Definition: dtypes.h:165
void myflush(FILE *fstream)
Definition: system.cc:125