GADGET-4
halotrees.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef MERGERTREE
15
16#include <gsl/gsl_rng.h>
17#include <hdf5.h>
18#include <math.h>
19#include <mpi.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <unistd.h>
26
27#include "../data/allvars.h"
28#include "../data/dtypes.h"
29#include "../data/mymalloc.h"
30#include "../fof/fof.h"
31#include "../fof/fof_io.h"
32#include "../io/hdf5_util.h"
33#include "../io/io.h"
34#include "../logs/timer.h"
35#include "../main/main.h"
36#include "../main/simulation.h"
37#include "../mergertree/io_descendant.h"
38#include "../mergertree/io_halotrees.h"
39#include "../mergertree/io_progenitors.h"
40#include "../mergertree/io_treelinks.h"
41#include "../mergertree/mergertree.h"
42#include "../mpi_utils/mpi_utils.h"
43#include "../sort/parallel_sort.h"
44#include "../subfind/subfind.h"
45#include "../system/system.h"
46
47/* This is the main function for constructing the halo trees.
48 */
49void mergertree::halotrees_construct(int lastsnapnr)
50{
51 domain<simparticles> Domain{Communicator, Sp};
52 fof<simparticles> FoF{Communicator, Sp, &Domain};
53
54 LastSnapShotNr = lastsnapnr;
55
56 Cats = (halo_catalogue *)Mem.mymalloc_clear("Cats", sizeof(halo_catalogue) * (LastSnapShotNr + 1));
57 CatTimes = (times_catalogue *)Mem.mymalloc("CatTimes", sizeof(times_catalogue) * (LastSnapShotNr + 1));
58
59 /* Let's load all catalogs. */
60 halotrees_load_catalogues(&FoF);
61 mpi_printf("\nMERGERTREE: Catalogues loaded\n");
62
63 /* As preliminary work, let everybody know the number of subhalos on each rank, and assign a global SubhaloNr to each. */
64 halotrees_assign_global_subhalonr_and_groupnr();
65 mpi_printf("\nMERGERTREE: SubhaloNr assigned\n");
66
67 /* Assign the halos to disjoint trees - initially, each halo is in its own tree, which will then be linked up to form bigger trees.
68 * To get a reasonably memory-balanced distribution, we assign random tasks to them initially. */
69 halotrees_initial_treeassignment();
70 mpi_printf("\nMERGERTREE: Initial tree assignment\n");
71
72 /* We now proceed by linking the halos according to the descendant information */
73 halotrees_link_trees();
74 mpi_printf("\nMERGERTREE: Halo tree linked\n");
75
76 /* because the previously selected first progenitor is not necessarily the most significant branch, we make a new choice
77 * by sorting the progenitors by their significance, and then redetermine the first progenitor among the set of progenitors */
78 halotrees_determine_mainprogenitor();
79 mpi_printf("\nMERGERTREE: Determined the main progenitor\n");
80
81 /* Now the trees are linked, i.e. all halos belonging to the same tree have a common tree-id. We proceed by assigning new indices to
82 * all halos within the same treeid. */
83 halotrees_assign_new_treeindices();
84 mpi_printf("\nMERGERTREE: New tree indices assigned\n");
85
86 /* next, we need to remap the global descendant and first/next progenitor pointers to another set of such pointers that use the local
87 * tree ids */
88 halotrees_remap_treepointers();
89 mpi_printf("\nMERGERTREE: Remapping done\n");
90
91 /* now we can go ahead and move the halos belonging to the same tree together, and set up a table with the trees */
92 halotrees_collect_treehalos();
93 mpi_printf("\nMERGERTREE: Treehalos collected\n\n");
94
95 /* finally, output the trees to files */
96 halotrees_io TreesIO{this, this->Communicator, All.SnapFormat};
97 TreesIO.halotrees_save_trees();
98
99 mpi_printf("\nMERGERTREE: Trees saved: A total number of %lld subhalos have been assembled in %lld trees.", TotNhalos, TotNtrees);
100 mpi_printf("\nMERGERTREE: Largest tree contains %d halos.\n\n", LargestHaloCount);
101
102 /* now also output links from the subhalo catalogues to the trees */
103 halotrees_save_subhalo_treelinks();
104 mpi_printf("\nMERGERTREE: Tree links saved\n\n");
105}
106
107/* This function is in charge of loading all the group catalogues.
108 */
109void mergertree::halotrees_load_catalogues(fof<simparticles> *FoF)
110{
111 /* let's first get the catalogs */
112 for(int num = 0; num <= LastSnapShotNr; num++)
113 {
114 /* load the group catalog */
115 fof_io<simparticles> FoF_IO{FoF, Communicator, All.SnapFormat};
116 FoF_IO.fof_subfind_load_groups(num);
117
118 /* bring all groups and subhalos into the order in which they were stored in the files, because the
119 * parallel load routine may have mangled up the order.
120 */
121 mycxxsort_parallel(FoF->Group, FoF->Group + FoF->Ngroups, compare_Group_FileOffset, Communicator);
122 mycxxsort_parallel(FoF->Subhalo, FoF->Subhalo + FoF->Nsubhalos, compare_Subhalo_FileOffset, Communicator);
123
124 if(FoF_IO.LegacyFormat)
125 {
126 mpi_printf("\nFOF/SUBFIND: Legacy format from Arepo detected, trying to adjust for this.\n");
127 FoF->subfind_redetermine_groupnr();
128 FoF->fof_assign_group_offset();
129 FoF->subfind_assign_subhalo_offsettype();
130 }
131
132 /* save the groups info for later use */
133 Cats[num].Ngroups = FoF->Ngroups;
134 Cats[num].TotNgroups = FoF->TotNgroups;
135
136 Cats[num].Nsubhalos = FoF->Nsubhalos;
137 Cats[num].TotNsubhalos = FoF->TotNsubhalos;
138
139 CatTimes[num].Time = FoF->Time;
140 CatTimes[num].Redshift = FoF->Redshift;
141
142 // we copy the group catalogue data here. Simply setting the pointer will violate the rule to not
143 // copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
144 Cats[num].Group = (fof<simparticles>::group_properties *)Mem.mymalloc_movable(
145 &Cats[num].Group, "Cats[num].Group", FoF->Ngroups * sizeof(fof<simparticles>::group_properties));
146 memcpy(Cats[num].Group, FoF->Group, FoF->Ngroups * sizeof(fof<simparticles>::group_properties));
147 Mem.myfree_movable(FoF->Group);
148
149 Cats[num].Subhalo = (fof<simparticles>::subhalo_properties *)Mem.mymalloc_movable(
150 &Cats[num].Subhalo, "Subhalo", Cats[num].Nsubhalos * sizeof(fof<simparticles>::subhalo_properties));
151 memcpy(Cats[num].Subhalo, FoF->Subhalo, Cats[num].Nsubhalos * sizeof(fof<simparticles>::subhalo_properties));
152 Mem.myfree_movable(FoF->Subhalo);
153
154 /* allocate some storage for extra subhalo info */
155 Cats[num].SubExt =
156 (subhalo_extension *)Mem.mymalloc_movable(&Cats[num].SubExt, "Cats[num].SubExt", FoF->Nsubhalos * sizeof(subhalo_extension));
157 }
158
159 /* now let's load the descendant tree information (except for the last snapshot) */
160 for(int num = 0; num < LastSnapShotNr; num++)
161 {
162 /* fetch the descendants for the catalogs */
163 descendant_io DescIO{this, this->Communicator, All.SnapFormat};
164 DescIO.mergertree_read_descendants(num);
165
166 /* if needed, restore proper order as in files */
167 mycxxsort_parallel(Descendants, Descendants + DescIO.Nsubhalos, compare_Desc_FileOffset, Communicator);
168
169 if(DescIO.TotNsubhalos != Cats[num].TotNsubhalos)
170 Terminate("inconsistency: DescIO.TotNsubhalos=%lld != Cats[num].TotNsubhalos=%lld", DescIO.TotNsubhalos,
171 Cats[num].TotNsubhalos);
172
173 /* it's possible that the local number of descendants, Nsubhalos, does not match Cats[num].Nsubhalos,
174 * implying that we need to reshuffle how things are distributed over processors.
175 */
176 halotrees_reshuffle((char **)&Descendants, sizeof(desc_list), DescIO.Nsubhalos, Cats[num].Nsubhalos);
177
178 /* retain info for later use
179 * we copy the data here. Simply setting the pointer will violate the rule to not
180 * copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
181 */
182 Cats[num].Descendants =
183 (desc_list *)Mem.mymalloc_movable(&Cats[num].Descendants, "Cats[num].Descendants", Cats[num].Nsubhalos * sizeof(desc_list));
184 memcpy(Cats[num].Descendants, Descendants, Cats[num].Nsubhalos * sizeof(desc_list));
185 Mem.myfree_movable(Descendants);
186 }
187
188 /* load the progenitor tree information (except for the first snapshot) */
189 for(int num = 1; num <= LastSnapShotNr; num++)
190 {
191 /* fetch from files */
192 progenitors_io ProgIO{this, this->Communicator, All.SnapFormat};
193 ProgIO.mergertree_read_progenitors(num);
194
195 /* if needed, restore proper order as in files */
196 mycxxsort_parallel(Progenitors, Progenitors + ProgIO.Nsubhalos, compare_Prog_FileOffset, Communicator);
197
198 if(ProgIO.TotNsubhalos != Cats[num].TotNsubhalos)
199 Terminate("inconsistency: ProgIO.TotNsubhalos=%lld != Cats[num].TotNsubhalos=%lld", ProgIO.TotNsubhalos,
200 Cats[num].TotNsubhalos);
201
202 /* it's possible that the local number of descendants, Nsubhalos, does not match Cats[num].Nsubhalos,
203 * implying that we need to reshuffle how things are distributed over processors.
204 */
205 halotrees_reshuffle((char **)&Progenitors, sizeof(prog_list), ProgIO.Nsubhalos, Cats[num].Nsubhalos);
206
207 /* retain info for later use
208 * we copy the data here. Simply setting the pointer will violate the rule to not
209 * copy a (movable) memory pointer into another variable, and would later give corruption once the IO object is destroyed...
210 */
211 Cats[num].Progenitors =
212 (prog_list *)Mem.mymalloc_movable(&Cats[num].Progenitors, "Cats[num].Progenitors", Cats[num].Nsubhalos * sizeof(prog_list));
213 memcpy(Cats[num].Progenitors, Progenitors, Cats[num].Nsubhalos * sizeof(prog_list));
214 Mem.myfree_movable(Progenitors);
215 }
216}
217
218void mergertree::halotrees_save_subhalo_treelinks(void)
219{
220 for(int num = 0; num <= LastSnapShotNr; num++)
221 {
222 treelinks_io TreeLinkIO{this, this->Communicator, All.SnapFormat};
223
224 TreeLinkIO.Nsubhalos = Cats[num].Nsubhalos;
225 TreeLinkIO.TotNsubhalos = Cats[num].TotNsubhalos;
226
227 TreeLink = (treelink_data *)Mem.mymalloc("TreeLink", TreeLinkIO.Nsubhalos * sizeof(treelink_data));
228 for(int i = 0; i < TreeLinkIO.Nsubhalos; i++)
229 {
230 TreeLink[i].TreeID = Cats[num].Subhalo[i].TreeID;
231 TreeLink[i].TreeIndex = Cats[num].Subhalo[i].TreeIndex;
232 }
233
234 /* save the tree-link info */
235 TreeLinkIO.treelinks_save(num);
236
237 Mem.myfree(TreeLink);
238 }
239}
240
241/* Here we collect the group and subhalo numbers stored on each processor for each snapshot, for communication purposes.
242 * We also assign for the subhalos of each catalogue a running global subhalo number which is then a unique identifier within this
243 * snapshot's catalogue.
244 */
245void mergertree::halotrees_assign_global_subhalonr_and_groupnr(void)
246{
247 long long grprev = 0;
248
249 for(int num = 0; num <= LastSnapShotNr; num++)
250 {
251 Cats[num].TabNgroups = (int *)Mem.mymalloc("Cats[num].TabNgroups", NTask * sizeof(int));
252 MPI_Allgather(&Cats[num].Ngroups, 1, MPI_INT, Cats[num].TabNgroups, 1, MPI_INT, Communicator);
253
254 Cats[num].TabNsubhalos = (int *)Mem.mymalloc("Cats[num].TabNsubhalos", NTask * sizeof(int));
255 MPI_Allgather(&Cats[num].Nsubhalos, 1, MPI_INT, Cats[num].TabNsubhalos, 1, MPI_INT, Communicator);
256
257 long long subprev = 0;
258
259 for(int i = 0; i < ThisTask; i++)
260 subprev += Cats[num].TabNsubhalos[i];
261
262 for(int i = 0; i < Cats[num].Nsubhalos; i++)
263 Cats[num].Subhalo[i].SubhaloNr = subprev + i;
264
265 /* Note: SubhaloNr should be now the quantity to which Descendant/FirstProgenitor/NextProgenitor refer to.
266 */
267
268 /* also set the Group[].GroupNr field (was not stored in the field that were read in)
269 * In contrast, Subhalo[].GroupNr was read in */
270 long long nbefore = 0;
271
272 for(int i = 0; i < ThisTask; i++)
273 nbefore += Cats[num].TabNgroups[i];
274
275 for(int i = 0; i < Cats[num].Ngroups; i++)
276 Cats[num].Group[i].GroupNr = nbefore + i;
277
278 /* define a GroupNr field for a unique group number */
279 for(int i = 0; i < Cats[num].Nsubhalos; i++)
280 Cats[num].Subhalo[i].UniqueGroupNr = Cats[num].Subhalo[i].GroupNr + grprev;
281
282 grprev += Cats[num].TotNgroups;
283
284 /* assign some special properties to the subhalos in the tree which come from the FOF group catalogue (like M200, etc.) */
285
286 for(int i = 0; i < Cats[num].Nsubhalos; i++)
287 Cats[num].Subhalo[i].M_Crit200 = 0;
288
289 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
290 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
291 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
292 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
293
294 struct exch_data
295 {
296 long long GroupNr;
297 MyFloat M_Crit200;
298 int loc_index;
299 };
300
301 exch_data *export_data = NULL, *import_data = NULL;
302 int nimport = 0, nexport = 0;
303
304 /* for communication bookkeeping reasons, we traverse the counting pattern twice */
305 for(int mode = 0; mode < 2; mode++)
306 {
307 for(int i = 0; i < NTask; i++)
308 Send_count[i] = 0;
309
310 int target = 0;
311 long long ngroup_previous = 0;
312
313 for(int i = 0; i < Cats[num].Nsubhalos; i++)
314 {
315 /* select only the main subhalos */
316 if(Cats[num].Subhalo[i].SubRankInGr == 0)
317 {
318 while(target < NTask - 1 && Cats[num].Subhalo[i].GroupNr >= (ngroup_previous + Cats[num].TabNgroups[target]))
319 {
320 ngroup_previous += Cats[num].TabNgroups[target];
321 target++;
322 }
323
324 if(mode == 0)
325 Send_count[target]++;
326 else
327 {
328 int off = Send_offset[target] + Send_count[target]++;
329
330 export_data[off].loc_index = i;
331 export_data[off].GroupNr = Cats[num].Subhalo[i].GroupNr;
332 }
333 }
334 }
335
336 if(mode == 0)
337 {
338 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
339 Recv_offset[0] = Send_offset[0] = 0;
340 for(int j = 0; j < NTask; j++)
341 {
342 nimport += Recv_count[j];
343 nexport += Send_count[j];
344 if(j > 0)
345 {
346 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
347 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
348 }
349 }
350
351 export_data = (exch_data *)Mem.mymalloc("export_data", nexport * sizeof(exch_data));
352 import_data = (exch_data *)Mem.mymalloc("import_data", nimport * sizeof(exch_data));
353 }
354 }
355
356 /* send data to target processors */
357 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
358 {
359 int recvTask = ThisTask ^ ngrp;
360 if(recvTask < NTask)
361 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
362 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask,
363 TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE,
364 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
365 }
366
367 long long firstgrnr = 0;
368 for(int i = 0; i < ThisTask; i++)
369 firstgrnr += Cats[num].TabNgroups[i];
370
371 /* now read out the information we want from the groups */
372
373 for(int i = 0; i < nimport; i++)
374 {
375 int index = import_data[i].GroupNr - firstgrnr;
376
377 if(Cats[num].Group[index].GroupNr != import_data[i].GroupNr)
378 Terminate(
379 "bummer: num=%d i=%d Cats[num].Ngroups=%d nimport=%d index=%d Cats[num].Group[index].GroupNr=%lld "
380 "import_data[i].GroupNr=%lld\n",
381 num, i, Cats[num].Ngroups, nimport, index, Cats[num].Group[index].GroupNr, import_data[i].GroupNr);
382
383 import_data[i].M_Crit200 = Cats[num].Group[index].M_Crit200;
384 }
385
386 /* send the results back */
387 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
388 {
389 int recvTask = ThisTask ^ ngrp;
390 if(recvTask < NTask)
391 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
392 myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(exch_data), MPI_BYTE, recvTask,
393 TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(exch_data), MPI_BYTE,
394 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
395 }
396
397 /* now read it out and assign the data */
398 for(int i = 0; i < nexport; i++)
399 Cats[num].Subhalo[export_data[i].loc_index].M_Crit200 = export_data[i].M_Crit200;
400
401 Mem.myfree(import_data);
402 Mem.myfree(export_data);
403
404 Mem.myfree(Recv_offset);
405 Mem.myfree(Recv_count);
406 Mem.myfree(Send_offset);
407 Mem.myfree(Send_count);
408 }
409}
410
411/* Give initially each subhalo its own unique TreeID, and a randomly placed processor.
412 */
413void mergertree::halotrees_initial_treeassignment(void)
414{
415 long long previous = 0;
416 for(int num = LastSnapShotNr; num >= 0; num--)
417 {
418 long long prevtask = 0;
419 for(int i = 0; i < ThisTask; i++)
420 prevtask += Cats[num].TabNsubhalos[i];
421
422 for(int i = 0; i < Cats[num].Nsubhalos; i++)
423 {
424 Cats[num].Subhalo[i].TreeTask = (int)(get_random_number() * NTask);
425 Cats[num].Subhalo[i].TreeID = previous + prevtask + i;
426 }
427
428 previous += Cats[num].TotNsubhalos;
429 }
430}
431
432void mergertree::halotrees_select_interior_min_newtreeid(int mode, tlink *treehalos, long long totnsubs)
433{
434 for(int backforth = -1; backforth <= 1; backforth += 2)
435 {
436 long long i;
437
438 if(backforth == -1)
439 i = 0;
440 else
441 i = totnsubs - 2;
442
443 while(i >= 0 && i <= totnsubs - 2)
444 {
445 if((mode == 0 && treehalos[i].UniqueGroupNr == treehalos[i + 1].UniqueGroupNr) ||
446 (mode == 1 && treehalos[i].TreeID == treehalos[i + 1].TreeID))
447 {
448 if(treehalos[i].NewTreeID > treehalos[i + 1].NewTreeID)
449 {
450 treehalos[i].NewTreeID = treehalos[i + 1].NewTreeID;
451 treehalos[i].NewTreeTask = treehalos[i + 1].NewTreeTask;
452 }
453 else if(treehalos[i].NewTreeID < treehalos[i + 1].NewTreeID)
454 {
455 treehalos[i + 1].NewTreeID = treehalos[i].NewTreeID;
456 treehalos[i + 1].NewTreeTask = treehalos[i].NewTreeTask;
457 }
458 }
459 if(backforth == -1)
460 i++;
461 else
462 i--;
463 }
464 }
465}
466
467long long mergertree::halotrees_join_trees_via_fof_or_mostboundid_bridges(int mode)
468{
469 long long totnsubs = 0;
470
471 for(int num = 0; num <= LastSnapShotNr; num++)
472 totnsubs += Cats[num].Nsubhalos;
473
474 tlink *treehalos = (tlink *)Mem.mymalloc("treehalos", (totnsubs + 1) * sizeof(tlink));
475
476 long long count = 0;
477
478 for(int num = 0; num <= LastSnapShotNr; num++)
479 for(int i = 0; i < Cats[num].Nsubhalos; i++)
480 {
481 treehalos[count].TreeID = Cats[num].Subhalo[i].TreeID;
482 treehalos[count].TreeTask = Cats[num].Subhalo[i].TreeTask;
483
484 treehalos[count].NewTreeID = Cats[num].Subhalo[i].TreeID;
485 treehalos[count].NewTreeTask = Cats[num].Subhalo[i].TreeTask;
486
487 treehalos[count].OrigTask = ThisTask;
488 treehalos[count].OrderIndex = count;
489
490 if(mode == 0)
491 treehalos[count].UniqueGroupNr = Cats[num].Subhalo[i].UniqueGroupNr;
492 else
493 treehalos[count].UniqueGroupNr = Cats[num].Subhalo[i].SubMostBoundID;
494
495 count++;
496 }
497
498 long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
499 MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
500
501 int next_task = -1;
502 for(int i = ThisTask + 1; i < NTask; i++)
503 if(count_list[i] > 0)
504 {
505 next_task = i;
506 break;
507 }
508
509 int prev_task = -1;
510 for(int i = ThisTask - 1; i >= 0; i--)
511 if(count_list[i] > 0)
512 {
513 prev_task = i;
514 break;
515 }
516
517 tlink *elem_first = (tlink *)Mem.mymalloc("elem_first", NTask * sizeof(tlink));
518 tlink *elem_last = (tlink *)Mem.mymalloc("elem_last", NTask * sizeof(tlink));
519
520 for(int mode = 0; mode < 2; mode++)
521 {
522 if(mode == 0)
523 {
524 /* bring halos in the same group together */
525 mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_GroupNr, Communicator);
526 halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
527 }
528 else
529 {
530 /* bring halos in the same tree together */
531 mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_TreeID, Communicator);
532 halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
533 }
534
535 long long totchanges = 0;
536 int iter = 0;
537 do
538 {
539 int changes = 0;
540
541 /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
542 MPI_Allgather(&treehalos[0], sizeof(tlink), MPI_BYTE, elem_first, sizeof(tlink), MPI_BYTE, Communicator);
543 MPI_Allgather(&treehalos[totnsubs > 0 ? totnsubs - 1 : 0], sizeof(tlink), MPI_BYTE, elem_last, sizeof(tlink), MPI_BYTE,
544 Communicator);
545
546 if(prev_task >= 0 && totnsubs > 0 &&
547 ((mode == 0 && elem_last[prev_task].UniqueGroupNr == treehalos[0].UniqueGroupNr) ||
548 (mode == 1 && elem_last[prev_task].TreeID == treehalos[0].TreeID)) &&
549 elem_last[prev_task].NewTreeID < treehalos[0].NewTreeID)
550 {
551 treehalos[0].NewTreeID = elem_last[prev_task].NewTreeID;
552 treehalos[0].NewTreeTask = elem_last[prev_task].NewTreeTask;
553 changes++;
554 }
555
556 if(next_task >= 0 && totnsubs > 0 &&
557 ((mode == 0 && elem_first[next_task].UniqueGroupNr == treehalos[totnsubs - 1].UniqueGroupNr) ||
558 (mode == 1 && elem_first[next_task].TreeID == treehalos[totnsubs - 1].TreeID)) &&
559 elem_first[next_task].NewTreeID < treehalos[totnsubs - 1].NewTreeID)
560 {
561 treehalos[totnsubs - 1].NewTreeID = elem_first[next_task].NewTreeID;
562 treehalos[totnsubs - 1].NewTreeTask = elem_first[next_task].NewTreeTask;
563 changes++;
564 }
565
566 halotrees_select_interior_min_newtreeid(mode, treehalos, totnsubs);
567
568 sumup_large_ints(1, &changes, &totchanges, Communicator);
569
570 if(++iter > MAXITER)
571 Terminate("too many iterations. mode=%d changes=%d", mode, changes);
572 }
573 while(totchanges > 0);
574 }
575
576 Mem.myfree(elem_last);
577 Mem.myfree(elem_first);
578 Mem.myfree(count_list);
579
580 /* now bring halos into original order */
581 mycxxsort_parallel(treehalos, treehalos + totnsubs, compare_tlink_OrigTask_OrderIndex, Communicator);
582
583 /* transfer new TreeIDs back */
584 count = 0;
585
586 long long flips = 0;
587
588 for(int num = 0; num <= LastSnapShotNr; num++)
589 for(int i = 0; i < Cats[num].Nsubhalos; i++)
590 {
591 if(treehalos[count].NewTreeID != treehalos[count].TreeID)
592 {
593 Cats[num].Subhalo[i].TreeID = treehalos[count].NewTreeID;
594 Cats[num].Subhalo[i].TreeTask = treehalos[count].NewTreeTask;
595
596 flips++;
597 }
598
599 count++;
600 }
601
602 Mem.myfree(treehalos);
603
604 return flips;
605}
606
607/* This function is in charge of linking the halos according to the descendant information.
608 */
609void mergertree::halotrees_link_trees(void)
610{
611 /* we will build up the trees by assigning any two subhalos which are in the same tree the same TreeID, and the same TreeTask */
612
613 long long changesA = 0, changesB = 0;
614 int iter = 0;
615
616 do
617 {
618 changesA = 0;
619
620 /* propagate the TreeIDs from level num to level num-1, via the descendant relationship from num-1 to num */
621 if(iter & 1)
622 for(int num = 1; num <= LastSnapShotNr; num++)
623 changesA += halotrees_join_via_descendants(num);
624 else
625 for(int num = LastSnapShotNr; num > 0; num--)
626 changesA += halotrees_join_via_descendants(num);
627
628 changesB = 0;
629
630 /* propagate the TreeIDs from level num to level num+1, via the progenitor relationship from num+1 to num */
631 if(iter & 1)
632 for(int num = LastSnapShotNr - 1; num >= 0; num--)
633 changesB += halotrees_join_via_progenitors(num);
634 else
635 for(int num = 0; num < LastSnapShotNr; num++)
636 changesB += halotrees_join_via_progenitors(num);
637
638 MPI_Allreduce(MPI_IN_PLACE, &changesA, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
639 MPI_Allreduce(MPI_IN_PLACE, &changesB, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
640
641 mpi_printf("MERGERTREE: iteration %d of joining trees via descendants and progenitor, %lld %lld links\n", iter, changesA,
642 changesB);
643
644 if(++iter > MAXITER)
645 Terminate("too many iterations");
646 }
647 while(changesA + changesB > 0);
648
649 /* Now link all subhalos that are in a common FOF halo, if they have most bound IDs in common, or if they are linked by a progenitor
650 * relation, so that they appear in the same tree */
651 for(int mode = 0; mode < 2; mode++)
652 {
653 long long totlinks = 0;
654 int iter = 0;
655 do
656 {
657 long long links = halotrees_join_trees_via_fof_or_mostboundid_bridges(mode);
658
659 MPI_Allreduce(&links, &totlinks, 1, MPI_LONG_LONG, MPI_SUM, Communicator);
660
661 if(mode == 0)
662 mpi_printf("MERGERTREE: iteration %d of joining trees via common FOF bridges yielded %lld links\n", iter, totlinks);
663 else
664 mpi_printf("MERGERTREE: iteration %d of joining trees via common MostboundID bridges yielded %lld links\n", iter,
665 totlinks);
666
667 if(++iter > MAXITER)
668 Terminate("too many iterations");
669 }
670 while(totlinks > 0);
671 }
672}
673
674/* this functions brings the halos of trees together according to the TreeID, so that they are ready for output to file.
675 * we are also assigning new TreeIDs to get a contiguous numbering, and we set up a table that gives the number of
676 * halos for each tree, and a cumulative offset into the output file where the tree starts
677 */
678void mergertree::halotrees_collect_treehalos(void)
679{
680 Nhalos = 0;
681
682 for(int num = 0; num <= LastSnapShotNr; num++)
683 Nhalos += Cats[num].Nsubhalos;
684
685 /* get the total number of all subhalos in all trees */
686 sumup_large_ints(1, &Nhalos, &TotNhalos, Communicator);
687
688 /* we now release some memory that is not needed any more in order to reduce peak memory usage */
689 for(int num = LastSnapShotNr; num >= 0; num--)
690 {
691 Mem.myfree(Cats[num].TabNsubhalos);
692 Cats[num].TabNsubhalos = NULL;
693
694 Mem.myfree(Cats[num].TabNgroups);
695 Cats[num].TabNgroups = NULL;
696 }
697
698 for(int num = LastSnapShotNr; num >= 1; num--)
699 {
700 Mem.myfree(Cats[num].Progenitors);
701 Cats[num].Progenitors = NULL;
702 }
703
704 for(int num = LastSnapShotNr - 1; num >= 0; num--)
705 {
706 Mem.myfree(Cats[num].Descendants);
707 Cats[num].Descendants = NULL;
708 }
709
710 for(int num = LastSnapShotNr; num >= 0; num--)
711 {
712 Mem.myfree_movable(Cats[num].Group);
713 Cats[num].Group = NULL;
714 }
715
716 Halos = (treehalo_type *)Mem.mymalloc_movable(&Halos, "Halos", (Nhalos + 1) * sizeof(treehalo_type));
717
718 /* set up the halo data for the tree output */
719 long long off = 0;
720 for(int num = 0; num <= LastSnapShotNr; num++)
721 for(int i = 0; i < Cats[num].Nsubhalos; i++)
722 {
723 Halos[off].TreeID = Cats[num].Subhalo[i].TreeID;
724 Halos[off].TreeIndex = Cats[num].Subhalo[i].TreeIndex;
725
726 Halos[off].TreeMainProgenitor = Cats[num].SubExt[i].TreeMainProgenitor;
727 Halos[off].TreeFirstProgenitor = Cats[num].SubExt[i].TreeFirstProgenitor;
728 Halos[off].TreeNextProgenitor = Cats[num].SubExt[i].TreeNextProgenitor;
729 Halos[off].TreeDescendant = Cats[num].SubExt[i].TreeDescendant;
730
731 Halos[off].TreeFirstDescendant = Cats[num].SubExt[i].TreeFirstDescendant;
732 Halos[off].TreeNextDescendant = Cats[num].SubExt[i].TreeNextDescendant;
733 Halos[off].TreeProgenitor = Cats[num].SubExt[i].TreeProgenitor;
734
735 Halos[off].SnapNum = num;
736 Halos[off].SubhaloNr = Cats[num].Subhalo[i].SubhaloNr;
737 Halos[off].GroupNr = Cats[num].Subhalo[i].GroupNr;
738 Halos[off].UniqueGroupNr = Cats[num].Subhalo[i].UniqueGroupNr;
739
740 Halos[off].SubProp = Cats[num].Subhalo[i];
741
742 off++;
743 }
744
745 /* release some memory to reduce peak memory usage */
746 for(int num = LastSnapShotNr; num >= 0; num--)
747 {
748 Mem.myfree_movable(Cats[num].SubExt);
749 Cats[num].SubExt = NULL;
750 }
751
752 long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
753 long long totnsubs = Nhalos;
754 MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
755
756 /* first set the fields TreeFirstHaloInFOFgroup and TreeNextHaloInFOFgroup */
757 mycxxsort_parallel(Halos, Halos + Nhalos, compare_Halos_UniqueGroupNr_SubhaloNr, Communicator);
758
759 long long previous_UniqueGroupNr = HALONR_MAX;
760 long long new_treeindex = HALONR_MAX;
761 for(int i = 0; i < Nhalos; i++)
762 {
763 if(Halos[i].UniqueGroupNr != previous_UniqueGroupNr)
764 {
765 previous_UniqueGroupNr = Halos[i].UniqueGroupNr;
766 new_treeindex = Halos[i].TreeIndex;
767 }
768
769 Halos[i].TreeFirstHaloInFOFgroup = new_treeindex;
770
771 if(i < Nhalos - 1 && Halos[i].UniqueGroupNr == Halos[i + 1].UniqueGroupNr)
772 Halos[i].TreeNextHaloInFOFgroup = Halos[i + 1].TreeIndex;
773 else
774 Halos[i].TreeNextHaloInFOFgroup = -1;
775 }
776
777 treehalo_type *efirst = (treehalo_type *)Mem.mymalloc("efirst", NTask * sizeof(treehalo_type));
778 treehalo_type *elast = (treehalo_type *)Mem.mymalloc("elast", NTask * sizeof(treehalo_type));
779 /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
780 MPI_Allgather(&Halos[0], sizeof(treehalo_type), MPI_BYTE, efirst, sizeof(treehalo_type), MPI_BYTE, Communicator);
781 MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elast, sizeof(treehalo_type), MPI_BYTE,
782 Communicator);
783
784 if(Nhalos > 0)
785 for(int task = ThisTask + 1; task < NTask; task++)
786 if(count_list[task] > 0)
787 {
788 if(Halos[Nhalos - 1].UniqueGroupNr == efirst[task].UniqueGroupNr)
789 Halos[Nhalos - 1].TreeNextHaloInFOFgroup = efirst[task].TreeIndex;
790
791 break;
792 }
793
794 if(Nhalos > 0)
795 {
796 long long previous_UniqueGroupNr = HALONR_MAX;
797 long long new_treeindex = HALONR_MAX;
798
799 for(int task = ThisTask - 1; task >= 0; task--)
800 {
801 if(count_list[task] > 0)
802 {
803 if(elast[task].UniqueGroupNr == Halos[0].UniqueGroupNr)
804 {
805 previous_UniqueGroupNr = elast[task].UniqueGroupNr;
806 new_treeindex = elast[task].TreeFirstHaloInFOFgroup;
807 }
808 }
809 }
810
811 for(int i = 0; i < Nhalos; i++)
812 {
813 if(Halos[i].UniqueGroupNr != previous_UniqueGroupNr)
814 {
815 previous_UniqueGroupNr = Halos[i].UniqueGroupNr;
816 new_treeindex = Halos[i].TreeIndex;
817 break;
818 }
819
820 Halos[i].TreeFirstHaloInFOFgroup = new_treeindex;
821 }
822 }
823
824 Mem.myfree(elast);
825 Mem.myfree(efirst);
826
827 /* parallel sort according to treeid and treeindex - this brings the tree halos together, in ascending order */
828 mycxxsort_parallel(Halos, Halos + Nhalos, compare_Halos_TreeID_TreeIndex, Communicator);
829
830 treehalo_type *elem_last = (treehalo_type *)Mem.mymalloc("elem_last", NTask * sizeof(treehalo_type));
831 /* now count the trees and overwrite the TreeIDs with new continuous IDs starting from zero */
832 MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elem_last, sizeof(treehalo_type), MPI_BYTE,
833 Communicator);
834
835 long long treeid_previous = -1;
836 for(int task = ThisTask - 1; task >= 0; task--)
837 {
838 if(count_list[task] > 0)
839 {
840 treeid_previous = elem_last[task].TreeID;
841 break;
842 }
843 }
844
845 Ntrees = 0;
846 for(int i = 0; i < Nhalos; i++)
847 {
848 if(Halos[i].TreeID != treeid_previous)
849 Ntrees++;
850
851 treeid_previous = Halos[i].TreeID;
852 Halos[i].TreeID = Ntrees - 1;
853 }
854
855 int *ntrees_list = (int *)Mem.mymalloc("ntrees_list", NTask * sizeof(int));
856 MPI_Allgather(&Ntrees, 1, MPI_INT, ntrees_list, 1, MPI_INT, Communicator);
857
858 TotNtrees = 0;
859 long long ntrees_before = 0;
860 for(int i = 0; i < NTask; i++)
861 {
862 TotNtrees += ntrees_list[i];
863 if(i < ThisTask)
864 ntrees_before += ntrees_list[i];
865 }
866
867 for(int i = 0; i < Nhalos; i++)
868 Halos[i].TreeID += ntrees_before;
869
870 /* let's now allocate the table of trees, with an extra [-1] element */
871 TreeTable = (halotrees_table *)Mem.mymalloc_movable(&TreeTable, "TreeTable", (Ntrees + 1) * sizeof(halotrees_table));
872 memset(TreeTable, 0, (Ntrees + 1) * sizeof(halotrees_table));
873 TreeTable += 1;
874
875 /* update what we have stored for the last element */
876 MPI_Allgather(&Halos[Nhalos > 0 ? Nhalos - 1 : 0], sizeof(treehalo_type), MPI_BYTE, elem_last, sizeof(treehalo_type), MPI_BYTE,
877 Communicator);
878
879 treeid_previous = -1;
880 for(int task = ThisTask - 1; task >= 0; task--)
881 {
882 if(count_list[task] > 0)
883 {
884 treeid_previous = elem_last[task].TreeID;
885 break;
886 }
887 }
888
889 Ntrees = 0;
890 for(int i = 0; i < Nhalos; i++)
891 {
892 if(Halos[i].TreeID != treeid_previous)
893 Ntrees++;
894
895 treeid_previous = Halos[i].TreeID;
896 TreeTable[Ntrees - 1].HaloCount++;
897 TreeTable[Ntrees - 1].TreeID = Halos[i].TreeID;
898 }
899
900 /* in the TreeTable[-1] element we have counted segments of trees that do not start on the local processor */
901
902 halotrees_table *elem_first = (halotrees_table *)Mem.mymalloc("elem_first", NTask * sizeof(halotrees_table));
903
904 MPI_Allgather(&TreeTable[-1], sizeof(halotrees_table), MPI_BYTE, elem_first, sizeof(halotrees_table), MPI_BYTE, Communicator);
905
906 if(Ntrees > 0)
907 for(int task = ThisTask + 1; task < NTask; task++)
908 {
909 if(TreeTable[Ntrees - 1].TreeID == elem_first[task].TreeID)
910 TreeTable[Ntrees - 1].HaloCount += elem_first[task].HaloCount;
911 else
912 break;
913 }
914
915 Mem.myfree(elem_first);
916
917 long long sumhalos = 0;
918 LargestHaloCount = 0;
919 for(int i = 0; i < Ntrees; i++)
920 {
921 sumhalos += TreeTable[i].HaloCount;
922 if(TreeTable[i].HaloCount > LargestHaloCount)
923 LargestHaloCount = TreeTable[i].HaloCount;
924 }
925 MPI_Allreduce(MPI_IN_PLACE, &LargestHaloCount, 1, MPI_INT, MPI_MAX, Communicator);
926
927 long long *list_sumhalos = (long long *)Mem.mymalloc("list_sumhalos", NTask * sizeof(long long));
928 MPI_Allgather(&sumhalos, sizeof(long long), MPI_BYTE, list_sumhalos, sizeof(long long), MPI_BYTE, Communicator);
929
930 long long sum_check = 0;
931 for(int i = 0; i < NTask; i++)
932 sum_check += list_sumhalos[i];
933
934 if(sum_check != TotNhalos)
935 Terminate("TotNTrees=%lld sum_check=%lld != TotNhalos=%lld", (long long)TotNtrees, sum_check, (long long)TotNhalos);
936
937 if(Ntrees > 0)
938 {
939 TreeTable[0].FirstHalo = 0;
940 for(int task = 0; task < ThisTask; task++)
941 TreeTable[0].FirstHalo += list_sumhalos[task];
942 }
943
944 Mem.myfree(list_sumhalos);
945
946 for(int i = 1; i < Ntrees; i++)
947 TreeTable[i].FirstHalo = TreeTable[i - 1].FirstHalo + TreeTable[i - 1].HaloCount;
948
949 Mem.myfree_movable(ntrees_list);
950 Mem.myfree_movable(elem_last);
951 Mem.myfree_movable(count_list);
952}
953
954/*--------------------------------------------------------------------------------------------------------------*/
955
956/* This function redistributes a subdivided global array with local pieces stored in an address pointed to by ptr.
957 * On the local processor, there are currently 'ncurrent' elements, but we want 'ntarget' elements. The size of
958 * one element is given by 'len'. The buffer, whose address is stored in *ptr, must be resizable and will be
959 * reallocated to the new size of ntarget*len bytes.
960 */
961void mergertree::halotrees_reshuffle(char **ptr, size_t len, int ncurrent, int ntarget)
962{
963 int *Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * NTask);
964 int *Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * NTask);
965 int *Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * NTask);
966 int *Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * NTask);
967
968 /* copy current data to an auxiliary buffer */
969 char *buf = (char *)Mem.mymalloc_movable(&buf, "buf", ncurrent * len);
970 memcpy(buf, *ptr, ncurrent * len);
971
972 /* now resize source buffer of data to be able to accommodate the data of the desired target size */
973 *ptr = (char *)Mem.myrealloc_movable(*ptr, ntarget * len);
974
975 /* collect the current and target layout of the array */
976 int *tab_ncurrent = (int *)Mem.mymalloc("tab_ncurrent", NTask * sizeof(int));
977 MPI_Allgather(&ncurrent, 1, MPI_INT, tab_ncurrent, 1, MPI_INT, Communicator);
978
979 int *tab_ntarget = (int *)Mem.mymalloc("tab_ntarget", NTask * sizeof(int));
980 MPI_Allgather(&ntarget, 1, MPI_INT, tab_ntarget, 1, MPI_INT, Communicator);
981
982 /* now work out where our local data should go */
983 int nexport = 0, nimport = 0;
984
985 for(int i = 0; i < NTask; i++)
986 Send_count[i] = 0;
987
988 int nbefore = 0;
989 for(int i = 0; i < ThisTask; i++)
990 nbefore += tab_ncurrent[i];
991
992 int target = 0, ncum = 0;
993 for(int i = 0; i < ncurrent; i++)
994 {
995 while(target < NTask - 1 && nbefore + i >= ncum + tab_ntarget[target])
996 ncum += tab_ntarget[target++];
997
998 Send_count[target]++;
999 }
1000
1001 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1002 Recv_offset[0] = Send_offset[0] = 0;
1003
1004 for(int j = 0; j < NTask; j++)
1005 {
1006 nimport += Recv_count[j];
1007 nexport += Send_count[j];
1008
1009 if(j > 0)
1010 {
1011 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1012 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1013 }
1014 }
1015
1016 if(nimport != ntarget)
1017 Terminate("nimport != ntarget");
1018
1019 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1020 {
1021 int recvTask = ThisTask ^ ngrp;
1022 if(recvTask < NTask)
1023 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1024 myMPI_Sendrecv(&buf[Send_offset[recvTask] * len], Send_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B,
1025 *ptr + Recv_offset[recvTask] * len, Recv_count[recvTask] * len, MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1026 MPI_STATUS_IGNORE);
1027 }
1028
1029 Mem.myfree(tab_ntarget);
1030 Mem.myfree(tab_ncurrent);
1031 Mem.myfree(buf);
1032
1033 Mem.myfree(Recv_offset);
1034 Mem.myfree(Recv_count);
1035 Mem.myfree(Send_offset);
1036 Mem.myfree(Send_count);
1037}
1038
1039/* This function sets up new pointer for navigating within a given tree based on TreeIndex.
1040 * This is done by translating the old pointers to the new ones within a tree.
1041 */
1042void mergertree::halotrees_remap_treepointers(void)
1043{
1044 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1045 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1046 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1047 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1048
1049 /* initialize new pointers to default values */
1050 for(int num = 0; num <= LastSnapShotNr; num++)
1051 for(int i = 0; i < Cats[num].Nsubhalos; i++)
1052 {
1053 Cats[num].SubExt[i].TreeMainProgenitor = -1;
1054 Cats[num].SubExt[i].TreeFirstProgenitor = -1;
1055 Cats[num].SubExt[i].TreeNextProgenitor = -1;
1056 Cats[num].SubExt[i].TreeDescendant = -1;
1057 Cats[num].SubExt[i].TreeFirstDescendant = -1;
1058 Cats[num].SubExt[i].TreeNextDescendant = -1;
1059 Cats[num].SubExt[i].TreeProgenitor = -1;
1060 }
1061
1062 /* the three pointers we have to set point either one snapshot back (e.g. FirstProgenitor, delta=-1),
1063 * they are in the same snapshot (NextProgenitor, delta=0), or they point into the future (Descendant, delta=+1)
1064 */
1065 for(int set = 0; set < 3; set++)
1066 for(int delta = 1; delta >= -1; delta--)
1067 {
1068 if(set == 2 && delta != -1)
1069 continue;
1070
1071 /* note that we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr */
1072
1073 /* in snapshot number 'base' we want to remap the pointers */
1074 for(int base = 0; base <= LastSnapShotNr; base++)
1075 {
1076 /* this is the snapshot we are pointing to */
1077 int num = base + delta;
1078
1079 if(set == 1 && delta != 1 && num == 0)
1080 continue;
1081
1082 if((delta == -1 && num >= 0) || (delta >= 0 && base < LastSnapShotNr))
1083 {
1084 /* lets get the minimum and maximum subhalo numbers for the target snapshot number as a function of the task */
1085 long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1086 long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1087
1088 long long min_subhalonr =
1089 Cats[num].Nsubhalos > 0 ? Cats[num].Subhalo[0].SubhaloNr : HALONR_MAX; // HALONR_MAX means empty here
1090 long long max_subhalonr = Cats[num].Nsubhalos > 0 ? Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr
1091 : HALONR_MAX; // HALONR_MAX means empty here
1092
1093 MPI_Allgather(&min_subhalonr, sizeof(long long), MPI_BYTE, list_min_subhalonr, sizeof(long long), MPI_BYTE,
1094 Communicator);
1095 MPI_Allgather(&max_subhalonr, sizeof(long long), MPI_BYTE, list_max_subhalonr, sizeof(long long), MPI_BYTE,
1096 Communicator);
1097
1098 /* prepare a list of the current pointer values on the local processor */
1099 data_list *list = (data_list *)Mem.mymalloc("list", Cats[base].Nsubhalos * sizeof(data_list));
1100 int count = 0;
1101 for(int i = 0; i < Cats[base].Nsubhalos; i++)
1102 {
1103 if(set == 0)
1104 switch(delta)
1105 {
1106 case -1:
1107 list[count].targetsubhalonr = Cats[base].Progenitors[i].FirstProgSubhaloNr;
1108 break;
1109 case 0:
1110 list[count].targetsubhalonr = Cats[base].Descendants[i].NextProgSubhaloNr;
1111 break;
1112 case +1:
1113 list[count].targetsubhalonr = Cats[base].Descendants[i].DescSubhaloNr;
1114 break;
1115 }
1116 else if(set == 1)
1117 switch(delta)
1118 {
1119 case -1:
1120 list[count].targetsubhalonr = Cats[base].Progenitors[i].ProgSubhaloNr;
1121 break;
1122 case 0:
1123 list[count].targetsubhalonr = Cats[base].Progenitors[i].NextDescSubhaloNr;
1124 break;
1125 case +1:
1126 list[count].targetsubhalonr = Cats[base].Descendants[i].FirstDescSubhaloNr;
1127 break;
1128 }
1129 else
1130 switch(delta)
1131 {
1132 case -1:
1133 list[count].targetsubhalonr = Cats[base].Progenitors[i].MainProgSubhaloNr;
1134 break;
1135 }
1136
1137 list[count].originsubhalonr = Cats[base].Subhalo[i].SubhaloNr;
1138
1139 if(list[count].targetsubhalonr >= 0 &&
1140 list[count].targetsubhalonr < (long long)HALONR_MAX) // need to only process existing pointers
1141 {
1142 if(list[count].targetsubhalonr >= Cats[num].TotNsubhalos)
1143 Terminate("set=%d delta=%d num=%d base=%d list[count].targetsubhalonr=%lld >= Cats[num].TotNsubhalos=%lld\n",
1144 set, delta, num, base, list[count].targetsubhalonr, Cats[num].TotNsubhalos);
1145
1146 list[count].origin = i;
1147 list[count].intreeid = Cats[base].Subhalo[i].TreeID;
1148 count++;
1149 }
1150 }
1151
1152 /* sort it by current pointer value (which is the subhalonr) */
1153 mycxxsort(list, list + count, compare_data_list_subhalonnr);
1154
1155 /* we now need to send them to other target processors since they may contain the corresponding subhalos */
1156
1157 int nexport = 0, nimport = 0;
1158
1159 remap_data *import_data = NULL, *export_data = NULL;
1160
1161 for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
1162 {
1163 for(int i = 0; i < NTask; i++)
1164 Send_count[i] = 0;
1165
1166 int target = 0;
1167
1168 for(int i = 0; i < count; i++)
1169 {
1170 while(target < NTask - 1 &&
1171 (list_min_subhalonr[target] == HALONR_MAX || list[i].targetsubhalonr > list_max_subhalonr[target]))
1172 target++;
1173
1174 if(list_min_subhalonr[target] != HALONR_MAX && list[i].targetsubhalonr >= list_min_subhalonr[target] &&
1175 list[i].targetsubhalonr <= list_max_subhalonr[target])
1176 {
1177 if(mode == 0)
1178 Send_count[target]++;
1179 else
1180 {
1181 int off = Send_offset[target] + Send_count[target]++;
1182
1183 export_data[off].loc_index = i;
1184 export_data[off].targetsubhalonr = list[i].targetsubhalonr;
1185 export_data[off].originsubhalonr = list[i].originsubhalonr;
1186 export_data[off].intreeid = list[i].intreeid;
1187 }
1188 }
1189 else
1190 Terminate(
1191 "this shouldn't happen: set=%d delta=%d num=%d base=%d delta=%d i=%d|count=%d "
1192 "list[i].targetsubhalonr=%lld "
1193 "target=%d "
1194 "list_min_subhalonr[target]=%lld list_max_subhalonr[target]=%lld\n",
1195 set, delta, num, base, delta, i, count, (long long)list[i].targetsubhalonr, target,
1196 list_min_subhalonr[target], list_max_subhalonr[target]);
1197 }
1198
1199 if(mode == 0) // prepare offset tables
1200 {
1201 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1202 Recv_offset[0] = Send_offset[0] = 0;
1203 for(int j = 0; j < NTask; j++)
1204 {
1205 nimport += Recv_count[j];
1206 nexport += Send_count[j];
1207 if(j > 0)
1208 {
1209 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1210 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1211 }
1212 }
1213
1214 export_data = (remap_data *)Mem.mymalloc("export_data", nexport * sizeof(remap_data));
1215 import_data = (remap_data *)Mem.mymalloc("import_data", nimport * sizeof(remap_data));
1216 }
1217 }
1218
1219 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1220 {
1221 /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1222 int recvTask = ThisTask ^ ngrp;
1223 if(recvTask < NTask)
1224 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1225 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(remap_data), MPI_BYTE,
1226 recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
1227 Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1228 MPI_STATUS_IGNORE);
1229 }
1230
1231 /* incoming data is not necessarily be sorted according to subhalorn, that's why we need to sort it now */
1232 for(int i = 0; i < nimport; i++)
1233 import_data[i].orig_index = i;
1234
1235 mycxxsort(import_data, import_data + nimport, compare_remap_data_subhalonr);
1236
1237 /* now we check the incoming data, and prepare new values for the pointers */
1238 for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1239 {
1240 if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].targetsubhalonr)
1241 i++;
1242 else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].targetsubhalonr)
1243 {
1244 Terminate("Can't find targetsubhalonr=%lld Cats[num].Subhalo[i].SubhaloNr=%lld i=%d|%d j=%d|%d",
1245 import_data[j].targetsubhalonr, Cats[num].Subhalo[i].SubhaloNr, i, Cats[num].Nsubhalos, j, nimport);
1246 }
1247 else // we have a match
1248 {
1249 import_data[j].new_treeindexptr = Cats[num].Subhalo[i].TreeIndex;
1250 import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1251
1252 if(Cats[num].Subhalo[i].TreeID != import_data[j].intreeid)
1253 Terminate(
1254 "\nWe are not in the same tree, which shouldn't be the case: set=%d delta=%d num=%d\n"
1255 "Cats[num].Subhalo[i].SubhaloNr=%lld \nCats[num].Subhalo[i].DescSubhaloNr=%lld\n"
1256 "Cats[num].Subhalo[i].NextProgSubhalor=%lld\nCats[num].Subhalo[i].FirstProgSubhalor=%lld\n"
1257 "Cats[num].Subhalo[i].NextDescSubhalor=%lld\nCats[num].Subhalo[i].FirstDescSubhalor=%lld\n"
1258 "Cats[num].Subhalo[i].ProgSubhalorNr=%lld\n"
1259 "originsubgalonr=%lld targetsubgalonr=%lld Cats[num].Subhalo[i].TreeID=%lld "
1260 "import_data[j].intreeid=%lld i=%d|%d j=%d|%d num=%d",
1261 set, delta, num, Cats[num].Subhalo[i].SubhaloNr, Cats[num].Descendants[i].DescSubhaloNr,
1262 Cats[num].Descendants[i].NextProgSubhaloNr, Cats[num].Progenitors[i].FirstProgSubhaloNr,
1263 Cats[num].Progenitors[i].NextDescSubhaloNr, Cats[num].Descendants[i].FirstDescSubhaloNr,
1264 Cats[num].Progenitors[i].ProgSubhaloNr, import_data[j].originsubhalonr, import_data[j].targetsubhalonr,
1265 Cats[num].Subhalo[i].TreeID, import_data[j].intreeid, i, Cats[num].Nsubhalos, j, nimport, num);
1266
1267 j++;
1268 }
1269 }
1270
1271 mycxxsort(import_data, import_data + nimport, compare_remap_data_orig_index);
1272
1273 /* send the results back */
1274 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1275 {
1276 /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1277 int recvTask = ThisTask ^ ngrp;
1278 if(recvTask < NTask)
1279 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1280 myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(remap_data), MPI_BYTE,
1281 recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]],
1282 Send_count[recvTask] * sizeof(remap_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1283 MPI_STATUS_IGNORE);
1284 }
1285
1286 for(int i = 0; i < nexport; i++)
1287 {
1288 int idx = list[export_data[i].loc_index].origin;
1289
1290 if(Cats[base].Subhalo[idx].TreeID != export_data[i].treeid)
1291 Terminate("something is wrong: delta=%d i=%d|%d idx=%d|%d %lld %lld %lld", delta, i, nexport, idx,
1292 Cats[base].Nsubhalos, Cats[base].Subhalo[idx].TreeID, export_data[i].treeid, export_data[i].intreeid);
1293
1294 if(set == 0)
1295 switch(delta)
1296 {
1297 case -1:
1298 Cats[base].SubExt[idx].TreeFirstProgenitor = export_data[i].new_treeindexptr;
1299 break;
1300
1301 case 0:
1302 Cats[base].SubExt[idx].TreeNextProgenitor = export_data[i].new_treeindexptr;
1303 break;
1304
1305 case +1:
1306 Cats[base].SubExt[idx].TreeDescendant = export_data[i].new_treeindexptr;
1307 break;
1308 }
1309 else if(set == 1)
1310 switch(delta)
1311 {
1312 case -1:
1313 Cats[base].SubExt[idx].TreeProgenitor = export_data[i].new_treeindexptr;
1314 break;
1315
1316 case 0:
1317 Cats[base].SubExt[idx].TreeNextDescendant = export_data[i].new_treeindexptr;
1318 break;
1319
1320 case +1:
1321 Cats[base].SubExt[idx].TreeFirstDescendant = export_data[i].new_treeindexptr;
1322 break;
1323 }
1324 else
1325 switch(delta)
1326 {
1327 case -1:
1328 Cats[base].SubExt[idx].TreeMainProgenitor = export_data[i].new_treeindexptr;
1329 break;
1330 }
1331 }
1332
1333 Mem.myfree(import_data);
1334 Mem.myfree(export_data);
1335 Mem.myfree(list);
1336 Mem.myfree(list_max_subhalonr);
1337 Mem.myfree(list_min_subhalonr);
1338 }
1339 }
1340 }
1341
1342 Mem.myfree(Recv_offset);
1343 Mem.myfree(Recv_count);
1344 Mem.myfree(Send_offset);
1345 Mem.myfree(Send_count);
1346}
1347
1348/*--------------------------------------------------------------------------------------------------------------*/
1349
1350/* This function creates a new subhalo numbering within each tree, as identified by the set of subhalos with the same treeid.
1351 */
1352void mergertree::halotrees_assign_new_treeindices(void)
1353{
1354 long long totnsubs = 0;
1355
1356 for(int num = 0; num <= LastSnapShotNr; num++)
1357 totnsubs += Cats[num].Nsubhalos;
1358
1359 assign_data *a_data = (assign_data *)Mem.mymalloc("a_data", (totnsubs + 1) * sizeof(assign_data));
1360
1361 long long count = 0;
1362 for(int num = 0; num <= LastSnapShotNr; num++)
1363 for(int i = 0; i < Cats[num].Nsubhalos; i++)
1364 {
1365 a_data[count].origin_task = ThisTask;
1366 a_data[count].origin_num = num;
1367 a_data[count].origin_index = i;
1368 a_data[count].treeid = Cats[num].Subhalo[i].TreeID;
1369 count++;
1370 }
1371
1372 mycxxsort_parallel(a_data, a_data + totnsubs, compare_assign_data_treeid_origin_num_origin_task_origin_index, Communicator);
1373
1374 long long newid = 0;
1375
1376 for(long long i = 0, treeindex = 0; i < totnsubs; i++)
1377 {
1378 if(i == 0 || a_data[i].treeid != a_data[i - 1].treeid)
1379 treeindex = 0;
1380
1381 a_data[i].treeindex = treeindex++;
1382
1383 if(i > 0 && a_data[i].treeid != a_data[i - 1].treeid)
1384 newid++;
1385
1386 a_data[i].newtreeid = newid;
1387 }
1388
1389 long long *count_list = (long long *)Mem.mymalloc("count_list", NTask * sizeof(long long));
1390 long long *newid_list = (long long *)Mem.mymalloc("newid_list", NTask * sizeof(long long));
1391 MPI_Allgather(&totnsubs, sizeof(long long), MPI_BYTE, count_list, sizeof(long long), MPI_BYTE, Communicator);
1392 MPI_Allgather(&newid, sizeof(long long), MPI_BYTE, newid_list, sizeof(long long), MPI_BYTE, Communicator);
1393
1394 assign_data *elem_last = (assign_data *)Mem.mymalloc("elem_last", NTask * sizeof(assign_data));
1395 assign_data *elem_first = (assign_data *)Mem.mymalloc("elem_first", NTask * sizeof(assign_data));
1396
1397 /* note: the 0th element is guaranteed to be allocated even on ranks with zero totnsubs */
1398 MPI_Allgather(&a_data[totnsubs > 0 ? totnsubs - 1 : 0], sizeof(assign_data), MPI_BYTE, elem_last, sizeof(assign_data), MPI_BYTE,
1399 Communicator);
1400 MPI_Allgather(&a_data[0], sizeof(assign_data), MPI_BYTE, elem_first, sizeof(assign_data), MPI_BYTE, Communicator);
1401
1402 if(count_list[ThisTask] > 0)
1403 {
1404 long long count_before = 0;
1405 for(int task = 0; task < ThisTask; task++)
1406 if(count_list[task] > 0)
1407 {
1408 if(a_data[0].treeid == elem_last[task].treeid)
1409 count_before += (elem_last[task].treeindex + 1);
1410 }
1411 if(count_before > 0)
1412 {
1413 for(long long i = 0; i < totnsubs; i++)
1414 {
1415 if(a_data[i].treeid != a_data[0].treeid)
1416 break;
1417
1418 a_data[i].treeindex += count_before;
1419 }
1420 }
1421
1422 long long newidoff = 0;
1423 for(int task = 0; task < ThisTask; task++)
1424 newidoff += newid_list[task];
1425
1426 /* now also need to check whether there are treeid changes that line up with task boundaries. Be aware that that some tasks could
1427 * be empty */
1428
1429 int taleft = 0;
1430
1431 do
1432 {
1433 while(taleft < ThisTask && count_list[taleft] == 0)
1434 taleft++;
1435
1436 if(taleft < ThisTask)
1437 {
1438 int taright = taleft + 1;
1439
1440 while(count_list[taright] == 0)
1441 taright++;
1442
1443 // taright may be at most equal to ThisTask here
1444
1445 if(elem_last[taleft].treeid != elem_first[taright].treeid)
1446 newidoff++;
1447
1448 taleft = taright;
1449 }
1450 }
1451 while(taleft < ThisTask);
1452
1453 /* now assignn new TreeIDs that form consecutive numbers without gaps */
1454 for(long long i = 0; i < totnsubs; i++)
1455 a_data[i].newtreeid += newidoff;
1456 }
1457
1458 Mem.myfree(elem_first);
1459 Mem.myfree(elem_last);
1460 Mem.myfree(newid_list);
1461 Mem.myfree(count_list);
1462
1463 mycxxsort_parallel(a_data, a_data + totnsubs, compare_assign_data_origin_task_origin_num_origin_index, Communicator);
1464
1465 count = 0;
1466 for(int num = 0; num <= LastSnapShotNr; num++)
1467 for(int i = 0; i < Cats[num].Nsubhalos; i++)
1468 {
1469 Cats[num].Subhalo[i].TreeIndex = a_data[count].treeindex;
1470 Cats[num].Subhalo[i].TreeID = a_data[count].newtreeid;
1471 count++;
1472 }
1473
1474 Mem.myfree(a_data);
1475}
1476
1477/*--------------------------------------------------------------------------------------------------------------*/
1478
1479/* This function puts subhalos from snapshot "num" that are linked by a descendant relation from snapshot "num-1" into the same tree
1480 * by unifying their tree-id.
1481 */
1482int mergertree::halotrees_join_via_descendants(int num)
1483{
1484 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1485 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1486 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1487 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1488
1489 /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1490
1491 /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1492 long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1493 long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1494
1495 /* this value flags that there are no subhalos on the corresponding processor */
1496 long long empty = HALONR_MAX;
1497
1498 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1499 sizeof(long long), MPI_BYTE, Communicator);
1500 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1501 list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1502
1503 int nexport = 0, nimport = 0;
1504
1505 halotrees_data *import_data = NULL, *export_data = NULL;
1506
1507 /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
1508 * let's create an auxiliary list for facilitating this.
1509 */
1510 descnr_data *sorted_list = (descnr_data *)Mem.mymalloc("sorted_list", Cats[num - 1].Nsubhalos * sizeof(descnr_data));
1511
1512 for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1513 {
1514 sorted_list[i].DescSubhaloNr = Cats[num - 1].Descendants[i].DescSubhaloNr;
1515 sorted_list[i].TreeID = Cats[num - 1].Subhalo[i].TreeID;
1516 sorted_list[i].TreeTask = Cats[num - 1].Subhalo[i].TreeTask;
1517 sorted_list[i].orig_index = i;
1518 }
1519
1520 mycxxsort(sorted_list, sorted_list + Cats[num - 1].Nsubhalos, compare_sorted_list_descsubhalonr);
1521
1522 /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1523 for(int mode = 0; mode < 2; mode++)
1524 {
1525 for(int i = 0; i < NTask; i++)
1526 Send_count[i] = 0;
1527
1528 int target = 0;
1529
1530 for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1531 {
1532 while(target < NTask - 1 &&
1533 (list_min_subhalonr[target] == empty || sorted_list[i].DescSubhaloNr > list_max_subhalonr[target]))
1534 target++;
1535
1536 if(list_min_subhalonr[target] != empty && sorted_list[i].DescSubhaloNr >= list_min_subhalonr[target] &&
1537 sorted_list[i].DescSubhaloNr <= list_max_subhalonr[target])
1538 {
1539 if(mode == 0)
1540 Send_count[target]++;
1541 else
1542 {
1543 int off = Send_offset[target] + Send_count[target]++;
1544
1545 export_data[off].loc_index = sorted_list[i].orig_index;
1546 export_data[off].descendantnr = sorted_list[i].DescSubhaloNr;
1547 export_data[off].treeid = sorted_list[i].TreeID;
1548 export_data[off].treetask = sorted_list[i].TreeTask;
1549 }
1550 }
1551 }
1552
1553 if(mode == 0)
1554 {
1555 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1556 Recv_offset[0] = Send_offset[0] = 0;
1557 for(int j = 0; j < NTask; j++)
1558 {
1559 nimport += Recv_count[j];
1560 nexport += Send_count[j];
1561 if(j > 0)
1562 {
1563 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1564 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1565 }
1566 }
1567
1568 export_data = (halotrees_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_data));
1569 import_data = (halotrees_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_data));
1570 }
1571 }
1572
1573 /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1574 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1575 {
1576 int recvTask = ThisTask ^ ngrp;
1577 if(recvTask < NTask)
1578 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1579 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1580 TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1581 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1582 }
1583
1584 /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient matching
1585 */
1586 for(int i = 0; i < nimport; i++)
1587 import_data[i].orig_order = i;
1588
1589 mycxxsort(import_data, import_data + nimport, compare_halotrees_data_descendantnr);
1590
1591 int changes = 0;
1592
1593 /* now do the matching */
1594 for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1595 {
1596 if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].descendantnr)
1597 i++;
1598 else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].descendantnr)
1599 j++;
1600 else
1601 {
1602 if(import_data[j].treeid > Cats[num].Subhalo[i].TreeID)
1603 {
1604 import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1605 import_data[j].treetask = Cats[num].Subhalo[i].TreeTask;
1606 changes++;
1607 }
1608 else if(import_data[j].treeid < Cats[num].Subhalo[i].TreeID)
1609 {
1610 Cats[num].Subhalo[i].TreeID = import_data[j].treeid;
1611 Cats[num].Subhalo[i].TreeTask = import_data[j].treetask;
1612 changes++;
1613 }
1614 j++;
1615 }
1616 }
1617
1618 /* reestablish original order */
1619 mycxxsort(import_data, import_data + nimport, compare_halotrees_data_orig_order);
1620
1621 /* send the results back */
1622 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1623 {
1624 int recvTask = ThisTask ^ ngrp;
1625 if(recvTask < NTask)
1626 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1627 myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1628 TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1629 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1630 }
1631
1632 /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
1633 for(int i = 0; i < nexport; i++)
1634 {
1635 if(export_data[i].treeid != HALONR_MAX)
1636 {
1637 Cats[num - 1].Subhalo[export_data[i].loc_index].TreeID = export_data[i].treeid;
1638 Cats[num - 1].Subhalo[export_data[i].loc_index].TreeTask = export_data[i].treetask;
1639 }
1640 }
1641
1642 Mem.myfree(import_data);
1643 Mem.myfree(export_data);
1644 Mem.myfree(sorted_list);
1645 Mem.myfree(list_max_subhalonr);
1646 Mem.myfree(list_min_subhalonr);
1647
1648 Mem.myfree(Recv_offset);
1649 Mem.myfree(Recv_count);
1650 Mem.myfree(Send_offset);
1651 Mem.myfree(Send_count);
1652
1653 return changes;
1654}
1655
1656int mergertree::halotrees_join_via_progenitors(int num)
1657{
1658 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1659 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1660 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1661 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1662
1663 /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1664
1665 /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1666 long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1667 long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1668
1669 /* this value flags that there are no subhalos on the corresponding processor */
1670 long long empty = HALONR_MAX;
1671
1672 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1673 sizeof(long long), MPI_BYTE, Communicator);
1674 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1675 list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1676
1677 int nexport = 0, nimport = 0;
1678
1679 halotrees_data *import_data = NULL, *export_data = NULL;
1680
1681 /* for efficiency reasons, we need to traverse the local progenitor pointers in increasing order of their target subhalo number, so
1682 * let's create an auxiliary list for facilitating this.
1683 */
1684 prognr_data *sorted_list = (prognr_data *)Mem.mymalloc("sorted_list", Cats[num + 1].Nsubhalos * sizeof(prognr_data));
1685
1686 for(int i = 0; i < Cats[num + 1].Nsubhalos; i++)
1687 {
1688 sorted_list[i].ProgSubhaloNr = Cats[num + 1].Progenitors[i].ProgSubhaloNr;
1689 sorted_list[i].TreeID = Cats[num + 1].Subhalo[i].TreeID;
1690 sorted_list[i].TreeTask = Cats[num + 1].Subhalo[i].TreeTask;
1691 sorted_list[i].orig_index = i;
1692 }
1693
1694 mycxxsort(sorted_list, sorted_list + Cats[num + 1].Nsubhalos, compare_sorted_list_progsubhalonr);
1695
1696 /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1697 for(int mode = 0; mode < 2; mode++)
1698 {
1699 for(int i = 0; i < NTask; i++)
1700 Send_count[i] = 0;
1701
1702 int target = 0;
1703
1704 for(int i = 0; i < Cats[num + 1].Nsubhalos; i++)
1705 {
1706 while(target < NTask - 1 &&
1707 (list_min_subhalonr[target] == empty || sorted_list[i].ProgSubhaloNr > list_max_subhalonr[target]))
1708 target++;
1709
1710 if(list_min_subhalonr[target] != empty && sorted_list[i].ProgSubhaloNr >= list_min_subhalonr[target] &&
1711 sorted_list[i].ProgSubhaloNr <= list_max_subhalonr[target])
1712 {
1713 if(mode == 0)
1714 Send_count[target]++;
1715 else
1716 {
1717 int off = Send_offset[target] + Send_count[target]++;
1718
1719 export_data[off].loc_index = sorted_list[i].orig_index;
1720 export_data[off].progenitornr = sorted_list[i].ProgSubhaloNr;
1721 export_data[off].treeid = sorted_list[i].TreeID;
1722 export_data[off].treetask = sorted_list[i].TreeTask;
1723 }
1724 }
1725 }
1726
1727 if(mode == 0)
1728 {
1729 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1730 Recv_offset[0] = Send_offset[0] = 0;
1731 for(int j = 0; j < NTask; j++)
1732 {
1733 nimport += Recv_count[j];
1734 nexport += Send_count[j];
1735 if(j > 0)
1736 {
1737 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1738 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1739 }
1740 }
1741
1742 export_data = (halotrees_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_data));
1743 import_data = (halotrees_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_data));
1744 }
1745 }
1746
1747 /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1748 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1749 {
1750 int recvTask = ThisTask ^ ngrp;
1751 if(recvTask < NTask)
1752 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1753 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1754 TAG_DENS_B, &import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1755 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1756 }
1757
1758 /* the collection of incoming data is not necessarily sorted according to descendantnr, so we need to sort it for efficient
1759 * matching
1760 */
1761 for(int i = 0; i < nimport; i++)
1762 import_data[i].orig_order = i;
1763
1764 mycxxsort(import_data, import_data + nimport, compare_halotrees_data_progenitornr);
1765
1766 int changes = 0;
1767
1768 /* now do the matching */
1769 for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1770 {
1771 if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].progenitornr)
1772 i++;
1773 else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].progenitornr)
1774 j++;
1775 else
1776 {
1777 if(import_data[j].treeid > Cats[num].Subhalo[i].TreeID)
1778 {
1779 import_data[j].treeid = Cats[num].Subhalo[i].TreeID;
1780 import_data[j].treetask = Cats[num].Subhalo[i].TreeTask;
1781 changes++;
1782 }
1783 else if(import_data[j].treeid < Cats[num].Subhalo[i].TreeID)
1784 {
1785 Cats[num].Subhalo[i].TreeID = import_data[j].treeid;
1786 Cats[num].Subhalo[i].TreeTask = import_data[j].treetask;
1787 changes++;
1788 }
1789 j++;
1790 }
1791 }
1792
1793 /* reestablish original order */
1794 mycxxsort(import_data, import_data + nimport, compare_halotrees_data_orig_order);
1795
1796 /* send the results back */
1797 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++) /* note: here we also have a transfer from each task to itself (for ngrp=0) */
1798 {
1799 int recvTask = ThisTask ^ ngrp;
1800 if(recvTask < NTask)
1801 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1802 myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_data), MPI_BYTE, recvTask,
1803 TAG_DENS_B, &export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_data), MPI_BYTE,
1804 recvTask, TAG_DENS_B, Communicator, MPI_STATUS_IGNORE);
1805 }
1806
1807 /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
1808 for(int i = 0; i < nexport; i++)
1809 {
1810 if(export_data[i].treeid != HALONR_MAX)
1811 {
1812 Cats[num + 1].Subhalo[export_data[i].loc_index].TreeID = export_data[i].treeid;
1813 Cats[num + 1].Subhalo[export_data[i].loc_index].TreeTask = export_data[i].treetask;
1814 }
1815 }
1816
1817 Mem.myfree(import_data);
1818 Mem.myfree(export_data);
1819 Mem.myfree(sorted_list);
1820 Mem.myfree(list_max_subhalonr);
1821 Mem.myfree(list_min_subhalonr);
1822
1823 Mem.myfree(Recv_offset);
1824 Mem.myfree(Recv_count);
1825 Mem.myfree(Send_offset);
1826 Mem.myfree(Send_count);
1827
1828 return changes;
1829}
1830
1831void mergertree::halotrees_determine_mainprogenitor(void)
1832{
1833 for(int num = 1; num <= LastSnapShotNr; num++)
1834 for(int i = 0; i < Cats[num].Nsubhalos; i++)
1835 Cats[num].Progenitors[i].MainProgSubhaloNr = HALONR_MAX;
1836
1837 /* initialize the maximum branch length field */
1838 for(int num = 0; num <= LastSnapShotNr; num++)
1839 for(int i = 0; i < Cats[num].Nsubhalos; i++)
1840 Cats[num].SubExt[i].MaxLenProgBranch = Cats[num].Subhalo[i].Len;
1841
1842 /* propagate the branch length of subhalos from snapshot "num-1" to those in snapshot "num" via the
1843 * descendant information */
1844 for(int num = 1; num <= LastSnapShotNr; num++)
1845 halotrees_propagate_max_branch_length_descendants(num);
1846
1847 /* propagate the branch length of subhalos from snapshot "num-1" to those in snapshot "num" via the
1848 * progenitor information */
1849 /* we disable this as it could cause a spurious jump to a larger progenitor that shed a few particles to a newly formed group */
1850 // for(int num = 1; num <= LastSnapShotNr; num++)
1851 // halotrees_propagate_max_branch_length_progenitors(num);
1852
1853 mpi_printf("MERGERTREE: determination of main progenitor branch done\n");
1854}
1855
1856/* This function determines the maximum branch size of subhalos from snapshot "num-1" to those snapshot "num" via the
1857 * descendant information
1858 */
1859void mergertree::halotrees_propagate_max_branch_length_descendants(int num)
1860{
1861 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
1862 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
1863 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
1864 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
1865
1866 /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
1867
1868 /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
1869 long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
1870 long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
1871
1872 /* this value flags that there are no subhalos on the corresponding processor */
1873 long long empty = HALONR_MAX;
1874
1875 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE, list_min_subhalonr,
1876 sizeof(long long), MPI_BYTE, Communicator);
1877 MPI_Allgather(Cats[num].Nsubhalos > 0 ? &Cats[num].Subhalo[Cats[num].Nsubhalos - 1].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
1878 list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
1879
1880 int nexport = 0, nimport = 0;
1881
1882 halotrees_propagate_data *import_data = NULL, *export_data = NULL;
1883
1884 /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
1885 * let's create an auxiliary list for facilitating this.
1886 */
1887 halotrees_propagate_data *sorted_list =
1888 (halotrees_propagate_data *)Mem.mymalloc("sorted_list", Cats[num - 1].Nsubhalos * sizeof(halotrees_propagate_data));
1889
1890 for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1891 {
1892 sorted_list[i].DescSubhaloNr = Cats[num - 1].Descendants[i].DescSubhaloNr;
1893 sorted_list[i].SubhaloNr = Cats[num - 1].Descendants[i].PrevSubhaloNr;
1894 sorted_list[i].MaxLenProgBranch = Cats[num - 1].SubExt[i].MaxLenProgBranch;
1895 }
1896
1897 mycxxsort(sorted_list, sorted_list + Cats[num - 1].Nsubhalos, compare_halotrees_propagate_data_DescSubhaloNr);
1898
1899 /* for communication bookkeeping reasons, we traverse the counting pattern twice */
1900 for(int mode = 0; mode < 2; mode++)
1901 {
1902 for(int i = 0; i < NTask; i++)
1903 Send_count[i] = 0;
1904
1905 int target = 0;
1906
1907 for(int i = 0; i < Cats[num - 1].Nsubhalos; i++)
1908 {
1909 while(target < NTask - 1 &&
1910 (list_min_subhalonr[target] == empty || sorted_list[i].DescSubhaloNr > list_max_subhalonr[target]))
1911 target++;
1912
1913 if(list_min_subhalonr[target] != empty && sorted_list[i].DescSubhaloNr >= list_min_subhalonr[target] &&
1914 sorted_list[i].DescSubhaloNr <= list_max_subhalonr[target])
1915 {
1916 if(mode == 0)
1917 Send_count[target]++;
1918 else
1919 {
1920 int off = Send_offset[target] + Send_count[target]++;
1921
1922 export_data[off].DescSubhaloNr = sorted_list[i].DescSubhaloNr;
1923 export_data[off].SubhaloNr = sorted_list[i].SubhaloNr;
1924 export_data[off].MaxLenProgBranch = sorted_list[i].MaxLenProgBranch;
1925 }
1926 }
1927 }
1928
1929 if(mode == 0)
1930 {
1931 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
1932 Recv_offset[0] = Send_offset[0] = 0;
1933 for(int j = 0; j < NTask; j++)
1934 {
1935 nimport += Recv_count[j];
1936 nexport += Send_count[j];
1937 if(j > 0)
1938 {
1939 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
1940 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
1941 }
1942 }
1943
1944 export_data = (halotrees_propagate_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_propagate_data));
1945 import_data = (halotrees_propagate_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_propagate_data));
1946 }
1947 }
1948
1949 /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
1950 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
1951 {
1952 int recvTask = ThisTask ^ ngrp;
1953 if(recvTask < NTask)
1954 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
1955 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
1956 recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
1957 Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
1958 MPI_STATUS_IGNORE);
1959 }
1960
1961 /* the collection of incoming data is not necessarily sorted according to DescSubhaloNr, so we need to sort it for efficient
1962 * matching
1963 */
1964 mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_DescSubhaloNr);
1965
1966 /* now do the matching */
1967 for(int i = 0, j = 0; i < Cats[num].Nsubhalos && j < nimport;)
1968 {
1969 if(Cats[num].Subhalo[i].SubhaloNr < import_data[j].DescSubhaloNr)
1970 i++;
1971 else if(Cats[num].Subhalo[i].SubhaloNr > import_data[j].DescSubhaloNr)
1972 j++;
1973 else
1974 {
1975 if(import_data[j].MaxLenProgBranch > Cats[num].SubExt[i].MaxLenProgBranch - Cats[num].Subhalo[i].Len)
1976 {
1977 Cats[num].SubExt[i].MaxLenProgBranch = import_data[j].MaxLenProgBranch + Cats[num].Subhalo[i].Len;
1978 Cats[num].Progenitors[i].MainProgSubhaloNr = import_data[j].SubhaloNr;
1979 }
1980
1981 j++;
1982 }
1983 }
1984
1985 Mem.myfree(import_data);
1986 Mem.myfree(export_data);
1987 Mem.myfree(sorted_list);
1988 Mem.myfree(list_max_subhalonr);
1989 Mem.myfree(list_min_subhalonr);
1990
1991 Mem.myfree(Recv_offset);
1992 Mem.myfree(Recv_count);
1993 Mem.myfree(Send_offset);
1994 Mem.myfree(Send_count);
1995}
1996
1997/* This function determines the maximum branch size of subhalos from snapshot "num-1" to those snapshot "num" via the
1998 * progenitor information
1999 */
2000void mergertree::halotrees_propagate_max_branch_length_progenitors(int num)
2001{
2002 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
2003 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
2004 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
2005 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
2006
2007 /* note: we take advantage of the fact that the Subgroups are ordered according to their global SubhaloNr for each output number */
2008
2009 /* the following lists store the minimum and maximum subhalo number to be found on a given processor */
2010 long long *list_min_subhalonr = (long long *)Mem.mymalloc("list_min_subhalonr", NTask * sizeof(long long));
2011 long long *list_max_subhalonr = (long long *)Mem.mymalloc("list_max_subhalonr", NTask * sizeof(long long));
2012
2013 /* this value flags that there are no subhalos on the corresponding processor */
2014 long long empty = HALONR_MAX;
2015
2016 MPI_Allgather(Cats[num - 1].Nsubhalos > 0 ? &Cats[num - 1].Subhalo[0].SubhaloNr : &empty, sizeof(long long), MPI_BYTE,
2017 list_min_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
2018 MPI_Allgather(Cats[num - 1].Nsubhalos > 0 ? &Cats[num - 1].Subhalo[Cats[num - 1].Nsubhalos - 1].SubhaloNr : &empty,
2019 sizeof(long long), MPI_BYTE, list_max_subhalonr, sizeof(long long), MPI_BYTE, Communicator);
2020
2021 int nexport = 0, nimport = 0;
2022
2023 halotrees_propagate_data *import_data = NULL, *export_data = NULL;
2024
2025 /* for efficiency reasons, we need to traverse the local descendant pointers in increasing order of their target subhalo number, so
2026 * let's create an auxiliary list for facilitating this.
2027 */
2028 halotrees_propagate_data *sorted_list =
2029 (halotrees_propagate_data *)Mem.mymalloc("sorted_list", Cats[num].Nsubhalos * sizeof(halotrees_propagate_data));
2030
2031 for(int i = 0; i < Cats[num].Nsubhalos; i++)
2032 {
2033 sorted_list[i].ProgSubhaloNr = Cats[num].Progenitors[i].ProgSubhaloNr;
2034 sorted_list[i].index = i;
2035 }
2036
2037 mycxxsort(sorted_list, sorted_list + Cats[num].Nsubhalos, compare_halotrees_propagate_data_ProgSubhaloNr);
2038
2039 /* for communication bookkeeping reasons, we traverse the counting pattern twice */
2040 for(int mode = 0; mode < 2; mode++)
2041 {
2042 for(int i = 0; i < NTask; i++)
2043 Send_count[i] = 0;
2044
2045 int target = 0;
2046
2047 for(int i = 0; i < Cats[num].Nsubhalos; i++)
2048 {
2049 while(target < NTask - 1 &&
2050 (list_min_subhalonr[target] == empty || sorted_list[i].ProgSubhaloNr > list_max_subhalonr[target]))
2051 target++;
2052
2053 if(list_min_subhalonr[target] != empty && sorted_list[i].ProgSubhaloNr >= list_min_subhalonr[target] &&
2054 sorted_list[i].ProgSubhaloNr <= list_max_subhalonr[target])
2055 {
2056 if(mode == 0)
2057 Send_count[target]++;
2058 else
2059 {
2060 int off = Send_offset[target] + Send_count[target]++;
2061
2062 export_data[off].ProgSubhaloNr = sorted_list[i].ProgSubhaloNr;
2063 export_data[off].index = sorted_list[i].index;
2064 }
2065 }
2066 }
2067
2068 if(mode == 0)
2069 {
2070 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
2071 Recv_offset[0] = Send_offset[0] = 0;
2072 for(int j = 0; j < NTask; j++)
2073 {
2074 nimport += Recv_count[j];
2075 nexport += Send_count[j];
2076 if(j > 0)
2077 {
2078 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
2079 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
2080 }
2081 }
2082
2083 export_data = (halotrees_propagate_data *)Mem.mymalloc("export_data", nexport * sizeof(halotrees_propagate_data));
2084 import_data = (halotrees_propagate_data *)Mem.mymalloc("import_data", nimport * sizeof(halotrees_propagate_data));
2085 }
2086 }
2087
2088 /* send data to those target processors that hold the descendant subhalos in order to fetch their tree-ids */
2089 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
2090 {
2091 int recvTask = ThisTask ^ ngrp;
2092 if(recvTask < NTask)
2093 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
2094 myMPI_Sendrecv(&export_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
2095 recvTask, TAG_DENS_B, &import_data[Recv_offset[recvTask]],
2096 Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
2097 MPI_STATUS_IGNORE);
2098 }
2099
2100 for(int i = 0; i < nimport; i++)
2101 import_data[i].orig_order = i;
2102
2103 /* the collection of incoming data is not necessarily sorted according to ProgSubhaloNr, so we need to sort it for efficient
2104 * matching
2105 */
2106 mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_ProgSubhaloNr);
2107
2108 /* now do the matching */
2109 for(int i = 0, j = 0; i < Cats[num - 1].Nsubhalos && j < nimport;)
2110 {
2111 if(Cats[num - 1].Subhalo[i].SubhaloNr < import_data[j].ProgSubhaloNr)
2112 i++;
2113 else if(Cats[num - 1].Subhalo[i].SubhaloNr > import_data[j].ProgSubhaloNr)
2114 j++;
2115 else
2116 {
2117 import_data[j].MaxLenProgBranch = Cats[num - 1].SubExt[i].MaxLenProgBranch;
2118 import_data[j].SubhaloNr = Cats[num - 1].Subhalo[i].SubhaloNr;
2119
2120 if(Cats[num - 1].Subhalo[i].SubhaloNr != import_data[j].ProgSubhaloNr)
2121 Terminate("Cats[num - 1].Subhalo[i].SubhaloNr != import_data[j].ProgSubhaloNr)");
2122
2123 j++;
2124 }
2125 }
2126
2127 /* reestablish original order */
2128 mycxxsort(import_data, import_data + nimport, compare_halotrees_propagate_data_orig_order);
2129
2130 /* send data back */
2131 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
2132 {
2133 int recvTask = ThisTask ^ ngrp;
2134 if(recvTask < NTask)
2135 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
2136 myMPI_Sendrecv(&import_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE,
2137 recvTask, TAG_DENS_B, &export_data[Send_offset[recvTask]],
2138 Send_count[recvTask] * sizeof(halotrees_propagate_data), MPI_BYTE, recvTask, TAG_DENS_B, Communicator,
2139 MPI_STATUS_IGNORE);
2140 }
2141
2142 /* now read it out and assign the new treeid/treetask value to the halos in the previous output (which are the progenitors) */
2143 for(int i = 0; i < nexport; i++)
2144 {
2145 int q = export_data[i].index;
2146
2147 if(export_data[i].MaxLenProgBranch > Cats[num].SubExt[q].MaxLenProgBranch - Cats[num].Subhalo[q].Len)
2148 {
2149 Cats[num].SubExt[q].MaxLenProgBranch = export_data[i].MaxLenProgBranch + Cats[num].Subhalo[q].Len;
2150 Cats[num].Progenitors[q].MainProgSubhaloNr = export_data[i].SubhaloNr;
2151 }
2152 }
2153
2154 Mem.myfree(import_data);
2155 Mem.myfree(export_data);
2156 Mem.myfree(sorted_list);
2157 Mem.myfree(list_max_subhalonr);
2158 Mem.myfree(list_min_subhalonr);
2159
2160 Mem.myfree(Recv_offset);
2161 Mem.myfree(Recv_count);
2162 Mem.myfree(Send_offset);
2163 Mem.myfree(Send_count);
2164}
2165
2166#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
Definition: fof_io.h:20
#define MAXITER
Definition: constants.h:305
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
float MyFloat
Definition: dtypes.h:86
#define HALONR_MAX
Definition: idstorage.h:20
#define Terminate(...)
Definition: macros.h:15
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Definition: myalltoall.cc:301
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
#define TAG_DENS_B
Definition: mpi_utils.h:51
memory Mem
Definition: main.cc:44
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)
double get_random_number(void)
Definition: system.cc:49