GADGET-4
descendant.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#ifdef MERGERTREE
15
16#include <gsl/gsl_rng.h>
17#include <hdf5.h>
18#include <math.h>
19#include <mpi.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <unistd.h>
26
27#include "../data/allvars.h"
28#include "../data/dtypes.h"
29#include "../data/mymalloc.h"
30#include "../fof/fof.h"
31#include "../io/hdf5_util.h"
32#include "../io/io.h"
33#include "../logs/timer.h"
34#include "../main/main.h"
35#include "../main/simulation.h"
36#include "../mergertree/io_descendant.h"
37#include "../mergertree/io_progenitors.h"
38#include "../mergertree/mergertree.h"
39#include "../mpi_utils/mpi_utils.h"
40#include "../sort/parallel_sort.h"
41#include "../subfind/subfind.h"
42#include "../system/system.h"
43
44/* This function allocates and fills the "Descendants" array, which gives the number of the descendant subhalo in the newly created
45 * subhalo catalogue for every subhalo from the previous catalogue.
46 */
47void mergertree::mergertree_determine_descendants_postproc(int num)
48{
49 Descendants = (desc_list *)Mem.mymalloc_movable(&Descendants, "Descendants", (PrevNsubhalos + 1) * sizeof(desc_list));
50 Progenitors = (prog_list *)Mem.mymalloc_movable(&Progenitors, "Progenitors", (CurrNsubhalos + 1) * sizeof(prog_list));
51
52 Sp->NumPart = MtrP_NumPart;
53
54 /* allocate a work structure */
55 desc = (desc_partdata *)Mem.mymalloc_movable(&desc, "desc", sizeof(desc_partdata) * Sp->NumPart);
56
57 /* let's fill in some relevant data into the work-structure */
58 for(int i = 0; i < Sp->NumPart; i++)
59 {
60 desc[i].CurrSubhaloNr.set(MtrP[i].SubhaloNr);
61 desc[i].CurrRankInSubhalo.set(MtrP[i].RankInSubhalo);
62
63 desc[i].PrevSubhaloNr.set(MtrP[i].PrevSubhaloNr);
64 desc[i].PrevRankInSubhalo.set(MtrP[i].PrevRankInSubhalo);
65
66 if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() != HALONR_MAX)
67 Terminate("strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (long long)desc[i].PrevSubhaloNr.get(),
68 (long long)PrevTotNsubhalos);
69 }
70
71 mergertree_determine_descendants(num);
72
73 mpi_printf("DONE!\n");
74}
75
76/* This function allocates and fills the "Descendants" array, which gives the number of the descendant subhalo in the newly created
77 * subhalo catalogue for every subhalo from the previous catalogue.
78 */
79void mergertree::mergertree_determine_descendants_on_the_fly(int num)
80{
81 if(num == 0) // for the first output, we don't yet have anything to link
82 return;
83
84 Descendants = (desc_list *)Mem.mymalloc_movable(&Descendants, "Descendants", (PrevNsubhalos + 1) * sizeof(desc_list));
85 Progenitors = (prog_list *)Mem.mymalloc_movable(&Progenitors, "Progenitors", (CurrNsubhalos + 1) * sizeof(prog_list));
86
87 /* allocate a work structure */
88 desc = (desc_partdata *)Mem.mymalloc_movable(&desc, "desc", sizeof(desc_partdata) * Sp->NumPart);
89
90 /* Let's fill in some relevant data into a work-structure defined for every particle
91 * For each particle, we know the new and the old subhalo number, as well as its rank in the previous subhalo
92 */
93 for(int i = 0; i < Sp->NumPart; i++)
94 {
95 desc[i].CurrSubhaloNr = Sp->PS[i].SubhaloNr;
96 desc[i].CurrRankInSubhalo = Sp->PS[i].RankInSubhalo;
97
98 desc[i].PrevSubhaloNr = Sp->P[i].PrevSubhaloNr;
99 desc[i].PrevRankInSubhalo = Sp->P[i].PrevRankInSubhalo;
100
101 if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() != HALONR_MAX)
102 Terminate("strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (long long)desc[i].PrevSubhaloNr.get(),
103 (long long)PrevTotNsubhalos);
104 }
105
106 mergertree_determine_descendants(num);
107
108 Mem.myfree(desc);
109 Mem.myfree(Progenitors);
110 Mem.myfree(Descendants);
111}
112
113void mergertree::mergertree_determine_descendants(int num)
114{
115 /* determine matching pieces of subhalos and their mutual scores */
116 int nmatch = mergertree_find_matching_segments_and_scores();
117
118 /* select the main descendants */
119 mergertree_select_maximum_score_descendants(nmatch);
120
121 /* select the main progenitors */
122 mergertree_select_maximum_score_progenitors(nmatch);
123
124 /* let's determine the next-progenitor fields, which chain up those subhalos that have the same descendant */
125 mergertree_chain_up_progenitors_with_same_descendant();
126
127 /* set first progenitor field for chaining up those subhalos that have the same descendant */
128 mergertree_set_first_progenitor_with_same_descendant();
129
130 /* let's determine the next-descendant fields, which chain up those subhalos that have the same progenitor */
131 mergertree_chain_up_descendants_with_same_progenitor();
132
133 /* set first descendant field for chaining up those subhalos that have the same progenitor */
134 mergertree_set_first_descendant_with_same_progenitor();
135
136 /**** write stuff to files *****/
137
138 descendant_io Desc(this, this->Communicator, All.SnapFormat); /* get an I/O object */
139 Desc.mergertree_save_descendants(num);
140
141 progenitors_io Prog(this, this->Communicator, All.SnapFormat);
142 Prog.mergertree_save_progenitors(num);
143
144 /* calculate and output some statistics to characterize linking */
145
146 int count_links = 0;
147 for(int i = 0; i < PrevNsubhalos; i++)
148 if(Descendants[i].DescSubhaloNr != HALONR_MAX)
149 count_links++;
150
151 long long tot_count_links;
152 sumup_large_ints(1, &count_links, &tot_count_links, Communicator);
153
154 mpi_printf("MERGERTREE: Was able to identify descendants for %lld out of %lld subhalos, i.e. for a fraction of %g\n",
155 tot_count_links, (long long)PrevTotNsubhalos, tot_count_links / (PrevTotNsubhalos + SMALLNUM));
156
157 int count_splits = 0;
158 for(int i = 0; i < CurrNsubhalos; i++)
159 if(Progenitors[i].NextDescSubhaloNr != HALONR_MAX)
160 count_splits++;
161
162 long long tot_count_splits;
163 sumup_large_ints(1, &count_splits, &tot_count_splits, Communicator);
164
165 mpi_printf("MERGERTREE: We have found secondary descendants for %lld halos out of %lld subhalos, i.e. for a fraction of %g\n",
166 tot_count_splits, (long long)CurrTotNsubhalos, tot_count_splits / (CurrTotNsubhalos + SMALLNUM));
167}
168
169int mergertree::mergertree_find_matching_segments_and_scores(void)
170{
171 /* let's eliminate unbound particles from the work list */
172 int nmatch = Sp->NumPart;
173
174 for(int i = 0; i < nmatch; i++)
175 if(desc[i].CurrSubhaloNr.get() == HALONR_MAX || desc[i].PrevSubhaloNr.get() == HALONR_MAX)
176 {
177 desc[i] = desc[nmatch - 1];
178 nmatch--;
179 i--;
180 }
181
182 /* let's do the scoring */
183 for(int i = 0; i < nmatch; i++)
184 {
185 if(desc[i].PrevRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
186 desc[i].DescScore = 1.0 / (1 + pow(desc[i].PrevRankInSubhalo.get(), 0.5));
187 else
188 desc[i].DescScore = 0;
189
190 if(desc[i].CurrRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
191 desc[i].ProgScore = 1.0 / (1 + pow(desc[i].CurrRankInSubhalo.get(), 0.5));
192 else
193 desc[i].ProgScore = 0;
194 }
195
196 /* Now we sort the list such that the old subhalos are grouped together, and the new subhalo numbers are consecutive within them
197 */
198 mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_PrevSubNr_NewSubNr, Communicator);
199
200 /* eliminate duplicate matched pairs on local processor and sum up the scores
201 */
202 int start = 0;
203 int count = 0;
204
205 if(nmatch > 0)
206 count = 1;
207
208 for(int i = 1; i < nmatch; i++)
209 if(desc[i].PrevSubhaloNr == desc[start].PrevSubhaloNr && desc[i].CurrSubhaloNr == desc[start].CurrSubhaloNr)
210 {
211 desc[start].DescScore += desc[i].DescScore;
212 desc[start].ProgScore += desc[i].ProgScore;
213 }
214 else
215 {
216 desc[count] = desc[i];
217 start = count;
218 count++;
219 }
220
221 nmatch = count;
222
223 /* now we consolidate duplicate matched pairs on different processors. The list is still ordered, but there could be gaps
224 * (i.e. processors with only one or potentially zero entries)
225 */
226
227 /* obtain last and first element of each processor, and the counts from each processor */
228 desc_partdata *desc_first = (desc_partdata *)Mem.mymalloc("desc_first", NTask * sizeof(desc_partdata));
229 desc_partdata *desc_last = (desc_partdata *)Mem.mymalloc("desc_last", NTask * sizeof(desc_partdata));
230 int *nmatch_list = (int *)Mem.mymalloc("nmatch_list", NTask * sizeof(int));
231
232 MPI_Allgather(&desc[0], sizeof(desc_partdata), MPI_BYTE, desc_first, sizeof(desc_partdata), MPI_BYTE,
233 Communicator); /* note: the 0th element is guaranteed to be allocated */
234 MPI_Allgather(&desc[nmatch > 0 ? nmatch - 1 : 0], sizeof(desc_partdata), MPI_BYTE, desc_last, sizeof(desc_partdata), MPI_BYTE,
235 Communicator);
236 MPI_Allgather(&nmatch, 1, MPI_INT, nmatch_list, 1, MPI_INT, Communicator);
237
238 /* We go from the back through the tasks, and for the first element in each case, we move it ahead in the list as far as possible.
239 * Eliminated items are marked through DescScore = -1, and are then filtered out later.
240 */
241 for(int i = NTask - 1; i > 0; i--)
242 if(nmatch_list[i] > 0)
243 {
244 int target = -1;
245
246 for(int j = i - 1; j >= 0; j--)
247 {
248 if(nmatch_list[j] > 0)
249 {
250 if(nmatch_list[j] > 1)
251 {
252 if(desc_last[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
253 desc_last[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
254 target = j;
255 }
256 else /* nmatch_list[j] == 1 */
257 {
258 if(desc_first[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
259 desc_first[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
260 target = j;
261 }
262
263 break;
264 }
265 }
266
267 if(target >= 0)
268 {
269 if(nmatch_list[target] > 1)
270 {
271 desc_last[target].DescScore += desc_first[i].DescScore;
272 desc_last[target].ProgScore += desc_first[i].ProgScore;
273
274 if(ThisTask == target)
275 {
276 desc[nmatch - 1].DescScore += desc_first[i].DescScore;
277 desc[nmatch - 1].ProgScore += desc_first[i].ProgScore;
278 }
279 }
280 else
281 {
282 desc_first[target].DescScore += desc_first[i].DescScore;
283 desc_first[target].ProgScore += desc_first[i].ProgScore;
284
285 if(ThisTask == target)
286 {
287 desc[0].DescScore += desc_first[i].DescScore;
288 desc[0].ProgScore += desc_first[i].ProgScore;
289 }
290 }
291
292 desc_first[i].DescScore = -1;
293 desc_first[i].ProgScore = -1;
294
295 if(ThisTask == i)
296 {
297 desc[0].DescScore = -1;
298 desc[0].ProgScore = -1;
299 }
300 }
301 }
302
303 Mem.myfree(nmatch_list);
304 Mem.myfree(desc_last);
305 Mem.myfree(desc_first);
306
307 /* now eliminate the ones with negative score
308 */
309 if(nmatch > 0 && desc[0].DescScore < 0)
310 {
311 nmatch--;
312 memmove(desc, desc + 1, nmatch * sizeof(desc_partdata));
313 }
314
315 return nmatch;
316}
317
318void mergertree::mergertree_chain_up_descendants_with_same_progenitor(void)
319{
320 /* sort by progenitor to bring equal ones next to each other */
321 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
322
323 prog_list *elem_first = (prog_list *)Mem.mymalloc("elem_first", NTask * sizeof(prog_list));
324 prog_list *elem_last = (prog_list *)Mem.mymalloc("elem_last", NTask * sizeof(prog_list));
325
326 /* note: the 0th element is guaranteed to be allocated even on ranks with zero CurrNsubhalos */
327 MPI_Allgather(&Progenitors[0], sizeof(prog_list), MPI_BYTE, elem_first, sizeof(prog_list), MPI_BYTE, Communicator);
328 MPI_Allgather(&Progenitors[CurrNsubhalos > 0 ? CurrNsubhalos - 1 : 0], sizeof(prog_list), MPI_BYTE, elem_last, sizeof(prog_list),
329 MPI_BYTE, Communicator);
330
331 /* get list of the subhalo count on each processor, and the cumulative number stored before */
332 int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
333 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
334
335 int next_task = -1;
336 for(int i = ThisTask + 1; i < NTask; i++)
337 if(tab_CurrNsubhalos[i] > 0)
338 {
339 next_task = i;
340 break;
341 }
342
343 int prev_task = -1;
344 for(int i = ThisTask - 1; i >= 0; i--)
345 if(tab_CurrNsubhalos[i] > 0)
346 {
347 prev_task = i;
348 break;
349 }
350
351 for(int i = 0; i < CurrNsubhalos; i++)
352 {
353 if(i < CurrNsubhalos - 1)
354 {
355 if(Progenitors[i].ProgSubhaloNr == Progenitors[i + 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
356 Progenitors[i].NextDescSubhaloNr = Progenitors[i + 1].SubhaloNr;
357 else
358 Progenitors[i].NextDescSubhaloNr = HALONR_MAX;
359 }
360 else
361 {
362 if(next_task >= 0 && Progenitors[i].ProgSubhaloNr == elem_first[next_task].ProgSubhaloNr &&
363 Progenitors[i].ProgSubhaloNr != HALONR_MAX)
364 Progenitors[i].NextDescSubhaloNr = elem_first[next_task].SubhaloNr;
365 else
366 Progenitors[i].NextDescSubhaloNr = HALONR_MAX;
367 }
368
369 if(i > 0)
370 {
371 if(Progenitors[i].ProgSubhaloNr != Progenitors[i - 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
372 Progenitors[i].FirstDescFlag = 1; /* flags the first progenitors */
373 else
374 Progenitors[i].FirstDescFlag = 0;
375 }
376 else
377 {
378 if(Progenitors[i].ProgSubhaloNr != HALONR_MAX &&
379 (ThisTask == 0 || (prev_task >= 0 && Progenitors[i].ProgSubhaloNr != elem_last[prev_task].ProgSubhaloNr)))
380 Progenitors[i].FirstDescFlag = 1; /* flags the first progenitors */
381 else
382 Progenitors[i].FirstDescFlag = 0;
383 }
384 }
385
386 Mem.myfree(tab_CurrNsubhalos);
387 Mem.myfree(elem_last);
388 Mem.myfree(elem_first);
389
390 /* bring back into original order */
391 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
392}
393
394/* This function determines the next progenitor field, which chains up those subhalos that have the same descendant */
395void mergertree::mergertree_chain_up_progenitors_with_same_descendant(void)
396{
397 /* sort by descendant to bring equal ones next to each other */
398 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
399
400 desc_list *elem_first = (desc_list *)Mem.mymalloc("elem_first", NTask * sizeof(desc_list));
401 desc_list *elem_last = (desc_list *)Mem.mymalloc("elem_last", NTask * sizeof(desc_list));
402
403 /* note: the 0th element is guaranteed to be allocated even on ranks with zero PrevNsubhalos */
404 MPI_Allgather(&Descendants[0], sizeof(desc_list), MPI_BYTE, elem_first, sizeof(desc_list), MPI_BYTE, Communicator);
405 MPI_Allgather(&Descendants[PrevNsubhalos > 0 ? PrevNsubhalos - 1 : 0], sizeof(desc_list), MPI_BYTE, elem_last, sizeof(desc_list),
406 MPI_BYTE, Communicator);
407
408 /* get list of the subhalo count on each processor, and the cumulative number stored before */
409 int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
410 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
411
412 int next_task = -1;
413 for(int i = ThisTask + 1; i < NTask; i++)
414 if(tab_PrevNsubhalos[i] > 0)
415 {
416 next_task = i;
417 break;
418 }
419
420 int prev_task = -1;
421 for(int i = ThisTask - 1; i >= 0; i--)
422 if(tab_PrevNsubhalos[i] > 0)
423 {
424 prev_task = i;
425 break;
426 }
427
428 for(int i = 0; i < PrevNsubhalos; i++)
429 {
430 if(i < PrevNsubhalos - 1)
431 {
432 if(Descendants[i].DescSubhaloNr == Descendants[i + 1].DescSubhaloNr && Descendants[i].DescSubhaloNr != HALONR_MAX)
433 Descendants[i].NextProgSubhaloNr = Descendants[i + 1].PrevSubhaloNr;
434 else
435 Descendants[i].NextProgSubhaloNr = HALONR_MAX;
436 }
437 else
438 {
439 if(next_task >= 0 && Descendants[i].DescSubhaloNr == elem_first[next_task].DescSubhaloNr &&
440 Descendants[i].DescSubhaloNr != HALONR_MAX)
441 Descendants[i].NextProgSubhaloNr = elem_first[next_task].PrevSubhaloNr;
442 else
443 Descendants[i].NextProgSubhaloNr = HALONR_MAX;
444 }
445
446 if(i > 0)
447 {
448 if(Descendants[i].DescSubhaloNr != Descendants[i - 1].DescSubhaloNr && Descendants[i].DescSubhaloNr != HALONR_MAX)
449 Descendants[i].FirstProgFlag = 1; /* flags the first progenitors */
450 else
451 Descendants[i].FirstProgFlag = 0;
452 }
453 else
454 {
455 if(Descendants[i].DescSubhaloNr != HALONR_MAX &&
456 (ThisTask == 0 || (prev_task >= 0 && Descendants[i].DescSubhaloNr != elem_last[prev_task].DescSubhaloNr)))
457 Descendants[i].FirstProgFlag = 1; /* flags the first progenitors */
458 else
459 Descendants[i].FirstProgFlag = 0;
460 }
461 }
462
463 Mem.myfree(tab_PrevNsubhalos);
464 Mem.myfree(elem_last);
465 Mem.myfree(elem_first);
466
467 /* bring back into original order */
468 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
469}
470
471/******** set first progenitor field ****/
472void mergertree::mergertree_set_first_progenitor_with_same_descendant(void)
473{
474 /* sort by descendant to bring equal ones next to each other */
475 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
476
477 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
478 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
479 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
480 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
481
482 int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
483 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
484
485 long long cumul_currnsubhalos = 0;
486 for(int i = 0; i < ThisTask; i++)
487 cumul_currnsubhalos += tab_CurrNsubhalos[i];
488
489 struct pair_data
490 {
491 long long subhalonr;
492 long long firstprognr;
493 };
494
495 pair_data *send_data = NULL;
496 pair_data *recv_data = NULL;
497 int nexport = 0, nimport = 0;
498
499 for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
500 {
501 for(int i = 0; i < NTask; i++)
502 Send_count[i] = 0;
503
504 int task = 0;
505 long long first = 0;
506
507 for(int i = 0; i < PrevNsubhalos; i++)
508 {
509 if(Descendants[i].FirstProgFlag && Descendants[i].DescSubhaloNr != HALONR_MAX)
510 {
511 while(task < NTask - 1 && Descendants[i].DescSubhaloNr >= first + tab_CurrNsubhalos[task])
512 {
513 first += tab_CurrNsubhalos[task];
514 task++;
515 }
516
517 if(mode == 0)
518 Send_count[task]++;
519 else
520 {
521 int off = Send_offset[task] + Send_count[task]++;
522
523 send_data[off].subhalonr = Descendants[i].DescSubhaloNr;
524 send_data[off].firstprognr = Descendants[i].PrevSubhaloNr;
525 }
526 }
527 }
528
529 if(mode == 0) // prepare offset tables
530 {
531 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
532
533 Recv_offset[0] = 0;
534 Send_offset[0] = 0;
535
536 for(int j = 0; j < NTask; j++)
537 {
538 nexport += Send_count[j];
539 nimport += Recv_count[j];
540
541 if(j > 0)
542 {
543 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
544 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
545 }
546 }
547
548 send_data = (pair_data *)Mem.mymalloc("pair_data", nexport * sizeof(pair_data));
549 recv_data = (pair_data *)Mem.mymalloc("pair_data", nimport * sizeof(pair_data));
550 }
551 }
552
553 /* exchange data */
554 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
555 {
556 int recvTask = ThisTask ^ ngrp;
557
558 if(recvTask < NTask)
559 {
560 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
561 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
562 &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
563 Communicator, MPI_STATUS_IGNORE);
564 }
565 }
566
567 for(int i = 0; i < CurrNsubhalos; i++)
568 Progenitors[i].FirstProgSubhaloNr = HALONR_MAX;
569
570 for(int i = 0; i < nimport; i++)
571 {
572 int off = recv_data[i].subhalonr - cumul_currnsubhalos;
573
574 if(off < 0 || off >= CurrNsubhalos)
575 Terminate("off = %d CurrNsubhalos = %d", off, CurrNsubhalos);
576
577 Progenitors[off].FirstProgSubhaloNr = recv_data[i].firstprognr;
578 }
579
580 Mem.myfree(recv_data);
581 Mem.myfree(send_data);
582
583 Mem.myfree(tab_CurrNsubhalos);
584
585 Mem.myfree(Recv_offset);
586 Mem.myfree(Recv_count);
587 Mem.myfree(Send_offset);
588 Mem.myfree(Send_count);
589
590 /* bring back into original order */
591 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
592}
593
594/********** pick the progenitor with the maximum score *****/
595void mergertree::mergertree_select_maximum_score_progenitors(int nmatch)
596{
597 mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_NewSubNr_PrevSubNr, Communicator);
598
599 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
600 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
601 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
602 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
603
604 int *tab_CurrNsubhalos = (int *)Mem.mymalloc("tab_CurrNsubhalos", sizeof(int) * NTask);
605 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
606
607 long long cumul_currnsubhalos = 0;
608 for(int i = 0; i < ThisTask; i++)
609 cumul_currnsubhalos += tab_CurrNsubhalos[i];
610
611 desc_partdata *send_data = NULL;
612 desc_partdata *recv_data = NULL;
613 int nexport = 0, nimport = 0;
614 for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
615 {
616 for(int i = 0; i < NTask; i++)
617 Send_count[i] = 0;
618
619 int task = 0;
620 unsigned long long first = 0;
621 for(int i = 0; i < nmatch; i++)
622 {
623 while(task < NTask - 1 && desc[i].CurrSubhaloNr.get() >= first + tab_CurrNsubhalos[task])
624 {
625 first += tab_CurrNsubhalos[task];
626 task++;
627 }
628
629 if(mode == 0)
630 Send_count[task]++;
631 else
632 {
633 int off = Send_offset[task] + Send_count[task]++;
634
635 send_data[off] = desc[i];
636 }
637 }
638
639 if(mode == 0) // prepare offset tables
640 {
641 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
642
643 Recv_offset[0] = 0;
644 Send_offset[0] = 0;
645
646 for(int j = 0; j < NTask; j++)
647 {
648 nexport += Send_count[j];
649 nimport += Recv_count[j];
650
651 if(j > 0)
652 {
653 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
654 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
655 }
656 }
657
658 send_data = (desc_partdata *)Mem.mymalloc("send_data", nexport * sizeof(desc_partdata));
659 recv_data = (desc_partdata *)Mem.mymalloc("recv_data", nimport * sizeof(desc_partdata));
660 }
661 }
662
663 /* exchange data */
664 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
665 {
666 int recvTask = ThisTask ^ ngrp;
667
668 if(recvTask < NTask)
669 {
670 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
671 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask,
672 TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE,
673 recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
674 }
675 }
676
677 for(int i = 0; i < CurrNsubhalos; i++)
678 {
679 Progenitors[i].SubhaloNr = cumul_currnsubhalos + i;
680 Progenitors[i].ProgSubhaloNr = HALONR_MAX;
681 Progenitors[i].MaxScore = 0;
682 }
683
684 for(int i = 0; i < nimport; i++)
685 {
686 int index = recv_data[i].CurrSubhaloNr.get() - cumul_currnsubhalos;
687
688 if(index < 0 || index >= CurrNsubhalos)
689 Terminate("index=%d CurrNsubhalos=%d", index, CurrNsubhalos);
690
691 if(recv_data[i].ProgScore > Progenitors[index].MaxScore)
692 {
693 Progenitors[index].MaxScore = recv_data[i].ProgScore;
694 Progenitors[index].ProgSubhaloNr = recv_data[i].PrevSubhaloNr.get();
695 }
696 }
697
698 Mem.myfree(recv_data);
699 Mem.myfree(send_data);
700
701 Mem.myfree(tab_CurrNsubhalos);
702
703 Mem.myfree(Recv_offset);
704 Mem.myfree(Recv_count);
705 Mem.myfree(Send_offset);
706 Mem.myfree(Send_count);
707}
708
709/********** determine the descendant with the maximum score *****/
710void mergertree::mergertree_select_maximum_score_descendants(int nmatch)
711{
712 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
713 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
714 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
715 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
716
717 /* get lists of the subhalo count on each processors, and the cumulative number stored before */
718 int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
719 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
720
721 long long cumul_prevnsubhalos = 0;
722 for(int i = 0; i < ThisTask; i++)
723 cumul_prevnsubhalos += tab_PrevNsubhalos[i];
724
725 desc_partdata *send_data = NULL;
726 desc_partdata *recv_data = NULL;
727 int nexport = 0, nimport = 0;
728 for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
729 {
730 for(int i = 0; i < NTask; i++)
731 Send_count[i] = 0;
732
733 int task = 0;
734 unsigned long long first = 0;
735 for(int i = 0; i < nmatch; i++)
736 {
737 if(PrevTotNsubhalos < 1)
738 Terminate("PrevTotNsubhalos = %lld", PrevTotNsubhalos);
739
740 while(task < NTask - 1 && desc[i].PrevSubhaloNr.get() >= first + tab_PrevNsubhalos[task])
741 {
742 first += tab_PrevNsubhalos[task];
743 task++;
744 }
745
746 if(mode == 0)
747 Send_count[task]++;
748 else
749 {
750 int off = Send_offset[task] + Send_count[task]++;
751
752 send_data[off] = desc[i];
753 }
754 }
755
756 if(mode == 0) // prepare offset tables
757 {
758 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
759
760 Recv_offset[0] = 0;
761 Send_offset[0] = 0;
762
763 for(int j = 0; j < NTask; j++)
764 {
765 nexport += Send_count[j];
766 nimport += Recv_count[j];
767
768 if(j > 0)
769 {
770 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
771 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
772 }
773 }
774
775 send_data = (desc_partdata *)Mem.mymalloc("send_data", nexport * sizeof(desc_partdata));
776 recv_data = (desc_partdata *)Mem.mymalloc("recv_data", nimport * sizeof(desc_partdata));
777 }
778 }
779
780 /* exchange data */
781 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
782 {
783 int recvTask = ThisTask ^ ngrp;
784
785 if(recvTask < NTask)
786 {
787 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
788 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(desc_partdata), MPI_BYTE, recvTask,
789 TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(desc_partdata), MPI_BYTE,
790 recvTask, TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
791 }
792 }
793
794 for(int i = 0; i < PrevNsubhalos; i++)
795 {
796 Descendants[i].PrevSubhaloNr = cumul_prevnsubhalos + i;
797 Descendants[i].DescSubhaloNr = HALONR_MAX;
798 Descendants[i].MaxScore = 0;
799 }
800
801 for(int i = 0; i < nimport; i++)
802 {
803 int index = recv_data[i].PrevSubhaloNr.get() - cumul_prevnsubhalos;
804
805 if(index < 0 || index >= PrevNsubhalos)
806 Terminate(
807 "index=%d i=%d nimport=%d PrevNsubhalos=%d recv_data[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld "
808 "cumul_prevnsubhalos=%lld",
809 index, i, nimport, PrevNsubhalos, (long long)recv_data[i].PrevSubhaloNr.get(), PrevTotNsubhalos, cumul_prevnsubhalos);
810
811 if(recv_data[i].DescScore > Descendants[index].MaxScore)
812 {
813 Descendants[index].MaxScore = recv_data[i].DescScore;
814 Descendants[index].DescSubhaloNr = recv_data[i].CurrSubhaloNr.get();
815 }
816 }
817
818 Mem.myfree(recv_data);
819 Mem.myfree(send_data);
820
821 Mem.myfree(tab_PrevNsubhalos);
822
823 Mem.myfree(Recv_offset);
824 Mem.myfree(Recv_count);
825 Mem.myfree(Send_offset);
826 Mem.myfree(Send_count);
827}
828
829void mergertree::mergertree_set_first_descendant_with_same_progenitor(void)
830{
831 /* sort by progenitor to bring equal ones next to each other */
832 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
833
834 int *Send_count = (int *)Mem.mymalloc("Send_count", sizeof(int) * NTask);
835 int *Send_offset = (int *)Mem.mymalloc("Send_offset", sizeof(int) * NTask);
836 int *Recv_count = (int *)Mem.mymalloc("Recv_count", sizeof(int) * NTask);
837 int *Recv_offset = (int *)Mem.mymalloc("Recv_offset", sizeof(int) * NTask);
838
839 int *tab_PrevNsubhalos = (int *)Mem.mymalloc("tab_PrevNsubhalos", sizeof(int) * NTask);
840 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
841
842 long long cumul_prevnsubhalos = 0;
843 for(int i = 0; i < ThisTask; i++)
844 cumul_prevnsubhalos += tab_PrevNsubhalos[i];
845
846 struct pair_data
847 {
848 long long subhalonr;
849 long long firstdescnr;
850 };
851
852 pair_data *send_data = NULL;
853 pair_data *recv_data = NULL;
854 int nexport = 0, nimport = 0;
855
856 for(int mode = 0; mode < 2; mode++) // go through this twice to simplify bookkeeping
857 {
858 for(int i = 0; i < NTask; i++)
859 Send_count[i] = 0;
860
861 int task = 0;
862 long long first = 0;
863
864 for(int i = 0; i < CurrNsubhalos; i++)
865 {
866 if(Progenitors[i].FirstDescFlag && Progenitors[i].ProgSubhaloNr != HALONR_MAX)
867 {
868 while(task < NTask - 1 && Progenitors[i].ProgSubhaloNr >= first + tab_PrevNsubhalos[task])
869 {
870 first += tab_PrevNsubhalos[task];
871 task++;
872 }
873
874 if(mode == 0)
875 Send_count[task]++;
876 else
877 {
878 int off = Send_offset[task] + Send_count[task]++;
879
880 send_data[off].subhalonr = Progenitors[i].ProgSubhaloNr;
881 send_data[off].firstdescnr = Progenitors[i].SubhaloNr;
882 }
883 }
884 }
885
886 if(mode == 0) // prepare offset tables
887 {
888 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
889
890 Recv_offset[0] = 0;
891 Send_offset[0] = 0;
892
893 for(int j = 0; j < NTask; j++)
894 {
895 nexport += Send_count[j];
896 nimport += Recv_count[j];
897
898 if(j > 0)
899 {
900 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
901 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
902 }
903 }
904
905 send_data = (pair_data *)Mem.mymalloc("pair_data", nexport * sizeof(pair_data));
906 recv_data = (pair_data *)Mem.mymalloc("pair_data", nimport * sizeof(pair_data));
907 }
908 }
909
910 /* exchange data */
911 for(int ngrp = 0; ngrp < (1 << PTask); ngrp++)
912 {
913 int recvTask = ThisTask ^ ngrp;
914
915 if(recvTask < NTask)
916 {
917 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
918 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
919 &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(pair_data), MPI_BYTE, recvTask, TAG_DENS_A,
920 Communicator, MPI_STATUS_IGNORE);
921 }
922 }
923
924 for(int i = 0; i < PrevNsubhalos; i++)
925 Descendants[i].FirstDescSubhaloNr = HALONR_MAX;
926
927 for(int i = 0; i < nimport; i++)
928 {
929 int off = recv_data[i].subhalonr - cumul_prevnsubhalos;
930
931 if(off < 0 || off >= PrevNsubhalos)
932 Terminate("off = %d PrevNsubhalos = %d", off, PrevNsubhalos);
933
934 Descendants[off].FirstDescSubhaloNr = recv_data[i].firstdescnr;
935 }
936
937 Mem.myfree(recv_data);
938 Mem.myfree(send_data);
939
940 Mem.myfree(tab_PrevNsubhalos);
941
942 Mem.myfree(Recv_offset);
943 Mem.myfree(Recv_count);
944 Mem.myfree(Send_offset);
945 Mem.myfree(Send_count);
946
947 /* bring back into original order */
948 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
949}
950
951#endif
global_data_all_processes All
Definition: main.cc:40
#define SMALLNUM
Definition: constants.h:83
#define HALONR_MAX
Definition: idstorage.h:20
#define Terminate(...)
Definition: macros.h:15
#define TAG_DENS_A
Definition: mpi_utils.h:50
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Definition: myalltoall.cc:301
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
memory Mem
Definition: main.cc:44
expr pow(half base, half exp)
Definition: half.hpp:2803
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)