12#include "gadgetconfig.h"
16#include <gsl/gsl_rng.h>
27#include "../data/allvars.h"
28#include "../data/dtypes.h"
29#include "../data/mymalloc.h"
30#include "../fof/fof.h"
31#include "../io/hdf5_util.h"
33#include "../logs/timer.h"
34#include "../main/main.h"
35#include "../main/simulation.h"
36#include "../mergertree/io_descendant.h"
37#include "../mergertree/io_progenitors.h"
38#include "../mergertree/mergertree.h"
39#include "../mpi_utils/mpi_utils.h"
40#include "../sort/parallel_sort.h"
41#include "../subfind/subfind.h"
42#include "../system/system.h"
47void mergertree::mergertree_determine_descendants_postproc(
int num)
49 Descendants = (desc_list *)
Mem.mymalloc_movable(&Descendants,
"Descendants", (PrevNsubhalos + 1) *
sizeof(desc_list));
50 Progenitors = (prog_list *)
Mem.mymalloc_movable(&Progenitors,
"Progenitors", (CurrNsubhalos + 1) *
sizeof(prog_list));
52 Sp->NumPart = MtrP_NumPart;
55 desc = (desc_partdata *)
Mem.mymalloc_movable(&desc,
"desc",
sizeof(desc_partdata) * Sp->NumPart);
58 for(
int i = 0; i < Sp->NumPart; i++)
60 desc[i].CurrSubhaloNr.set(MtrP[i].SubhaloNr);
61 desc[i].CurrRankInSubhalo.set(MtrP[i].RankInSubhalo);
63 desc[i].PrevSubhaloNr.set(MtrP[i].PrevSubhaloNr);
64 desc[i].PrevRankInSubhalo.set(MtrP[i].PrevRankInSubhalo);
66 if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() !=
HALONR_MAX)
67 Terminate(
"strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (
long long)desc[i].PrevSubhaloNr.get(),
68 (
long long)PrevTotNsubhalos);
71 mergertree_determine_descendants(num);
73 mpi_printf(
"DONE!\n");
79void mergertree::mergertree_determine_descendants_on_the_fly(
int num)
84 Descendants = (desc_list *)
Mem.mymalloc_movable(&Descendants,
"Descendants", (PrevNsubhalos + 1) *
sizeof(desc_list));
85 Progenitors = (prog_list *)
Mem.mymalloc_movable(&Progenitors,
"Progenitors", (CurrNsubhalos + 1) *
sizeof(prog_list));
88 desc = (desc_partdata *)
Mem.mymalloc_movable(&desc,
"desc",
sizeof(desc_partdata) * Sp->NumPart);
93 for(
int i = 0; i < Sp->NumPart; i++)
95 desc[i].CurrSubhaloNr = Sp->PS[i].SubhaloNr;
96 desc[i].CurrRankInSubhalo = Sp->PS[i].RankInSubhalo;
98 desc[i].PrevSubhaloNr = Sp->P[i].PrevSubhaloNr;
99 desc[i].PrevRankInSubhalo = Sp->P[i].PrevRankInSubhalo;
101 if(desc[i].PrevSubhaloNr.get() >= PrevTotNsubhalos && desc[i].PrevSubhaloNr.get() !=
HALONR_MAX)
102 Terminate(
"strange: i=%d desc[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld", i, (
long long)desc[i].PrevSubhaloNr.get(),
103 (
long long)PrevTotNsubhalos);
106 mergertree_determine_descendants(num);
109 Mem.myfree(Progenitors);
110 Mem.myfree(Descendants);
113void mergertree::mergertree_determine_descendants(
int num)
116 int nmatch = mergertree_find_matching_segments_and_scores();
119 mergertree_select_maximum_score_descendants(nmatch);
122 mergertree_select_maximum_score_progenitors(nmatch);
125 mergertree_chain_up_progenitors_with_same_descendant();
128 mergertree_set_first_progenitor_with_same_descendant();
131 mergertree_chain_up_descendants_with_same_progenitor();
134 mergertree_set_first_descendant_with_same_progenitor();
138 descendant_io Desc(
this, this->Communicator,
All.
SnapFormat);
139 Desc.mergertree_save_descendants(num);
141 progenitors_io Prog(
this, this->Communicator,
All.
SnapFormat);
142 Prog.mergertree_save_progenitors(num);
147 for(
int i = 0; i < PrevNsubhalos; i++)
148 if(Descendants[i].DescSubhaloNr !=
HALONR_MAX)
151 long long tot_count_links;
154 mpi_printf(
"MERGERTREE: Was able to identify descendants for %lld out of %lld subhalos, i.e. for a fraction of %g\n",
155 tot_count_links, (
long long)PrevTotNsubhalos, tot_count_links / (PrevTotNsubhalos +
SMALLNUM));
157 int count_splits = 0;
158 for(
int i = 0; i < CurrNsubhalos; i++)
159 if(Progenitors[i].NextDescSubhaloNr !=
HALONR_MAX)
162 long long tot_count_splits;
165 mpi_printf(
"MERGERTREE: We have found secondary descendants for %lld halos out of %lld subhalos, i.e. for a fraction of %g\n",
166 tot_count_splits, (
long long)CurrTotNsubhalos, tot_count_splits / (CurrTotNsubhalos +
SMALLNUM));
169int mergertree::mergertree_find_matching_segments_and_scores(
void)
172 int nmatch = Sp->NumPart;
174 for(
int i = 0; i < nmatch; i++)
177 desc[i] = desc[nmatch - 1];
183 for(
int i = 0; i < nmatch; i++)
185 if(desc[i].PrevRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
186 desc[i].DescScore = 1.0 / (1 +
pow(desc[i].PrevRankInSubhalo.get(), 0.5));
188 desc[i].DescScore = 0;
190 if(desc[i].CurrRankInSubhalo.get() < NUM_MOST_BOUND_PARTICLES_USED_FOR_TRACKING)
191 desc[i].ProgScore = 1.0 / (1 +
pow(desc[i].CurrRankInSubhalo.get(), 0.5));
193 desc[i].ProgScore = 0;
198 mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_PrevSubNr_NewSubNr, Communicator);
208 for(
int i = 1; i < nmatch; i++)
209 if(desc[i].PrevSubhaloNr == desc[start].PrevSubhaloNr && desc[i].CurrSubhaloNr == desc[start].CurrSubhaloNr)
211 desc[start].DescScore += desc[i].DescScore;
212 desc[start].ProgScore += desc[i].ProgScore;
216 desc[count] = desc[i];
228 desc_partdata *desc_first = (desc_partdata *)
Mem.mymalloc(
"desc_first", NTask *
sizeof(desc_partdata));
229 desc_partdata *desc_last = (desc_partdata *)
Mem.mymalloc(
"desc_last", NTask *
sizeof(desc_partdata));
230 int *nmatch_list = (
int *)
Mem.mymalloc(
"nmatch_list", NTask *
sizeof(
int));
232 MPI_Allgather(&desc[0],
sizeof(desc_partdata), MPI_BYTE, desc_first,
sizeof(desc_partdata), MPI_BYTE,
234 MPI_Allgather(&desc[nmatch > 0 ? nmatch - 1 : 0],
sizeof(desc_partdata), MPI_BYTE, desc_last,
sizeof(desc_partdata), MPI_BYTE,
236 MPI_Allgather(&nmatch, 1, MPI_INT, nmatch_list, 1, MPI_INT, Communicator);
241 for(
int i = NTask - 1; i > 0; i--)
242 if(nmatch_list[i] > 0)
246 for(
int j = i - 1; j >= 0; j--)
248 if(nmatch_list[j] > 0)
250 if(nmatch_list[j] > 1)
252 if(desc_last[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
253 desc_last[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
258 if(desc_first[j].PrevSubhaloNr == desc_first[i].PrevSubhaloNr &&
259 desc_first[j].CurrSubhaloNr == desc_first[i].CurrSubhaloNr)
269 if(nmatch_list[target] > 1)
271 desc_last[target].DescScore += desc_first[i].DescScore;
272 desc_last[target].ProgScore += desc_first[i].ProgScore;
274 if(ThisTask == target)
276 desc[nmatch - 1].DescScore += desc_first[i].DescScore;
277 desc[nmatch - 1].ProgScore += desc_first[i].ProgScore;
282 desc_first[target].DescScore += desc_first[i].DescScore;
283 desc_first[target].ProgScore += desc_first[i].ProgScore;
285 if(ThisTask == target)
287 desc[0].DescScore += desc_first[i].DescScore;
288 desc[0].ProgScore += desc_first[i].ProgScore;
292 desc_first[i].DescScore = -1;
293 desc_first[i].ProgScore = -1;
297 desc[0].DescScore = -1;
298 desc[0].ProgScore = -1;
303 Mem.myfree(nmatch_list);
304 Mem.myfree(desc_last);
305 Mem.myfree(desc_first);
309 if(nmatch > 0 && desc[0].DescScore < 0)
312 memmove(desc, desc + 1, nmatch *
sizeof(desc_partdata));
318void mergertree::mergertree_chain_up_descendants_with_same_progenitor(
void)
321 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
323 prog_list *elem_first = (prog_list *)
Mem.mymalloc(
"elem_first", NTask *
sizeof(prog_list));
324 prog_list *elem_last = (prog_list *)
Mem.mymalloc(
"elem_last", NTask *
sizeof(prog_list));
327 MPI_Allgather(&Progenitors[0],
sizeof(prog_list), MPI_BYTE, elem_first,
sizeof(prog_list), MPI_BYTE, Communicator);
328 MPI_Allgather(&Progenitors[CurrNsubhalos > 0 ? CurrNsubhalos - 1 : 0],
sizeof(prog_list), MPI_BYTE, elem_last,
sizeof(prog_list),
329 MPI_BYTE, Communicator);
332 int *tab_CurrNsubhalos = (
int *)
Mem.mymalloc(
"tab_CurrNsubhalos",
sizeof(
int) * NTask);
333 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
336 for(
int i = ThisTask + 1; i < NTask; i++)
337 if(tab_CurrNsubhalos[i] > 0)
344 for(
int i = ThisTask - 1; i >= 0; i--)
345 if(tab_CurrNsubhalos[i] > 0)
351 for(
int i = 0; i < CurrNsubhalos; i++)
353 if(i < CurrNsubhalos - 1)
355 if(Progenitors[i].ProgSubhaloNr == Progenitors[i + 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr !=
HALONR_MAX)
356 Progenitors[i].NextDescSubhaloNr = Progenitors[i + 1].SubhaloNr;
358 Progenitors[i].NextDescSubhaloNr =
HALONR_MAX;
362 if(next_task >= 0 && Progenitors[i].ProgSubhaloNr == elem_first[next_task].ProgSubhaloNr &&
364 Progenitors[i].NextDescSubhaloNr = elem_first[next_task].SubhaloNr;
366 Progenitors[i].NextDescSubhaloNr =
HALONR_MAX;
371 if(Progenitors[i].ProgSubhaloNr != Progenitors[i - 1].ProgSubhaloNr && Progenitors[i].ProgSubhaloNr !=
HALONR_MAX)
372 Progenitors[i].FirstDescFlag = 1;
374 Progenitors[i].FirstDescFlag = 0;
378 if(Progenitors[i].ProgSubhaloNr !=
HALONR_MAX &&
379 (ThisTask == 0 || (prev_task >= 0 && Progenitors[i].ProgSubhaloNr != elem_last[prev_task].ProgSubhaloNr)))
380 Progenitors[i].FirstDescFlag = 1;
382 Progenitors[i].FirstDescFlag = 0;
386 Mem.myfree(tab_CurrNsubhalos);
387 Mem.myfree(elem_last);
388 Mem.myfree(elem_first);
391 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
395void mergertree::mergertree_chain_up_progenitors_with_same_descendant(
void)
398 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
400 desc_list *elem_first = (desc_list *)
Mem.mymalloc(
"elem_first", NTask *
sizeof(desc_list));
401 desc_list *elem_last = (desc_list *)
Mem.mymalloc(
"elem_last", NTask *
sizeof(desc_list));
404 MPI_Allgather(&Descendants[0],
sizeof(desc_list), MPI_BYTE, elem_first,
sizeof(desc_list), MPI_BYTE, Communicator);
405 MPI_Allgather(&Descendants[PrevNsubhalos > 0 ? PrevNsubhalos - 1 : 0],
sizeof(desc_list), MPI_BYTE, elem_last,
sizeof(desc_list),
406 MPI_BYTE, Communicator);
409 int *tab_PrevNsubhalos = (
int *)
Mem.mymalloc(
"tab_PrevNsubhalos",
sizeof(
int) * NTask);
410 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
413 for(
int i = ThisTask + 1; i < NTask; i++)
414 if(tab_PrevNsubhalos[i] > 0)
421 for(
int i = ThisTask - 1; i >= 0; i--)
422 if(tab_PrevNsubhalos[i] > 0)
428 for(
int i = 0; i < PrevNsubhalos; i++)
430 if(i < PrevNsubhalos - 1)
432 if(Descendants[i].DescSubhaloNr == Descendants[i + 1].DescSubhaloNr && Descendants[i].DescSubhaloNr !=
HALONR_MAX)
433 Descendants[i].NextProgSubhaloNr = Descendants[i + 1].PrevSubhaloNr;
435 Descendants[i].NextProgSubhaloNr =
HALONR_MAX;
439 if(next_task >= 0 && Descendants[i].DescSubhaloNr == elem_first[next_task].DescSubhaloNr &&
441 Descendants[i].NextProgSubhaloNr = elem_first[next_task].PrevSubhaloNr;
443 Descendants[i].NextProgSubhaloNr =
HALONR_MAX;
448 if(Descendants[i].DescSubhaloNr != Descendants[i - 1].DescSubhaloNr && Descendants[i].DescSubhaloNr !=
HALONR_MAX)
449 Descendants[i].FirstProgFlag = 1;
451 Descendants[i].FirstProgFlag = 0;
455 if(Descendants[i].DescSubhaloNr !=
HALONR_MAX &&
456 (ThisTask == 0 || (prev_task >= 0 && Descendants[i].DescSubhaloNr != elem_last[prev_task].DescSubhaloNr)))
457 Descendants[i].FirstProgFlag = 1;
459 Descendants[i].FirstProgFlag = 0;
463 Mem.myfree(tab_PrevNsubhalos);
464 Mem.myfree(elem_last);
465 Mem.myfree(elem_first);
468 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
472void mergertree::mergertree_set_first_progenitor_with_same_descendant(
void)
475 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_DescSubhaloNr, Communicator);
477 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * NTask);
478 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * NTask);
479 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * NTask);
480 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * NTask);
482 int *tab_CurrNsubhalos = (
int *)
Mem.mymalloc(
"tab_CurrNsubhalos",
sizeof(
int) * NTask);
483 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
485 long long cumul_currnsubhalos = 0;
486 for(
int i = 0; i < ThisTask; i++)
487 cumul_currnsubhalos += tab_CurrNsubhalos[i];
492 long long firstprognr;
495 pair_data *send_data = NULL;
496 pair_data *recv_data = NULL;
497 int nexport = 0, nimport = 0;
499 for(
int mode = 0; mode < 2; mode++)
501 for(
int i = 0; i < NTask; i++)
507 for(
int i = 0; i < PrevNsubhalos; i++)
509 if(Descendants[i].FirstProgFlag && Descendants[i].DescSubhaloNr !=
HALONR_MAX)
511 while(task < NTask - 1 && Descendants[i].DescSubhaloNr >= first + tab_CurrNsubhalos[task])
513 first += tab_CurrNsubhalos[task];
521 int off = Send_offset[task] + Send_count[task]++;
523 send_data[off].subhalonr = Descendants[i].DescSubhaloNr;
524 send_data[off].firstprognr = Descendants[i].PrevSubhaloNr;
531 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
536 for(
int j = 0; j < NTask; j++)
538 nexport += Send_count[j];
539 nimport += Recv_count[j];
543 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
544 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
548 send_data = (pair_data *)
Mem.mymalloc(
"pair_data", nexport *
sizeof(pair_data));
549 recv_data = (pair_data *)
Mem.mymalloc(
"pair_data", nimport *
sizeof(pair_data));
554 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
556 int recvTask = ThisTask ^ ngrp;
560 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
561 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(pair_data), MPI_BYTE, recvTask,
TAG_DENS_A,
562 &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(pair_data), MPI_BYTE, recvTask,
TAG_DENS_A,
563 Communicator, MPI_STATUS_IGNORE);
567 for(
int i = 0; i < CurrNsubhalos; i++)
568 Progenitors[i].FirstProgSubhaloNr =
HALONR_MAX;
570 for(
int i = 0; i < nimport; i++)
572 int off = recv_data[i].subhalonr - cumul_currnsubhalos;
574 if(off < 0 || off >= CurrNsubhalos)
575 Terminate(
"off = %d CurrNsubhalos = %d", off, CurrNsubhalos);
577 Progenitors[off].FirstProgSubhaloNr = recv_data[i].firstprognr;
580 Mem.myfree(recv_data);
581 Mem.myfree(send_data);
583 Mem.myfree(tab_CurrNsubhalos);
585 Mem.myfree(Recv_offset);
586 Mem.myfree(Recv_count);
587 Mem.myfree(Send_offset);
588 Mem.myfree(Send_count);
591 mycxxsort_parallel(Descendants, Descendants + PrevNsubhalos, mergertree_compare_PrevSubhaloNr, Communicator);
595void mergertree::mergertree_select_maximum_score_progenitors(
int nmatch)
597 mycxxsort_parallel(desc, desc + nmatch, mergertree_compare_NewSubNr_PrevSubNr, Communicator);
599 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * NTask);
600 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * NTask);
601 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * NTask);
602 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * NTask);
604 int *tab_CurrNsubhalos = (
int *)
Mem.mymalloc(
"tab_CurrNsubhalos",
sizeof(
int) * NTask);
605 MPI_Allgather(&CurrNsubhalos, 1, MPI_INT, tab_CurrNsubhalos, 1, MPI_INT, Communicator);
607 long long cumul_currnsubhalos = 0;
608 for(
int i = 0; i < ThisTask; i++)
609 cumul_currnsubhalos += tab_CurrNsubhalos[i];
611 desc_partdata *send_data = NULL;
612 desc_partdata *recv_data = NULL;
613 int nexport = 0, nimport = 0;
614 for(
int mode = 0; mode < 2; mode++)
616 for(
int i = 0; i < NTask; i++)
620 unsigned long long first = 0;
621 for(
int i = 0; i < nmatch; i++)
623 while(task < NTask - 1 && desc[i].CurrSubhaloNr.get() >= first + tab_CurrNsubhalos[task])
625 first += tab_CurrNsubhalos[task];
633 int off = Send_offset[task] + Send_count[task]++;
635 send_data[off] = desc[i];
641 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
646 for(
int j = 0; j < NTask; j++)
648 nexport += Send_count[j];
649 nimport += Recv_count[j];
653 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
654 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
658 send_data = (desc_partdata *)
Mem.mymalloc(
"send_data", nexport *
sizeof(desc_partdata));
659 recv_data = (desc_partdata *)
Mem.mymalloc(
"recv_data", nimport *
sizeof(desc_partdata));
664 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
666 int recvTask = ThisTask ^ ngrp;
670 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
671 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(desc_partdata), MPI_BYTE, recvTask,
672 TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(desc_partdata), MPI_BYTE,
673 recvTask,
TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
677 for(
int i = 0; i < CurrNsubhalos; i++)
679 Progenitors[i].SubhaloNr = cumul_currnsubhalos + i;
681 Progenitors[i].MaxScore = 0;
684 for(
int i = 0; i < nimport; i++)
686 int index = recv_data[i].CurrSubhaloNr.get() - cumul_currnsubhalos;
688 if(index < 0 || index >= CurrNsubhalos)
689 Terminate(
"index=%d CurrNsubhalos=%d", index, CurrNsubhalos);
691 if(recv_data[i].ProgScore > Progenitors[index].MaxScore)
693 Progenitors[index].MaxScore = recv_data[i].ProgScore;
694 Progenitors[index].ProgSubhaloNr = recv_data[i].PrevSubhaloNr.get();
698 Mem.myfree(recv_data);
699 Mem.myfree(send_data);
701 Mem.myfree(tab_CurrNsubhalos);
703 Mem.myfree(Recv_offset);
704 Mem.myfree(Recv_count);
705 Mem.myfree(Send_offset);
706 Mem.myfree(Send_count);
710void mergertree::mergertree_select_maximum_score_descendants(
int nmatch)
712 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * NTask);
713 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * NTask);
714 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * NTask);
715 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * NTask);
718 int *tab_PrevNsubhalos = (
int *)
Mem.mymalloc(
"tab_PrevNsubhalos",
sizeof(
int) * NTask);
719 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
721 long long cumul_prevnsubhalos = 0;
722 for(
int i = 0; i < ThisTask; i++)
723 cumul_prevnsubhalos += tab_PrevNsubhalos[i];
725 desc_partdata *send_data = NULL;
726 desc_partdata *recv_data = NULL;
727 int nexport = 0, nimport = 0;
728 for(
int mode = 0; mode < 2; mode++)
730 for(
int i = 0; i < NTask; i++)
734 unsigned long long first = 0;
735 for(
int i = 0; i < nmatch; i++)
737 if(PrevTotNsubhalos < 1)
738 Terminate(
"PrevTotNsubhalos = %lld", PrevTotNsubhalos);
740 while(task < NTask - 1 && desc[i].PrevSubhaloNr.get() >= first + tab_PrevNsubhalos[task])
742 first += tab_PrevNsubhalos[task];
750 int off = Send_offset[task] + Send_count[task]++;
752 send_data[off] = desc[i];
758 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
763 for(
int j = 0; j < NTask; j++)
765 nexport += Send_count[j];
766 nimport += Recv_count[j];
770 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
771 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
775 send_data = (desc_partdata *)
Mem.mymalloc(
"send_data", nexport *
sizeof(desc_partdata));
776 recv_data = (desc_partdata *)
Mem.mymalloc(
"recv_data", nimport *
sizeof(desc_partdata));
781 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
783 int recvTask = ThisTask ^ ngrp;
787 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
788 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(desc_partdata), MPI_BYTE, recvTask,
789 TAG_DENS_A, &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(desc_partdata), MPI_BYTE,
790 recvTask,
TAG_DENS_A, Communicator, MPI_STATUS_IGNORE);
794 for(
int i = 0; i < PrevNsubhalos; i++)
796 Descendants[i].PrevSubhaloNr = cumul_prevnsubhalos + i;
798 Descendants[i].MaxScore = 0;
801 for(
int i = 0; i < nimport; i++)
803 int index = recv_data[i].PrevSubhaloNr.get() - cumul_prevnsubhalos;
805 if(index < 0 || index >= PrevNsubhalos)
807 "index=%d i=%d nimport=%d PrevNsubhalos=%d recv_data[i].PrevSubhaloNr=%lld PrevTotNsubhalos=%lld "
808 "cumul_prevnsubhalos=%lld",
809 index, i, nimport, PrevNsubhalos, (
long long)recv_data[i].PrevSubhaloNr.get(), PrevTotNsubhalos, cumul_prevnsubhalos);
811 if(recv_data[i].DescScore > Descendants[index].MaxScore)
813 Descendants[index].MaxScore = recv_data[i].DescScore;
814 Descendants[index].DescSubhaloNr = recv_data[i].CurrSubhaloNr.get();
818 Mem.myfree(recv_data);
819 Mem.myfree(send_data);
821 Mem.myfree(tab_PrevNsubhalos);
823 Mem.myfree(Recv_offset);
824 Mem.myfree(Recv_count);
825 Mem.myfree(Send_offset);
826 Mem.myfree(Send_count);
829void mergertree::mergertree_set_first_descendant_with_same_progenitor(
void)
832 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_ProgSubhaloNr, Communicator);
834 int *Send_count = (
int *)
Mem.mymalloc(
"Send_count",
sizeof(
int) * NTask);
835 int *Send_offset = (
int *)
Mem.mymalloc(
"Send_offset",
sizeof(
int) * NTask);
836 int *Recv_count = (
int *)
Mem.mymalloc(
"Recv_count",
sizeof(
int) * NTask);
837 int *Recv_offset = (
int *)
Mem.mymalloc(
"Recv_offset",
sizeof(
int) * NTask);
839 int *tab_PrevNsubhalos = (
int *)
Mem.mymalloc(
"tab_PrevNsubhalos",
sizeof(
int) * NTask);
840 MPI_Allgather(&PrevNsubhalos, 1, MPI_INT, tab_PrevNsubhalos, 1, MPI_INT, Communicator);
842 long long cumul_prevnsubhalos = 0;
843 for(
int i = 0; i < ThisTask; i++)
844 cumul_prevnsubhalos += tab_PrevNsubhalos[i];
849 long long firstdescnr;
852 pair_data *send_data = NULL;
853 pair_data *recv_data = NULL;
854 int nexport = 0, nimport = 0;
856 for(
int mode = 0; mode < 2; mode++)
858 for(
int i = 0; i < NTask; i++)
864 for(
int i = 0; i < CurrNsubhalos; i++)
866 if(Progenitors[i].FirstDescFlag && Progenitors[i].ProgSubhaloNr !=
HALONR_MAX)
868 while(task < NTask - 1 && Progenitors[i].ProgSubhaloNr >= first + tab_PrevNsubhalos[task])
870 first += tab_PrevNsubhalos[task];
878 int off = Send_offset[task] + Send_count[task]++;
880 send_data[off].subhalonr = Progenitors[i].ProgSubhaloNr;
881 send_data[off].firstdescnr = Progenitors[i].SubhaloNr;
888 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, Communicator);
893 for(
int j = 0; j < NTask; j++)
895 nexport += Send_count[j];
896 nimport += Recv_count[j];
900 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
901 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
905 send_data = (pair_data *)
Mem.mymalloc(
"pair_data", nexport *
sizeof(pair_data));
906 recv_data = (pair_data *)
Mem.mymalloc(
"pair_data", nimport *
sizeof(pair_data));
911 for(
int ngrp = 0; ngrp < (1 << PTask); ngrp++)
913 int recvTask = ThisTask ^ ngrp;
917 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
918 myMPI_Sendrecv(&send_data[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(pair_data), MPI_BYTE, recvTask,
TAG_DENS_A,
919 &recv_data[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(pair_data), MPI_BYTE, recvTask,
TAG_DENS_A,
920 Communicator, MPI_STATUS_IGNORE);
924 for(
int i = 0; i < PrevNsubhalos; i++)
925 Descendants[i].FirstDescSubhaloNr =
HALONR_MAX;
927 for(
int i = 0; i < nimport; i++)
929 int off = recv_data[i].subhalonr - cumul_prevnsubhalos;
931 if(off < 0 || off >= PrevNsubhalos)
932 Terminate(
"off = %d PrevNsubhalos = %d", off, PrevNsubhalos);
934 Descendants[off].FirstDescSubhaloNr = recv_data[i].firstdescnr;
937 Mem.myfree(recv_data);
938 Mem.myfree(send_data);
940 Mem.myfree(tab_PrevNsubhalos);
942 Mem.myfree(Recv_offset);
943 Mem.myfree(Recv_count);
944 Mem.myfree(Send_offset);
945 Mem.myfree(Send_count);
948 mycxxsort_parallel(Progenitors, Progenitors + CurrNsubhalos, mergertree_compare_SubhaloNr, Communicator);
global_data_all_processes All
void sumup_large_ints(int n, int *src, long long *res, MPI_Comm comm)
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
expr pow(half base, half exp)
double mycxxsort_parallel(T *begin, T *end, Comp comp, MPI_Comm comm)