12#include "gadgetconfig.h"
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/intposconvert.h"
23#include "../data/mymalloc.h"
24#include "../domain/domain.h"
25#include "../logs/logs.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../sort/cxxsort.h"
30#include "../sort/peano.h"
31#include "../system/system.h"
32#include "../time_integration/timestep.h"
33#include "../tree/tree.h"
51template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
54 if(MaxPart == 0 && ninsert == 0)
57 if(MaxPart == 0 && ninsert > 0)
58 Terminate(
"Strange, we'll try to construct a tree for %d particles, but it appears not be allocated\n", ninsert);
60 if(ninsert == Tp->NumPart)
61 D->mpi_printf(
"TREE: Full tree construction for all particles. (presently allocated=%g MB)\n",
Mem.
getAllocatedBytesInMB());
64 IndexList = indexlist;
75 int flag_single = treebuild_construct();
79 MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, D->Communicator);
87 treeallocate(Tp->NumPart, Tp, D);
93 MPI_Allreduce(&NumNodes, &max_numnodes, 1, MPI_INT, MPI_MAX, D->Communicator);
95 if((MaxNodes - D->NTopnodes) > 1.5 * (max_numnodes - D->NTopnodes))
98 double newvalue = std::max<double>(1.1 * (max_numnodes - D->NTopnodes) / (MaxPart +
BASENUMBER), 0.02);
100 if(newvalue < oldvalue)
106 treeallocate(Tp->NumPart, Tp, D);
121 int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
122 int *list = D->ListOfTopleaves + D->FirstTopleafOfTask[D->ThisTask];
124 for(
int i = 0; i < ntopleaves; i++)
126 int no = NodeIndex[list[i]];
127 int sib = NodeSibling[list[i]];
134 exchange_topleafdata();
137 if(TreeSharedMem_ThisTask == 0)
140 for(
int k = D->NTopnodes; k < NumNodes; k++)
142 int index = MaxPart + k;
143 Nodes[index].OriginTask = D->ThisTask;
144 Nodes[index].OriginNode = index;
145 Nodes[index].access.clear();
146 Nodes[index].flag_already_fetched = 0;
149 if(TreeSharedMem_ThisTask == 0)
151 for(
int k = 0; k < D->NTopnodes; k++)
153 int index = MaxPart + k;
154 TopNodes[index].OriginTask = D->ThisTask;
155 TopNodes[index].OriginNode = index;
156 TopNodes[index].access.clear();
157 TopNodes[index].flag_already_fetched = 0;
161 tree_initialize_leaf_node_access_info();
166 report_log_message();
173template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
176 if(TreeSharedMem_ThisTask == 0)
178 for(
int k = 0; k < D->NTopleaves; k++)
180 int index = NodeIndex[k];
183 TopNodes[index].cannot_be_opened_locally = 0;
186 TopNodes[index].cannot_be_opened_locally = 1;
187 TopNodes[index].flag_already_fetched = 0;
188 TopNodes[index].nextnode = MaxPart + MaxNodes + k;
189 TopNodes[index].nextnode_shmrank = TreeSharedMem_ThisTask;
191 TopNodes[index].OriginTask = D->TaskOfLeaf[k];
192 TopNodes[index].OriginNode = index;
214template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
217 if(TreeSharedMem_ThisTask == 0)
220 NextFreeNode = MaxPart;
222 node *nfreep = &TopNodes[NextFreeNode];
223 nfreep->nextnode = -1;
224 nfreep->sibling = -1;
227 nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
228 nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
230 for(
int j = 0; j < 3; j++)
233 NodeIndex[0] = NextFreeNode;
245 if(create_empty_nodes(MaxPart, 1, 0, 1, -1, 0, 0, 0) < 0)
249 MPI_Bcast(&NumNodes, 1, MPI_INT, 0, D->Communicator);
250 MPI_Bcast(&NextFreeNode, 1, MPI_INT, 0, D->Communicator);
252 if(NumNodes != D->NTopnodes)
253 Terminate(
"NumNodes=%d != D->NTopnodes=%d", NumNodes, D->NTopnodes);
255 FirstNonTopLevelNode = NextFreeNode;
257 if(MaxPart + D->NTopnodes != FirstNonTopLevelNode)
260 for(
int j = 0; j < D->NTask; j++)
264 int *node_list = (
int *)
Mem.mymalloc_movable(&node_list,
"node_list", Tp->NumPart *
sizeof(
int));
265 int *task_list = (
int *)
Mem.mymalloc_movable(&task_list,
"task_list", Tp->NumPart *
sizeof(
int));
269 for(
int idx = 0; idx < Ninsert; idx++)
282 tree_get_node_and_task(i, no, task);
289 if(task != D->ThisTask)
293 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator);
300 for(
int j = 0; j < D->NTask; j++)
302 NumPartImported += Recv_count[j];
303 NumPartExported += Send_count[j];
306 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
307 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
311 Points = (point_data *)
Mem.mymalloc_movable(&Points,
"Points", NumPartImported *
sizeof(point_data));
312 Nextnode = (
int *)
Mem.mymalloc_movable(&Nextnode,
"Nextnode", (MaxPart + D->NTopleaves + NumPartImported) *
sizeof(int));
313 Father = (
int *)
Mem.mymalloc_movable(&Father,
"Father", (MaxPart + NumPartImported) *
sizeof(int));
316 for(
int i = 0; i < D->NTopleaves; i++)
318 int index = NodeIndex[i];
320 if(TreeSharedMem_ThisTask == 0)
321 TopNodes[index].nextnode = MaxPart + MaxNodes + i;
324 Nextnode[MaxPart + i] = TopNodes[index].sibling;
327 point_data *export_Points = (point_data *)
Mem.mymalloc(
"export_Points", NumPartExported *
sizeof(point_data));
329 for(
int j = 0; j < D->NTask; j++)
332 for(
int idx = 0; idx < Ninsert; idx++)
342 tree_get_node_and_task(i, no, task);
344 int task = task_list[i];
345 int no = node_list[i];
348 if(task != D->ThisTask)
350 int n = Send_offset[task] + Send_count[task]++;
352 fill_in_export_points(&export_Points[n], i, no);
357 for(
int ngrp = 1; ngrp < (1 << D->PTask); ngrp++)
359 int recvTask = D->ThisTask ^ ngrp;
360 if(recvTask < D->NTask)
361 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
362 myMPI_Sendrecv(&export_Points[Send_offset[recvTask]], Send_count[recvTask] *
sizeof(point_data), MPI_BYTE, recvTask,
363 TAG_DENS_A, &Points[Recv_offset[recvTask]], Recv_count[recvTask] *
sizeof(point_data), MPI_BYTE, recvTask,
364 TAG_DENS_A, D->Communicator, MPI_STATUS_IGNORE);
367 Mem.myfree(export_Points);
369 ImportedNodeOffset = MaxPart + MaxNodes + D->NTopleaves;
371 MPI_Allreduce(&NumPartImported, &EndOfTreePoints, 1, MPI_INT, MPI_MAX, D->Communicator);
372 EndOfTreePoints += ImportedNodeOffset;
373 EndOfForeignNodes = EndOfTreePoints + (INT_MAX - EndOfTreePoints) / 2;
376 index_data *index_list =
377 (index_data *)
Mem.mymalloc_movable(&index_list,
"index_list", (Ninsert + NumPartImported) *
sizeof(index_data));
380 for(
int idx = 0; idx < Ninsert; idx++)
390 tree_get_node_and_task(i, no, task);
392 int task = task_list[i];
393 int no = node_list[i];
396 if(task == D->ThisTask)
398 index_list[count].p = i;
399 index_list[count].subnode = no;
404 for(
int i = 0; i < NumPartImported; i++)
406 index_list[count].p = i + ImportedNodeOffset;
407 index_list[count].subnode = Points[i].no;
412 Mem.myfree_movable(task_list);
413 Mem.myfree_movable(node_list);
417 mycxxsort(index_list, index_list + count, compare_index_data_subnode);
420 int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
423 for(
int n = 0; n < ntopleaves; n++)
425 int no = D->ListOfTopleaves[D->FirstTopleafOfTask[D->ThisTask] + n];
426 int th = NodeIndex[no];
427 unsigned char level = NodeLevel[no];
428 int sibling = NodeSibling[no];
430 while(start < count && index_list[start].subnode < no)
436 while(last < count && index_list[last].subnode == no)
439 int num = last - start;
441 if(treebuild_insert_group_of_points(num, &index_list[start], th, level, sibling))
451 if((NumNodes = NextFreeNode - MaxPart) >= MaxNodes)
455 Tp->dump_particles();
457 "task %d: looks like a serious problem, stopping with particle dump. NumNodes=%d MaxNodes=%d NumPartImported=%d "
459 D->ThisTask, NumNodes, MaxNodes, NumPartImported, Tp->NumPart);
463 Mem.myfree(index_list);
479template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
481 unsigned char level,
int sibling)
485 "It appears we have reached the bottom of the tree because there are more than TREE_NUM_BEFORE_NODESPLIT=%d particles in the "
486 "smallest tree node representable for BITS_FOR_POSITIONS=%d.\n"
487 "Either eliminate the particles at (nearly) indentical coordinates, increase the setting for TREE_NUM_BEFORE_NODESPLIT, or "
488 "possibly enlarge BITS_FOR_POSITIONS if you have really not enough dynamic range\n",
497 int subcount[8] = {0, 0, 0, 0, 0, 0, 0, 0}, subnode[8];
500 for(
int i = 0; i < num; i++)
504 int p = index_list[i].p;
507 intpos = Tp->P[p].IntPos;
511 int n = p - ImportedNodeOffset;
512 intpos = Points[n].IntPos;
515 unsigned char subnode = (((
unsigned char)((intpos[0] & mask) >> shiftx)) | ((
unsigned char)((intpos[1] & mask) >> shifty)) |
516 ((
unsigned char)((intpos[2] & mask) >> shiftz)));
522 subintpos[subnode] = intpos;
524 index_list[i].subnode = subnode;
528 mycxxsort(index_list, index_list + num, compare_index_data_subnode);
534 node *nfreep_last = NULL;
537 for(
int i = 0; i < 8; i++)
543 thnew = NextFreeNode++;
545 if(thnew - MaxPart >= MaxNodes)
552 nfreep_last->sibling = thnew;
553 nfreep_last->nextnode = thnew;
555 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
556 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
560 get_nodep(th)->nextnode = thnew;
561 get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
564 if(thnew < MaxPart + D->NTopnodes)
565 Terminate(
"thnew = %d < MaxPart=%d + D->NTopnodes=%d", thnew, MaxPart, D->NTopnodes);
567 node *nfreep = get_nodep(thnew);
570 nfreep->level = level + 1;
572 nfreep->center[0] = ((subintpos[i][0] & centermask) | mask);
573 nfreep->center[1] = ((subintpos[i][1] & centermask) | mask);
574 nfreep->center[2] = ((subintpos[i][2] & centermask) | mask);
576 nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
577 nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
579 nfreep_last = nfreep;
585 int p_last = -1, p_first = -1;
588 for(
int i = 0, off = 0; i < 8; i++)
593 for(
int j = 0; j < subcount[i]; j++)
595 int p = index_list[off + j].p;
597 if(nfreep_last == NULL && p_first == -1)
599 get_nodep(th)->nextnode = p;
600 get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
606 Father[p - MaxNodes - D->NTopleaves] = th;
611 Nextnode[p_last] = p;
613 Nextnode[p_last - MaxNodes] = p;
629 Nextnode[p_last] = sibling;
631 Nextnode[p_last - MaxNodes] = sibling;
638 nfreep_last->sibling = p_first;
639 nfreep_last->nextnode = p_first;
641 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
642 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
646 nfreep_last->sibling = sibling;
647 nfreep_last->nextnode = sibling;
649 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
650 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
654 for(
int i = 0, off = 0; i < 8; i++)
658 if(subnode[i] < MaxPart + D->NTopnodes)
659 Terminate(
"subnode[i]=%d < MaxPart=%d + D->NTopnodes=%d", subnode[i], MaxPart, D->NTopnodes);
662 treebuild_insert_group_of_points(subcount[i], &index_list[off], subnode[i], level + 1, get_nodep(subnode[i])->sibling);
683template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
685 int no_parent ,
int level ,
687 int bits ,
int sibling,
692 if(D->TopNodes[topnode].Daughter >= 0)
695 int nostart = NextFreeNode;
698 for(
int i = 0; i < 2; i++)
699 for(
int j = 0; j < 2; j++)
700 for(
int k = 0; k < 2; k++)
702 if(NumNodes >= MaxNodes)
707 sprintf(buf,
"task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", D->ThisTask,
709 Tp->dump_particles();
715 int no = NextFreeNode++;
720 TopNodes[no_parent].nextnode = no;
724 TopNodes[no].father = no_parent;
726 if(i + 2 * j + 4 * k == 7)
727 TopNodes[no].sibling = sibling;
729 TopNodes[no].sibling = no + 1;
731 TopNodes[no].nextnode = TopNodes[no].sibling;
733 TopNodes[no].level = level;
736 TopNodes[no].center[0] = TopNodes[no_parent].center[0] + (2 * i - 1) * lenhalf;
737 TopNodes[no].center[1] = TopNodes[no_parent].center[1] + (2 * j - 1) * lenhalf;
738 TopNodes[no].center[2] = TopNodes[no_parent].center[2] + (2 * k - 1) * lenhalf;
740 TopNodes[no].sibling_shmrank = TreeSharedMem_ThisTask;
741 TopNodes[no].nextnode_shmrank = TreeSharedMem_ThisTask;
745 for(
int i = 0; i < 2; i++)
746 for(
int j = 0; j < 2; j++)
747 for(
int k = 0; k < 2; k++)
752 int sub = 7 & key.
ls;
754 if(D->TopNodes[D->TopNodes[topnode].Daughter + sub].Daughter == -1)
756 NodeIndex[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = no;
757 NodeLevel[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = level;
758 NodeSibling[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = TopNodes[no].sibling;
762 if(create_empty_nodes(no, level + 1, D->TopNodes[topnode].Daughter + sub, bits + 1, TopNodes[no].sibling, 2 * x + i,
763 2 * y + j, 2 * z + k) < 0)
775template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
782 if(max_partindex != -1)
785 if(max_partindex != -1)
786 MPI_Allreduce(&max_partindex, &MaxPart, 1, MPI_INT, MPI_MAX, D->Communicator);
794 Send_count = (
int *)
Mem.mymalloc_movable(&Send_count,
"Send_count",
sizeof(
int) * D->NTask);
795 Send_offset = (
int *)
Mem.mymalloc_movable(&Send_offset,
"Send_offset",
sizeof(
int) * D->NTask);
796 Recv_count = (
int *)
Mem.mymalloc_movable(&Recv_count,
"Recv_count",
sizeof(
int) * D->NTask);
797 Recv_offset = (
int *)
Mem.mymalloc_movable(&Recv_offset,
"Recv_offset",
sizeof(
int) * D->NTask);
799 if(max_partindex != -1)
801 MPI_Allreduce(MPI_IN_PLACE, &
All.
TreeAllocFactor, 1, MPI_DOUBLE, MPI_MAX, D->Communicator);
806 MPI_Allreduce(&MaxNodes, &max_nodes, 1, MPI_INT, MPI_MAX, D->Communicator);
808 if(max_nodes != MaxNodes)
809 Terminate(
"Strange: different maxnodes detected: %d %d", max_nodes, MaxNodes);
812 MPI_Allreduce(&D->NTopleaves, &max_leaves, 1, MPI_INT, MPI_MAX, D->Communicator);
813 if(max_leaves != D->NTopleaves)
814 Terminate(
"Strange: different maxnodes detected: %d %d", max_leaves, D->NTopleaves);
818 max_partindex = MaxPart;
821 MPI_Comm_rank(TreeSharedMemComm, &TreeSharedMem_ThisTask);
822 MPI_Comm_size(TreeSharedMemComm, &TreeSharedMem_NTask);
824 TreeNodes_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeNodes_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
825 TreePoints_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreePoints_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
826 TreeNextnode_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeNextnode_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
827 TreeForeign_Nodes_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeForeign_Nodes_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
828 TreeForeign_Points_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeForeign_Points_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
829 TreeP_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeP_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
830 TreeSphP_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreeSphP_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
831 TreePS_offsets = (ptrdiff_t *)
Mem.mymalloc(
"TreePS_offsets", TreeSharedMem_NTask *
sizeof(ptrdiff_t));
833 TreeSharedMemBaseAddr = (
void **)
Mem.mymalloc(
"TreeSharedMemBaseAddr", TreeSharedMem_NTask *
sizeof(
void *));
835 for(
int i = 0; i < TreeSharedMem_NTask; i++)
846 if(TreeSharedMem_ThisTask == 0)
851 NodeLevel = (
unsigned char *)
Mem.mymalloc(
"NodeLevel", D->NTopleaves *
sizeof(
unsigned char));
852 NodeSibling = (
int *)
Mem.mymalloc(
"NodeSibling", D->NTopleaves *
sizeof(
int));
853 NodeIndex = (
int *)
Mem.mymalloc(
"NodeIndex", D->NTopleaves *
sizeof(
int));
855 TopNodes = (node *)
Mem.mymalloc(
"TopNodes", D->NTopnodes *
sizeof(node));
863 size_t tab_len[4] = {D->NTopleaves *
sizeof(
unsigned char), D->NTopleaves *
sizeof(
int), D->NTopleaves *
sizeof(int),
864 D->NTopnodes *
sizeof(node)};
866 MPI_Send(tab_len, 4 *
sizeof(tab_len), MPI_BYTE, ghost_rank,
TAG_TOPNODE_ALLOC, MPI_COMM_WORLD);
869 MPI_Recv(off, 4 *
sizeof(ptrdiff_t), MPI_BYTE, ghost_rank,
TAG_TOPNODE_OFFSET, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
878 MPI_Recv(&TreeInfoHandle, 1, MPI_INT, ghost_rank,
TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
882 Nodes = (node *)
Mem.mymalloc_movable(&Nodes,
"Nodes", (MaxNodes - D->NTopnodes + 1) *
sizeof(node));
883 Nodes -= (MaxPart + D->NTopnodes);
885 if(max_partindex != -1)
886 treeallocate_share_topnode_addresses();
889template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
892 MPI_Bcast(&TreeInfoHandle, 1, MPI_INT, 0, TreeSharedMemComm);
894 ptrdiff_t off[4] = {((
char *)NodeLevel -
Mem.
Base), ((
char *)NodeSibling -
Mem.
Base), ((
char *)NodeIndex -
Mem.
Base),
895 ((
char *)TopNodes -
Mem.
Base)};
897 MPI_Bcast(off, 4 *
sizeof(ptrdiff_t), MPI_BYTE, 0, TreeSharedMemComm);
900 MPI_Bcast(&shmrank, 1, MPI_INT, 0, TreeSharedMemComm);
908template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
912 Terminate(
"This version of the tree communication algorithm only works with the full simulation partition");
915 off = (
char *)Nodes -
Mem.
Base;
916 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeNodes_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
917 off = (
char *)Points -
Mem.
Base;
918 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreePoints_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
919 off = (
char *)Nextnode -
Mem.
Base;
920 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeNextnode_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
921 off = (
char *)Foreign_Nodes -
Mem.
Base;
922 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Nodes_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
923 off = (
char *)Foreign_Points -
Mem.
Base;
924 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Points_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
925 off = (
char *)Tp->P -
Mem.
Base;
926 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeP_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
927 off = (
char *)Tp->SphP -
Mem.
Base;
928 MPI_Allgather(&off,
sizeof(ptrdiff_t), MPI_BYTE, TreeSphP_offsets,
sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
940 if(TreeSharedMem_ThisTask == 0)
965 MPI_Barrier(D->Communicator);
969template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
974 if(TreeSharedMem_ThisTask == 0)
982template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
986 int *CountFetch = (
int *)
Mem.mymalloc_movable_clear(&CountFetch,
"CountFetch",
Shmem.
World_NTask *
sizeof(
int));
987 int *OffsetFetch = (
int *)
Mem.mymalloc_movable(&OffsetFetch,
"OffsetFetch",
Shmem.
World_NTask *
sizeof(
int));
989 for(
int i = 0; i < NumOnFetchStack; i++)
990 CountFetch[StackToFetch[i].GhostRank]++;
994 OffsetFetch[i] = OffsetFetch[i - 1] + CountFetch[i - 1];
996 mycxxsort(StackToFetch, StackToFetch + NumOnFetchStack, compare_ghostrank);
1004 for(
int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
1010 if(CountFetch[ghost_rank] > 0)
1013 (
node_req *)
Mem.mymalloc_movable(&node_req_send,
"node_req_send", CountFetch[ghost_rank] *
sizeof(
node_req));
1015 for(
int i = 0; i < CountFetch[ghost_rank]; i++)
1017 int k = OffsetFetch[ghost_rank] + i;
1019 node *nop = get_nodep(StackToFetch[k].NodeToOpen, StackToFetch[k].ShmRank);
1027 if(fetch_type == FETCH_GRAVTREE)
1029 else if(fetch_type == FETCH_SPH_DENSITY)
1031 else if(fetch_type == FETCH_SPH_HYDRO)
1033 else if(fetch_type == FETCH_SPH_TREETIMESTEP)
1041 MPI_Send(node_req_send, CountFetch[ghost_rank] *
sizeof(
node_req), MPI_BYTE, ghost_rank, tag, MPI_COMM_WORLD);
1042 Mem.myfree(node_req_send);
1046 &node_info_send,
"node_info_send", CountFetch[ghost_rank] *
sizeof(
node_count_info));
1048 MPI_Recv(node_info_send, CountFetch[ghost_rank] *
sizeof(
node_count_info), MPI_BYTE, ghost_rank,
TAG_N, MPI_COMM_WORLD,
1052 int n_sendpoints = 0;
1053 int n_sendnodes = 0;
1055 for(
int i = 0; i < CountFetch[ghost_rank]; i++)
1061 foreign_point_data *buf_foreignpoints =
1062 (foreign_point_data *)
Mem.mymalloc(
"buf_foreignpoints", n_sendpoints *
sizeof(foreign_point_data));
1064 node *buf_foreignnodes = (node *)
Mem.mymalloc(
"buf_foreignnodes", n_sendnodes *
sizeof(node));
1067 if(n_sendpoints > 0)
1068 MPI_Recv(buf_foreignpoints, n_sendpoints *
sizeof(foreign_point_data), MPI_BYTE, ghost_rank,
TAG_PDATA, MPI_COMM_WORLD,
1072 MPI_Recv(buf_foreignnodes, n_sendnodes *
sizeof(node), MPI_BYTE, ghost_rank,
TAG_SPHDATA, MPI_COMM_WORLD,
1077 int n_nodes_used = 0;
1078 int n_parts_used = 0;
1080 for(
int i = 0; i < CountFetch[ghost_rank]; i++)
1082 int k = OffsetFetch[ghost_rank] + i;
1084 int no = StackToFetch[k].NodeToOpen;
1085 int shmrank = StackToFetch[k].ShmRank;
1087 node *nop = get_nodep(no, shmrank);
1092 while(nop->access.test_and_set(std::memory_order_acquire))
1097 if(nop->cannot_be_opened_locally)
1102 for(
int j = 0; j < n_nodes; j++)
1104 if(NumForeignNodes >= MaxForeignNodes)
1106 "We are out of storage for foreign nodes: NumForeignNodes=%d MaxForeignNodes=%d j=%d n_parts=%d",
1107 NumForeignNodes, MaxForeignNodes, j, n_parts);
1110 int p = EndOfTreePoints + NumForeignNodes++;
1114 memcpy(
static_cast<void *
>(&Foreign_Nodes[p - EndOfTreePoints]),
1115 static_cast<void *
>(&buf_foreignnodes[n_nodes_used++]),
sizeof(node));
1117 Foreign_Nodes[p - EndOfTreePoints].access.clear();
1118 Foreign_Nodes[p - EndOfTreePoints].flag_already_fetched = 0;
1120 Foreign_Nodes[p - EndOfTreePoints].nextnode = -1;
1124 if(plast < EndOfForeignNodes)
1126 Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1131 Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1142 for(
int j = 0; j < n_parts; j++)
1144 if(NumForeignPoints >= MaxForeignPoints)
1146 "We are out of storage for foreign points: NumForeignPoints=%d MaxForeignPoints=%d j=%d n_parts=%d",
1147 NumForeignPoints, MaxForeignPoints, j, n_parts);
1150 int p = EndOfForeignNodes + NumForeignPoints++;
1151 Foreign_Points[p - EndOfForeignNodes] = buf_foreignpoints[n_parts_used++];
1155 if(plast < EndOfForeignNodes)
1157 Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1162 Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1173 if(plast < 0 || pfirst < 0)
1174 Terminate(
"plast=%d < 0 || pfirst=%d < 0 n_nodes=%d n_parts=%d", plast, pfirst, n_nodes, n_parts);
1178 if(plast < EndOfForeignNodes)
1180 Foreign_Nodes[plast - EndOfTreePoints].sibling = nop->sibling;
1181 Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = nop->sibling_shmrank;
1183 if(Foreign_Nodes[plast - EndOfTreePoints].cannot_be_opened_locally == 0)
1184 if(D->ThisTask == 0)
1185 Terminate(
"what? plast - EndOfTreePoints=%d", plast - EndOfTreePoints);
1189 Foreign_Points[plast - EndOfForeignNodes].Nextnode = nop->sibling;
1190 Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = nop->sibling_shmrank;
1194 nop->nextnode = pfirst;
1197 nop->cannot_be_opened_locally.store(0, std::memory_order_release);
1199 sum_NumForeignNodes += n_nodes;
1200 sum_NumForeignPoints += n_parts;
1205 n_nodes_used += n_nodes;
1206 n_parts_used += n_parts;
1209 nop->access.clear(std::memory_order_release);
1212 if(n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used)
1213 Terminate(
"n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used");
1215 Mem.myfree(buf_foreignnodes);
1216 Mem.myfree(buf_foreignpoints);
1217 Mem.myfree(node_info_send);
1222 NumOnFetchStack = 0;
1224 Mem.myfree(OffsetFetch);
1225 Mem.myfree(CountFetch);
1231template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
1234 MPI_Comm_free(&TreeSharedMemComm);
1243 Mem.myfree_movable(Father);
1244 Mem.myfree_movable(Nextnode);
1248 Mem.myfree_movable(Points);
1251 Mem.myfree_movable(Nodes + MaxPart + D->NTopnodes);
1253 if(TreeSharedMem_ThisTask == 0)
1257 Mem.myfree_movable(TopNodes + MaxPart);
1258 Mem.myfree_movable(NodeIndex);
1259 Mem.myfree_movable(NodeSibling);
1260 Mem.myfree_movable(NodeLevel);
1267 MPI_Send(&TreeInfoHandle, 1, MPI_INT, ghost_rank,
TAG_TOPNODE_FREE, MPI_COMM_WORLD);
1271 Mem.myfree(TreeSharedMemBaseAddr);
1273 Mem.myfree(TreePS_offsets);
1274 Mem.myfree(TreeSphP_offsets);
1275 Mem.myfree(TreeP_offsets);
1276 Mem.myfree(TreeForeign_Points_offsets);
1277 Mem.myfree(TreeForeign_Nodes_offsets);
1278 Mem.myfree(TreeNextnode_offsets);
1279 Mem.myfree(TreePoints_offsets);
1280 Mem.myfree(TreeNodes_offsets);
1282 Mem.myfree_movable(Recv_offset);
1283 Mem.myfree_movable(Recv_count);
1284 Mem.myfree_movable(Send_offset);
1285 Mem.myfree_movable(Send_count);
1297 Terminate(
"trying to free the tree even though it's not allocated");
1300template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
1304 int task = D->TaskOfLeaf[no - (MaxPart + MaxNodes)];
1305 int nodeindex = NodeIndex[no - (MaxPart + MaxNodes)];
1307 tree_export_node_threads_by_task_and_node(task, nodeindex, i, thread, off);
1310template <
typename node,
typename partset,
typename po
int_data,
typename foreign_po
int_data>
1315 if(task < 0 || task >= D->NTask)
1316 Terminate(
"task < 0 || task >= D->NTask");
1318 if(task != D->ThisTask)
1332 nodelist[nexp].
Task = task;
1333 nodelist[nexp].
Index = i;
1339#include "../ngbtree/ngbtree.h"
1342#include "../gravtree/gravtree.h"
1346#include "../fof/foftree.h"
1348#if defined(LIGHTCONE) && (defined(LIGHTCONE_PARTICLES_GROUPS) || defined(LIGHTCONE_IMAGE_COMP_HSML_VELDISP))
1350#include "../data/lcparticles.h"
global_data_all_processes All
double timediff(double t0, double t1)
double getAllocatedBytesInMB(void)
int * GetNodeIDForSimulCommRank
void inform_offset_table(void *p)
void ** SharedMemBaseAddr
int * GetShmRankForSimulCommRank
tree_storage_info tree_info[MAX_TREE_INFOS]
int Island_Smallest_WorldTask
int * GetGhostRankForSimulCommRank
void tree_export_node_threads(int no, int i, thread_data *thread, offset_tuple off=0)
void treeallocate_share_topnode_addresses(void)
void cleanup_shared_memory_access(void)
void treeallocate(int max_partindex, partset *Pptr, domain< partset > *Dptr)
void tree_fetch_foreign_nodes(enum ftype fetch_type)
void prepare_shared_memory_access(void)
int treebuild(int ninsert, int *indexlist)
void tree_initialize_leaf_node_access_info(void)
void tree_export_node_threads_by_task_and_node(int task, int nodeindex, int i, thread_data *thread, offset_tuple off=0)
double mycxxsort(T *begin, T *end, Tcomp comp)
#define BITS_FOR_POSITIONS
#define MAX_TREE_ALLOC_FACTOR
#define TIMER_START(counter)
Starts the timer counter.
#define TIMER_STOP(counter)
Stops the timer counter.
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
#define TAG_FETCH_SPH_DENSITY
#define TAG_FETCH_SPH_TREETIMESTEP
#define TAG_FETCH_GRAVTREE
#define TAG_TOPNODE_ALLOC
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
#define TAG_FETCH_SPH_HYDRO
#define TAG_TOPNODE_OFFSET
peanokey peano_hilbert_key(MyIntPosType x, MyIntPosType y, MyIntPosType z, int bits)
#define TREE_NUM_BEFORE_NODESPLIT
#define TREE_MODE_TOPLEVEL