GADGET-4
tree.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/intposconvert.h"
23#include "../data/mymalloc.h"
24#include "../domain/domain.h"
25#include "../logs/logs.h"
26#include "../logs/timer.h"
27#include "../main/simulation.h"
28#include "../mpi_utils/mpi_utils.h"
29#include "../sort/cxxsort.h"
30#include "../sort/peano.h"
31#include "../system/system.h"
32#include "../time_integration/timestep.h"
33#include "../tree/tree.h"
34
51template <typename node, typename partset, typename point_data, typename foreign_point_data>
53{
54 if(MaxPart == 0 && ninsert == 0)
55 return 0; // nothing to be done
56
57 if(MaxPart == 0 && ninsert > 0)
58 Terminate("Strange, we'll try to construct a tree for %d particles, but it appears not be allocated\n", ninsert);
59
60 if(ninsert == Tp->NumPart)
61 D->mpi_printf("TREE: Full tree construction for all particles. (presently allocated=%g MB)\n", Mem.getAllocatedBytesInMB());
62
63 Ninsert = ninsert;
64 IndexList = indexlist;
65
66 TIMER_START(CPU_TREEBUILD);
67
68 double t0 = Logs.second();
69
70 int flag, iter = 0;
71 do /* try constructing tree until successful in terms of storage allocation */
72 {
73 TIMER_START(CPU_TREEBUILD_INSERT);
74
75 int flag_single = treebuild_construct();
76
77 TIMER_STOP(CPU_TREEBUILD_INSERT);
78
79 MPI_Allreduce(&flag_single, &flag, 1, MPI_INT, MPI_MIN, D->Communicator);
80
81 if(flag < 0)
82 {
83 /* tree construction was not successful and needs to be repeated */
84 treefree();
85 All.TreeAllocFactor *= 1.15;
86 // D->mpi_printf("TREE: Increasing TreeAllocFactor, new value=%g\n", All.TreeAllocFactor);
87 treeallocate(Tp->NumPart, Tp, D);
88 }
89 else
90 {
91 /* treebuild was successfull, but let's check if we allocated clearly too much storage, and if so repeat it */
92 int max_numnodes;
93 MPI_Allreduce(&NumNodes, &max_numnodes, 1, MPI_INT, MPI_MAX, D->Communicator);
94
95 if((MaxNodes - D->NTopnodes) > 1.5 * (max_numnodes - D->NTopnodes))
96 {
97 double oldvalue = All.TreeAllocFactor;
98 double newvalue = std::max<double>(1.1 * (max_numnodes - D->NTopnodes) / (MaxPart + BASENUMBER), 0.02);
99
100 if(newvalue < oldvalue)
101 {
102 treefree();
103
104 All.TreeAllocFactor = newvalue;
105 flag = -1;
106 treeallocate(Tp->NumPart, Tp, D);
107 }
108 }
109 }
110 iter++;
111
112 if(iter > TREE_MAX_ITER)
113 Terminate("tree construction failed\n");
114 }
115 while(flag < 0);
116
117 TIMER_STOPSTART(CPU_TREEBUILD, CPU_TREEBUILD_BRANCHES);
118
119 /* first, construct the properties of the tree branches below the top leaves */
120
121 int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
122 int *list = D->ListOfTopleaves + D->FirstTopleafOfTask[D->ThisTask];
123
124 for(int i = 0; i < ntopleaves; i++)
125 {
126 int no = NodeIndex[list[i]];
127 int sib = NodeSibling[list[i]];
128
129 update_node_recursive(no, sib, TREE_MODE_BRANCH);
130 }
131
132 TIMER_STOPSTART(CPU_TREEBUILD_BRANCHES, CPU_TREEBUILD_TOPLEVEL);
133
134 exchange_topleafdata();
135
136 /* now update the top-level tree nodes */
137 if(TreeSharedMem_ThisTask == 0)
138 update_node_recursive(MaxPart, -1, TREE_MODE_TOPLEVEL);
139
140 for(int k = D->NTopnodes; k < NumNodes; k++)
141 {
142 int index = MaxPart + k;
143 Nodes[index].OriginTask = D->ThisTask;
144 Nodes[index].OriginNode = index;
145 Nodes[index].access.clear();
146 Nodes[index].flag_already_fetched = 0;
147 }
148
149 if(TreeSharedMem_ThisTask == 0)
150 {
151 for(int k = 0; k < D->NTopnodes; k++)
152 {
153 int index = MaxPart + k;
154 TopNodes[index].OriginTask = D->ThisTask;
155 TopNodes[index].OriginNode = index;
156 TopNodes[index].access.clear();
157 TopNodes[index].flag_already_fetched = 0;
158 }
159 }
160
161 tree_initialize_leaf_node_access_info();
162
163 double t1 = Logs.second();
164 Buildtime = Logs.timediff(t0, t1);
165
166 report_log_message();
167
168 TIMER_STOP(CPU_TREEBUILD_TOPLEVEL);
169
170 return NumNodes;
171}
172
173template <typename node, typename partset, typename point_data, typename foreign_point_data>
175{
176 if(TreeSharedMem_ThisTask == 0)
177 {
178 for(int k = 0; k < D->NTopleaves; k++)
179 {
180 int index = NodeIndex[k];
181
182 if(Shmem.GetNodeIDForSimulCommRank[D->TaskOfLeaf[k]] == Shmem.GetNodeIDForSimulCommRank[D->ThisTask])
183 TopNodes[index].cannot_be_opened_locally = 0;
184 else
185 {
186 TopNodes[index].cannot_be_opened_locally = 1;
187 TopNodes[index].flag_already_fetched = 0;
188 TopNodes[index].nextnode = MaxPart + MaxNodes + k;
189 TopNodes[index].nextnode_shmrank = TreeSharedMem_ThisTask;
190 }
191 TopNodes[index].OriginTask = D->TaskOfLeaf[k];
192 TopNodes[index].OriginNode = index;
193 }
194 }
195}
196
214template <typename node, typename partset, typename point_data, typename foreign_point_data>
216{
217 if(TreeSharedMem_ThisTask == 0)
218 {
219 /* create an empty root node */
220 NextFreeNode = MaxPart; /* index of first free node */
221
222 node *nfreep = &TopNodes[NextFreeNode]; /* select first node */
223 nfreep->nextnode = -1;
224 nfreep->sibling = -1;
225 nfreep->father = -1;
226 nfreep->level = 0;
227 nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
228 nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
229
230 for(int j = 0; j < 3; j++)
231 nfreep->center[j] = ((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1);
232
233 NodeIndex[0] = NextFreeNode;
234 NodeLevel[0] = 0;
235 NodeSibling[0] = -1;
236 NumNodes = 1;
237 NextFreeNode++;
238
239 /* create a set of empty nodes corresponding to the top-level domain
240 * grid. We need to generate these nodes first to make sure that we have a
241 * complete top-level tree which allows the easy insertion of the
242 * pseudo-particles at the right place.
243 */
244
245 if(create_empty_nodes(MaxPart, 1, 0, 1, -1, 0, 0, 0) < 0)
246 return -2;
247 }
248
249 MPI_Bcast(&NumNodes, 1, MPI_INT, 0, D->Communicator);
250 MPI_Bcast(&NextFreeNode, 1, MPI_INT, 0, D->Communicator);
251
252 if(NumNodes != D->NTopnodes)
253 Terminate("NumNodes=%d != D->NTopnodes=%d", NumNodes, D->NTopnodes);
254
255 FirstNonTopLevelNode = NextFreeNode;
256
257 if(MaxPart + D->NTopnodes != FirstNonTopLevelNode)
258 Terminate("unexpected");
259
260 for(int j = 0; j < D->NTask; j++)
261 Send_count[j] = 0;
263#ifndef LEAN
264 int *node_list = (int *)Mem.mymalloc_movable(&node_list, "node_list", Tp->NumPart * sizeof(int));
265 int *task_list = (int *)Mem.mymalloc_movable(&task_list, "task_list", Tp->NumPart * sizeof(int));
266#endif
267
268 /* now we determine for each point the insertion top-level node, and the task on which this lies */
269 for(int idx = 0; idx < Ninsert; idx++)
270 {
271 int i;
272 if(IndexList)
273 i = IndexList[idx];
274 else
275 i = idx;
276
277 if(Tp->P[i].get_Ti_Current() != All.Ti_Current)
278 Tp->drift_particle(&Tp->P[i], &Tp->SphP[i], All.Ti_Current);
279
280 int no;
281 int task;
282 tree_get_node_and_task(i, no, task);
283
284#ifndef LEAN
285 node_list[i] = no;
286 task_list[i] = task;
287#endif
288
289 if(task != D->ThisTask)
290 Send_count[task]++;
291 }
292
293 myMPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, D->Communicator);
294
295 NumPartImported = 0;
296 NumPartExported = 0;
297 Recv_offset[0] = 0;
298 Send_offset[0] = 0;
299
300 for(int j = 0; j < D->NTask; j++)
301 {
302 NumPartImported += Recv_count[j];
303 NumPartExported += Send_count[j];
304 if(j > 0)
305 {
306 Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
307 Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
308 }
309 }
310
311 Points = (point_data *)Mem.mymalloc_movable(&Points, "Points", NumPartImported * sizeof(point_data));
312 Nextnode = (int *)Mem.mymalloc_movable(&Nextnode, "Nextnode", (MaxPart + D->NTopleaves + NumPartImported) * sizeof(int));
313 Father = (int *)Mem.mymalloc_movable(&Father, "Father", (MaxPart + NumPartImported) * sizeof(int));
314
315 /* now put in markers ("pseudo" particles) in top-leaf nodes to indicate on which task the branch lies */
316 for(int i = 0; i < D->NTopleaves; i++)
317 {
318 int index = NodeIndex[i];
319
320 if(TreeSharedMem_ThisTask == 0)
321 TopNodes[index].nextnode = MaxPart + MaxNodes + i;
323 /* set nextnode for pseudo-particle (Nextnode exists on all ranks) */
324 Nextnode[MaxPart + i] = TopNodes[index].sibling;
325 }
327 point_data *export_Points = (point_data *)Mem.mymalloc("export_Points", NumPartExported * sizeof(point_data));
328
329 for(int j = 0; j < D->NTask; j++)
330 Send_count[j] = 0;
331
332 for(int idx = 0; idx < Ninsert; idx++) /* prepare particle data to be copied to other tasks */
333 {
334 int i;
335 if(IndexList)
336 i = IndexList[idx];
337 else
338 i = idx;
339
340#ifdef LEAN
341 int no, task;
342 tree_get_node_and_task(i, no, task);
343#else
344 int task = task_list[i];
345 int no = node_list[i];
346#endif
347
348 if(task != D->ThisTask)
349 {
350 int n = Send_offset[task] + Send_count[task]++;
351
352 fill_in_export_points(&export_Points[n], i, no);
353 }
354 }
355
356 /* exchange data */
357 for(int ngrp = 1; ngrp < (1 << D->PTask); ngrp++)
358 {
359 int recvTask = D->ThisTask ^ ngrp;
360 if(recvTask < D->NTask)
361 if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
362 myMPI_Sendrecv(&export_Points[Send_offset[recvTask]], Send_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask,
363 TAG_DENS_A, &Points[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(point_data), MPI_BYTE, recvTask,
364 TAG_DENS_A, D->Communicator, MPI_STATUS_IGNORE);
365 }
366
367 Mem.myfree(export_Points);
368
369 ImportedNodeOffset = MaxPart + MaxNodes + D->NTopleaves;
370
371 MPI_Allreduce(&NumPartImported, &EndOfTreePoints, 1, MPI_INT, MPI_MAX, D->Communicator);
372 EndOfTreePoints += ImportedNodeOffset;
373 EndOfForeignNodes = EndOfTreePoints + (INT_MAX - EndOfTreePoints) / 2;
374
375 /* make a list that holds the particles that belong to a certain node */
376 index_data *index_list =
377 (index_data *)Mem.mymalloc_movable(&index_list, "index_list", (Ninsert + NumPartImported) * sizeof(index_data));
378 int count = 0;
379
380 for(int idx = 0; idx < Ninsert; idx++)
381 {
382 int i;
383 if(IndexList)
384 i = IndexList[idx];
385 else
386 i = idx;
387
388#ifdef LEAN
389 int no, task;
390 tree_get_node_and_task(i, no, task);
391#else
392 int task = task_list[i];
393 int no = node_list[i];
394#endif
395
396 if(task == D->ThisTask)
397 {
398 index_list[count].p = i;
399 index_list[count].subnode = no;
400 count++;
401 }
402 }
403
404 for(int i = 0; i < NumPartImported; i++)
405 {
406 index_list[count].p = i + ImportedNodeOffset;
407 index_list[count].subnode = Points[i].no;
408 count++;
409 }
410
411#ifndef LEAN
412 Mem.myfree_movable(task_list);
413 Mem.myfree_movable(node_list);
414#endif
415
416 /* sort according to node so that particles indices in the same node are grouped together */
417 mycxxsort(index_list, index_list + count, compare_index_data_subnode);
418
419 int full_flag = 0;
420 int ntopleaves = D->NumTopleafOfTask[D->ThisTask];
421 int start = 0;
422
423 for(int n = 0; n < ntopleaves; n++)
424 {
425 int no = D->ListOfTopleaves[D->FirstTopleafOfTask[D->ThisTask] + n];
426 int th = NodeIndex[no];
427 unsigned char level = NodeLevel[no];
428 int sibling = NodeSibling[no];
429
430 while(start < count && index_list[start].subnode < no)
431 start++;
432
433 if(start < count)
434 {
435 int last = start;
436 while(last < count && index_list[last].subnode == no)
437 last++;
438
439 int num = last - start;
440
441 if(treebuild_insert_group_of_points(num, &index_list[start], th, level, sibling))
442 {
443 full_flag = 1;
444 break;
445 }
446
447 start += num;
448 }
449 }
450
451 if((NumNodes = NextFreeNode - MaxPart) >= MaxNodes)
452 {
454 {
455 Tp->dump_particles();
456 Terminate(
457 "task %d: looks like a serious problem, stopping with particle dump. NumNodes=%d MaxNodes=%d NumPartImported=%d "
458 "NumPart=%d\n",
459 D->ThisTask, NumNodes, MaxNodes, NumPartImported, Tp->NumPart);
460 }
461 }
462
463 Mem.myfree(index_list);
464
465 if(full_flag)
466 return -1;
467
468 return NumNodes;
469}
470
479template <typename node, typename partset, typename point_data, typename foreign_point_data>
481 unsigned char level, int sibling)
482{
483 if(level >= BITS_FOR_POSITIONS)
484 Terminate(
485 "It appears we have reached the bottom of the tree because there are more than TREE_NUM_BEFORE_NODESPLIT=%d particles in the "
486 "smallest tree node representable for BITS_FOR_POSITIONS=%d.\n"
487 "Either eliminate the particles at (nearly) indentical coordinates, increase the setting for TREE_NUM_BEFORE_NODESPLIT, or "
488 "possibly enlarge BITS_FOR_POSITIONS if you have really not enough dynamic range\n",
490
491 MyIntPosType mask = (((MyIntPosType)1) << (BITS_FOR_POSITIONS - 1 - level));
492 unsigned char shiftx = (BITS_FOR_POSITIONS - 1 - level);
493 unsigned char shifty = (BITS_FOR_POSITIONS - 2 - level);
494 unsigned char shiftz = (BITS_FOR_POSITIONS - 3 - level);
495 MyIntPosType centermask = ~(~((MyIntPosType)0) >> level);
496
497 int subcount[8] = {0, 0, 0, 0, 0, 0, 0, 0}, subnode[8];
498 MyIntPosType *subintpos[8];
499
500 for(int i = 0; i < num; i++)
501 {
502 MyIntPosType *intpos;
503
504 int p = index_list[i].p;
505 if(p < MaxPart)
506 {
507 intpos = Tp->P[p].IntPos;
508 }
509 else
510 {
511 int n = p - ImportedNodeOffset;
512 intpos = Points[n].IntPos;
513 }
514
515 unsigned char subnode = (((unsigned char)((intpos[0] & mask) >> shiftx)) | ((unsigned char)((intpos[1] & mask) >> shifty)) |
516 ((unsigned char)((intpos[2] & mask) >> shiftz)));
517 if(subnode > 7)
518 Terminate("stop: subnode > 7");
519
520 subcount[subnode]++;
521
522 subintpos[subnode] = intpos;
523
524 index_list[i].subnode = subnode;
525 }
526
527 /* sort */
528 mycxxsort(index_list, index_list + num, compare_index_data_subnode);
529
530 centermask >>= 1;
531 centermask |= ~(~((MyIntPosType)0) >> 1); /* this sets the MSB */
532 mask >>= 1;
533
534 node *nfreep_last = NULL;
535
536 /* create the daughter nodes */
537 for(int i = 0; i < 8; i++)
538 {
539 if(subcount[i] > TREE_NUM_BEFORE_NODESPLIT)
540 {
541 int thnew;
542
543 thnew = NextFreeNode++;
544
545 if(thnew - MaxPart >= MaxNodes)
546 return 1; /* we are out of space */
547
548 subnode[i] = thnew;
549
550 if(nfreep_last)
551 {
552 nfreep_last->sibling = thnew;
553 nfreep_last->nextnode = thnew;
554
555 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
556 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
557 }
558 else
559 {
560 get_nodep(th)->nextnode = thnew;
561 get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
562 }
563
564 if(thnew < MaxPart + D->NTopnodes)
565 Terminate("thnew = %d < MaxPart=%d + D->NTopnodes=%d", thnew, MaxPart, D->NTopnodes);
566
567 node *nfreep = get_nodep(thnew);
568
569 nfreep->father = th;
570 nfreep->level = level + 1;
571
572 nfreep->center[0] = ((subintpos[i][0] & centermask) | mask);
573 nfreep->center[1] = ((subintpos[i][1] & centermask) | mask);
574 nfreep->center[2] = ((subintpos[i][2] & centermask) | mask);
575
576 nfreep->nextnode_shmrank = TreeSharedMem_ThisTask;
577 nfreep->sibling_shmrank = TreeSharedMem_ThisTask;
578
579 nfreep_last = nfreep;
580 }
581 }
582
583 /* now insert the particles that are chained to the node */
584
585 int p_last = -1, p_first = -1;
586
587 /* now insert the particle groups into the created daughter nodes, or chain them to the node */
588 for(int i = 0, off = 0; i < 8; i++)
589 {
590 if(subcount[i] <= TREE_NUM_BEFORE_NODESPLIT)
591 {
592 /* put the particles into the node as a chain */
593 for(int j = 0; j < subcount[i]; j++)
594 {
595 int p = index_list[off + j].p;
596
597 if(nfreep_last == NULL && p_first == -1)
598 {
599 get_nodep(th)->nextnode = p;
600 get_nodep(th)->nextnode_shmrank = TreeSharedMem_ThisTask;
601 }
602
603 if(p < MaxPart)
604 Father[p] = th;
605 else
606 Father[p - MaxNodes - D->NTopleaves] = th;
607
608 if(p_last >= 0)
609 {
610 if(p_last < MaxPart)
611 Nextnode[p_last] = p;
612 else
613 Nextnode[p_last - MaxNodes] = p; /* imported point */
614 }
615
616 p_last = p;
617
618 if(p_first == -1)
619 p_first = p;
620 }
621 }
622
623 off += subcount[i];
624 }
625
626 if(p_last >= 0)
627 {
628 if(p_last < MaxPart)
629 Nextnode[p_last] = sibling;
630 else
631 Nextnode[p_last - MaxNodes] = sibling; /* imported point */
632 }
633
634 if(nfreep_last)
635 {
636 if(p_first >= 0)
637 {
638 nfreep_last->sibling = p_first;
639 nfreep_last->nextnode = p_first;
640
641 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
642 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
643 }
644 else
645 {
646 nfreep_last->sibling = sibling;
647 nfreep_last->nextnode = sibling;
648
649 nfreep_last->sibling_shmrank = TreeSharedMem_ThisTask;
650 nfreep_last->nextnode_shmrank = TreeSharedMem_ThisTask;
651 }
652 }
653
654 for(int i = 0, off = 0; i < 8; i++)
655 {
656 if(subcount[i] > TREE_NUM_BEFORE_NODESPLIT)
657 {
658 if(subnode[i] < MaxPart + D->NTopnodes)
659 Terminate("subnode[i]=%d < MaxPart=%d + D->NTopnodes=%d", subnode[i], MaxPart, D->NTopnodes);
660
661 int out_of_space =
662 treebuild_insert_group_of_points(subcount[i], &index_list[off], subnode[i], level + 1, get_nodep(subnode[i])->sibling);
663 if(out_of_space)
664 return out_of_space;
665 }
666
667 off += subcount[i];
668 }
669
670 return 0; /* success */
671}
672
683template <typename node, typename partset, typename point_data, typename foreign_point_data>
685 int no_parent , int level ,
686 int topnode ,
687 int bits , int sibling,
688 MyIntPosType x ,
689 MyIntPosType y ,
690 MyIntPosType z )
691{
692 if(D->TopNodes[topnode].Daughter >= 0)
693 {
694 int firstflag = 0;
695 int nostart = NextFreeNode;
696
697 /* loop over daughter nodes */
698 for(int i = 0; i < 2; i++)
699 for(int j = 0; j < 2; j++)
700 for(int k = 0; k < 2; k++)
701 {
702 if(NumNodes >= MaxNodes)
703 {
705 {
706 char buf[MAXLEN_PATH];
707 sprintf(buf, "task %d: looks like a serious problem (NTopnodes=%d), stopping with particle dump.\n", D->ThisTask,
708 D->NTopnodes);
709 Tp->dump_particles();
710 Terminate(buf);
711 }
712 return -1;
713 }
714
715 int no = NextFreeNode++;
716 NumNodes++;
717
718 if(firstflag == 0)
719 {
720 TopNodes[no_parent].nextnode = no;
721 firstflag = 1;
722 }
723
724 TopNodes[no].father = no_parent;
725
726 if(i + 2 * j + 4 * k == 7)
727 TopNodes[no].sibling = sibling;
728 else
729 TopNodes[no].sibling = no + 1;
730
731 TopNodes[no].nextnode = TopNodes[no].sibling;
732
733 TopNodes[no].level = level;
734
735 MyIntPosType lenhalf = ((MyIntPosType)1) << (BITS_FOR_POSITIONS - level - 1);
736 TopNodes[no].center[0] = TopNodes[no_parent].center[0] + (2 * i - 1) * lenhalf;
737 TopNodes[no].center[1] = TopNodes[no_parent].center[1] + (2 * j - 1) * lenhalf;
738 TopNodes[no].center[2] = TopNodes[no_parent].center[2] + (2 * k - 1) * lenhalf;
739
740 TopNodes[no].sibling_shmrank = TreeSharedMem_ThisTask;
741 TopNodes[no].nextnode_shmrank = TreeSharedMem_ThisTask;
742 }
743
744 /* loop over daughter nodes */
745 for(int i = 0; i < 2; i++)
746 for(int j = 0; j < 2; j++)
747 for(int k = 0; k < 2; k++)
748 {
749 int no = nostart++;
750
751 peanokey key = peano_hilbert_key((x << 1) + i, (y << 1) + j, (z << 1) + k, bits);
752 int sub = 7 & key.ls;
753
754 if(D->TopNodes[D->TopNodes[topnode].Daughter + sub].Daughter == -1)
755 {
756 NodeIndex[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = no;
757 NodeLevel[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = level;
758 NodeSibling[D->TopNodes[D->TopNodes[topnode].Daughter + sub].Leaf] = TopNodes[no].sibling;
759 }
760
761 /* create grand daughter nodes for current daughter node */
762 if(create_empty_nodes(no, level + 1, D->TopNodes[topnode].Daughter + sub, bits + 1, TopNodes[no].sibling, 2 * x + i,
763 2 * y + j, 2 * z + k) < 0)
764 return -1;
765 }
766 }
767
768 return 0;
769}
770
775template <typename node, typename partset, typename point_data, typename foreign_point_data>
777{
778 D = Dptr;
779 Tp = Tp_ptr;
780
781 /* split up the communicator into pieces overlap with different shared memory regions */
782 if(max_partindex != -1)
783 MPI_Comm_split(D->Communicator, Shmem.Island_Smallest_WorldTask, 0, &TreeSharedMemComm);
784
785 if(max_partindex != -1)
786 MPI_Allreduce(&max_partindex, &MaxPart, 1, MPI_INT, MPI_MAX, D->Communicator);
787
788 if(MaxPart == 0)
789 return; // nothing to be done
790
791 if(Nodes)
792 Terminate("Nodes already allocated");
793
794 Send_count = (int *)Mem.mymalloc_movable(&Send_count, "Send_count", sizeof(int) * D->NTask);
795 Send_offset = (int *)Mem.mymalloc_movable(&Send_offset, "Send_offset", sizeof(int) * D->NTask);
796 Recv_count = (int *)Mem.mymalloc_movable(&Recv_count, "Recv_count", sizeof(int) * D->NTask);
797 Recv_offset = (int *)Mem.mymalloc_movable(&Recv_offset, "Recv_offset", sizeof(int) * D->NTask);
798
799 if(max_partindex != -1)
800 {
801 MPI_Allreduce(MPI_IN_PLACE, &All.TreeAllocFactor, 1, MPI_DOUBLE, MPI_MAX, D->Communicator);
802
803 MaxNodes = (int)(All.TreeAllocFactor * (MaxPart + BASENUMBER)) + D->NTopnodes;
804
805 int max_nodes;
806 MPI_Allreduce(&MaxNodes, &max_nodes, 1, MPI_INT, MPI_MAX, D->Communicator);
807
808 if(max_nodes != MaxNodes)
809 Terminate("Strange: different maxnodes detected: %d %d", max_nodes, MaxNodes);
810
811 int max_leaves;
812 MPI_Allreduce(&D->NTopleaves, &max_leaves, 1, MPI_INT, MPI_MAX, D->Communicator);
813 if(max_leaves != D->NTopleaves)
814 Terminate("Strange: different maxnodes detected: %d %d", max_leaves, D->NTopleaves);
815 }
816 else
817 {
818 max_partindex = MaxPart;
819 }
820
821 MPI_Comm_rank(TreeSharedMemComm, &TreeSharedMem_ThisTask);
822 MPI_Comm_size(TreeSharedMemComm, &TreeSharedMem_NTask);
823
824 TreeNodes_offsets = (ptrdiff_t *)Mem.mymalloc("TreeNodes_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
825 TreePoints_offsets = (ptrdiff_t *)Mem.mymalloc("TreePoints_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
826 TreeNextnode_offsets = (ptrdiff_t *)Mem.mymalloc("TreeNextnode_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
827 TreeForeign_Nodes_offsets = (ptrdiff_t *)Mem.mymalloc("TreeForeign_Nodes_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
828 TreeForeign_Points_offsets = (ptrdiff_t *)Mem.mymalloc("TreeForeign_Points_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
829 TreeP_offsets = (ptrdiff_t *)Mem.mymalloc("TreeP_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
830 TreeSphP_offsets = (ptrdiff_t *)Mem.mymalloc("TreeSphP_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
831 TreePS_offsets = (ptrdiff_t *)Mem.mymalloc("TreePS_offsets", TreeSharedMem_NTask * sizeof(ptrdiff_t));
832
833 TreeSharedMemBaseAddr = (void **)Mem.mymalloc("TreeSharedMemBaseAddr", TreeSharedMem_NTask * sizeof(void *));
834
835 for(int i = 0; i < TreeSharedMem_NTask; i++)
836 {
837 int island_rank = Shmem.Island_ThisTask + (i - TreeSharedMem_ThisTask);
838
839 if(island_rank < 0 || island_rank >= Shmem.Island_NTask)
840 Terminate("island_rank=%d < 0 || island_rank >= Shmem.Island_NTask=%d", island_rank, Shmem.Island_NTask);
841
842 TreeSharedMemBaseAddr[i] = Shmem.SharedMemBaseAddr[island_rank];
843 }
844
845 /* allocate the Top-Level tree only once per shared-memory section in the communicator */
846 if(TreeSharedMem_ThisTask == 0)
847 {
848 /* if we have a single shared memory node, or we are building a tree on a subcommunicator, allocate locally */
849 if(TreeSharedMem_NTask == Shmem.World_NTask || D->NTask < Shmem.Sim_NTask)
850 {
851 NodeLevel = (unsigned char *)Mem.mymalloc("NodeLevel", D->NTopleaves * sizeof(unsigned char));
852 NodeSibling = (int *)Mem.mymalloc("NodeSibling", D->NTopleaves * sizeof(int));
853 NodeIndex = (int *)Mem.mymalloc("NodeIndex", D->NTopleaves * sizeof(int));
854
855 TopNodes = (node *)Mem.mymalloc("TopNodes", D->NTopnodes * sizeof(node));
856 TopNodes -= MaxPart;
857 }
858 else /* otherwise, allocate the storage on the ghost processor, and get the address in our space from him */
859 {
861
862 /* request the storage from the responsible ghost rank, and map it into the local address space */
863 size_t tab_len[4] = {D->NTopleaves * sizeof(unsigned char), D->NTopleaves * sizeof(int), D->NTopleaves * sizeof(int),
864 D->NTopnodes * sizeof(node)};
865
866 MPI_Send(tab_len, 4 * sizeof(tab_len), MPI_BYTE, ghost_rank, TAG_TOPNODE_ALLOC, MPI_COMM_WORLD);
867
868 ptrdiff_t off[4];
869 MPI_Recv(off, 4 * sizeof(ptrdiff_t), MPI_BYTE, ghost_rank, TAG_TOPNODE_OFFSET, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
870
871 NodeLevel = (unsigned char *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[0]);
872 NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[1]);
873 NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[2]);
874
875 TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[Shmem.Island_NTask - 1] + off[3]);
876 TopNodes -= MaxPart;
877
878 MPI_Recv(&TreeInfoHandle, 1, MPI_INT, ghost_rank, TAG_N, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
879 }
880 }
881
882 Nodes = (node *)Mem.mymalloc_movable(&Nodes, "Nodes", (MaxNodes - D->NTopnodes + 1) * sizeof(node));
883 Nodes -= (MaxPart + D->NTopnodes);
884
885 if(max_partindex != -1)
886 treeallocate_share_topnode_addresses();
887}
888
889template <typename node, typename partset, typename point_data, typename foreign_point_data>
891{
892 MPI_Bcast(&TreeInfoHandle, 1, MPI_INT, 0, TreeSharedMemComm);
893
894 ptrdiff_t off[4] = {((char *)NodeLevel - Mem.Base), ((char *)NodeSibling - Mem.Base), ((char *)NodeIndex - Mem.Base),
895 ((char *)TopNodes - Mem.Base)};
896
897 MPI_Bcast(off, 4 * sizeof(ptrdiff_t), MPI_BYTE, 0, TreeSharedMemComm);
898
900 MPI_Bcast(&shmrank, 1, MPI_INT, 0, TreeSharedMemComm);
901
902 NodeLevel = (unsigned char *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[0]);
903 NodeSibling = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[1]);
904 NodeIndex = (int *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[2]);
905 TopNodes = (node *)((char *)Shmem.SharedMemBaseAddr[shmrank] + off[3]);
906}
907
908template <typename node, typename partset, typename point_data, typename foreign_point_data>
910{
911 if(D->NTask != Shmem.Sim_NTask)
912 Terminate("This version of the tree communication algorithm only works with the full simulation partition");
913
914 ptrdiff_t off;
915 off = (char *)Nodes - Mem.Base;
916 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeNodes_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
917 off = (char *)Points - Mem.Base;
918 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreePoints_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
919 off = (char *)Nextnode - Mem.Base;
920 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeNextnode_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
921 off = (char *)Foreign_Nodes - Mem.Base;
922 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Nodes_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
923 off = (char *)Foreign_Points - Mem.Base;
924 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeForeign_Points_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
925 off = (char *)Tp->P - Mem.Base;
926 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeP_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
927 off = (char *)Tp->SphP - Mem.Base;
928 MPI_Allgather(&off, sizeof(ptrdiff_t), MPI_BYTE, TreeSphP_offsets, sizeof(ptrdiff_t), MPI_BYTE, TreeSharedMemComm);
929
931 {
932 Shmem.tree_info[TreeInfoHandle].Bd.MaxPart = MaxPart;
933 Shmem.tree_info[TreeInfoHandle].Bd.MaxNodes = MaxNodes;
934 Shmem.tree_info[TreeInfoHandle].Bd.NTopnodes = D->NTopnodes;
935 Shmem.tree_info[TreeInfoHandle].Bd.ImportedNodeOffset = ImportedNodeOffset;
936 Shmem.tree_info[TreeInfoHandle].Bd.EndOfTreePoints = EndOfTreePoints;
937 Shmem.tree_info[TreeInfoHandle].Bd.EndOfForeignNodes = EndOfForeignNodes;
938
939 // need to inform also our shared shared memory processor
940 if(TreeSharedMem_ThisTask == 0)
941 {
942 MPI_Send(&TreeInfoHandle, 1, MPI_INT, Shmem.MyShmRankInGlobal, TAG_METDATA, MPI_COMM_WORLD);
943 MPI_Send(&All.Ti_Current, sizeof(All.Ti_Current), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_METDATA + 1, MPI_COMM_WORLD);
944 MPI_Send(&Shmem.tree_info[TreeInfoHandle].Bd, sizeof(Shmem.tree_info[TreeInfoHandle].Bd), MPI_BYTE, Shmem.MyShmRankInGlobal,
945 TAG_METDATA + 2, MPI_COMM_WORLD);
946
947 intposconvert *convfac = Tp;
948 MPI_Send(convfac, sizeof(intposconvert), MPI_BYTE, Shmem.MyShmRankInGlobal, TAG_METDATA + 3, MPI_COMM_WORLD);
949 }
950
951 Shmem.inform_offset_table(TopNodes);
953 Shmem.inform_offset_table(Nextnode);
956 Shmem.inform_offset_table(Tp->SphP);
957 Shmem.inform_offset_table(Foreign_Nodes);
958 Shmem.inform_offset_table(Foreign_Points);
959
960 MPI_Barrier(Shmem.SharedMemComm); // this barrier is in principle superfluous, but on some systems,
961 // the MPI_Gather in prepare_offset_table() can return prematurely
962 // on the target rank before all data has arrived
963
964 /* the following is needed to make sure that the shared memory handler on different nodes is already properly initialized */
965 MPI_Barrier(D->Communicator);
966 }
967}
968
969template <typename node, typename partset, typename point_data, typename foreign_point_data>
971{
973 {
974 if(TreeSharedMem_ThisTask == 0)
975 {
976 // need to send this flag to the correct processor rank (our shared memoin the global communicator
977 MPI_Send(&TreeInfoHandle, 1, MPI_INT, Shmem.MyShmRankInGlobal, TAG_HEADER, MPI_COMM_WORLD);
978 }
979 }
980}
981
982template <typename node, typename partset, typename point_data, typename foreign_point_data>
984{
985 // find out how many we have to fetch from each node
986 int *CountFetch = (int *)Mem.mymalloc_movable_clear(&CountFetch, "CountFetch", Shmem.World_NTask * sizeof(int));
987 int *OffsetFetch = (int *)Mem.mymalloc_movable(&OffsetFetch, "OffsetFetch", Shmem.World_NTask * sizeof(int));
988
989 for(int i = 0; i < NumOnFetchStack; i++)
990 CountFetch[StackToFetch[i].GhostRank]++;
991
992 OffsetFetch[0] = 0;
993 for(int i = 1; i < Shmem.World_NTask; i++)
994 OffsetFetch[i] = OffsetFetch[i - 1] + CountFetch[i - 1];
995
996 mycxxsort(StackToFetch, StackToFetch + NumOnFetchStack, compare_ghostrank);
997
998 /* now go through each node in turn, and import from them the requested nodes
999 */
1000 int CommPTask;
1001 for(CommPTask = 0; Shmem.World_NTask > (1 << CommPTask); CommPTask++)
1002 ;
1003
1004 for(int ngrp = 1; ngrp < (1 << CommPTask); ngrp++)
1005 {
1006 int ghost_rank = Shmem.World_ThisTask ^ ngrp; // this is already the responsible ghost rank
1007
1008 if(ghost_rank < Shmem.World_NTask)
1009 {
1010 if(CountFetch[ghost_rank] > 0)
1011 {
1012 node_req *node_req_send =
1013 (node_req *)Mem.mymalloc_movable(&node_req_send, "node_req_send", CountFetch[ghost_rank] * sizeof(node_req));
1014
1015 for(int i = 0; i < CountFetch[ghost_rank]; i++)
1016 {
1017 int k = OffsetFetch[ghost_rank] + i;
1018
1019 node *nop = get_nodep(StackToFetch[k].NodeToOpen, StackToFetch[k].ShmRank);
1020
1021 node_req_send[i].foreigntask = nop->OriginTask;
1022 node_req_send[i].foreignnode = nop->OriginNode;
1023 }
1024
1025 /* now make the node request */
1026 int tag;
1027 if(fetch_type == FETCH_GRAVTREE)
1028 tag = TAG_FETCH_GRAVTREE + TreeInfoHandle;
1029 else if(fetch_type == FETCH_SPH_DENSITY)
1030 tag = TAG_FETCH_SPH_DENSITY + TreeInfoHandle;
1031 else if(fetch_type == FETCH_SPH_HYDRO)
1032 tag = TAG_FETCH_SPH_HYDRO + TreeInfoHandle;
1033 else if(fetch_type == FETCH_SPH_TREETIMESTEP)
1034 tag = TAG_FETCH_SPH_TREETIMESTEP + TreeInfoHandle;
1035 else
1036 {
1037 tag = 0;
1038 Terminate("tag undefined");
1039 }
1040
1041 MPI_Send(node_req_send, CountFetch[ghost_rank] * sizeof(node_req), MPI_BYTE, ghost_rank, tag, MPI_COMM_WORLD);
1042 Mem.myfree(node_req_send);
1043
1044 // get the information about how many nodes and particles hang below each of the nodes
1045 node_count_info *node_info_send = (node_count_info *)Mem.mymalloc_movable(
1046 &node_info_send, "node_info_send", CountFetch[ghost_rank] * sizeof(node_count_info));
1047
1048 MPI_Recv(node_info_send, CountFetch[ghost_rank] * sizeof(node_count_info), MPI_BYTE, ghost_rank, TAG_N, MPI_COMM_WORLD,
1049 MPI_STATUS_IGNORE);
1050
1051 /* now find out how many nodes and points we want to import in total */
1052 int n_sendpoints = 0;
1053 int n_sendnodes = 0;
1054
1055 for(int i = 0; i < CountFetch[ghost_rank]; i++)
1056 {
1057 n_sendpoints += node_info_send[i].count_parts;
1058 n_sendnodes += node_info_send[i].count_nodes;
1059 }
1060
1061 foreign_point_data *buf_foreignpoints =
1062 (foreign_point_data *)Mem.mymalloc("buf_foreignpoints", n_sendpoints * sizeof(foreign_point_data));
1063
1064 node *buf_foreignnodes = (node *)Mem.mymalloc("buf_foreignnodes", n_sendnodes * sizeof(node));
1065
1066 /* now receive the points and nodes */
1067 if(n_sendpoints > 0)
1068 MPI_Recv(buf_foreignpoints, n_sendpoints * sizeof(foreign_point_data), MPI_BYTE, ghost_rank, TAG_PDATA, MPI_COMM_WORLD,
1069 MPI_STATUS_IGNORE);
1070
1071 if(n_sendnodes > 0)
1072 MPI_Recv(buf_foreignnodes, n_sendnodes * sizeof(node), MPI_BYTE, ghost_rank, TAG_SPHDATA, MPI_COMM_WORLD,
1073 MPI_STATUS_IGNORE);
1074
1075 /* now we have to link the nodes and particles into the tree */
1076
1077 int n_nodes_used = 0;
1078 int n_parts_used = 0;
1079
1080 for(int i = 0; i < CountFetch[ghost_rank]; i++)
1081 {
1082 int k = OffsetFetch[ghost_rank] + i;
1083
1084 int no = StackToFetch[k].NodeToOpen;
1085 int shmrank = StackToFetch[k].ShmRank;
1086
1087 node *nop = get_nodep(no, shmrank);
1088
1089 int n_nodes = node_info_send[i].count_nodes;
1090 int n_parts = node_info_send[i].count_parts;
1091
1092 while(nop->access.test_and_set(std::memory_order_acquire))
1093 {
1094 // acquire spin lock
1095 }
1096
1097 if(nop->cannot_be_opened_locally) // make sure the node hasn't been inserted by another task yet
1098 {
1099 int pfirst = -1;
1100 int plast = -1;
1101
1102 for(int j = 0; j < n_nodes; j++)
1103 {
1104 if(NumForeignNodes >= MaxForeignNodes)
1105 Terminate(
1106 "We are out of storage for foreign nodes: NumForeignNodes=%d MaxForeignNodes=%d j=%d n_parts=%d",
1107 NumForeignNodes, MaxForeignNodes, j, n_parts);
1108
1109 /* tree index of this node */
1110 int p = EndOfTreePoints + NumForeignNodes++;
1111
1112 // cannot do a Foreign_Nodes[p - EndOfTreePoints] = buf_foreignnodes[n_nodes_used++]; because out
1113 // std::atomic_flag has a deleted default copy operator
1114 memcpy(static_cast<void *>(&Foreign_Nodes[p - EndOfTreePoints]),
1115 static_cast<void *>(&buf_foreignnodes[n_nodes_used++]), sizeof(node));
1116
1117 Foreign_Nodes[p - EndOfTreePoints].access.clear();
1118 Foreign_Nodes[p - EndOfTreePoints].flag_already_fetched = 0;
1119
1120 Foreign_Nodes[p - EndOfTreePoints].nextnode = -1;
1121
1122 if(plast >= 0) /* this is here still the previous one */
1123 {
1124 if(plast < EndOfForeignNodes) /* have a foreign node */
1125 {
1126 Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1127 Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = Shmem.Island_ThisTask;
1128 }
1129 else
1130 {
1131 Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1132 Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = Shmem.Island_ThisTask;
1133 }
1134 }
1135
1136 if(pfirst < 0)
1137 pfirst = p;
1138
1139 plast = p;
1140 }
1141
1142 for(int j = 0; j < n_parts; j++)
1143 {
1144 if(NumForeignPoints >= MaxForeignPoints)
1145 Terminate(
1146 "We are out of storage for foreign points: NumForeignPoints=%d MaxForeignPoints=%d j=%d n_parts=%d",
1147 NumForeignPoints, MaxForeignPoints, j, n_parts);
1148
1149 /* global tree index of this foreign point */
1150 int p = EndOfForeignNodes + NumForeignPoints++;
1151 Foreign_Points[p - EndOfForeignNodes] = buf_foreignpoints[n_parts_used++];
1152
1153 if(plast >= 0) /* this is here still the previous one */
1154 {
1155 if(plast < EndOfForeignNodes) /* have a foreign node */
1156 {
1157 Foreign_Nodes[plast - EndOfTreePoints].sibling = p;
1158 Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = Shmem.Island_ThisTask;
1159 }
1160 else
1161 {
1162 Foreign_Points[plast - EndOfForeignNodes].Nextnode = p;
1163 Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = Shmem.Island_ThisTask;
1164 }
1165 }
1166
1167 if(pfirst < 0)
1168 pfirst = p;
1169
1170 plast = p;
1171 }
1172
1173 if(plast < 0 || pfirst < 0)
1174 Terminate("plast=%d < 0 || pfirst=%d < 0 n_nodes=%d n_parts=%d", plast, pfirst, n_nodes, n_parts);
1175
1176 if(plast >= 0) /* this is here still the previous one */
1177 {
1178 if(plast < EndOfForeignNodes) /* have a foreign node */
1179 {
1180 Foreign_Nodes[plast - EndOfTreePoints].sibling = nop->sibling;
1181 Foreign_Nodes[plast - EndOfTreePoints].sibling_shmrank = nop->sibling_shmrank;
1182
1183 if(Foreign_Nodes[plast - EndOfTreePoints].cannot_be_opened_locally == 0)
1184 if(D->ThisTask == 0)
1185 Terminate("what? plast - EndOfTreePoints=%d", plast - EndOfTreePoints);
1186 }
1187 else
1188 {
1189 Foreign_Points[plast - EndOfForeignNodes].Nextnode = nop->sibling;
1190 Foreign_Points[plast - EndOfForeignNodes].Nextnode_shmrank = nop->sibling_shmrank;
1191 }
1192 }
1193
1194 nop->nextnode = pfirst;
1195 nop->nextnode_shmrank = Shmem.Island_ThisTask;
1196
1197 nop->cannot_be_opened_locally.store(0, std::memory_order_release);
1198
1199 sum_NumForeignNodes += n_nodes;
1200 sum_NumForeignPoints += n_parts;
1201 }
1202 else
1203 {
1204 // skip this node, because apparently it was fetched in the meantime by another mpi task
1205 n_nodes_used += n_nodes;
1206 n_parts_used += n_parts;
1207 }
1208
1209 nop->access.clear(std::memory_order_release);
1210 }
1211
1212 if(n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used)
1213 Terminate("n_sendpoints != n_parts_used || n_sendnodes != n_nodes_used");
1214
1215 Mem.myfree(buf_foreignnodes);
1216 Mem.myfree(buf_foreignpoints);
1217 Mem.myfree(node_info_send);
1218 }
1219 }
1220 }
1221
1222 NumOnFetchStack = 0;
1223
1224 Mem.myfree(OffsetFetch);
1225 Mem.myfree(CountFetch);
1226}
1227
1231template <typename node, typename partset, typename point_data, typename foreign_point_data>
1233{
1234 MPI_Comm_free(&TreeSharedMemComm);
1235
1236 if(MaxPart == 0)
1237 return; // nothing to be done
1238
1239 if(Nodes)
1240 {
1241 if(Father)
1242 {
1243 Mem.myfree_movable(Father);
1244 Mem.myfree_movable(Nextnode);
1245 }
1246 if(Points)
1247 {
1248 Mem.myfree_movable(Points);
1249 }
1250
1251 Mem.myfree_movable(Nodes + MaxPart + D->NTopnodes);
1252
1253 if(TreeSharedMem_ThisTask == 0)
1254 {
1255 if(TreeSharedMem_NTask == Shmem.World_NTask || D->NTask < Shmem.Sim_NTask)
1256 {
1257 Mem.myfree_movable(TopNodes + MaxPart);
1258 Mem.myfree_movable(NodeIndex);
1259 Mem.myfree_movable(NodeSibling);
1260 Mem.myfree_movable(NodeLevel);
1261 }
1262 else
1263 {
1265
1266 // tell the ghost rank to free the storage
1267 MPI_Send(&TreeInfoHandle, 1, MPI_INT, ghost_rank, TAG_TOPNODE_FREE, MPI_COMM_WORLD);
1268 }
1269 }
1270
1271 Mem.myfree(TreeSharedMemBaseAddr);
1272
1273 Mem.myfree(TreePS_offsets);
1274 Mem.myfree(TreeSphP_offsets);
1275 Mem.myfree(TreeP_offsets);
1276 Mem.myfree(TreeForeign_Points_offsets);
1277 Mem.myfree(TreeForeign_Nodes_offsets);
1278 Mem.myfree(TreeNextnode_offsets);
1279 Mem.myfree(TreePoints_offsets);
1280 Mem.myfree(TreeNodes_offsets);
1281
1282 Mem.myfree_movable(Recv_offset);
1283 Mem.myfree_movable(Recv_count);
1284 Mem.myfree_movable(Send_offset);
1285 Mem.myfree_movable(Send_count);
1286
1287 Nodes = NULL;
1288 TopNodes = NULL;
1289 NodeIndex = NULL;
1290 NodeSibling = NULL;
1291 NodeLevel = NULL;
1292 Points = NULL;
1293 Nextnode = NULL;
1294 Father = NULL;
1295 }
1296 else
1297 Terminate("trying to free the tree even though it's not allocated");
1298}
1299
1300template <typename node, typename partset, typename point_data, typename foreign_point_data>
1302 offset_tuple off)
1303{
1304 int task = D->TaskOfLeaf[no - (MaxPart + MaxNodes)];
1305 int nodeindex = NodeIndex[no - (MaxPart + MaxNodes)];
1306
1307 tree_export_node_threads_by_task_and_node(task, nodeindex, i, thread, off);
1308}
1309
1310template <typename node, typename partset, typename point_data, typename foreign_point_data>
1312 thread_data *thread,
1313 offset_tuple off)
1314{
1315 if(task < 0 || task >= D->NTask)
1316 Terminate("task < 0 || task >= D->NTask");
1317
1318 if(task != D->ThisTask)
1319 {
1320 if(thread->Exportflag[task] != i)
1321 {
1322 thread->Exportflag[task] = i;
1323 int nexp = thread->Nexport++;
1324 thread->PartList[nexp].Task = task;
1325 thread->PartList[nexp].Index = i;
1326 thread->ExportSpace -= thread->ItemSize;
1327 }
1328
1329 int nexp = thread->NexportNodes++;
1330 nexp = -1 - nexp;
1331 data_nodelist *nodelist = (data_nodelist *)(((char *)thread->PartList) + thread->InitialSpace);
1332 nodelist[nexp].Task = task;
1333 nodelist[nexp].Index = i;
1334 nodelist[nexp].NodeInfo.Node = nodeindex;
1335 thread->ExportSpace -= (sizeof(data_nodelist) + sizeof(int));
1336 }
1337}
1338
1339#include "../ngbtree/ngbtree.h"
1341
1342#include "../gravtree/gravtree.h"
1344
1345#ifdef FOF
1346#include "../fof/foftree.h"
1348#if defined(LIGHTCONE) && (defined(LIGHTCONE_PARTICLES_GROUPS) || defined(LIGHTCONE_IMAGE_COMP_HSML_VELDISP))
1349/* make sure that we instantiate the template */
1350#include "../data/lcparticles.h"
1353#endif
1354#endif
global_data_all_processes All
Definition: main.cc:40
Definition: domain.h:31
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
double getAllocatedBytesInMB(void)
Definition: mymalloc.h:71
char * Base
Definition: mymalloc.h:49
MyIntPosType ls
Definition: dtypes.h:210
MPI_Comm SharedMemComm
int * GetNodeIDForSimulCommRank
int Island_ThisTask
void inform_offset_table(void *p)
int Island_NTask
void ** SharedMemBaseAddr
int * GetShmRankForSimulCommRank
tree_storage_info tree_info[MAX_TREE_INFOS]
int Island_Smallest_WorldTask
int Sim_ThisTask
int * GetGhostRankForSimulCommRank
int MyShmRankInGlobal
int World_ThisTask
Definition: tree.h:91
void tree_export_node_threads(int no, int i, thread_data *thread, offset_tuple off=0)
Definition: tree.cc:1301
void treeallocate_share_topnode_addresses(void)
Definition: tree.cc:890
void cleanup_shared_memory_access(void)
Definition: tree.cc:970
void treeallocate(int max_partindex, partset *Pptr, domain< partset > *Dptr)
Definition: tree.cc:776
void tree_fetch_foreign_nodes(enum ftype fetch_type)
Definition: tree.cc:983
void prepare_shared_memory_access(void)
Definition: tree.cc:909
void treefree(void)
Definition: tree.cc:1232
int treebuild(int ninsert, int *indexlist)
Definition: tree.cc:52
ftype
Definition: tree.h:265
void tree_initialize_leaf_node_access_info(void)
Definition: tree.cc:174
void tree_export_node_threads_by_task_and_node(int task, int nodeindex, int i, thread_data *thread, offset_tuple off=0)
Definition: tree.cc:1311
#define MAXLEN_PATH
Definition: constants.h:300
#define BASENUMBER
Definition: constants.h:303
double mycxxsort(T *begin, T *end, Tcomp comp)
Definition: cxxsort.h:39
uint32_t MyIntPosType
Definition: dtypes.h:35
#define BITS_FOR_POSITIONS
Definition: dtypes.h:37
#define MAX_TREE_ALLOC_FACTOR
Definition: gravtree.h:39
logs Logs
Definition: main.cc:43
#define TIMER_START(counter)
Starts the timer counter.
Definition: logs.h:197
#define TIMER_STOP(counter)
Stops the timer counter.
Definition: logs.h:220
#define TIMER_STOPSTART(stop, start)
Stops the timer 'stop' and starts the timer 'start'.
Definition: logs.h:231
#define Terminate(...)
Definition: macros.h:15
shmem Shmem
Definition: main.cc:45
#define TAG_SPHDATA
Definition: mpi_utils.h:28
#define TAG_HEADER
Definition: mpi_utils.h:26
#define TAG_FETCH_SPH_DENSITY
Definition: mpi_utils.h:103
#define TAG_FETCH_SPH_TREETIMESTEP
Definition: mpi_utils.h:105
#define TAG_DENS_A
Definition: mpi_utils.h:50
#define TAG_TOPNODE_FREE
Definition: mpi_utils.h:19
#define TAG_PDATA
Definition: mpi_utils.h:27
#define TAG_FETCH_GRAVTREE
Definition: mpi_utils.h:102
#define TAG_TOPNODE_ALLOC
Definition: mpi_utils.h:21
#define TAG_N
Definition: mpi_utils.h:25
int myMPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
Definition: myalltoall.cc:301
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
#define TAG_FETCH_SPH_HYDRO
Definition: mpi_utils.h:104
#define TAG_TOPNODE_OFFSET
Definition: mpi_utils.h:20
#define TAG_METDATA
Definition: mpi_utils.h:101
memory Mem
Definition: main.cc:44
peanokey peano_hilbert_key(MyIntPosType x, MyIntPosType y, MyIntPosType z, int bits)
Definition: peano.cc:83
int Index
Definition: tree.h:84
node_info NodeInfo
Definition: tree.h:85
int Task
Definition: tree.h:83
integertime Ti_Current
Definition: allvars.h:188
int Node
Definition: tree.h:78
size_t ItemSize
Definition: dtypes.h:345
int NexportNodes
Definition: dtypes.h:336
int * Exportflag
Definition: dtypes.h:354
size_t InitialSpace
Definition: dtypes.h:344
size_t ExportSpace
Definition: dtypes.h:343
data_partlist * PartList
Definition: dtypes.h:351
int Nexport
Definition: dtypes.h:335
int foreigntask
Definition: tree.h:257
int foreignnode
Definition: tree.h:258
#define TREE_MAX_ITER
Definition: tree.h:24
#define TREE_MODE_BRANCH
Definition: tree.h:19
#define TREE_NUM_BEFORE_NODESPLIT
Definition: tree.h:16
#define TREE_MODE_TOPLEVEL
Definition: tree.h:20