GADGET-4
healthtest.cc
Go to the documentation of this file.
1/*******************************************************************************
2 * \copyright This file is part of the GADGET4 N-body/SPH code developed
3 * \copyright by Volker Springel. Copyright (C) 2014-2020 by Volker Springel
4 * \copyright (vspringel@mpa-garching.mpg.de) and all contributing authors.
5 *******************************************************************************/
6
12#include "gadgetconfig.h"
13
14#include <math.h>
15#include <mpi.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19
20#include "../data/allvars.h"
21#include "../data/dtypes.h"
22#include "../data/mymalloc.h"
23#include "../logs/logs.h"
24#include "../main/simulation.h"
25#include "../mpi_utils/mpi_utils.h"
26#include "../system/system.h"
27
28#define TEST_PACKET_SIZE_IN_MB 5
29#define WORK_LOOP_COUNTER 50000000
30#define WORK_NUMBER_OF_IPROBE_TESTS 1000000
31
32#ifndef MAX_VARIATION_TOLERANCE
33#define MAX_VARIATION_TOLERANCE 0.5
34#endif
35
37{
38 mpi_printf("\n");
39
40 measure_cpu_performance(Communicator);
41
42 // Let's take a look at the communication speed in a global all-to-all data exchange realized through pairwise exchanges along a
43 // hypercube
44 if(NTask > 1)
45 measure_hyper_cube_speed("Full hypercube:", Communicator);
46
47 // Let's take a look at inter-node communication speed
48 if(NumNodes > 1)
49 {
50 int CommSplitColor;
51
52 if(RankInThisNode == 0)
53 CommSplitColor = 0;
54 else
55 CommSplitColor = 1;
56
57 MPI_Comm comm;
58 MPI_Comm_split(Communicator, CommSplitColor, ThisTask, &comm);
59
60 if(RankInThisNode == 0)
61 measure_hyper_cube_speed("Internode cube:", comm);
62
63 MPI_Comm_free(&comm);
64 }
65
66 // Now look at intra-node communication speed
67 if(NumNodes < NTask)
68 {
69 int CommSplitColor = ThisNode;
70 MPI_Comm comm;
71 MPI_Comm_split(Communicator, CommSplitColor, ThisTask, &comm);
72
73 measure_hyper_cube_speed("Intranode cube, 1st node:", comm);
74
75 MPI_Comm_free(&comm);
76 }
77
78 measure_iprobe_performance("Iprobe for any message:");
79
80 mpi_printf("\n");
81}
82
83double sim::measure_cpu_performance(MPI_Comm Communicator)
84{
85 int loc_ntask, loc_thistask, loc_ptask;
86
87 double ta = Logs.second();
88
89 MPI_Comm_rank(Communicator, &loc_thistask);
90 MPI_Comm_size(Communicator, &loc_ntask);
91
92 for(loc_ptask = 0; loc_ntask > (1 << loc_ptask); loc_ptask++)
93 ;
94
95 double sum = 0;
96
97 MPI_Barrier(Communicator);
98
99 double t0 = Logs.second();
100
101 // do some computationally intense (but useless) work for a while
102 for(int i = 0; i < WORK_LOOP_COUNTER; i++)
103 sum += sin((i + 0.1) / WORK_LOOP_COUNTER) / (2.0 + cos(i - 0.1) / WORK_LOOP_COUNTER);
104
105 double t1 = Logs.second();
106
107 double tperf = Logs.timediff(t0, t1), tperfsum;
108
109 MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
110 double tavg = tperfsum / loc_ntask;
111
112 struct
113 {
114 double t;
115 int rank;
116 } local = {tperf, ThisTask}, localnode = {tperf, ThisNode}, min_time, max_time, min_timenode, max_timenode;
117
118 MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
119 MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
120
121 MPI_Allreduce(&localnode, &min_timenode, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
122 MPI_Allreduce(&localnode, &max_timenode, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
123
124 double variation = (max_time.t - min_time.t) / tavg;
125
126 double tb = Logs.second();
127
129 "HEALTHTEST: %25s %8.3f sec %7.3f%% variation | Best=%g on Task=%d/Node=%d, Worst=%g on Task=%d/Node=%d, test "
130 "took %g sec\n",
131 "CPU performance:", tavg, 100.0 * variation, min_time.t, min_time.rank, min_timenode.rank, max_time.t, max_time.rank,
132 max_timenode.rank, Logs.timediff(ta, tb));
133
134 if(variation >= MAX_VARIATION_TOLERANCE)
135 {
136 char name_maxnode[MPI_MAX_PROCESSOR_NAME];
137 int len;
138 if(ThisTask == max_time.rank)
139 MPI_Get_processor_name(name_maxnode, &len);
140
141 MPI_Bcast(name_maxnode, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, max_time.rank, Communicator);
142
143 char buf[1000 + MPI_MAX_PROCESSOR_NAME];
144 sprintf(buf, "processes_%s.txt", name_maxnode);
145
146 mpi_printf("HEALTHTEST: We are dumping a process list to the file '%s'\n", buf);
147
148 if(ThisTask == max_time.rank)
149 {
150 char cmd[10000 + MPI_MAX_PROCESSOR_NAME];
151 sprintf(cmd, "ps -ef >& %s", buf);
152 system(cmd);
153 }
154
155 MPI_Barrier(Communicator);
156
157 // only issue a warning for now instead of terminating the code
158 warn(
159 "\n\nHEALTHTEST: We issue a warning because the performance variation=%g of the CPUs lies above the prescribed tolerance "
160 "MAX_VARIATION_TOLERANCE=%g, possibly indicating a machine problem. (sum=%g)\n",
161 variation, MAX_VARIATION_TOLERANCE, sum);
162 }
163
164 return sum;
165}
166
167double sim::measure_hyper_cube_speed(const char *tag, MPI_Comm Communicator)
168{
169 double ta = Logs.second();
170
171 int loc_ntask, loc_thistask, loc_ptask;
172
173 MPI_Comm_rank(Communicator, &loc_thistask);
174 MPI_Comm_size(Communicator, &loc_ntask);
175
176 for(loc_ptask = 0; loc_ntask > (1 << loc_ptask); loc_ptask++)
177 ;
178
179 int bytecount = (TEST_PACKET_SIZE_IN_MB * 1024L * 1024L) / loc_ntask;
180
181 double tall = 0;
182 int count = 0;
183
184 char *sendbuf = (char *)Mem.mymalloc_clear("send", bytecount * sizeof(char));
185 char *recvbuf = (char *)Mem.mymalloc_clear("recv", bytecount * sizeof(char));
186
187 /* exchange the test data */
188 for(int ngrp = 1; ngrp < (1 << loc_ptask); ngrp++)
189 {
190 int recvTask = loc_thistask ^ ngrp;
191
192 MPI_Barrier(Communicator);
193
194 if(recvTask < loc_ntask)
195 {
196 double t0 = Logs.second();
197 myMPI_Sendrecv(sendbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A, recvbuf, bytecount, MPI_BYTE, recvTask, TAG_DENS_A,
198 Communicator, MPI_STATUS_IGNORE);
199 double t1 = Logs.second();
200
201 tall += Logs.timediff(t0, t1);
202 count++;
203 }
204 }
205
206 Mem.myfree(recvbuf);
207 Mem.myfree(sendbuf);
208
209 double tperf = 0.5 * tall / count, tperfsum;
210
211 MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
212 double tavg = tperfsum / loc_ntask;
213
214 struct
215 {
216 double t;
217 int rank;
218 } local = {tperf, ThisTask}, localnode = {tperf, ThisNode}, min_time, max_time, min_timenode, max_timenode;
219
220 MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
221 MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
222
223 MPI_Allreduce(&localnode, &min_timenode, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
224 MPI_Allreduce(&localnode, &max_timenode, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
225
226 double tb = Logs.second();
227
228 double variation = (bytecount / min_time.t - bytecount / max_time.t) / (bytecount / tavg);
229
231 "HEALTHTEST: %25s %8.1f MB/s per pair %7.3f%% variation | Best=%g on Task=%d/Node=%d, Worst=%g on Task=%d/Node=%d, test "
232 "took %g sec\n",
233 tag, bytecount / tavg * TO_MBYTE_FAC, 100.0 * variation, bytecount / min_time.t * TO_MBYTE_FAC, min_time.rank, min_timenode.rank,
234 bytecount / max_time.t * TO_MBYTE_FAC, max_time.rank, max_timenode.rank, Logs.timediff(ta, tb));
235
236 if(variation > MAX_VARIATION_TOLERANCE && ThisTask == 0)
237 warn(
238 "\nThe performance variation=%g of the communication speed lies above the prescribed tolerance MAX_VARIATION_TOLERANCE=%g, "
239 "possibly indicating a machine problem.\n",
240 variation, MAX_VARIATION_TOLERANCE);
241
242 return tavg;
243}
244
245void sim::measure_iprobe_performance(const char *tag)
246{
247 double ta = Logs.second();
248
249 for(int i = 0; i < WORK_NUMBER_OF_IPROBE_TESTS; i++)
250 {
251 int flag;
252 MPI_Status status;
253
254 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, Communicator, &flag, &status);
255 }
256
257 double tb = Logs.second();
258
259 double tperf = Logs.timediff(ta, tb) / WORK_NUMBER_OF_IPROBE_TESTS;
260
261 struct
262 {
263 double t;
264 int rank;
265 } local = {tperf, ThisTask}, min_time, max_time;
266
267 MPI_Allreduce(&local, &min_time, 1, MPI_DOUBLE_INT, MPI_MINLOC, Communicator);
268 MPI_Allreduce(&local, &max_time, 1, MPI_DOUBLE_INT, MPI_MAXLOC, Communicator);
269
270 double tperfsum;
271 MPI_Allreduce(&tperf, &tperfsum, 1, MPI_DOUBLE, MPI_SUM, Communicator);
272 double tavg = tperfsum / NTask;
273
274 char name_minnode[MPI_MAX_PROCESSOR_NAME];
275 char name_maxnode[MPI_MAX_PROCESSOR_NAME];
276
277 int len;
278 if(ThisTask == min_time.rank)
279 MPI_Get_processor_name(name_minnode, &len);
280 if(ThisTask == max_time.rank)
281 MPI_Get_processor_name(name_maxnode, &len);
282
283 MPI_Bcast(name_minnode, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, min_time.rank, Communicator);
284 MPI_Bcast(name_maxnode, MPI_MAX_PROCESSOR_NAME, MPI_BYTE, max_time.rank, Communicator);
285
286 double variation = (max_time.t - min_time.t) / tavg;
287
289 "HEALTHTEST: %25s %g s per MPI_Ip%7.3f%% variation | Best=%g on Task=%d/Node=%s, Worst=%g on Task=%d/Node=%s, test took %g "
290 "sec\n",
291 tag, tavg, 100.0 * variation, min_time.t, min_time.rank, name_minnode, max_time.t, max_time.rank, name_maxnode,
292 Logs.timediff(ta, tb));
293}
double timediff(double t0, double t1)
Definition: logs.cc:488
double second(void)
Definition: logs.cc:471
int ThisNode
Definition: setcomm.h:36
void mpi_printf(const char *fmt,...)
Definition: setcomm.h:55
int ThisTask
Definition: setcomm.h:33
int NTask
Definition: setcomm.h:32
int RankInThisNode
Definition: setcomm.h:39
int NumNodes
Definition: setcomm.h:37
MPI_Comm Communicator
Definition: setcomm.h:31
void healthtest(void)
Definition: healthtest.cc:36
#define TO_MBYTE_FAC
Definition: constants.h:59
#define TEST_PACKET_SIZE_IN_MB
Definition: healthtest.cc:28
#define WORK_NUMBER_OF_IPROBE_TESTS
Definition: healthtest.cc:30
#define MAX_VARIATION_TOLERANCE
Definition: healthtest.cc:33
#define WORK_LOOP_COUNTER
Definition: healthtest.cc:29
logs Logs
Definition: main.cc:43
#define warn(...)
Definition: macros.h:30
#define TAG_DENS_A
Definition: mpi_utils.h:50
int myMPI_Sendrecv(void *sendbuf, size_t sendcount, MPI_Datatype sendtype, int dest, int sendtag, void *recvbuf, size_t recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status *status)
memory Mem
Definition: main.cc:44
expr cos(half arg)
Definition: half.hpp:2823
expr sin(half arg)
Definition: half.hpp:2816