CoMD
A Mini-app for Co-Design of Classical Molecular Dynamics.
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
performanceTimers.c
Go to the documentation of this file.
1 /// \file
2 /// Performance timer functions.
3 ///
4 /// Use the timer functionality to collect timing and number of calls
5 /// information for chosen computations (such as force calls) and
6 /// communication (such as sends, receives, reductions). Timing results
7 /// are reported at the end of the run showing overall timings and
8 /// statistics of timings across ranks.
9 ///
10 /// A new timer can be added as follows:
11 /// -# add new handle to the TimerHandle in performanceTimers.h
12 /// -# provide a corresponding name in timerName
13 ///
14 /// Note that the order of the handles and names must be the
15 /// same. This order also determines the order in which the timers are
16 /// printed. Names can contain leading spaces to show a hierarchical
17 /// ordering. Timers with zero calls are omitted from the report.
18 ///
19 /// Raw timer data is obtained from the getTime() and getTick()
20 /// functions. The supplied portable versions of these functions can be
21 /// replaced with platform specific versions for improved accuracy or
22 /// lower latency.
23 /// \see TimerHandle
24 /// \see getTime
25 /// \see getTick
26 ///
27 
28 
29 #include "performanceTimers.h"
30 
31 #include <stdio.h>
32 #include <sys/time.h>
33 #include <string.h>
34 #include <stdint.h>
35 #include <inttypes.h>
36 #include <math.h>
37 
38 #include "performanceTimers.h"
39 #include "mytype.h"
40 #include "parallel.h"
41 #include "yamlOutput.h"
42 
43 static uint64_t getTime(void);
44 static double getTick(void);
45 static void timerStats(void);
46 
47 /// You must add timer name in same order as enum in .h file.
48 /// Leading spaces can be specified to show a hierarchy of timers.
50  "total",
51  "loop",
52  "timestep",
53  " position",
54  " velocity",
55  " redistribute",
56  " atomHalo",
57  " force",
58  " eamHalo",
59  "commHalo",
60  "commReduce"
61 };
62 
63 /// Timer data collected. Also facilitates computing averages and
64 /// statistics.
65 typedef struct TimersSt
66 {
67  uint64_t start; //!< call start time
68  uint64_t total; //!< current total time
69  uint64_t count; //!< current call count
70  uint64_t elapsed; //!< lap time
71 
72  int minRank; //!< rank with min value
73  int maxRank; //!< rank with max value
74 
75  double minValue; //!< min over ranks
76  double maxValue; //!< max over ranks
77  double average; //!< average over ranks
78  double stdev; //!< stdev across ranks
79 } Timers;
80 
82 
83 void profileStart(const enum TimerHandle handle)
84 {
85  perfTimer[handle].start = getTime();
86 }
87 
88 void profileStop(const enum TimerHandle handle)
89 {
90  perfTimer[handle].count += 1;
91  uint64_t delta = getTime() - perfTimer[handle].start;
92  perfTimer[handle].total += delta;
93  perfTimer[handle].elapsed += delta;
94 }
95 
96 /// \details
97 /// Return elapsed time (in seconds) since last call with this handle
98 /// and clear for next lap.
99 double getElapsedTime(const enum TimerHandle handle)
100 {
101  double etime = getTick() * (double)perfTimer[handle].elapsed;
102  perfTimer[handle].elapsed = 0;
103 
104  return etime;
105 }
106 
107 /// \details
108 /// The report contains two blocks. The upper block is performance
109 /// information for the printRank. The lower block is statistical
110 /// information over all ranks.
111 void printPerformanceResults(int nGlobalAtoms)
112 {
113  // Collect timer statistics overall and across ranks
114  timerStats();
115 
116  if (!printRank())
117  return;
118 
119  // only print timers with non-zero values.
120  double tick = getTick();
121  double loopTime = perfTimer[loopTimer].total*tick;
122 
123  fprintf(screenOut, "\n\nTimings for Rank %d\n", getMyRank());
124  fprintf(screenOut, " Timer # Calls Avg/Call (s) Total (s) %% Loop\n");
125  fprintf(screenOut, "___________________________________________________________________\n");
126  for (int ii=0; ii<numberOfTimers; ++ii)
127  {
128  double totalTime = perfTimer[ii].total*tick;
129  if (perfTimer[ii].count > 0)
130  fprintf(screenOut, "%-16s%12"PRIu64" %8.4f %8.4f %8.2f\n",
131  timerName[ii],
132  perfTimer[ii].count,
133  totalTime/(double)perfTimer[ii].count,
134  totalTime,
135  totalTime/loopTime*100.0);
136  }
137 
138  fprintf(screenOut, "\nTiming Statistics Across %d Ranks:\n", getNRanks());
139  fprintf(screenOut, " Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)\n");
140  fprintf(screenOut, "_____________________________________________________________________________\n");
141 
142  for (int ii = 0; ii < numberOfTimers; ++ii)
143  {
144  if (perfTimer[ii].count > 0)
145  fprintf(screenOut, "%-16s%6d:%10.4f %6d:%10.4f %10.4f %10.4f\n",
146  timerName[ii],
147  perfTimer[ii].minRank, perfTimer[ii].minValue*tick,
148  perfTimer[ii].maxRank, perfTimer[ii].maxValue*tick,
149  perfTimer[ii].average*tick, perfTimer[ii].stdev*tick);
150  }
151  real_t atomsPerTask = nGlobalAtoms/(real_t)getNRanks();
152  real_t atomRate = perfTimer[computeForceTimer].average * tick * 1e6 /
153  (atomsPerTask * perfTimer[computeForceTimer].count);
154  fprintf(screenOut, "\n---------------------------------------------------\n");
155  fprintf(screenOut, " Average atom update rate: %6.2f us/atom/task\n", atomRate);
156  fprintf(screenOut, "---------------------------------------------------\n\n");
157 }
158 
160 {
161  if (! printRank())
162  return;
163 
164  double tick = getTick();
165  double loopTime = perfTimer[loopTimer].total*tick;
166 
167  fprintf(file,"\nPerformance Results:\n");
168  fprintf(file, " TotalRanks: %d\n", getNRanks());
169  fprintf(file, " ReportingTimeUnits: seconds\n");
170  fprintf(file, "Performance Results For Rank %d:\n", getMyRank());
171  for (int ii = 0; ii < numberOfTimers; ii++)
172  {
173  if (perfTimer[ii].count > 0)
174  {
175  double totalTime = perfTimer[ii].total*tick;
176  fprintf(file, " Timer: %s\n", timerName[ii]);
177  fprintf(file, " CallCount: %"PRIu64"\n", perfTimer[ii].count);
178  fprintf(file, " AvgPerCall: %8.4f\n", totalTime/(double)perfTimer[ii].count);
179  fprintf(file, " Total: %8.4f\n", totalTime);
180  fprintf(file, " PercentLoop: %8.2f\n", totalTime/loopTime*100);
181  }
182  }
183 
184  fprintf(file, "Performance Results Across Ranks:\n");
185  for (int ii = 0; ii < numberOfTimers; ii++)
186  {
187  if (perfTimer[ii].count > 0)
188  {
189  fprintf(file, " Timer: %s\n", timerName[ii]);
190  fprintf(file, " MinRank: %d\n", perfTimer[ii].minRank);
191  fprintf(file, " MinTime: %8.4f\n", perfTimer[ii].minValue*tick);
192  fprintf(file, " MaxRank: %d\n", perfTimer[ii].maxRank);
193  fprintf(file, " MaxTime: %8.4f\n", perfTimer[ii].maxValue*tick);
194  fprintf(file, " AvgTime: %8.4f\n", perfTimer[ii].average*tick);
195  fprintf(file, " StdevTime: %8.4f\n", perfTimer[ii].stdev*tick);
196  }
197  }
198 
199  fprintf(file, "\n");
200 }
201 
202 /// Returns current time as a 64-bit integer. This portable version
203 /// returns the number of microseconds since mindight, Jamuary 1, 1970.
204 /// Hence, timing data will have a resolution of 1 microsecond.
205 /// Platforms with access to calls with lower latency or higher
206 /// resolution (such as a cycle counter) may wish to replace this
207 /// implementation and change the conversion factor in getTick as
208 /// appropriate.
209 /// \see getTick for the conversion factor between the integer time
210 /// units of this function and seconds.
211 static uint64_t getTime(void)
212 {
213  struct timeval ptime;
214  uint64_t t = 0;
215  gettimeofday(&ptime, (struct timezone *)NULL);
216  t = ((uint64_t)1000000)*(uint64_t)ptime.tv_sec + (uint64_t)ptime.tv_usec;
217 
218  return t;
219 }
220 
221 /// Returns the factor for converting the integer time reported by
222 /// getTime into seconds. The portable getTime returns values in units
223 /// of microseconds so the conversion is simply 1e-6.
224 /// \see getTime
225 static double getTick(void)
226 {
227  double seconds_per_cycle = 1.0e-6;
228  return seconds_per_cycle;
229 }
230 
231 /// Collect timer statistics across ranks.
232 void timerStats(void)
233 {
234  double sendBuf[numberOfTimers], recvBuf[numberOfTimers];
235 
236  // Determine average of each timer across ranks
237  for (int ii = 0; ii < numberOfTimers; ii++)
238  sendBuf[ii] = (double)perfTimer[ii].total;
239  addDoubleParallel(sendBuf, recvBuf, numberOfTimers);
240 
241  for (int ii = 0; ii < numberOfTimers; ii++)
242  perfTimer[ii].average = recvBuf[ii] / (double)getNRanks();
243 
244 
245  // Determine min and max across ranks and which rank
246  RankReduceData reduceSendBuf[numberOfTimers], reduceRecvBuf[numberOfTimers];
247  for (int ii = 0; ii < numberOfTimers; ii++)
248  {
249  reduceSendBuf[ii].val = (double)perfTimer[ii].total;
250  reduceSendBuf[ii].rank = getMyRank();
251  }
252  minRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers);
253  for (int ii = 0; ii < numberOfTimers; ii++)
254  {
255  perfTimer[ii].minValue = reduceRecvBuf[ii].val;
256  perfTimer[ii].minRank = reduceRecvBuf[ii].rank;
257  }
258  maxRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers);
259  for (int ii = 0; ii < numberOfTimers; ii++)
260  {
261  perfTimer[ii].maxValue = reduceRecvBuf[ii].val;
262  perfTimer[ii].maxRank = reduceRecvBuf[ii].rank;
263  }
264 
265  // Determine standard deviation
266  for (int ii = 0; ii < numberOfTimers; ii++)
267  {
268  double temp = (double)perfTimer[ii].total - perfTimer[ii].average;
269  sendBuf[ii] = temp * temp;
270  }
271  addDoubleParallel(sendBuf, recvBuf, numberOfTimers);
272  for (int ii = 0; ii < numberOfTimers; ii++)
273  {
274  perfTimer[ii].stdev = sqrt(recvBuf[ii] / (double) getNRanks());
275  }
276 }
277