CoMD
A Mini-app for Co-Design of Classical Molecular Dynamics.
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
haloExchange.c
Go to the documentation of this file.
1 /// \file
2 /// Communicate halo data such as "ghost" atoms with neighboring tasks.
3 /// In addition to ghost atoms, the EAM potential also needs to exchange
4 /// some force information. Hence this file implements both an atom
5 /// exchange and a force exchange, each with slightly different
6 /// properties due to their different roles.
7 ///
8 /// The halo exchange in CoMD 1.1 takes advantage of the Cartesian domain
9 /// decomposition as well as the link cell structure to quickly
10 /// determine what data needs to be sent.
11 ///
12 /// This halo exchange implementation is able to send data to all 26
13 /// neighboring tasks using only 6 messages. This is accomplished by
14 /// sending data across the x-faces, then the y-faces, and finally
15 /// across the z-faces. Some of the data that was received from the
16 /// x-faces is included in the y-face sends and so on. This
17 /// accumulation of data allows data to reach edge neighbors and corner
18 /// neighbors by a two or three step process.
19 ///
20 /// The advantage of this type of structured halo exchange is that it
21 /// minimizes the number of MPI messages to send, and maximizes the size
22 /// of those messages.
23 ///
24 /// The disadvantage of this halo exchange is that it serializes message
25 /// traffic. Only two messages can be in flight at once. The x-axis
26 /// messages must be received and processed before the y-axis messages
27 /// can begin. Architectures with low message latency and many off node
28 /// network links would likely benefit from alternate halo exchange
29 /// strategies that send independent messages to each neighbor task.
30 
31 #include "haloExchange.h"
32 
33 #include <assert.h>
34 
35 #include "CoMDTypes.h"
36 #include "decomposition.h"
37 #include "parallel.h"
38 #include "linkCells.h"
39 #include "eam.h"
40 #include "memUtils.h"
41 #include "performanceTimers.h"
42 
43 #define MAX(A,B) ((A) > (B) ? (A) : (B))
44 
45 /// Don't change the order of the faces in this enum.
49 
50 /// Don't change the order of the axes in this enum.
52 
53 /// Extra data members that are needed for the exchange of atom data.
54 /// For an atom exchange, the HaloExchangeSt::parms will point to a
55 /// structure of this type.
56 typedef struct AtomExchangeParmsSt
57 {
58  int nCells[6]; //!< Number of cells in cellList for each face.
59  int* cellList[6]; //!< List of link cells from which to load data for each face.
60  real_t* pbcFactor[6]; //!< Whether this face is a periodic boundary.
61 }
63 
64 /// Extra data members that are needed for the exchange of force data.
65 /// For an force exchange, the HaloExchangeSt::parms will point to a
66 /// structure of this type.
67 typedef struct ForceExchangeParmsSt
68 {
69  int nCells[6]; //!< Number of cells to send/recv for each face.
70  int* sendCells[6]; //!< List of link cells to send for each face.
71  int* recvCells[6]; //!< List of link cells to recv for each face.
72 }
74 
75 /// A structure to package data for a single atom to pack into a
76 /// send/recv buffer. Also used for sorting atoms within link cells.
77 typedef struct AtomMsgSt
78 {
79  int gid;
80  int type;
83 }
84 AtomMsg;
85 
86 /// Package data for the force exchange.
87 typedef struct ForceMsgSt
88 {
90 }
91 ForceMsg;
92 
93 static HaloExchange* initHaloExchange(Domain* domain);
94 static void exchangeData(HaloExchange* haloExchange, void* data, int iAxis);
95 
96 static int* mkAtomCellList(LinkCell* boxes, enum HaloFaceOrder iFace, const int nCells);
97 static int loadAtomsBuffer(void* vparms, void* data, int face, char* charBuf);
98 static void unloadAtomsBuffer(void* vparms, void* data, int face, int bufSize, char* charBuf);
99 static void destroyAtomsExchange(void* vparms);
100 
101 static int* mkForceSendCellList(LinkCell* boxes, int face, int nCells);
102 static int* mkForceRecvCellList(LinkCell* boxes, int face, int nCells);
103 static int loadForceBuffer(void* vparms, void* data, int face, char* charBuf);
104 static void unloadForceBuffer(void* vparms, void* data, int face, int bufSize, char* charBuf);
105 static void destroyForceExchange(void* vparms);
106 static int sortAtomsById(const void* a, const void* b);
107 
108 /// \details
109 /// When called in proper sequence by redistributeAtoms, the atom halo
110 /// exchange helps serve three purposes:
111 /// - Send ghost atom data to neighbor tasks.
112 /// - Shift atom coordinates by the global simulation size when they cross
113 /// periodic boundaries. This shift is performed in loadAtomsBuffer.
114 /// - Transfer ownership of atoms between tasks as the atoms move across
115 /// spatial domain boundaries. This transfer of ownership occurs in
116 /// two places. The former owner gives up ownership when
117 /// updateLinkCells moves a formerly local atom into a halo link cell.
118 /// The new owner accepts ownership when unloadAtomsBuffer calls
119 /// putAtomInBox to place a received atom into a local link cell.
120 ///
121 /// This constructor does the following:
122 ///
123 /// - Sets the bufCapacity to hold the largest possible number of atoms
124 /// that can be sent across a face.
125 /// - Initialize function pointers to the atom-specific versions
126 /// - Sets the number of link cells to send across each face.
127 /// - Builds the list of link cells to send across each face. As
128 /// explained in the comments for mkAtomCellList, this list must
129 /// include any link cell, local or halo, that could possibly contain
130 /// an atom that needs to be sent across the face. Atoms that need to
131 /// be sent include "ghost atoms" that are located in local link
132 /// cells that correspond to halo link cells on receiving tasks as well as
133 /// formerly local atoms that have just moved into halo link cells and
134 /// need to be sent to the rank that owns the spatial domain the atom
135 /// has moved into.
136 /// - Sets a coordinate shift factor for each face to account for
137 /// periodic boundary conditions. For most faces the factor is zero.
138 /// For faces on the +x, +y, or +z face of the simulation domain
139 /// the factor is -1.0 (to shift the coordinates by -1 times the
140 /// simulation domain size). For -x, -y, and -z faces of the
141 /// simulation domain, the factor is +1.0.
142 ///
143 /// \see redistributeAtoms
145 {
146  HaloExchange* hh = initHaloExchange(domain);
147 
148  int size0 = (boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
149  int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
150  int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
151  int maxSize = MAX(size0, size1);
152  maxSize = MAX(size1, size2);
153  hh->bufCapacity = maxSize*2*MAXATOMS*sizeof(AtomMsg);
154 
158 
160 
161  parms->nCells[HALO_X_MINUS] = 2*(boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
162  parms->nCells[HALO_Y_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
163  parms->nCells[HALO_Z_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
164  parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS];
165  parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS];
166  parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS];
167 
168  for (int ii=0; ii<6; ++ii)
169  parms->cellList[ii] = mkAtomCellList(boxes, ii, parms->nCells[ii]);
170 
171  for (int ii=0; ii<6; ++ii)
172  {
173  parms->pbcFactor[ii] = comdMalloc(3*sizeof(real_t));
174  for (int jj=0; jj<3; ++jj)
175  parms->pbcFactor[ii][jj] = 0.0;
176  }
177  int* procCoord = domain->procCoord; //alias
178  int* procGrid = domain->procGrid; //alias
179  if (procCoord[HALO_X_AXIS] == 0) parms->pbcFactor[HALO_X_MINUS][HALO_X_AXIS] = +1.0;
180  if (procCoord[HALO_X_AXIS] == procGrid[HALO_X_AXIS]-1) parms->pbcFactor[HALO_X_PLUS][HALO_X_AXIS] = -1.0;
181  if (procCoord[HALO_Y_AXIS] == 0) parms->pbcFactor[HALO_Y_MINUS][HALO_Y_AXIS] = +1.0;
182  if (procCoord[HALO_Y_AXIS] == procGrid[HALO_Y_AXIS]-1) parms->pbcFactor[HALO_Y_PLUS][HALO_Y_AXIS] = -1.0;
183  if (procCoord[HALO_Z_AXIS] == 0) parms->pbcFactor[HALO_Z_MINUS][HALO_Z_AXIS] = +1.0;
184  if (procCoord[HALO_Z_AXIS] == procGrid[HALO_Z_AXIS]-1) parms->pbcFactor[HALO_Z_PLUS][HALO_Z_AXIS] = -1.0;
185 
186  hh->parms = parms;
187  return hh;
188 }
189 
190 /// The force exchange is considerably simpler than the atom exchange.
191 /// In the force case we only need to exchange data that is needed to
192 /// complete the force calculation. Since the atoms have not moved we
193 /// only need to send data from local link cells and we are guaranteed
194 /// that the same atoms exist in the same order in corresponding halo
195 /// cells on remote tasks. The only tricky part is the size of the
196 /// plane of local cells that needs to be sent grows in each direction.
197 /// This is because the y-axis send must send some of the data that was
198 /// received from the x-axis send, and the z-axis must send some data
199 /// from the y-axis send. This accumulation of data to send is
200 /// responsible for data reaching neighbor cells that share only edges
201 /// or corners.
202 ///
203 /// \see eam.c for an explanation of the requirement to exchange
204 /// force data.
206 {
207  HaloExchange* hh = initHaloExchange(domain);
208 
212 
213  int size0 = (boxes->gridSize[1])*(boxes->gridSize[2]);
214  int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]);
215  int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
216  int maxSize = MAX(size0, size1);
217  maxSize = MAX(size1, size2);
218  hh->bufCapacity = (maxSize)*MAXATOMS*sizeof(ForceMsg);
219 
221 
222  parms->nCells[HALO_X_MINUS] = (boxes->gridSize[1] )*(boxes->gridSize[2] );
223  parms->nCells[HALO_Y_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[2] );
224  parms->nCells[HALO_Z_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
225  parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS];
226  parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS];
227  parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS];
228 
229  for (int ii=0; ii<6; ++ii)
230  {
231  parms->sendCells[ii] = mkForceSendCellList(boxes, ii, parms->nCells[ii]);
232  parms->recvCells[ii] = mkForceRecvCellList(boxes, ii, parms->nCells[ii]);
233  }
234 
235  hh->parms = parms;
236  return hh;
237 }
238 
240 {
241  (*haloExchange)->destroy((*haloExchange)->parms);
242  free(*haloExchange);
243  *haloExchange = NULL;
244 }
245 
247 {
248  for (int iAxis=0; iAxis<3; ++iAxis)
249  exchangeData(haloExchange, data, iAxis);
250 }
251 
252 /// Base class constructor.
254 {
255  HaloExchange* hh = comdMalloc(sizeof(HaloExchange));
256 
257  // Rank of neighbor task for each face.
258  hh->nbrRank[HALO_X_MINUS] = processorNum(domain, -1, 0, 0);
259  hh->nbrRank[HALO_X_PLUS] = processorNum(domain, +1, 0, 0);
260  hh->nbrRank[HALO_Y_MINUS] = processorNum(domain, 0, -1, 0);
261  hh->nbrRank[HALO_Y_PLUS] = processorNum(domain, 0, +1, 0);
262  hh->nbrRank[HALO_Z_MINUS] = processorNum(domain, 0, 0, -1);
263  hh->nbrRank[HALO_Z_PLUS] = processorNum(domain, 0, 0, +1);
264  hh->bufCapacity = 0; // will be set by sub-class.
265 
266  return hh;
267 }
268 
269 /// This is the function that does the heavy lifting for the
270 /// communication of halo data. It is called once for each axis and
271 /// sends and receives two message. Loading and unloading of the
272 /// buffers is in the hands of the sub-class virtual functions.
273 ///
274 /// \param [in] iAxis Axis index.
275 /// \param [in, out] data Pointer to data that will be passed to the load and
276 /// unload functions
277 void exchangeData(HaloExchange* haloExchange, void* data, int iAxis)
278 {
279  enum HaloFaceOrder faceM = 2*iAxis;
280  enum HaloFaceOrder faceP = faceM+1;
281 
282  char* sendBufM = comdMalloc(haloExchange->bufCapacity);
283  char* sendBufP = comdMalloc(haloExchange->bufCapacity);
284  char* recvBufM = comdMalloc(haloExchange->bufCapacity);
285  char* recvBufP = comdMalloc(haloExchange->bufCapacity);
286 
287  int nSendM = haloExchange->loadBuffer(haloExchange->parms, data, faceM, sendBufM);
288  int nSendP = haloExchange->loadBuffer(haloExchange->parms, data, faceP, sendBufP);
289 
290  int nbrRankM = haloExchange->nbrRank[faceM];
291  int nbrRankP = haloExchange->nbrRank[faceP];
292 
293  int nRecvM, nRecvP;
294 
296  nRecvP = sendReceiveParallel(sendBufM, nSendM, nbrRankM, recvBufP, haloExchange->bufCapacity, nbrRankP);
297  nRecvM = sendReceiveParallel(sendBufP, nSendP, nbrRankP, recvBufM, haloExchange->bufCapacity, nbrRankM);
299 
300  haloExchange->unloadBuffer(haloExchange->parms, data, faceM, nRecvM, recvBufM);
301  haloExchange->unloadBuffer(haloExchange->parms, data, faceP, nRecvP, recvBufP);
302  comdFree(recvBufP);
303  comdFree(recvBufM);
304  comdFree(sendBufP);
305  comdFree(sendBufM);
306 }
307 
308 /// Make a list of link cells that need to be sent across the specified
309 /// face. For each face, the list must include all cells, local and
310 /// halo, in the first two planes of link cells. Halo cells must be
311 /// included in the list of link cells to send since local atoms may
312 /// have moved from local cells into halo cells on this time step.
313 /// (Actual remote atoms should have been deleted, so the halo cells
314 /// should contain only these few atoms that have just crossed.)
315 /// Sending these atoms will allow them to be reassigned to the task
316 /// that covers the spatial domain they have moved into.
317 ///
318 /// Note that link cell grid coordinates range from -1 to gridSize[iAxis].
319 /// \see initLinkCells for an explanation link cell grid coordinates.
320 ///
321 /// \param [in] boxes Link cell information.
322 /// \param [in] iFace Index of the face data will be sent across.
323 /// \param [in] nCells Number of cells to send. This is used for a
324 /// consistency check.
325 /// \return The list of cells to send. Caller is responsible to free
326 /// the list.
327 int* mkAtomCellList(LinkCell* boxes, enum HaloFaceOrder iFace, const int nCells)
328 {
329  int* list = comdMalloc(nCells*sizeof(int));
330  int xBegin = -1;
331  int xEnd = boxes->gridSize[0]+1;
332  int yBegin = -1;
333  int yEnd = boxes->gridSize[1]+1;
334  int zBegin = -1;
335  int zEnd = boxes->gridSize[2]+1;
336 
337  if (iFace == HALO_X_MINUS) xEnd = xBegin+2;
338  if (iFace == HALO_X_PLUS) xBegin = xEnd-2;
339  if (iFace == HALO_Y_MINUS) yEnd = yBegin+2;
340  if (iFace == HALO_Y_PLUS) yBegin = yEnd-2;
341  if (iFace == HALO_Z_MINUS) zEnd = zBegin+2;
342  if (iFace == HALO_Z_PLUS) zBegin = zEnd-2;
343 
344  int count = 0;
345  for (int ix=xBegin; ix<xEnd; ++ix)
346  for (int iy=yBegin; iy<yEnd; ++iy)
347  for (int iz=zBegin; iz<zEnd; ++iz)
348  list[count++] = getBoxFromTuple(boxes, ix, iy, iz);
349  assert(count == nCells);
350  return list;
351 }
352 
353 /// The loadBuffer function for a halo exchange of atom data. Iterates
354 /// link cells in the cellList and load any atoms into the send buffer.
355 /// This function also shifts coordinates of the atoms by an appropriate
356 /// factor if they are being sent across a periodic boundary.
357 ///
358 /// \see HaloExchangeSt::loadBuffer for an explanation of the loadBuffer
359 /// parameters.
360 int loadAtomsBuffer(void* vparms, void* data, int face, char* charBuf)
361 {
362  AtomExchangeParms* parms = (AtomExchangeParms*) vparms;
363  SimFlat* s = (SimFlat*) data;
364  AtomMsg* buf = (AtomMsg*) charBuf;
365 
366  real_t* pbcFactor = parms->pbcFactor[face];
367  real3 shift;
368  shift[0] = pbcFactor[0] * s->domain->globalExtent[0];
369  shift[1] = pbcFactor[1] * s->domain->globalExtent[1];
370  shift[2] = pbcFactor[2] * s->domain->globalExtent[2];
371 
372  int nCells = parms->nCells[face];
373  int* cellList = parms->cellList[face];
374  int nBuf = 0;
375  for (int iCell=0; iCell<nCells; ++iCell)
376  {
377  int iBox = cellList[iCell];
378  int iOff = iBox*MAXATOMS;
379  for (int ii=iOff; ii<iOff+s->boxes->nAtoms[iBox]; ++ii)
380  {
381  buf[nBuf].gid = s->atoms->gid[ii];
382  buf[nBuf].type = s->atoms->iSpecies[ii];
383  buf[nBuf].rx = s->atoms->r[ii][0] + shift[0];
384  buf[nBuf].ry = s->atoms->r[ii][1] + shift[1];
385  buf[nBuf].rz = s->atoms->r[ii][2] + shift[2];
386  buf[nBuf].px = s->atoms->p[ii][0];
387  buf[nBuf].py = s->atoms->p[ii][1];
388  buf[nBuf].pz = s->atoms->p[ii][2];
389  ++nBuf;
390  }
391  }
392  return nBuf*sizeof(AtomMsg);
393 }
394 
395 /// The unloadBuffer function for a halo exchange of atom data.
396 /// Iterates the receive buffer and places each atom that was received
397 /// into the link cell that corresponds to the atom coordinate. Note
398 /// that this naturally accomplishes transfer of ownership of atoms that
399 /// have moved from one spatial domain to another. Atoms with
400 /// coordinates in local link cells automatically become local
401 /// particles. Atoms that are owned by other ranks are automatically
402 /// placed in halo kink cells.
403 /// \see HaloExchangeSt::unloadBuffer for an explanation of the
404 /// unloadBuffer parameters.
405 void unloadAtomsBuffer(void* vparms, void* data, int face, int bufSize, char* charBuf)
406 {
407  AtomExchangeParms* parms = (AtomExchangeParms*) vparms;
408  SimFlat* s = (SimFlat*) data;
409  AtomMsg* buf = (AtomMsg*) charBuf;
410  int nBuf = bufSize / sizeof(AtomMsg);
411  assert(bufSize % sizeof(AtomMsg) == 0);
412 
413  for (int ii=0; ii<nBuf; ++ii)
414  {
415  int gid = buf[ii].gid;
416  int type = buf[ii].type;
417  real_t rx = buf[ii].rx;
418  real_t ry = buf[ii].ry;
419  real_t rz = buf[ii].rz;
420  real_t px = buf[ii].px;
421  real_t py = buf[ii].py;
422  real_t pz = buf[ii].pz;
423  putAtomInBox(s->boxes, s->atoms, gid, type, rx, ry, rz, px, py, pz);
424  }
425 }
426 
427 void destroyAtomsExchange(void* vparms)
428 {
429  AtomExchangeParms* parms = (AtomExchangeParms*) vparms;
430 
431  for (int ii=0; ii<6; ++ii)
432  {
433  free(parms->pbcFactor[ii]);
434  free(parms->cellList[ii]);
435  }
436 }
437 
438 /// Make a list of link cells that need to send data across the
439 /// specified face. Note that this list must be compatible with the
440 /// corresponding recv list to ensure that the data goes to the correct
441 /// atoms.
442 ///
443 /// \see initLinkCells for information about the conventions for grid
444 /// coordinates of link cells.
445 int* mkForceSendCellList(LinkCell* boxes, int face, int nCells)
446 {
447  int* list = comdMalloc(nCells*sizeof(int));
448  int xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;
449 
450  int nx = boxes->gridSize[0];
451  int ny = boxes->gridSize[1];
452  int nz = boxes->gridSize[2];
453  switch(face)
454  {
455  case HALO_X_MINUS:
456  xBegin=0; xEnd=1; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz;
457  break;
458  case HALO_X_PLUS:
459  xBegin=nx-1; xEnd=nx; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz;
460  break;
461  case HALO_Y_MINUS:
462  xBegin=-1; xEnd=nx+1; yBegin=0; yEnd=1; zBegin=0; zEnd=nz;
463  break;
464  case HALO_Y_PLUS:
465  xBegin=-1; xEnd=nx+1; yBegin=ny-1; yEnd=ny; zBegin=0; zEnd=nz;
466  break;
467  case HALO_Z_MINUS:
468  xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=0; zEnd=1;
469  break;
470  case HALO_Z_PLUS:
471  xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=nz-1; zEnd=nz;
472  break;
473  default:
474  assert(1==0);
475  }
476 
477  int count = 0;
478  for (int ix=xBegin; ix<xEnd; ++ix)
479  for (int iy=yBegin; iy<yEnd; ++iy)
480  for (int iz=zBegin; iz<zEnd; ++iz)
481  list[count++] = getBoxFromTuple(boxes, ix, iy, iz);
482 
483  assert(count == nCells);
484  return list;
485 }
486 
487 /// Make a list of link cells that need to receive data across the
488 /// specified face. Note that this list must be compatible with the
489 /// corresponding send list to ensure that the data goes to the correct
490 /// atoms.
491 ///
492 /// \see initLinkCells for information about the conventions for grid
493 /// coordinates of link cells.
494 int* mkForceRecvCellList(LinkCell* boxes, int face, int nCells)
495 {
496  int* list = comdMalloc(nCells*sizeof(int));
497  int xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;
498 
499  int nx = boxes->gridSize[0];
500  int ny = boxes->gridSize[1];
501  int nz = boxes->gridSize[2];
502  switch(face)
503  {
504  case HALO_X_MINUS:
505  xBegin=-1; xEnd=0; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz;
506  break;
507  case HALO_X_PLUS:
508  xBegin=nx; xEnd=nx+1; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz;
509  break;
510  case HALO_Y_MINUS:
511  xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=0; zBegin=0; zEnd=nz;
512  break;
513  case HALO_Y_PLUS:
514  xBegin=-1; xEnd=nx+1; yBegin=ny; yEnd=ny+1; zBegin=0; zEnd=nz;
515  break;
516  case HALO_Z_MINUS:
517  xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=-1; zEnd=0;
518  break;
519  case HALO_Z_PLUS:
520  xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=nz; zEnd=nz+1;
521  break;
522  default:
523  assert(1==0);
524  }
525 
526  int count = 0;
527  for (int ix=xBegin; ix<xEnd; ++ix)
528  for (int iy=yBegin; iy<yEnd; ++iy)
529  for (int iz=zBegin; iz<zEnd; ++iz)
530  list[count++] = getBoxFromTuple(boxes, ix, iy, iz);
531 
532  assert(count == nCells);
533  return list;
534 }
535 
536 /// The loadBuffer function for a force exchange.
537 /// Iterate the send list and load the derivative of the embedding
538 /// energy with respect to the local density into the send buffer.
539 ///
540 /// \see HaloExchangeSt::loadBuffer for an explanation of the loadBuffer
541 /// parameters.
542 int loadForceBuffer(void* vparms, void* vdata, int face, char* charBuf)
543 {
544  ForceExchangeParms* parms = (ForceExchangeParms*) vparms;
545  ForceExchangeData* data = (ForceExchangeData*) vdata;
546  ForceMsg* buf = (ForceMsg*) charBuf;
547 
548  int nCells = parms->nCells[face];
549  int* cellList = parms->sendCells[face];
550  int nBuf = 0;
551  for (int iCell=0; iCell<nCells; ++iCell)
552  {
553  int iBox = cellList[iCell];
554  int iOff = iBox*MAXATOMS;
555  for (int ii=iOff; ii<iOff+data->boxes->nAtoms[iBox]; ++ii)
556  {
557  buf[nBuf].dfEmbed = data->dfEmbed[ii];
558  ++nBuf;
559  }
560  }
561  return nBuf*sizeof(ForceMsg);
562 }
563 
564 /// The unloadBuffer function for a force exchange.
565 /// Data is received in an order that naturally aligns with the atom
566 /// storage so it is simple to put the data where it belongs.
567 ///
568 /// \see HaloExchangeSt::unloadBuffer for an explanation of the
569 /// unloadBuffer parameters.
570 void unloadForceBuffer(void* vparms, void* vdata, int face, int bufSize, char* charBuf)
571 {
572  ForceExchangeParms* parms = (ForceExchangeParms*) vparms;
573  ForceExchangeData* data = (ForceExchangeData*) vdata;
574  ForceMsg* buf = (ForceMsg*) charBuf;
575  assert(bufSize % sizeof(ForceMsg) == 0);
576 
577  int nCells = parms->nCells[face];
578  int* cellList = parms->recvCells[face];
579  int iBuf = 0;
580  for (int iCell=0; iCell<nCells; ++iCell)
581  {
582  int iBox = cellList[iCell];
583  int iOff = iBox*MAXATOMS;
584  for (int ii=iOff; ii<iOff+data->boxes->nAtoms[iBox]; ++ii)
585  {
586  data->dfEmbed[ii] = buf[iBuf].dfEmbed;
587  ++iBuf;
588  }
589  }
590  assert(iBuf == bufSize/ sizeof(ForceMsg));
591 }
592 
593 void destroyForceExchange(void* vparms)
594 {
595  ForceExchangeParms* parms = (ForceExchangeParms*) vparms;
596 
597  for (int ii=0; ii<6; ++ii)
598  {
599  free(parms->sendCells[ii]);
600  free(parms->recvCells[ii]);
601  }
602 }
603 
604 /// \details
605 /// The force exchange assumes that the atoms are in the same order in
606 /// both a given local link cell and the corresponding remote cell(s).
607 /// However, the atom exchange does not guarantee this property,
608 /// especially when atoms cross a domain decomposition boundary and move
609 /// from one task to another. Trying to maintain the atom order during
610 /// the atom exchange would immensely complicate that code. Instead, we
611 /// just sort the atoms after the atom exchange.
612 void sortAtomsInCell(Atoms* atoms, LinkCell* boxes, int iBox)
613 {
614  int nAtoms = boxes->nAtoms[iBox];
615 
616  AtomMsg tmp[nAtoms];
617 
618  int begin = iBox*MAXATOMS;
619  int end = begin + nAtoms;
620  for (int ii=begin, iTmp=0; ii<end; ++ii, ++iTmp)
621  {
622  tmp[iTmp].gid = atoms->gid[ii];
623  tmp[iTmp].type = atoms->iSpecies[ii];
624  tmp[iTmp].rx = atoms->r[ii][0];
625  tmp[iTmp].ry = atoms->r[ii][1];
626  tmp[iTmp].rz = atoms->r[ii][2];
627  tmp[iTmp].px = atoms->p[ii][0];
628  tmp[iTmp].py = atoms->p[ii][1];
629  tmp[iTmp].pz = atoms->p[ii][2];
630  }
631  qsort(&tmp, nAtoms, sizeof(AtomMsg), sortAtomsById);
632  for (int ii=begin, iTmp=0; ii<end; ++ii, ++iTmp)
633  {
634  atoms->gid[ii] = tmp[iTmp].gid;
635  atoms->iSpecies[ii] = tmp[iTmp].type;
636  atoms->r[ii][0] = tmp[iTmp].rx;
637  atoms->r[ii][1] = tmp[iTmp].ry;
638  atoms->r[ii][2] = tmp[iTmp].rz;
639  atoms->p[ii][0] = tmp[iTmp].px;
640  atoms->p[ii][1] = tmp[iTmp].py;
641  atoms->p[ii][2] = tmp[iTmp].pz;
642  }
643 
644 }
645 
646 /// A function suitable for passing to qsort to sort atoms by gid.
647 /// Because every atom in the simulation is supposed to have a unique
648 /// id, this function checks that the atoms have different gids. If
649 /// that assertion ever fails it is a sign that something has gone
650 /// wrong elsewhere in the code.
651 int sortAtomsById(const void* a, const void* b)
652 {
653  int aId = ((AtomMsg*) a)->gid;
654  int bId = ((AtomMsg*) b)->gid;
655  assert(aId != bId);
656 
657  if (aId < bId)
658  return -1;
659  return 1;
660 }
661