Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_Halo_Mpi.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_HALO_MPI_HPP
17#define CABANA_HALO_MPI_HPP
18
19#include <Cabana_AoSoA.hpp>
21#include <Cabana_Slice.hpp>
22
23#include <Kokkos_Core.hpp>
24#include <Kokkos_Profiling_ScopedRegion.hpp>
25
26#include <mpi.h>
27
28#include <exception>
29#include <vector>
30
32
33namespace Cabana
34{
35
41template <class HaloType, class AoSoAType>
42template <class ExecutionSpace, class CommSpaceType>
43std::enable_if_t<std::is_same<CommSpaceType, Mpi>::value, void>
44Gather<HaloType, AoSoAType,
45 typename std::enable_if<is_aosoa<AoSoAType>::value>::type>::
46 applyImpl( ExecutionSpace, CommSpaceType )
47{
48 Kokkos::Profiling::ScopedRegion region( "Cabana::gather" );
49
50 // Get the buffers and particle data (local copies for lambdas below).
51 auto send_buffer = this->getSendBuffer();
52 auto recv_buffer = this->getReceiveBuffer();
53 auto aosoa = this->getData();
54
55 // Get the steering vector for the sends.
56 auto steering = _comm_plan.getExportSteering();
57 // Gather from the local data into a tuple-contiguous send buffer.
58 auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
59 {
60 send_buffer( i ) = aosoa.getTuple( steering( i ) );
61 };
62 Kokkos::RangePolicy<ExecutionSpace> send_policy( 0, _send_size );
63 Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", send_policy,
64 gather_send_buffer_func );
65 Kokkos::fence();
66
67 // The halo has it's own communication space so choose any mpi tag.
68 const int mpi_tag = 2345;
69
70 // Post non-blocking receives.
71 int num_n = _comm_plan.numNeighbor();
72 std::vector<MPI_Request> requests( num_n );
73 std::pair<std::size_t, std::size_t> recv_range = { 0, 0 };
74 for ( int n = 0; n < num_n; ++n )
75 {
76 recv_range.second = recv_range.first + _comm_plan.numImport( n );
77
78 auto recv_subview = Kokkos::subview( recv_buffer, recv_range );
79
80 MPI_Irecv( recv_subview.data(),
81 recv_subview.size() * sizeof( data_type ), MPI_BYTE,
82 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm(),
83 &( requests[n] ) );
84
85 recv_range.first = recv_range.second;
86 }
87
88 // Do blocking sends.
89 std::pair<std::size_t, std::size_t> send_range = { 0, 0 };
90 for ( int n = 0; n < num_n; ++n )
91 {
92 send_range.second = send_range.first + _comm_plan.numExport( n );
93
94 auto send_subview = Kokkos::subview( send_buffer, send_range );
95
96 MPI_Send( send_subview.data(),
97 send_subview.size() * sizeof( data_type ), MPI_BYTE,
98 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm() );
99
100 send_range.first = send_range.second;
101 }
102
103 // Wait on non-blocking receives.
104 std::vector<MPI_Status> status( num_n );
105 const int ec =
106 MPI_Waitall( requests.size(), requests.data(), status.data() );
107 if ( MPI_SUCCESS != ec )
108 throw std::logic_error(
109 "Cabana::Gather::apply: Failed MPI Communication" );
110
111 // Extract the receive buffer into the ghosted elements.
112 std::size_t num_local = _comm_plan.numLocal();
113 auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
114 {
115 std::size_t ghost_idx = i + num_local;
116 aosoa.setTuple( ghost_idx, recv_buffer( i ) );
117 };
118 Kokkos::RangePolicy<ExecutionSpace> recv_policy( 0, _recv_size );
119 Kokkos::parallel_for( "Cabana::gather::apply::extract_recv_buffer",
120 recv_policy, extract_recv_buffer_func );
121 Kokkos::fence();
122
123 // Barrier before completing to ensure synchronization.
124 MPI_Barrier( _comm_plan.comm() );
125}
126
132template <class HaloType, class SliceType>
133template <class ExecutionSpace, class CommSpaceType>
134std::enable_if_t<std::is_same<CommSpaceType, Mpi>::value, void>
135Gather<HaloType, SliceType,
136 typename std::enable_if<is_slice<SliceType>::value>::type>::
137 applyImpl( ExecutionSpace, CommSpaceType )
138{
139 Kokkos::Profiling::ScopedRegion region( "Cabana::gather" );
140
141 // Get the buffers (local copies for lambdas below).
142 auto send_buffer = this->getSendBuffer();
143 auto recv_buffer = this->getReceiveBuffer();
144 auto slice = this->getData();
145
146 // Get the number of components in the slice.
147 std::size_t num_comp = this->getSliceComponents();
148
149 // Get the raw slice data.
150 auto slice_data = slice.data();
151
152 // Get the steering vector for the sends.
153 auto steering = _comm_plan.getExportSteering();
154
155 // Gather from the local data into a tuple-contiguous send buffer.
156 auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
157 {
158 auto s = SliceType::index_type::s( steering( i ) );
159 auto a = SliceType::index_type::a( steering( i ) );
160 std::size_t slice_offset = s * slice.stride( 0 ) + a;
161 for ( std::size_t n = 0; n < num_comp; ++n )
162 send_buffer( i, n ) =
163 slice_data[slice_offset + n * SliceType::vector_length];
164 };
165 Kokkos::RangePolicy<ExecutionSpace> send_policy( 0, _send_size );
166 Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", send_policy,
167 gather_send_buffer_func );
168 Kokkos::fence();
169
170 // The halo has it's own communication space so choose any mpi tag.
171 const int mpi_tag = 2345;
172
173 // Post non-blocking receives.
174 int num_n = _comm_plan.numNeighbor();
175 std::vector<MPI_Request> requests( num_n );
176 std::pair<std::size_t, std::size_t> recv_range = { 0, 0 };
177 for ( int n = 0; n < num_n; ++n )
178 {
179 recv_range.second = recv_range.first + _comm_plan.numImport( n );
180
181 auto recv_subview =
182 Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL );
183
184 MPI_Irecv( recv_subview.data(),
185 recv_subview.size() * sizeof( data_type ), MPI_BYTE,
186 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm(),
187 &( requests[n] ) );
188
189 recv_range.first = recv_range.second;
190 }
191
192 // Do blocking sends.
193 std::pair<std::size_t, std::size_t> send_range = { 0, 0 };
194 for ( int n = 0; n < num_n; ++n )
195 {
196 send_range.second = send_range.first + _comm_plan.numExport( n );
197
198 auto send_subview =
199 Kokkos::subview( send_buffer, send_range, Kokkos::ALL );
200
201 MPI_Send( send_subview.data(),
202 send_subview.size() * sizeof( data_type ), MPI_BYTE,
203 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm() );
204
205 send_range.first = send_range.second;
206 }
207
208 // Wait on non-blocking receives.
209 std::vector<MPI_Status> status( num_n );
210 const int ec =
211 MPI_Waitall( requests.size(), requests.data(), status.data() );
212 if ( MPI_SUCCESS != ec )
213 throw std::logic_error(
214 "Cabana::gather::apply (SliceType): Failed MPI Communication" );
215
216 // Extract the receive buffer into the ghosted elements.
217 std::size_t num_local = _comm_plan.numLocal();
218 auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
219 {
220 std::size_t ghost_idx = i + num_local;
221 auto s = SliceType::index_type::s( ghost_idx );
222 auto a = SliceType::index_type::a( ghost_idx );
223 std::size_t slice_offset = s * slice.stride( 0 ) + a;
224 for ( std::size_t n = 0; n < num_comp; ++n )
225 slice_data[slice_offset + SliceType::vector_length * n] =
226 recv_buffer( i, n );
227 };
228 Kokkos::RangePolicy<ExecutionSpace> recv_policy( 0, _recv_size );
229 Kokkos::parallel_for( "Cabana::gather::extract_recv_buffer", recv_policy,
230 extract_recv_buffer_func );
231 Kokkos::fence();
232
233 // Barrier before completing to ensure synchronization.
234 MPI_Barrier( _comm_plan.comm() );
235}
236
237/**********
238 * SCATTER *
239 **********/
240
246template <class HaloType, class SliceType>
247template <class ExecutionSpace, class CommSpaceType>
248std::enable_if_t<std::is_same<CommSpaceType, Mpi>::value, void>
249Scatter<HaloType, SliceType>::applyImpl( ExecutionSpace, CommSpaceType )
250{
251 Kokkos::Profiling::ScopedRegion region( "Cabana::scatter" );
252
253 // Get the buffers (local copies for lambdas below).
254 auto send_buffer = this->getSendBuffer();
255 auto recv_buffer = this->getReceiveBuffer();
256 auto slice = this->getData();
257
258 // Get the number of components in the slice.
259 std::size_t num_comp = this->getSliceComponents();
260
261 // Get the raw slice data. Wrap in a 1D Kokkos View so we can unroll the
262 // components of each slice element.
263 Kokkos::View<data_type*, memory_space,
264 Kokkos::MemoryTraits<Kokkos::Unmanaged>>
265 slice_data( slice.data(), slice.numSoA() * slice.stride( 0 ) );
266
267 // Extract the send buffer from the ghosted elements.
268 std::size_t num_local = _comm_plan.numLocal();
269 auto extract_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
270 {
271 std::size_t ghost_idx = i + num_local;
272 auto s = SliceType::index_type::s( ghost_idx );
273 auto a = SliceType::index_type::a( ghost_idx );
274 std::size_t slice_offset = s * slice.stride( 0 ) + a;
275 for ( std::size_t n = 0; n < num_comp; ++n )
276 send_buffer( i, n ) =
277 slice_data( slice_offset + SliceType::vector_length * n );
278 };
279 Kokkos::RangePolicy<ExecutionSpace> send_policy( 0, _send_size );
280 Kokkos::parallel_for( "Cabana::scatter::extract_send_buffer", send_policy,
281 extract_send_buffer_func );
282 Kokkos::fence();
283
284 // The halo has it's own communication space so choose any mpi tag.
285 const int mpi_tag = 2345;
286
287 // Post non-blocking receives.
288 int num_n = _comm_plan.numNeighbor();
289 std::vector<MPI_Request> requests( num_n );
290 std::pair<std::size_t, std::size_t> recv_range = { 0, 0 };
291 for ( int n = 0; n < num_n; ++n )
292 {
293 recv_range.second = recv_range.first + _comm_plan.numExport( n );
294
295 auto recv_subview =
296 Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL );
297
298 MPI_Irecv( recv_subview.data(),
299 recv_subview.size() * sizeof( data_type ), MPI_BYTE,
300 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm(),
301 &( requests[n] ) );
302
303 recv_range.first = recv_range.second;
304 }
305
306 // Do blocking sends.
307 std::pair<std::size_t, std::size_t> send_range = { 0, 0 };
308 for ( int n = 0; n < num_n; ++n )
309 {
310 send_range.second = send_range.first + _comm_plan.numImport( n );
311
312 auto send_subview =
313 Kokkos::subview( send_buffer, send_range, Kokkos::ALL );
314
315 MPI_Send( send_subview.data(),
316 send_subview.size() * sizeof( data_type ), MPI_BYTE,
317 _comm_plan.neighborRank( n ), mpi_tag, _comm_plan.comm() );
318
319 send_range.first = send_range.second;
320 }
321
322 // Wait on non-blocking receives.
323 std::vector<MPI_Status> status( num_n );
324 const int ec =
325 MPI_Waitall( requests.size(), requests.data(), status.data() );
326 if ( MPI_SUCCESS != ec )
327 throw std::logic_error( "Cabana::scatter::apply (SliceType): "
328 "Failed MPI Communication" );
329
330 // Get the steering vector for the sends.
331 auto steering = _comm_plan.getExportSteering();
332
333 // Scatter the ghosts in the receive buffer into the local values.
334 auto scatter_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i )
335 {
336 auto s = SliceType::index_type::s( steering( i ) );
337 auto a = SliceType::index_type::a( steering( i ) );
338 std::size_t slice_offset = s * slice.stride( 0 ) + a;
339 for ( std::size_t n = 0; n < num_comp; ++n )
340 Kokkos::atomic_add(
341 &slice_data( slice_offset + SliceType::vector_length * n ),
342 recv_buffer( i, n ) );
343 };
344 Kokkos::RangePolicy<ExecutionSpace> recv_policy( 0, _recv_size );
345 Kokkos::parallel_for( "Cabana::scatter::apply::scatter_recv_buffer",
346 recv_policy, scatter_recv_buffer_func );
347 Kokkos::fence();
348
349 // Barrier before completing to ensure synchronization.
350 MPI_Barrier( _comm_plan.comm() );
351}
352
353} // end namespace Cabana
354
356
357#endif // end CABANA_HALO_MPI_HPP
Array-of-Struct-of-Arrays particle data structure.
Multi-node communication patterns. Base class.
Slice a single particle property from an AoSoA.
Definition Cabana_Halo.hpp:393
std::enable_if_t< std::is_same< CommSpaceType, Mpi >::value, void > applyImpl(ExecutionSpace, CommSpaceType)
Vanilla Mpi implementation of the scatter operation.
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
AoSoA_t::template member_slice_type< M > slice(const AoSoA_t &aosoa, const std::string &slice_label="")
Create a slice from an AoSoA.
Definition Cabana_AoSoA.hpp:77