Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_CommunicationPlan.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_COMMUNICATIONPLAN_HPP
17#define CABANA_COMMUNICATIONPLAN_HPP
18
19#include <Cabana_Utils.hpp>
20
21#include <Kokkos_Core.hpp>
22#include <Kokkos_ScatterView.hpp>
23
24#include <mpi.h>
25
26#include <algorithm>
27#include <exception>
28#include <memory>
29#include <numeric>
30#include <type_traits>
31#include <vector>
32
33namespace Cabana
34{
35//---------------------------------------------------------------------------//
36// Communication driver construction type tags.
37//---------------------------------------------------------------------------//
41struct Export
42{
43};
44
48struct Import
49{
50};
51//---------------------------------------------------------------------------//
52
53namespace Impl
54{
56//---------------------------------------------------------------------------//
57// Count sends and create steering algorithm tags.
58struct CountSendsAndCreateSteeringDuplicated
59{
60};
61struct CountSendsAndCreateSteeringAtomic
62{
63};
64
65//---------------------------------------------------------------------------//
66// Count sends and create steering algorithm selector.
67template <class ExecutionSpace>
68struct CountSendsAndCreateSteeringAlgorithm;
69
70// CUDA and HIP use atomics.
71#ifdef KOKKOS_ENABLE_CUDA
72template <>
73struct CountSendsAndCreateSteeringAlgorithm<Kokkos::Cuda>
74{
75 using type = CountSendsAndCreateSteeringAtomic;
76};
77#endif // end KOKKOS_ENABLE_CUDA
78#ifdef KOKKOS_ENABLE_HIP
79template <>
80struct CountSendsAndCreateSteeringAlgorithm<Kokkos::Experimental::HIP>
81{
82 using type = CountSendsAndCreateSteeringAtomic;
83};
84#endif // end KOKKOS_ENABLE_HIP
85#ifdef KOKKOS_ENABLE_SYCL
86template <>
87struct CountSendsAndCreateSteeringAlgorithm<Kokkos::Experimental::SYCL>
88{
89 using type = CountSendsAndCreateSteeringAtomic;
90};
91#endif // end KOKKOS_ENABLE_SYCL
92#ifdef KOKKOS_ENABLE_OPENMPTARGET
93template <>
94struct CountSendsAndCreateSteeringAlgorithm<Kokkos::Experimental::OpenMPTarget>
95{
96 using type = CountSendsAndCreateSteeringAtomic;
97};
98#endif // end KOKKOS_ENABLE_OPENMPTARGET
99
100// The default is to use duplication.
101template <class ExecutionSpace>
102struct CountSendsAndCreateSteeringAlgorithm
103{
104 using type = CountSendsAndCreateSteeringDuplicated;
105};
106
107//---------------------------------------------------------------------------//
108// Count sends and generate the steering vector. Atomic version.
109template <class ExecutionSpace, class ExportRankView>
110auto countSendsAndCreateSteering( ExecutionSpace,
111 const ExportRankView element_export_ranks,
112 const int comm_size,
113 CountSendsAndCreateSteeringAtomic )
114 -> std::pair<Kokkos::View<int*, typename ExportRankView::memory_space>,
115 Kokkos::View<typename ExportRankView::size_type*,
116 typename ExportRankView::memory_space>>
117{
118 using memory_space = typename ExportRankView::memory_space;
119 using size_type = typename ExportRankView::size_type;
120
121 // Create views.
122 Kokkos::View<int*, memory_space> neighbor_counts( "neighbor_counts",
123 comm_size );
124 Kokkos::View<size_type*, memory_space> neighbor_ids(
125 Kokkos::ViewAllocateWithoutInitializing( "neighbor_ids" ),
126 element_export_ranks.size() );
127
128 // Count the sends and create the steering vector.
129
130 // For smaller values of comm_size, use the optimized scratch memory path
131 // Value of 64 is arbitrary, should be at least 27 to cover 3-d halos?
132 if ( comm_size <= 64 )
133 {
134 constexpr int team_size = 256;
135 Kokkos::TeamPolicy<ExecutionSpace> team_policy(
136 ( element_export_ranks.size() + team_size - 1 ) / team_size,
137 team_size );
138 team_policy = team_policy.set_scratch_size(
139 0,
140 Kokkos::PerTeam( sizeof( int ) * ( team_size + 2 * comm_size ) ) );
141 Kokkos::parallel_for(
142 "Cabana::CommunicationPlan::countSendsAndCreateSteeringShared",
143 team_policy,
144 KOKKOS_LAMBDA(
145 const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type&
146 team ) {
147 // NOTE: `get_shmem` returns shared memory pointers *aligned to
148 // 8 bytes*, so if `comm_size` is odd we can get erroneously
149 // padded offsets if we call `get_shmem` separately for each
150 // shared memory intermediary. Acquiring all the needed scratch
151 // memory at once then computing pointer offsets by hand avoids
152 // this issue.
153 int* scratch = (int*)team.team_shmem().get_shmem(
154 ( team.team_size() + 2 * comm_size ) * sizeof( int ), 0 );
155
156 // local neighbor_ids, gives the local offset relative to
157 // calculated global offset. Size team.team_size() * sizeof(int)
158 int* local_neighbor_ids = scratch;
159 // local histogram, `comm_size` in size
160 int* histo = local_neighbor_ids + team.team_size();
161 // offset into global array, `comm_size` in size
162 int* global_offset = histo + comm_size;
163 // overall element `tid`
164 const auto tid =
165 team.team_rank() + team.league_rank() * team.team_size();
166 // local element
167 const int local_id = team.team_rank();
168 // total number of elements, for convenience
169 const int num_elements = element_export_ranks.size();
170 // my element export rank
171 const int my_element_export_rank =
172 ( tid < num_elements ? element_export_ranks( tid ) : -1 );
173
174 // cannot outright terminate early b/c threads share work
175 // if (tid >= num_elements) return;
176 const bool in_bounds =
177 tid < num_elements && my_element_export_rank >= 0;
178
179 Kokkos::parallel_for(
180 Kokkos::TeamThreadRange( team, comm_size ),
181 [&]( const int i ) { histo[i] = 0; } );
182
183 // synchronize zeroing
184 team.team_barrier();
185
186 // build local histogram, need to encode num_elements check here
187 if ( in_bounds )
188 {
189 // shared memory atomic add, accumulate into local offset
190 local_neighbor_ids[local_id] = Kokkos::atomic_fetch_add(
191 &histo[my_element_export_rank], 1 );
192 }
193
194 // synchronize local histogram build
195 team.team_barrier();
196
197 // reserve space in global array via a loop over neighbor counts
198 Kokkos::parallel_for(
199 Kokkos::TeamThreadRange( team, comm_size ),
200 [&]( const int i )
201 {
202 // global memory atomic add, reserves space
203 global_offset[i] = Kokkos::atomic_fetch_add(
204 &neighbor_counts( i ), histo[i] );
205 } );
206
207 // synchronize block-stride loop
208 team.team_barrier();
209
210 // and now store to my location
211 if ( in_bounds )
212 {
213 neighbor_ids( tid ) =
214 global_offset[my_element_export_rank] +
215 local_neighbor_ids[local_id];
216 }
217 } );
218 }
219 else
220 {
221 // For larger numbers of export ranks (for ex, migration) we use the
222 // global memory version, though this is a point of future optimization
223
224 Kokkos::parallel_for(
225 "Cabana::CommunicationPlan::countSendsAndCreateSteering",
226 Kokkos::RangePolicy<ExecutionSpace>( 0,
227 element_export_ranks.size() ),
228 KOKKOS_LAMBDA( const size_type i ) {
229 if ( element_export_ranks( i ) >= 0 )
230 neighbor_ids( i ) = Kokkos::atomic_fetch_add(
231 &neighbor_counts( element_export_ranks( i ) ), 1 );
232 } );
233 }
234 Kokkos::fence();
235
236 // Return the counts and ids.
237 return std::make_pair( neighbor_counts, neighbor_ids );
238}
239
240//---------------------------------------------------------------------------//
241// Count sends and generate the steering vector. Duplicated version.
242template <class ExecutionSpace, class ExportRankView>
243auto countSendsAndCreateSteering( ExecutionSpace,
244 const ExportRankView element_export_ranks,
245 const int comm_size,
246 CountSendsAndCreateSteeringDuplicated )
247 -> std::pair<Kokkos::View<int*, typename ExportRankView::memory_space>,
248 Kokkos::View<typename ExportRankView::size_type*,
249 typename ExportRankView::memory_space>>
250{
251 using memory_space = typename ExportRankView::memory_space;
252 using size_type = typename ExportRankView::size_type;
253
254 // Create a unique thread token.
255 Kokkos::Experimental::UniqueToken<
256 ExecutionSpace, Kokkos::Experimental::UniqueTokenScope::Global>
257 unique_token;
258
259 // Create views.
260 Kokkos::View<int*, memory_space> neighbor_counts(
261 Kokkos::ViewAllocateWithoutInitializing( "neighbor_counts" ),
262 comm_size );
263 Kokkos::View<size_type*, memory_space> neighbor_ids(
264 Kokkos::ViewAllocateWithoutInitializing( "neighbor_ids" ),
265 element_export_ranks.size() );
266 Kokkos::View<int**, memory_space> neighbor_counts_dup(
267 "neighbor_counts", unique_token.size(), comm_size );
268 Kokkos::View<size_type**, memory_space> neighbor_ids_dup(
269 "neighbor_ids", unique_token.size(), element_export_ranks.size() );
270
271 // Compute initial duplicated sends and steering.
272 Kokkos::parallel_for(
273 "Cabana::CommunicationPlan::intialCount",
274 Kokkos::RangePolicy<ExecutionSpace>( 0, element_export_ranks.size() ),
275 KOKKOS_LAMBDA( const size_type i ) {
276 if ( element_export_ranks( i ) >= 0 )
277 {
278 // Get the thread id.
279 auto thread_id = unique_token.acquire();
280
281 // Do the duplicated fetch-add. If this is a valid element id
282 // increment the send count for this rank. Add the incremented
283 // count as the thread-local neighbor id. This is too big by
284 // one (because we use the prefix increment operator) but we
285 // want a non-zero value so we can later find which thread
286 // this element was located on because we are always
287 // guaranteed a non-zero value. We will subtract this value
288 // later.
289 neighbor_ids_dup( thread_id, i ) = ++neighbor_counts_dup(
290 thread_id, element_export_ranks( i ) );
291
292 // Release the thread id.
293 unique_token.release( thread_id );
294 }
295 } );
296 Kokkos::fence();
297
298 // Team policy
299 using team_policy =
300 Kokkos::TeamPolicy<ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>>;
301 using index_type = typename team_policy::index_type;
302
303 // Compute the send counts for each neighbor rank by reducing across
304 // the thread duplicates.
305 Kokkos::parallel_for(
306 "Cabana::CommunicationPlan::finalCount",
307 team_policy( neighbor_counts.extent( 0 ), Kokkos::AUTO ),
308 KOKKOS_LAMBDA( const typename team_policy::member_type& team ) {
309 // Get the element id.
310 auto i = team.league_rank();
311
312 // Add the thread results.
313 int thread_counts = 0;
314 Kokkos::parallel_reduce(
315 Kokkos::TeamThreadRange( team,
316 neighbor_counts_dup.extent( 0 ) ),
317 [&]( const index_type thread_id, int& result )
318 { result += neighbor_counts_dup( thread_id, i ); },
319 thread_counts );
320 neighbor_counts( i ) = thread_counts;
321 } );
322 Kokkos::fence();
323
324 // Compute the location of each export element in the send buffer of
325 // its destination rank.
326 Kokkos::parallel_for(
327 "Cabana::CommunicationPlan::createSteering",
328 team_policy( element_export_ranks.size(), Kokkos::AUTO ),
329 KOKKOS_LAMBDA( const typename team_policy::member_type& team ) {
330 // Get the element id.
331 auto i = team.league_rank();
332
333 // Only operate on valid elements
334 if ( element_export_ranks( i ) >= 0 )
335 {
336 // Compute the thread id in which we located the element
337 // during the count phase. Only the thread in which we
338 // located the element will contribute to the reduction.
339 index_type dup_thread = 0;
340 Kokkos::parallel_reduce(
341 Kokkos::TeamThreadRange( team,
342 neighbor_ids_dup.extent( 0 ) ),
343 [&]( const index_type thread_id, index_type& result )
344 {
345 if ( neighbor_ids_dup( thread_id, i ) > 0 )
346 result += thread_id;
347 },
348 dup_thread );
349
350 // Compute the offset of this element in the steering
351 // vector for its destination rank. Loop through the
352 // threads up to the thread that found this element in the
353 // count stage. All thread counts prior to that thread
354 // will contribute to the offset.
355 size_type thread_offset = 0;
356 Kokkos::parallel_reduce(
357 Kokkos::TeamThreadRange( team, dup_thread ),
358 [&]( const index_type thread_id, size_type& result ) {
359 result += neighbor_counts_dup(
360 thread_id, element_export_ranks( i ) );
361 },
362 thread_offset );
363
364 // Add the thread-local value to the offset where we subtract
365 // the 1 that we added artificially when we were first
366 // counting.
367 neighbor_ids( i ) =
368 thread_offset + neighbor_ids_dup( dup_thread, i ) - 1;
369 }
370 } );
371 Kokkos::fence();
372
373 // Return the counts and ids.
374 return std::make_pair( neighbor_counts, neighbor_ids );
375}
376
377//---------------------------------------------------------------------------//
379} // end namespace Impl
380
381//---------------------------------------------------------------------------//
388inline std::vector<int> getUniqueTopology( MPI_Comm comm,
389 std::vector<int> topology )
390{
391 auto remove_end = std::remove( topology.begin(), topology.end(), -1 );
392 std::sort( topology.begin(), remove_end );
393 auto unique_end = std::unique( topology.begin(), remove_end );
394 topology.resize( std::distance( topology.begin(), unique_end ) );
395
396 // Put this rank first.
397 int my_rank = -1;
398 MPI_Comm_rank( comm, &my_rank );
399 for ( auto& n : topology )
400 {
401 if ( n == my_rank )
402 {
403 std::swap( n, topology[0] );
404 break;
405 }
406 }
407 return topology;
408}
409
410//---------------------------------------------------------------------------//
436template <class MemorySpace>
438{
439 public:
441 using memory_space = MemorySpace;
442 static_assert( Kokkos::is_memory_space<MemorySpace>() );
443
445 using execution_space = typename memory_space::execution_space;
446
447 // FIXME: extracting the self type for backwards compatibility with previous
448 // template on DeviceType. Should simply be memory_space::size_type after
449 // next release.
451 using size_type = typename memory_space::memory_space::size_type;
452
459 {
460 _comm_ptr.reset(
461 // Duplicate the communicator and store in a std::shared_ptr so that
462 // all copies point to the same object
463 [comm]()
464 {
465 auto p = std::make_unique<MPI_Comm>();
466 MPI_Comm_dup( comm, p.get() );
467 return p.release();
468 }(),
469 // Custom deleter to mark the communicator for deallocation
470 []( MPI_Comm* p )
471 {
472 MPI_Comm_free( p );
473 delete p;
474 } );
475 }
476
480 MPI_Comm comm() const { return *_comm_ptr; }
481
487 int numNeighbor() const { return _neighbors.size(); }
488
494 int neighborRank( const int neighbor ) const
495 {
496 return _neighbors[neighbor];
497 }
498
507 std::size_t numExport( const int neighbor ) const
508 {
509 return _num_export[neighbor];
510 }
511
517 std::size_t totalNumExport() const { return _total_num_export; }
518
527 std::size_t numImport( const int neighbor ) const
528 {
529 return _num_import[neighbor];
530 }
531
537 std::size_t totalNumImport() const { return _total_num_import; }
538
549 std::size_t exportSize() const { return _num_export_element; }
550
560 Kokkos::View<std::size_t*, memory_space> getExportSteering() const
561 {
562 return _export_steering;
563 }
564
565 // The functions in the public block below would normally be protected but
566 // we make them public to allow using private class data in CUDA kernels
567 // with lambda functions.
568 public:
606 template <class ExecutionSpace, class RankViewType>
607 Kokkos::View<size_type*, memory_space>
608 createWithTopology( ExecutionSpace exec_space, Export,
609 const RankViewType& element_export_ranks,
610 const std::vector<int>& neighbor_ranks )
611 {
613
614 // Store the number of export elements.
615 _num_export_element = element_export_ranks.size();
616
617 // Store the unique neighbors (this rank first).
618 _neighbors = getUniqueTopology( comm(), neighbor_ranks );
619 int num_n = _neighbors.size();
620
621 // Get the size of this communicator.
622 int comm_size = -1;
623 MPI_Comm_size( comm(), &comm_size );
624
625 // Get the MPI rank we are currently on.
626 int my_rank = -1;
627 MPI_Comm_rank( comm(), &my_rank );
628
629 // Pick an mpi tag for communication. This object has it's own
630 // communication space so any mpi tag will do.
631 const int mpi_tag = 1221;
632
633 // Initialize import/export sizes.
634 _num_export.assign( num_n, 0 );
635 _num_import.assign( num_n, 0 );
636
637 // Count the number of sends this rank will do to other ranks. Keep
638 // track of which slot we get in our neighbor's send buffer.
639 auto counts_and_ids = Impl::countSendsAndCreateSteering(
640 exec_space, element_export_ranks, comm_size,
641 typename Impl::CountSendsAndCreateSteeringAlgorithm<
642 ExecutionSpace>::type() );
643
644 // Copy the counts to the host.
645 auto neighbor_counts_host = Kokkos::create_mirror_view_and_copy(
646 Kokkos::HostSpace(), counts_and_ids.first );
647
648 // Get the export counts.
649 for ( int n = 0; n < num_n; ++n )
650 _num_export[n] = neighbor_counts_host( _neighbors[n] );
651
652 // Post receives for the number of imports we will get.
653 std::vector<MPI_Request> requests;
654 requests.reserve( num_n );
655 for ( int n = 0; n < num_n; ++n )
656 if ( my_rank != _neighbors[n] )
657 {
658 requests.push_back( MPI_Request() );
659 MPI_Irecv( &_num_import[n], 1, MPI_UNSIGNED_LONG, _neighbors[n],
660 mpi_tag, comm(), &( requests.back() ) );
661 }
662 else
663 _num_import[n] = _num_export[n];
664
665 // Send the number of exports to each of our neighbors.
666 for ( int n = 0; n < num_n; ++n )
667 if ( my_rank != _neighbors[n] )
668 MPI_Send( &_num_export[n], 1, MPI_UNSIGNED_LONG, _neighbors[n],
669 mpi_tag, comm() );
670
671 // Wait on receives.
672 std::vector<MPI_Status> status( requests.size() );
673 const int ec =
674 MPI_Waitall( requests.size(), requests.data(), status.data() );
675 if ( MPI_SUCCESS != ec )
676 throw std::logic_error(
677 "Cabana::CommunicationPlan::createFromExportsAndTopology: "
678 "Failed MPI Communication" );
679
680 // Get the total number of imports/exports.
681 _total_num_export = std::accumulate(
682 _num_export.begin(), _num_export.end(), std::size_t{ 0u } );
683 _total_num_import = std::accumulate(
684 _num_import.begin(), _num_import.end(), std::size_t{ 0u } );
685
686 // No barrier is needed because all ranks know who they are receiving
687 // and sending to.
688
689 // Return the neighbor ids.
690 return counts_and_ids.second;
691 }
692
728 template <class RankViewType>
729 Kokkos::View<size_type*, memory_space>
730 createWithTopology( Export, const RankViewType& element_export_ranks,
731 const std::vector<int>& neighbor_ranks )
732 {
733 // Use the default execution space.
735 element_export_ranks, neighbor_ranks );
736 }
737
768 template <class ExecutionSpace, class RankViewType>
769 Kokkos::View<size_type*, memory_space>
770 createWithoutTopology( ExecutionSpace exec_space, Export,
771 const RankViewType& element_export_ranks )
772 {
774
775 // Store the number of export elements.
776 _num_export_element = element_export_ranks.size();
777
778 // Get the size of this communicator.
779 int comm_size = -1;
780 MPI_Comm_size( comm(), &comm_size );
781
782 // Get the MPI rank we are currently on.
783 int my_rank = -1;
784 MPI_Comm_rank( comm(), &my_rank );
785
786 // Pick an mpi tag for communication. This object has it's own
787 // communication space so any mpi tag will do.
788 const int mpi_tag = 1221;
789
790 // Count the number of sends this rank will do to other ranks. Keep
791 // track of which slot we get in our neighbor's send buffer.
792 auto counts_and_ids = Impl::countSendsAndCreateSteering(
793 exec_space, element_export_ranks, comm_size,
794 typename Impl::CountSendsAndCreateSteeringAlgorithm<
795 ExecutionSpace>::type() );
796
797 // Copy the counts to the host.
798 auto neighbor_counts_host = Kokkos::create_mirror_view_and_copy(
799 Kokkos::HostSpace(), counts_and_ids.first );
800
801 // Extract the export ranks and number of exports and then flag the
802 // send ranks.
803 _neighbors.clear();
804 _num_export.clear();
805 _total_num_export = 0;
806 for ( int r = 0; r < comm_size; ++r )
807 if ( neighbor_counts_host( r ) > 0 )
808 {
809 _neighbors.push_back( r );
810 _num_export.push_back( neighbor_counts_host( r ) );
811 _total_num_export += neighbor_counts_host( r );
812 neighbor_counts_host( r ) = 1;
813 }
814
815 // Get the number of export ranks and initially allocate the import
816 // sizes.
817 int num_export_rank = _neighbors.size();
818 _num_import.assign( num_export_rank, 0 );
819
820 // If we are sending to ourself put that one first in the neighbor
821 // list and assign the number of imports to be the number of exports.
822 bool self_send = false;
823 for ( int n = 0; n < num_export_rank; ++n )
824 if ( _neighbors[n] == my_rank )
825 {
826 std::swap( _neighbors[n], _neighbors[0] );
827 std::swap( _num_export[n], _num_export[0] );
828 _num_import[0] = _num_export[0];
829 self_send = true;
830 break;
831 }
832
833 // Determine how many total import ranks each neighbor has.
834 int num_import_rank = -1;
835 std::vector<int> recv_counts( comm_size, 1 );
836 MPI_Reduce_scatter( neighbor_counts_host.data(), &num_import_rank,
837 recv_counts.data(), MPI_INT, MPI_SUM, comm() );
838 if ( self_send )
839 --num_import_rank;
840
841 // Post the expected number of receives and indicate we might get them
842 // from any rank.
843 std::vector<std::size_t> import_sizes( num_import_rank );
844 std::vector<MPI_Request> requests( num_import_rank );
845 for ( int n = 0; n < num_import_rank; ++n )
846 MPI_Irecv( &import_sizes[n], 1, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE,
847 mpi_tag, comm(), &requests[n] );
848
849 // Do blocking sends. Dont do any self sends.
850 int self_offset = ( self_send ) ? 1 : 0;
851 for ( int n = self_offset; n < num_export_rank; ++n )
852 MPI_Send( &_num_export[n], 1, MPI_UNSIGNED_LONG, _neighbors[n],
853 mpi_tag, comm() );
854
855 // Wait on non-blocking receives.
856 std::vector<MPI_Status> status( requests.size() );
857 const int ec =
858 MPI_Waitall( requests.size(), requests.data(), status.data() );
859 if ( MPI_SUCCESS != ec )
860 throw std::logic_error(
861 "Cabana::CommunicationPlan::createFromExportsOnly: Failed MPI "
862 "Communication" );
863
864 // Compute the total number of imports.
865 _total_num_import =
866 std::accumulate( import_sizes.begin(), import_sizes.end(),
867 ( self_send ) ? _num_import[0] : 0 );
868
869 // Extract the imports. If we did self sends we already know what
870 // imports we got from that.
871 for ( int i = 0; i < num_import_rank; ++i )
872 {
873 // Get the message source.
874 const auto source = status[i].MPI_SOURCE;
875
876 // See if the neighbor we received stuff from was someone we also
877 // sent stuff to.
878 auto found_neighbor =
879 std::find( _neighbors.begin(), _neighbors.end(), source );
880
881 // If this is a new neighbor (i.e. someone we didn't send anything
882 // to) record this.
883 if ( found_neighbor == std::end( _neighbors ) )
884 {
885 _neighbors.push_back( source );
886 _num_import.push_back( import_sizes[i] );
887 _num_export.push_back( 0 );
888 }
889
890 // Otherwise if we already sent something to this neighbor that
891 // means we already have a neighbor/export entry. Just assign the
892 // import entry for that neighbor.
893 else
894 {
895 auto n = std::distance( _neighbors.begin(), found_neighbor );
896 _num_import[n] = import_sizes[i];
897 }
898 }
899
900 // A barrier is needed because of the use of wildcard receives. This
901 // avoids successive calls interfering with each other.
902 MPI_Barrier( this->comm() );
903
904 // Return the neighbor ids.
905 return counts_and_ids.second;
906 }
907
936 template <class RankViewType>
937 Kokkos::View<size_type*, memory_space>
938 createWithoutTopology( Export, const RankViewType& element_export_ranks )
939 {
940 // Use the default execution space.
942 element_export_ranks );
943 }
944
984 template <class ExecutionSpace, class RankViewType, class IdViewType>
985 auto createWithTopology( ExecutionSpace exec_space, Import,
986 const RankViewType& element_import_ranks,
987 const IdViewType& element_import_ids,
988 const std::vector<int>& neighbor_ranks )
989 -> std::tuple<Kokkos::View<typename RankViewType::size_type*,
990 typename RankViewType::memory_space>,
991 Kokkos::View<int*, typename RankViewType::memory_space>,
992 Kokkos::View<int*, typename IdViewType::memory_space>>
993 {
995
996 if ( element_import_ids.size() != element_import_ranks.size() )
997 throw std::runtime_error( "Export ids and ranks different sizes!" );
998
999 // Store the unique neighbors (this rank first).
1000 _neighbors = getUniqueTopology( comm(), neighbor_ranks );
1001 std::size_t num_n = _neighbors.size();
1002
1003 // Get the size of this communicator.
1004 int comm_size = -1;
1005 MPI_Comm_size( comm(), &comm_size );
1006
1007 // Get the MPI rank we are currently on.
1008 int my_rank = -1;
1009 MPI_Comm_rank( comm(), &my_rank );
1010
1011 // Pick an mpi tag for communication. This object has it's own
1012 // communication space so any mpi tag will do.
1013 const int mpi_tag = 1221;
1014
1015 // Initialize import/export sizes.
1016 _num_export.assign( num_n, 0 );
1017 _num_import.assign( num_n, 0 );
1018
1019 // Count the number of imports this rank needs from other ranks. Keep
1020 // track of which slot we get in our neighbor's send buffer?
1021 auto counts_and_ids = Impl::countSendsAndCreateSteering(
1022 exec_space, element_import_ranks, comm_size,
1023 typename Impl::CountSendsAndCreateSteeringAlgorithm<
1024 ExecutionSpace>::type() );
1025
1026 // Copy the counts to the host.
1027 auto neighbor_counts_host = Kokkos::create_mirror_view_and_copy(
1028 Kokkos::HostSpace(), counts_and_ids.first );
1029
1030 // Get the import counts.
1031 for ( std::size_t n = 0; n < num_n; ++n )
1032 _num_import[n] = neighbor_counts_host( _neighbors[n] );
1033
1034 // Post receives to get the number of indices I will send to each rank.
1035 // Post that many wildcard recieves to get the number of indices I will
1036 // send to each rank
1037 std::vector<MPI_Request> requests;
1038 requests.reserve( num_n * 2 );
1039 for ( std::size_t n = 0; n < num_n; ++n )
1040 if ( my_rank != _neighbors[n] )
1041 {
1042 requests.push_back( MPI_Request() );
1043 MPI_Irecv( &_num_export[n], 1, MPI_UNSIGNED_LONG, _neighbors[n],
1044 mpi_tag, comm(), &( requests.back() ) );
1045 }
1046 else // Self import
1047 {
1048 _num_export[n] = _num_import[n];
1049 }
1050
1051 // Send the number of imports to each of our neighbors.
1052 for ( std::size_t n = 0; n < num_n; ++n )
1053 if ( my_rank != _neighbors[n] )
1054 {
1055 requests.push_back( MPI_Request() );
1056 MPI_Isend( &_num_import[n], 1, MPI_UNSIGNED_LONG, _neighbors[n],
1057 mpi_tag, comm(), &( requests.back() ) );
1058 }
1059
1060 // Wait on messages.
1061 std::vector<MPI_Status> status( requests.size() );
1062 const int ec =
1063 MPI_Waitall( requests.size(), requests.data(), status.data() );
1064 if ( MPI_SUCCESS != ec )
1065 throw std::logic_error( "Failed MPI Communication" );
1066
1067 // Get the total number of imports/exports.
1068 _total_num_export =
1069 std::accumulate( _num_export.begin(), _num_export.end(), 0 );
1070 _total_num_import =
1071 std::accumulate( _num_import.begin(), _num_import.end(), 0 );
1072 _num_export_element = _total_num_export;
1073
1074 // Post receives to get the indices other processes are requesting
1075 // i.e. our export indices
1076 Kokkos::View<int*, memory_space> export_indices( "export_indices",
1077 _total_num_export );
1078 std::size_t idx = 0;
1079 int num_messages = _total_num_export + element_import_ranks.extent( 0 );
1080 std::vector<MPI_Request> mpi_requests( num_messages );
1081 std::vector<MPI_Status> mpi_statuses( num_messages );
1082
1083 // Increment the mpi_tag for this round of messages to ensure messages
1084 // are processed in the correct order from the previous round of Isends
1085 // and Irecvs.
1086 for ( std::size_t i = 0; i < num_n; i++ )
1087 {
1088 for ( std::size_t j = 0; j < _num_export[i]; j++ )
1089 {
1090 MPI_Irecv( export_indices.data() + idx, 1, MPI_INT,
1091 _neighbors[i], mpi_tag + 1, comm(),
1092 &mpi_requests[idx] );
1093 idx++;
1094 }
1095 }
1096
1097 // Send the indices we need
1098 for ( std::size_t i = 0; i < element_import_ranks.extent( 0 ); i++ )
1099 {
1100 MPI_Isend( element_import_ids.data() + i, 1, MPI_INT,
1101 *( element_import_ranks.data() + i ), mpi_tag + 1,
1102 comm(), &mpi_requests[idx++] );
1103 }
1104
1105 // Wait for all count exchanges to complete
1106 const int ec1 = MPI_Waitall( num_messages, mpi_requests.data(),
1107 mpi_statuses.data() );
1108 if ( MPI_SUCCESS != ec1 )
1109 throw std::logic_error( "Failed MPI Communication" );
1110
1111 // Now, build the export steering
1112 // Export rank in mpi_statuses[i].MPI_SOURCE
1113 // Export ID in export_indices(i)
1114 Kokkos::View<int*, Kokkos::HostSpace> element_export_ranks_h(
1115 "element_export_ranks_h", _total_num_export );
1116 for ( std::size_t i = 0; i < _total_num_export; i++ )
1117 {
1118 element_export_ranks_h[i] = mpi_statuses[i].MPI_SOURCE;
1119 }
1120 auto element_export_ranks = Kokkos::create_mirror_view_and_copy(
1121 memory_space(), element_export_ranks_h );
1122
1123 auto counts_and_ids2 = Impl::countSendsAndCreateSteering(
1124 exec_space, element_export_ranks, comm_size,
1125 typename Impl::CountSendsAndCreateSteeringAlgorithm<
1126 ExecutionSpace>::type() );
1127
1128 // No barrier is needed because all ranks know who they are receiving
1129 // from and sending to.
1130
1131 // Return the neighbor ids, export ranks, and export indices
1132 return std::tuple{ counts_and_ids2.second, element_export_ranks,
1133 export_indices };
1134 }
1135
1173 template <class RankViewType, class IdViewType>
1174 auto createWithTopology( Import, const RankViewType& element_import_ranks,
1175 const IdViewType& element_import_ids,
1176 const std::vector<int>& neighbor_ranks )
1177 {
1178 // Use the default execution space.
1180 element_import_ranks, element_import_ids,
1181 neighbor_ranks );
1182 }
1183
1216 template <class ExecutionSpace, class RankViewType, class IdViewType>
1217 auto createWithoutTopology( ExecutionSpace exec_space, Import,
1218 const RankViewType& element_import_ranks,
1219 const IdViewType& element_import_ids )
1220 -> std::tuple<Kokkos::View<typename RankViewType::size_type*,
1221 typename RankViewType::memory_space>,
1222 Kokkos::View<int*, typename RankViewType::memory_space>,
1223 Kokkos::View<int*, typename IdViewType::memory_space>>
1224 {
1226
1227 if ( element_import_ids.size() != element_import_ranks.size() )
1228 throw std::runtime_error( "Export ids and ranks different sizes!" );
1229
1230 // Get the size of this communicator.
1231 int comm_size = -1;
1232 MPI_Comm_size( comm(), &comm_size );
1233
1234 // Get the MPI rank we are currently on.
1235 int rank = -1;
1236 MPI_Comm_rank( comm(), &rank );
1237
1238 // Pick an mpi tag for communication. This object has it's own
1239 // communication space so any mpi tag will do.
1240 const int mpi_tag = 1221;
1241
1242 // Store which ranks I need to recieve from (i.e. send data to me)
1243 Kokkos::View<int*, memory_space> importing_ranks( "importing_ranks",
1244 comm_size );
1245 Kokkos::deep_copy( importing_ranks, 0 );
1246 Kokkos::parallel_for(
1247 "Cabana::storeImportRanks",
1248 Kokkos::RangePolicy<ExecutionSpace>(
1249 0, element_import_ranks.extent( 0 ) ),
1250 KOKKOS_LAMBDA( const int i ) {
1251 int import_rank = element_import_ranks( i );
1252 Kokkos::atomic_store( &importing_ranks( import_rank ), 1 );
1253 } );
1254 Kokkos::fence();
1255 auto importing_ranks_h = Kokkos::create_mirror_view_and_copy(
1256 Kokkos::HostSpace(), importing_ranks );
1257
1258 // Allreduce to count number of ranks I am communicating with
1259 Kokkos::View<int*, Kokkos::HostSpace> num_ranks_communicate(
1260 "num_ranks_communicate", comm_size );
1261 MPI_Allreduce( importing_ranks_h.data(), num_ranks_communicate.data(),
1262 comm_size, MPI_INT, MPI_SUM, comm() );
1263
1264 // Post that many wildcard recieves to get the number of indices I will
1265 // send to each rank Allocate buffers based on num_ranks_communicate
1266 int num_recvs = num_ranks_communicate( rank );
1267 Kokkos::View<int*, Kokkos::HostSpace> send_counts( "send_counts",
1268 num_recvs );
1269 Kokkos::View<int*, Kokkos::HostSpace> send_to( "send_to", num_recvs );
1270
1271 std::vector<MPI_Request> mpi_requests( num_recvs );
1272 std::vector<MPI_Status> mpi_statuses( num_recvs );
1273
1274 // Receive counts for indices this process will send
1275 for ( int i = 0; i < num_recvs; i++ )
1276 {
1277 MPI_Irecv( &send_counts( i ), 1, MPI_INT, MPI_ANY_SOURCE, mpi_tag,
1278 comm(), &mpi_requests[i] );
1279 }
1280
1281 // Count the number of imports this rank needs from other ranks. Keep
1282 // track of which slot we get in our neighbor's send buffer?
1283 auto counts_and_ids = Impl::countSendsAndCreateSteering(
1284 exec_space, element_import_ranks, comm_size,
1285 typename Impl::CountSendsAndCreateSteeringAlgorithm<
1286 ExecutionSpace>::type() );
1287
1288 // Copy the counts to the host.
1289 auto neighbor_counts_host = Kokkos::create_mirror_view_and_copy(
1290 Kokkos::HostSpace(), counts_and_ids.first );
1291
1292 // Clear vectors before we use them
1293 _neighbors.clear();
1294 _num_export.clear();
1295 _num_import.clear();
1296
1297 for ( std::size_t i = 0; i < neighbor_counts_host.extent( 0 ); i++ )
1298 {
1299 if ( neighbor_counts_host( i ) != 0 )
1300 {
1301 // Send counts of needed indices
1302 MPI_Send( &neighbor_counts_host( i ), 1, MPI_INT, i, mpi_tag,
1303 comm() );
1304
1305 // Store we are importing this count from this rank
1306 _neighbors.push_back( i );
1307 _num_import.push_back( neighbor_counts_host( i ) );
1308 }
1309 }
1310 // Assign all exports to zero
1311 _num_export.assign( _num_import.size(), 0 );
1312
1313 // Wait for all count exchanges to complete
1314 const int ec0 =
1315 MPI_Waitall( num_recvs, mpi_requests.data(), mpi_statuses.data() );
1316 if ( MPI_SUCCESS != ec0 )
1317 throw std::logic_error( "Failed MPI Communication" );
1318
1319 // Save ranks we got messages from and track total messages to size
1320 // buffers
1321 _total_num_export = 0;
1322 for ( int i = 0; i < num_recvs; i++ )
1323 {
1324 send_to( i ) = mpi_statuses[i].MPI_SOURCE;
1325 _total_num_export += send_counts( i );
1326 }
1327
1328 // Extract the export ranks and number of exports and then flag the
1329 // send ranks.
1330 for ( int r = 0; r < num_recvs; ++r )
1331 {
1332 int export_to = send_to( r );
1333 if ( export_to > -1 )
1334 {
1335 // See if the neighbor we are exporting to is someone we are
1336 // also importing from
1337 auto found_neighbor = std::find( _neighbors.begin(),
1338 _neighbors.end(), export_to );
1339
1340 // If this is a new neighbor (i.e. someone we are not importing
1341 // from) record this.
1342 if ( found_neighbor == std::end( _neighbors ) )
1343 {
1344 _neighbors.push_back( export_to );
1345 _num_import.push_back( 0 );
1346 _num_export.push_back( send_counts( r ) );
1347 }
1348
1349 // Otherwise if we are already importing from this neighbor that
1350 // means we already have a neighbor/import entry. Just assign
1351 // the export entry for that neighbor.
1352 else
1353 {
1354 auto n =
1355 std::distance( _neighbors.begin(), found_neighbor );
1356 _num_export[n] = send_counts( r );
1357 }
1358 }
1359 else
1360 {
1361 // This block should never be reached as
1362 // mpi_statuses[i].MPI_SOURCE will never be less than 0.
1363 throw std::runtime_error(
1364 "CommunicationPlan::createFromImportsOnly: "
1365 "mpi_statuses[i].MPI_SOURCE returned a value >= -1" );
1366 }
1367 }
1368 // If we are sending to ourself put that one first in the neighbor
1369 // list and assign the number of exports to be the number of imports.
1370 for ( std::size_t n = 0; n < _neighbors.size(); ++n )
1371 if ( _neighbors[n] == rank )
1372 {
1373 std::swap( _neighbors[n], _neighbors[0] );
1374 std::swap( _num_export[n], _num_export[0] );
1375 std::swap( _num_import[n], _num_import[0] );
1376 _num_export[0] = _num_import[0];
1377 break;
1378 }
1379
1380 // Total number of imports and exports are now known
1381 _total_num_import = element_import_ranks.extent( 0 );
1382 _num_export_element = _total_num_export;
1383
1384 // Post receives to get the indices other processes are requesting
1385 // i.e. our export indices
1386 Kokkos::View<int*, memory_space> export_indices( "export_indices",
1387 _total_num_export );
1388 std::size_t idx = 0;
1389 mpi_requests.clear();
1390 mpi_statuses.clear();
1391 int num_messages = _total_num_export + element_import_ranks.extent( 0 );
1392 mpi_requests.resize( num_messages );
1393 mpi_statuses.resize( num_messages );
1394
1395 // Increment the mpi_tag for this round of messages to ensure messages
1396 // are processed in the correct order from the previous round of Isends
1397 // and Irecvs.
1398 for ( int i = 0; i < num_recvs; i++ )
1399 {
1400 for ( int j = 0; j < send_counts( i ); j++ )
1401 {
1402 MPI_Irecv( export_indices.data() + idx, 1, MPI_INT,
1403 send_to( i ), mpi_tag + 1, comm(),
1404 &mpi_requests[idx] );
1405 idx++;
1406 }
1407 }
1408
1409 // Send the indices we need
1410 for ( std::size_t i = 0; i < element_import_ranks.extent( 0 ); i++ )
1411 {
1412 MPI_Isend( element_import_ids.data() + i, 1, MPI_INT,
1413 *( element_import_ranks.data() + i ), mpi_tag + 1,
1414 comm(), &mpi_requests[idx++] );
1415 }
1416
1417 // Wait for all count exchanges to complete
1418 const int ec1 = MPI_Waitall( num_messages, mpi_requests.data(),
1419 mpi_statuses.data() );
1420 if ( MPI_SUCCESS != ec1 )
1421 throw std::logic_error( "Failed MPI Communication" );
1422
1423 // Now, build the export steering
1424 // Export rank in mpi_statuses[i].MPI_SOURCE
1425 // Export ID in export_indices(i)
1426 Kokkos::View<int*, Kokkos::HostSpace> element_export_ranks_h(
1427 "element_export_ranks_h", _total_num_export );
1428 for ( std::size_t i = 0; i < _total_num_export; i++ )
1429 {
1430 element_export_ranks_h[i] = mpi_statuses[i].MPI_SOURCE;
1431 }
1432 auto element_export_ranks = Kokkos::create_mirror_view_and_copy(
1433 memory_space(), element_export_ranks_h );
1434
1435 auto counts_and_ids2 = Impl::countSendsAndCreateSteering(
1436 exec_space, element_export_ranks, comm_size,
1437 typename Impl::CountSendsAndCreateSteeringAlgorithm<
1438 ExecutionSpace>::type() );
1439
1440 // A barrier is needed because of the use of wildcard receives. This
1441 // avoids successive calls interfering with each other.
1442 MPI_Barrier( this->comm() );
1443
1444 return std::tuple{ counts_and_ids2.second, element_export_ranks,
1445 export_indices };
1446 }
1447
1478 template <class RankViewType, class IdViewType>
1480 const RankViewType& element_import_ranks,
1481 const IdViewType& element_import_ids )
1482 {
1483 // Use the default execution space.
1485 element_import_ranks,
1486 element_import_ids );
1487 }
1488
1503 template <class PackViewType, class RankViewType>
1504 void createExportSteering( const PackViewType& neighbor_ids,
1505 const RankViewType& element_export_ranks )
1506 {
1507 // passing in element_export_ranks here as a dummy argument.
1508 createSteering( true, neighbor_ids, element_export_ranks,
1509 element_export_ranks );
1510 }
1511
1531 template <class PackViewType, class RankViewType, class IdViewType>
1532 void createExportSteering( const PackViewType& neighbor_ids,
1533 const RankViewType& element_export_ranks,
1534 const IdViewType& element_export_ids )
1535 {
1536 createSteering( false, neighbor_ids, element_export_ranks,
1537 element_export_ids );
1538 }
1539
1541 // Create the export steering vector.
1542 template <class ExecutionSpace, class PackViewType, class RankViewType,
1543 class IdViewType>
1544 void createSteering( ExecutionSpace, const bool use_iota,
1545 const PackViewType& neighbor_ids,
1546 const RankViewType& element_export_ranks,
1547 const IdViewType& element_export_ids )
1548 {
1550
1551 if ( !use_iota &&
1552 ( element_export_ids.size() != element_export_ranks.size() ) )
1553 throw std::runtime_error(
1554 "Cabana::CommunicationPlan::createSteering: Export ids and "
1555 "ranks different sizes!" );
1556
1557 // Get the size of this communicator.
1558 int comm_size = -1;
1559 MPI_Comm_size( *_comm_ptr, &comm_size );
1560
1561 // Calculate the steering offsets via exclusive prefix sum for the
1562 // exports.
1563 int num_n = _neighbors.size();
1564 std::vector<std::size_t> offsets( num_n, 0.0 );
1565 for ( int n = 1; n < num_n; ++n )
1566 offsets[n] = offsets[n - 1] + _num_export[n - 1];
1567
1568 // Map the offsets to the device.
1569 Kokkos::View<std::size_t*, Kokkos::HostSpace> rank_offsets_host(
1570 Kokkos::ViewAllocateWithoutInitializing( "rank_map" ), comm_size );
1571 for ( int n = 0; n < num_n; ++n )
1572 rank_offsets_host( _neighbors[n] ) = offsets[n];
1573 auto rank_offsets = Kokkos::create_mirror_view_and_copy(
1574 memory_space(), rank_offsets_host );
1575
1576 // Create the export steering vector for writing local elements into
1577 // the send buffer. Note we create a local, shallow copy - this is a
1578 // CUDA workaround for handling class private data.
1579 _export_steering = Kokkos::View<std::size_t*, memory_space>(
1580 Kokkos::ViewAllocateWithoutInitializing( "export_steering" ),
1581 _total_num_export );
1582 auto steer_vec = _export_steering;
1583 Kokkos::parallel_for(
1584 "Cabana::CommunicationPlan::createSteering",
1585 Kokkos::RangePolicy<ExecutionSpace>( 0, _num_export_element ),
1586 KOKKOS_LAMBDA( const int i ) {
1587 if ( element_export_ranks( i ) >= 0 )
1588 steer_vec( rank_offsets( element_export_ranks( i ) ) +
1589 neighbor_ids( i ) ) =
1590 ( use_iota ) ? i : element_export_ids( i );
1591 } );
1592 Kokkos::fence();
1593 }
1594
1595 template <class PackViewType, class RankViewType, class IdViewType>
1596 void createSteering( const bool use_iota, const PackViewType& neighbor_ids,
1597 const RankViewType& element_export_ranks,
1598 const IdViewType& element_export_ids )
1599 {
1600 // Use the default execution space.
1601 createSteering( execution_space{}, use_iota, neighbor_ids,
1602 element_export_ranks, element_export_ids );
1603 }
1605
1606 private:
1607 std::shared_ptr<MPI_Comm> _comm_ptr;
1608 std::vector<int> _neighbors;
1609 std::size_t _total_num_export;
1610 std::size_t _total_num_import;
1611 std::vector<std::size_t> _num_export;
1612 std::vector<std::size_t> _num_import;
1613 std::size_t _num_export_element;
1614 Kokkos::View<std::size_t*, memory_space> _export_steering;
1615};
1616
1617//---------------------------------------------------------------------------//
1621template <class AoSoAType>
1623{
1624 static_assert( is_aosoa<AoSoAType>::value, "" );
1625
1627 using particle_data_type = AoSoAType;
1629 using memory_space = typename particle_data_type::memory_space;
1631 using data_type = typename particle_data_type::tuple_type;
1633 using buffer_type = typename Kokkos::View<data_type*, memory_space>;
1634
1640 : _particles( particles )
1641 {
1643 Kokkos::ViewAllocateWithoutInitializing( "send_buffer" ), 0 );
1645 Kokkos::ViewAllocateWithoutInitializing( "recv_buffer" ), 0 );
1646 }
1647
1649 void reallocateSend( const std::size_t num_send )
1650 {
1651 Kokkos::realloc( _send_buffer, num_send );
1652 }
1653
1654 void reallocateReceive( const std::size_t num_recv )
1655 {
1656 Kokkos::realloc( _recv_buffer, num_recv );
1657 }
1658
1666 std::size_t _num_comp = 0;
1667};
1668
1672template <class SliceType>
1674{
1675 static_assert( is_slice<SliceType>::value, "" );
1676
1678 using particle_data_type = SliceType;
1680 using memory_space = typename particle_data_type::memory_space;
1682 using data_type = typename particle_data_type::value_type;
1685 typename Kokkos::View<data_type**, Kokkos::LayoutRight, memory_space>;
1686
1692 : _particles( particles )
1693 {
1695
1697 Kokkos::ViewAllocateWithoutInitializing( "send_buffer" ), 0, 0 );
1699 Kokkos::ViewAllocateWithoutInitializing( "recv_buffer" ), 0, 0 );
1700 }
1701
1703 void reallocateSend( const std::size_t num_send )
1704 {
1705 Kokkos::realloc( _send_buffer, num_send, _num_comp );
1706 }
1707
1708 void reallocateReceive( const std::size_t num_recv )
1709 {
1710 Kokkos::realloc( _recv_buffer, num_recv, _num_comp );
1711 }
1712
1715 {
1716 _num_comp = 1;
1717 for ( std::size_t d = 2; d < _particles.viewRank(); ++d )
1718 _num_comp *= _particles.extent( d );
1719 }
1720
1728 std::size_t _num_comp;
1729};
1730//---------------------------------------------------------------------------//
1731
1735template <class CommPlanType, class CommDataType>
1737{
1738 public:
1740 using plan_type = CommPlanType;
1742 using execution_space = typename plan_type::execution_space;
1744 using policy_type = Kokkos::RangePolicy<execution_space>;
1746 using comm_data_type = CommDataType;
1748 using particle_data_type = typename comm_data_type::particle_data_type;
1750 using memory_space = typename comm_data_type::memory_space;
1752 using data_type = typename comm_data_type::data_type;
1754 using buffer_type = typename comm_data_type::buffer_type;
1755
1762 CommunicationData( const CommPlanType& comm_plan,
1763 const particle_data_type& particles,
1764 const double overallocation = 1.0 )
1765 : _comm_plan( comm_plan )
1766 , _comm_data( CommDataType( particles ) )
1767 , _overallocation( overallocation )
1768 {
1769 }
1770
1772 buffer_type getSendBuffer() const { return _comm_data._send_buffer; }
1774 buffer_type getReceiveBuffer() const { return _comm_data._recv_buffer; }
1775
1777 particle_data_type getData() const { return _comm_data._particles; }
1779 void setData( const particle_data_type& particles )
1780 {
1781 _comm_data._particles = particles;
1782 }
1783
1789 auto sendSize() { return _send_size; }
1795 auto receiveSize() { return _recv_size; }
1797 auto sendCapacity() { return _comm_data._send_buffer.extent( 0 ); }
1799 auto receiveCapacity() { return _comm_data._recv_buffer.extent( 0 ); }
1805 void shrinkToFit( const bool use_overallocation = false )
1806 {
1807 auto shrunk_send_size = _send_size;
1808 auto shrunk_recv_size = _recv_size;
1809 if ( use_overallocation )
1810 {
1811 shrunk_send_size *= _overallocation;
1812 shrunk_recv_size *= _overallocation;
1813 }
1814 _comm_data.reallocateSend( shrunk_send_size );
1815 _comm_data.reallocateReceive( shrunk_recv_size );
1816 }
1817
1819 virtual void apply() = 0;
1820
1822 template <class ExecutionSpace>
1823 void apply( ExecutionSpace );
1824
1826 void reserveImpl( const CommPlanType& comm_plan,
1827 const particle_data_type particles,
1828 const std::size_t total_send,
1829 const std::size_t total_recv,
1830 const double overallocation )
1831 {
1832 if ( overallocation < 1.0 )
1833 throw std::runtime_error( "Cabana::CommunicationPlan: "
1834 "Cannot allocate buffers with less space "
1835 "than data to communicate!" );
1836 _overallocation = overallocation;
1837
1838 reserveImpl( comm_plan, particles, total_send, total_recv );
1839 }
1840 void reserveImpl( const CommPlanType& comm_plan,
1841 const particle_data_type particles,
1842 const std::size_t total_send,
1843 const std::size_t total_recv )
1844 {
1845 _comm_plan = comm_plan;
1846 setData( particles );
1847
1848 auto send_capacity = sendCapacity();
1849 auto new_send_size = static_cast<std::size_t>(
1850 static_cast<double>( total_send ) * _overallocation );
1851 if ( new_send_size > send_capacity )
1852 _comm_data.reallocateSend( new_send_size );
1853
1854 auto recv_capacity = receiveCapacity();
1855 auto new_recv_size = static_cast<std::size_t>(
1856 static_cast<double>( total_recv ) * _overallocation );
1857 if ( new_recv_size > recv_capacity )
1858 _comm_data.reallocateReceive( new_recv_size );
1859
1860 _send_size = total_send;
1861 _recv_size = total_recv;
1862 }
1864
1865 protected:
1867 auto getSliceComponents() { return _comm_data._num_comp; };
1868
1876 std::size_t _send_size;
1878 std::size_t _recv_size;
1879};
1880
1881} // end namespace Cabana
1882
1883#endif // end CABANA_COMMUNICATIONPLAN_HPP
Cabana utilities.
void setData(const particle_data_type &particles)
Update particles to communicate.
Definition Cabana_CommunicationPlan.hpp:1779
auto sendCapacity()
Current allocated send buffer space.
Definition Cabana_CommunicationPlan.hpp:1797
typename plan_type::execution_space execution_space
Kokkos execution space.
Definition Cabana_CommunicationPlan.hpp:1742
void shrinkToFit(const bool use_overallocation=false)
Reduce communication buffers to current send/receive sizes.
Definition Cabana_CommunicationPlan.hpp:1805
auto receiveSize()
Current receive buffer size.
Definition Cabana_CommunicationPlan.hpp:1795
buffer_type getSendBuffer() const
Get the communication send buffer.
Definition Cabana_CommunicationPlan.hpp:1772
particle_data_type getData() const
Get the particles to communicate.
Definition Cabana_CommunicationPlan.hpp:1777
plan_type _comm_plan
Definition Cabana_CommunicationPlan.hpp:1870
std::size_t _recv_size
Definition Cabana_CommunicationPlan.hpp:1878
void apply(ExecutionSpace)
Perform the communication (migrate, gather, scatter).
typename comm_data_type::data_type data_type
Communication data type.
Definition Cabana_CommunicationPlan.hpp:1752
comm_data_type _comm_data
Definition Cabana_CommunicationPlan.hpp:1872
double _overallocation
Definition Cabana_CommunicationPlan.hpp:1874
typename comm_data_type::particle_data_type particle_data_type
Particle data type.
Definition Cabana_CommunicationPlan.hpp:1748
CommDataType comm_data_type
Communication data type.
Definition Cabana_CommunicationPlan.hpp:1746
Kokkos::RangePolicy< execution_space > policy_type
Kokkos execution policy.
Definition Cabana_CommunicationPlan.hpp:1744
typename comm_data_type::buffer_type buffer_type
Communication buffer type.
Definition Cabana_CommunicationPlan.hpp:1754
auto getSliceComponents()
Get the total number of components in the slice.
Definition Cabana_CommunicationPlan.hpp:1867
CommPlanType plan_type
Communication plan type (Halo, Distributor)
Definition Cabana_CommunicationPlan.hpp:1740
CommunicationData(const CommPlanType &comm_plan, const particle_data_type &particles, const double overallocation=1.0)
Definition Cabana_CommunicationPlan.hpp:1762
auto receiveCapacity()
Current allocated receive buffer space.
Definition Cabana_CommunicationPlan.hpp:1799
virtual void apply()=0
Perform the communication (migrate, gather, scatter).
auto sendSize()
Current send buffer size.
Definition Cabana_CommunicationPlan.hpp:1789
typename comm_data_type::memory_space memory_space
Kokkos memory space.
Definition Cabana_CommunicationPlan.hpp:1750
buffer_type getReceiveBuffer() const
Get the communication receive buffer.
Definition Cabana_CommunicationPlan.hpp:1774
std::size_t _send_size
Definition Cabana_CommunicationPlan.hpp:1876
Kokkos::View< size_type *, memory_space > createWithTopology(Export, const RankViewType &element_export_ranks, const std::vector< int > &neighbor_ranks)
Neighbor and export rank creator. Use this when you already know which ranks neighbor each other (i....
Definition Cabana_CommunicationPlan.hpp:730
Kokkos::View< std::size_t *, memory_space > getExportSteering() const
Get the steering vector for the exports.
Definition Cabana_CommunicationPlan.hpp:560
std::size_t exportSize() const
Get the number of export elements.
Definition Cabana_CommunicationPlan.hpp:549
typename memory_space::memory_space::size_type size_type
Size type.
Definition Cabana_CommunicationPlan.hpp:451
auto createWithTopology(Import, const RankViewType &element_import_ranks, const IdViewType &element_import_ids, const std::vector< int > &neighbor_ranks)
Neighbor and import rank creator. Use this when you already know which ranks neighbor each other (i....
Definition Cabana_CommunicationPlan.hpp:1174
Kokkos::View< size_type *, memory_space > createWithoutTopology(ExecutionSpace exec_space, Export, const RankViewType &element_export_ranks)
Export rank creator. Use this when you don't know who you will receiving from - only who you are send...
Definition Cabana_CommunicationPlan.hpp:770
int numNeighbor() const
Get the number of neighbor ranks that this rank will communicate with.
Definition Cabana_CommunicationPlan.hpp:487
Kokkos::View< size_type *, memory_space > createWithTopology(ExecutionSpace exec_space, Export, const RankViewType &element_export_ranks, const std::vector< int > &neighbor_ranks)
Neighbor and export rank creator. Use this when you already know which ranks neighbor each other (i....
Definition Cabana_CommunicationPlan.hpp:608
MemorySpace memory_space
Kokkos memory space.
Definition Cabana_CommunicationPlan.hpp:441
void createExportSteering(const PackViewType &neighbor_ids, const RankViewType &element_export_ranks)
Create the export steering vector.
Definition Cabana_CommunicationPlan.hpp:1504
auto createWithoutTopology(Import, const RankViewType &element_import_ranks, const IdViewType &element_import_ids)
Import rank creator. Use this when you don't know who you will be receiving from - only who you are i...
Definition Cabana_CommunicationPlan.hpp:1479
std::size_t totalNumExport() const
Get the total number of exports this rank will do.
Definition Cabana_CommunicationPlan.hpp:517
MPI_Comm comm() const
Get the MPI communicator.
Definition Cabana_CommunicationPlan.hpp:480
auto createWithTopology(ExecutionSpace exec_space, Import, const RankViewType &element_import_ranks, const IdViewType &element_import_ids, const std::vector< int > &neighbor_ranks) -> std::tuple< Kokkos::View< typename RankViewType::size_type *, typename RankViewType::memory_space >, Kokkos::View< int *, typename RankViewType::memory_space >, Kokkos::View< int *, typename IdViewType::memory_space > >
Neighbor and import rank creator. Use this when you already know which ranks neighbor each other (i....
Definition Cabana_CommunicationPlan.hpp:985
std::size_t numExport(const int neighbor) const
Get the number of elements this rank will export to a given neighbor.
Definition Cabana_CommunicationPlan.hpp:507
std::size_t numImport(const int neighbor) const
Get the number of elements this rank will import from a given neighbor.
Definition Cabana_CommunicationPlan.hpp:527
std::size_t totalNumImport() const
Get the total number of imports this rank will do.
Definition Cabana_CommunicationPlan.hpp:537
int neighborRank(const int neighbor) const
Given a local neighbor id get its rank in the MPI communicator.
Definition Cabana_CommunicationPlan.hpp:494
void createExportSteering(const PackViewType &neighbor_ids, const RankViewType &element_export_ranks, const IdViewType &element_export_ids)
Create the export steering vector.
Definition Cabana_CommunicationPlan.hpp:1532
auto createWithoutTopology(ExecutionSpace exec_space, Import, const RankViewType &element_import_ranks, const IdViewType &element_import_ids) -> std::tuple< Kokkos::View< typename RankViewType::size_type *, typename RankViewType::memory_space >, Kokkos::View< int *, typename RankViewType::memory_space >, Kokkos::View< int *, typename IdViewType::memory_space > >
Import rank creator. Use this when you don't know who you will be receiving from - only who you are i...
Definition Cabana_CommunicationPlan.hpp:1217
Kokkos::View< size_type *, memory_space > createWithoutTopology(Export, const RankViewType &element_export_ranks)
Export rank creator. Use this when you don't know who you will receiving from - only who you are send...
Definition Cabana_CommunicationPlan.hpp:938
CommunicationPlan(MPI_Comm comm)
Constructor.
Definition Cabana_CommunicationPlan.hpp:458
typename memory_space::execution_space execution_space
Default execution space.
Definition Cabana_CommunicationPlan.hpp:445
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
std::vector< int > getUniqueTopology(MPI_Comm comm, std::vector< int > topology)
Return unique neighbor ranks, with the current rank first.
Definition Cabana_CommunicationPlan.hpp:388
buffer_type _send_buffer
Send buffer.
Definition Cabana_CommunicationPlan.hpp:1660
void reallocateSend(const std::size_t num_send)
Resize the send buffer.
Definition Cabana_CommunicationPlan.hpp:1649
typename particle_data_type::tuple_type data_type
Communication data type.
Definition Cabana_CommunicationPlan.hpp:1631
AoSoAType particle_data_type
Particle data type.
Definition Cabana_CommunicationPlan.hpp:1627
buffer_type _recv_buffer
Receive buffer.
Definition Cabana_CommunicationPlan.hpp:1662
typename particle_data_type::memory_space memory_space
Kokkos memory space.
Definition Cabana_CommunicationPlan.hpp:1629
particle_data_type _particles
Particle AoSoA.
Definition Cabana_CommunicationPlan.hpp:1664
CommunicationDataAoSoA(particle_data_type particles)
Definition Cabana_CommunicationPlan.hpp:1639
typename Kokkos::View< data_type *, memory_space > buffer_type
Communication buffer type.
Definition Cabana_CommunicationPlan.hpp:1633
std::size_t _num_comp
Slice components.
Definition Cabana_CommunicationPlan.hpp:1666
void reallocateReceive(const std::size_t num_recv)
Resize the receive buffer.
Definition Cabana_CommunicationPlan.hpp:1654
void setSliceComponents()
Get the total number of components in the slice.
Definition Cabana_CommunicationPlan.hpp:1714
void reallocateSend(const std::size_t num_send)
Resize the send buffer.
Definition Cabana_CommunicationPlan.hpp:1703
buffer_type _send_buffer
Send buffer.
Definition Cabana_CommunicationPlan.hpp:1722
buffer_type _recv_buffer
Receive buffer.
Definition Cabana_CommunicationPlan.hpp:1724
void reallocateReceive(const std::size_t num_recv)
Resize the receive buffer.
Definition Cabana_CommunicationPlan.hpp:1708
typename Kokkos::View< data_type **, Kokkos::LayoutRight, memory_space > buffer_type
Communication buffer type.
Definition Cabana_CommunicationPlan.hpp:1684
typename particle_data_type::value_type data_type
Communication data type.
Definition Cabana_CommunicationPlan.hpp:1682
typename particle_data_type::memory_space memory_space
Kokkos memory space.
Definition Cabana_CommunicationPlan.hpp:1680
SliceType particle_data_type
Particle data type.
Definition Cabana_CommunicationPlan.hpp:1678
std::size_t _num_comp
Slice components.
Definition Cabana_CommunicationPlan.hpp:1728
particle_data_type _particles
Particle slice.
Definition Cabana_CommunicationPlan.hpp:1726
CommunicationDataSlice(particle_data_type particles)
Definition Cabana_CommunicationPlan.hpp:1691
Export-based tag - default.
Definition Cabana_CommunicationPlan.hpp:42
Import-based tag.
Definition Cabana_CommunicationPlan.hpp:49
Definition Cabana_Types.hpp:88
AoSoA static type checker.
Definition Cabana_AoSoA.hpp:61
Slice static type checker.
Definition Cabana_Slice.hpp:868