Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_Grid_SparseDimPartitioner.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_GRID_SPARSEDIMPARTITIONER_HPP
17#define CABANA_GRID_SPARSEDIMPARTITIONER_HPP
18
21#include <Cabana_Utils.hpp> // FIXME: remove after next release.
22
23#include <Kokkos_Core.hpp>
24
25#include <array>
26#include <vector>
27
28#include <mpi.h>
29
30namespace Cabana
31{
32namespace Grid
33{
34//---------------------------------------------------------------------------//
41template <typename MemorySpace, unsigned long long CellPerTileDim = 4,
42 std::size_t NumSpaceDim = 3>
43class SparseDimPartitioner : public BlockPartitioner<NumSpaceDim>
44{
45 public:
47 static constexpr std::size_t num_space_dim = NumSpaceDim;
48
49 // FIXME: extracting the self type for backwards compatibility with previous
50 // template on DeviceType. Should simply be MemorySpace after next release.
52 using memory_space = typename MemorySpace::memory_space;
53 // FIXME: replace warning with memory space assert after next release.
54 static_assert(
55 Cabana::Impl::deprecated( Kokkos::is_device<MemorySpace>() ) );
56
58 using device_type [[deprecated]] = typename memory_space::device_type;
60 using execution_space = typename memory_space::execution_space;
61
63 using workload_view = Kokkos::View<int***, memory_space>;
65 using partition_view = Kokkos::View<int* [num_space_dim], memory_space>;
68 Kokkos::View<int***, typename execution_space::array_layout,
69 Kokkos::HostSpace>;
72 Kokkos::View<int* [num_space_dim],
73 typename execution_space::array_layout, Kokkos::HostSpace>;
74
76 static constexpr unsigned long long cell_bits_per_tile_dim =
77 bitCount( CellPerTileDim );
80 static constexpr unsigned long long cell_num_per_tile_dim =
82
96 MPI_Comm comm, float max_workload_coeff, int workload_num,
97 int num_step_rebalance,
98 const std::array<int, num_space_dim>& global_cells_per_dim,
99 int max_optimize_iteration = 10 )
100 : _workload_threshold(
101 static_cast<int>( max_workload_coeff * workload_num ) )
102 , _num_step_rebalance( num_step_rebalance )
103 , _max_optimize_iteration( max_optimize_iteration )
104 {
105 // compute the ranks_per_dim from MPI communicator
106 allocate( global_cells_per_dim );
107 ranksPerDimension( comm );
108 }
109
125 MPI_Comm comm, float max_workload_coeff, int workload_num,
126 int num_step_rebalance,
127 const std::array<int, num_space_dim>& ranks_per_dim,
128 const std::array<int, num_space_dim>& global_cells_per_dim,
129 int max_optimize_iteration = 10 )
130 : _workload_threshold(
131 static_cast<int>( max_workload_coeff * workload_num ) )
132 , _num_step_rebalance( num_step_rebalance )
133 , _max_optimize_iteration( max_optimize_iteration )
134 {
135 allocate( global_cells_per_dim );
136 std::copy( ranks_per_dim.begin(), ranks_per_dim.end(),
137 _ranks_per_dim.data() );
138
139 // init MPI topology
140 int comm_size;
141 MPI_Comm_size( comm, &comm_size );
142 MPI_Dims_create( comm_size, num_space_dim, _ranks_per_dim.data() );
143 }
144
150 std::array<int, num_space_dim> ranksPerDimension( MPI_Comm comm )
151 {
152 int comm_size;
153 MPI_Comm_size( comm, &comm_size );
154
155 std::array<int, num_space_dim> ranks_per_dim;
156 for ( std::size_t d = 0; d < num_space_dim; ++d )
157 ranks_per_dim[d] = 0;
158 MPI_Dims_create( comm_size, num_space_dim, ranks_per_dim.data() );
159
160 std::copy( ranks_per_dim.begin(), ranks_per_dim.end(),
161 _ranks_per_dim.data() );
162
163 return ranks_per_dim;
164 }
165
171 std::array<int, num_space_dim>
172 ranksPerDimension( MPI_Comm comm,
173 const std::array<int, num_space_dim>& ) const override
174 {
175 std::array<int, num_space_dim> ranks_per_dim = {
176 _ranks_per_dim[0], _ranks_per_dim[1], _ranks_per_dim[2] };
177 int comm_size;
178 MPI_Comm_size( comm, &comm_size );
179 int nrank = 1;
180 for ( std::size_t d = 0; d < num_space_dim; ++d )
181 nrank *= _ranks_per_dim[d];
182 if ( comm_size != nrank )
183 throw std::runtime_error(
184 "Cabana::Grid::SparseDimPartitioner::ranksPerDimension: "
185 "SparseDimPartitioner ranks do not match comm size" );
186 return ranks_per_dim;
187 }
188
193 std::array<int, num_space_dim>
194 ownedTilesPerDimension( MPI_Comm cart_comm ) const
195 {
196 // Get the Cartesian topology index of this rank.
197 std::array<int, num_space_dim> cart_rank;
198 int linear_rank;
199 MPI_Comm_rank( cart_comm, &linear_rank );
200 MPI_Cart_coords( cart_comm, linear_rank, num_space_dim,
201 cart_rank.data() );
202
203 // Get the tiles per dimension and the remainder.
204 std::array<int, num_space_dim> tiles_per_dim;
205 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
206 Kokkos::HostSpace(), _rectangle_partition_dev );
207 for ( std::size_t d = 0; d < num_space_dim; ++d )
208 tiles_per_dim[d] = rec_mirror( cart_rank[d] + 1, d ) -
209 rec_mirror( cart_rank[d], d );
210 return tiles_per_dim;
211 }
212
217 std::array<int, num_space_dim>
218 ownedCellsPerDimension( MPI_Comm cart_comm ) const
219 {
220 auto tiles_per_dim = ownedTilesPerDimension( cart_comm );
221 for ( std::size_t d = 0; d < num_space_dim; ++d )
222 {
223 // compute cells_per_dim from tiles_per_dim
224 tiles_per_dim[d] <<= cell_bits_per_tile_dim;
225 }
226 return tiles_per_dim;
227 }
228
238 void
239 ownedTileInfo( MPI_Comm cart_comm,
240 std::array<int, num_space_dim>& owned_num_tile,
241 std::array<int, num_space_dim>& global_tile_offset ) const
242 {
243 // Get the Cartesian topology index of this rank.
244 std::array<int, num_space_dim> cart_rank;
245 int linear_rank;
246 MPI_Comm_rank( cart_comm, &linear_rank );
247 MPI_Cart_coords( cart_comm, linear_rank, num_space_dim,
248 cart_rank.data() );
249
250 // Get the tiles per dimension and the remainder.
251 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
252 Kokkos::HostSpace(), _rectangle_partition_dev );
253 for ( std::size_t d = 0; d < num_space_dim; ++d )
254 {
255 owned_num_tile[d] = rec_mirror( cart_rank[d] + 1, d ) -
256 rec_mirror( cart_rank[d], d );
257 global_tile_offset[d] = rec_mirror( cart_rank[d], d );
258 }
259 }
260
271 MPI_Comm cart_comm, const std::array<int, num_space_dim>&,
272 std::array<int, num_space_dim>& owned_num_cell,
273 std::array<int, num_space_dim>& global_cell_offset ) const override
274 {
275 ownedTileInfo( cart_comm, owned_num_cell, global_cell_offset );
276 for ( std::size_t d = 0; d < num_space_dim; ++d )
277 {
278 // compute cells_per_dim from tiles_per_dim
279 owned_num_cell[d] <<= cell_bits_per_tile_dim;
280 global_cell_offset[d] <<= cell_bits_per_tile_dim;
281 }
282 }
283
292 void initializeRecPartition( std::vector<int>& rec_partition_i,
293 std::vector<int>& rec_partition_j,
294 std::vector<int>& rec_partition_k )
295 {
296 int max_size = 0;
297 for ( std::size_t d = 0; d < num_space_dim; ++d )
298 max_size =
299 max_size < _ranks_per_dim[d] ? _ranks_per_dim[d] : max_size;
300
301 typedef typename execution_space::array_layout layout;
302 Kokkos::View<int* [num_space_dim], layout, Kokkos::HostSpace>
303 rectangle_partition( "rectangle_partition_host", max_size + 1 );
304
305 for ( int id = 0; id < _ranks_per_dim[0] + 1; ++id )
306 rectangle_partition( id, 0 ) = rec_partition_i[id];
307
308 for ( int id = 0; id < _ranks_per_dim[1] + 1; ++id )
309 rectangle_partition( id, 1 ) = rec_partition_j[id];
310
311 for ( int id = 0; id < _ranks_per_dim[2] + 1; ++id )
312 rectangle_partition( id, 2 ) = rec_partition_k[id];
313
314 _rectangle_partition_dev = Kokkos::create_mirror_view_and_copy(
315 memory_space(), rectangle_partition );
316 }
317
322 std::array<std::vector<int>, num_space_dim> getCurrentPartition()
323 {
324 std::array<std::vector<int>, num_space_dim> rec_part;
325 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
326 Kokkos::HostSpace(), _rectangle_partition_dev );
327 for ( std::size_t d = 0; d < num_space_dim; ++d )
328 {
329 rec_part[d].resize( _ranks_per_dim[d] + 1 );
330 for ( int id = 0; id < _ranks_per_dim[d] + 1; ++id )
331 {
332 rec_part[d][id] = rec_mirror( id, d );
333 }
334 }
335 return rec_part;
336 }
337
343 {
344 Kokkos::deep_copy( _workload_per_tile, 0 );
345 Kokkos::deep_copy( _workload_prefix_sum, 0 );
346 }
347
357 template <class ParticlePosViewType, typename ArrayType, typename CellUnit>
358 void computeLocalWorkLoad( const ParticlePosViewType& view,
359 int particle_num,
360 const ArrayType& global_lower_corner,
361 const CellUnit dx )
362 {
364 // make a local copy
365 auto workload = _workload_per_tile;
366 Kokkos::Array<CellUnit, num_space_dim> lower_corner;
367 for ( std::size_t d = 0; d < num_space_dim; ++d )
368 {
369 lower_corner[d] = global_lower_corner[d];
370 }
371
372 Kokkos::parallel_for(
373 "Cabana::Grid::SparseDimPartitioner::computeLocalWorkLoadPosition",
374 Kokkos::RangePolicy<execution_space>( 0, particle_num ),
375 KOKKOS_LAMBDA( const int i ) {
376 int ti = static_cast<int>(
377 ( view( i, 0 ) - lower_corner[0] ) / dx - 0.5 ) >>
379 int tj = static_cast<int>(
380 ( view( i, 1 ) - lower_corner[1] ) / dx - 0.5 ) >>
382 int tz = static_cast<int>(
383 ( view( i, 2 ) - lower_corner[2] ) / dx - 0.5 ) >>
385 Kokkos::atomic_inc( &workload( ti + 1, tj + 1, tz + 1 ) );
386 } );
387 Kokkos::fence();
388 }
389
395 template <class SparseMapType>
396 void computeLocalWorkLoad( const SparseMapType& sparseMap )
397 {
399 // make a local copy
400 auto workload = _workload_per_tile;
401 Kokkos::parallel_for(
402 "Cabana::Grid::SparseDimPartitioner::computeLocalWorkLoadSparseMap",
403 Kokkos::RangePolicy<execution_space>( 0, sparseMap.capacity() ),
404 KOKKOS_LAMBDA( uint32_t i ) {
405 if ( sparseMap.valid_at( i ) )
406 {
407 auto key = sparseMap.key_at( i );
408 int ti, tj, tk;
409 sparseMap.key2ijk( key, ti, tj, tk );
410 Kokkos::atomic_inc( &workload( ti + 1, tj + 1, tk + 1 ) );
411 }
412 } );
413 Kokkos::fence();
414 }
415
421 void computeFullPrefixSum( MPI_Comm comm )
422 {
423 // local copy
424 auto workload = _workload_per_tile;
425 auto prefix_sum = _workload_prefix_sum;
426 int total_size = _workload_per_tile.size();
427
428 // MPI all reduce: compute workload in all MPI ranks from the local
429 // workload matrix, save the results in _workload_prefix_sum
430 MPI_Allreduce( workload.data(), prefix_sum.data(), total_size, MPI_INT,
431 MPI_SUM, comm );
432 MPI_Barrier( comm );
433
434 // compute the prefix sum (in three dimensions)
435 // prefix sum in the dimension 0
436 for ( int j = 0;
437 j < static_cast<int>( _workload_prefix_sum.extent( 1 ) ); ++j )
438 for ( int k = 0;
439 k < static_cast<int>( _workload_prefix_sum.extent( 2 ) );
440 ++k )
441 Kokkos::parallel_scan(
442 "scan_prefix_sum_dim0",
443 Kokkos::RangePolicy<execution_space>(
444 0, _workload_prefix_sum.extent( 0 ) ),
445 KOKKOS_LAMBDA( const int i, int& update,
446 const bool final ) {
447 const float val_i = prefix_sum( i, j, k );
448 update += val_i;
449 if ( final )
450 {
451 prefix_sum( i, j, k ) = update;
452 }
453 } );
454 Kokkos::fence();
455
456 // prefix sum in the dimension 1
457 for ( int i = 0;
458 i < static_cast<int>( _workload_prefix_sum.extent( 0 ) ); ++i )
459 for ( int k = 0;
460 k < static_cast<int>( _workload_prefix_sum.extent( 2 ) );
461 ++k )
462 Kokkos::parallel_scan(
463 "scan_prefix_sum_dim1",
464 Kokkos::RangePolicy<execution_space>(
465 0, _workload_prefix_sum.extent( 1 ) ),
466 KOKKOS_LAMBDA( const int j, int& update,
467 const bool final ) {
468 const float val_i = prefix_sum( i, j, k );
469 update += val_i;
470 if ( final )
471 {
472 prefix_sum( i, j, k ) = update;
473 }
474 } );
475 Kokkos::fence();
476
477 // prefix sum in the dimension 2
478 for ( int i = 0;
479 i < static_cast<int>( _workload_prefix_sum.extent( 0 ) ); ++i )
480 for ( int j = 0;
481 j < static_cast<int>( _workload_prefix_sum.extent( 1 ) );
482 ++j )
483 Kokkos::parallel_scan(
484 "scan_prefix_sum_dim2",
485 Kokkos::RangePolicy<execution_space>(
486 0, _workload_prefix_sum.extent( 2 ) ),
487 KOKKOS_LAMBDA( const int k, int& update,
488 const bool final ) {
489 const float val_i = prefix_sum( i, j, k );
490 update += val_i;
491 if ( final )
492 {
493 prefix_sum( i, j, k ) = update;
494 }
495 } );
496 Kokkos::fence();
497 }
498
509 template <class ParticlePosViewType, typename ArrayType, typename CellUnit>
510 int optimizePartition( const ParticlePosViewType& view, int particle_num,
511 const ArrayType& global_lower_corner,
512 const CellUnit dx, MPI_Comm comm )
513 {
514 computeLocalWorkLoad( view, particle_num, global_lower_corner, dx );
515 MPI_Barrier( comm );
516
517 computeFullPrefixSum( comm );
518 MPI_Barrier( comm );
519
520 // each iteration covers partitioner optization in all three dimensions
521 // (with a random dim sequence)
522 for ( int i = 0; i < _max_optimize_iteration; ++i )
523 {
524 bool is_changed = false; // record changes in current iteration
525 bool dim_covered[3] = { false, false, false };
526 for ( int d = 0; d < 3; ++d )
527 {
528 int random_dim_id = std::rand() % num_space_dim;
529 while ( dim_covered[random_dim_id] )
530 random_dim_id = std::rand() % num_space_dim;
531
532 bool is_dim_changed = false; // record changes in current dim
533 optimizePartition( is_dim_changed, random_dim_id );
534
535 // update control info
536 is_changed = is_changed || is_dim_changed;
537 dim_covered[random_dim_id] = true;
538 }
539 // return if the current partition is optimal
540 if ( !is_changed )
541 return i;
542 }
543 return _max_optimize_iteration;
544 }
545
552 template <class SparseMapType>
553 int optimizePartition( const SparseMapType& sparseMap, MPI_Comm comm )
554 {
555 computeLocalWorkLoad( sparseMap );
556 MPI_Barrier( comm );
557
558 computeFullPrefixSum( comm );
559 MPI_Barrier( comm );
560
561 for ( int i = 0; i < _max_optimize_iteration; ++i )
562 {
563 bool is_changed = false; // record changes in current iteration
564 bool dim_covered[3] = { false, false, false };
565 for ( int d = 0; d < 3; ++d )
566 {
567 int random_dim_id = std::rand() % num_space_dim;
568 while ( dim_covered[random_dim_id] )
569 random_dim_id = std::rand() % num_space_dim;
570
571 bool is_dim_changed = false; // record changes in current dim
572 optimizePartition( is_dim_changed, random_dim_id );
573
574 // update control info
575 is_changed = is_changed || is_dim_changed;
576 dim_covered[random_dim_id] = true;
577 }
578 // return if the current partition is optimal
579 if ( !is_changed )
580 return i;
581 }
582 return _max_optimize_iteration;
583 }
584
591 void optimizePartition( bool& is_changed, int iter_seed )
592 {
593 is_changed = false;
594 // loop over three dimensions, optimize the partition in dimension di
595 for ( int iter_id = iter_seed;
596 iter_id < iter_seed + static_cast<int>( num_space_dim );
597 ++iter_id )
598 {
599 int di = iter_id % num_space_dim;
600 // compute the dimensions that should be fixed (dj and dk)
601 int dj = ( di + 1 ) % num_space_dim;
602 int dk = ( di + 2 ) % num_space_dim;
603 auto rank_k = _ranks_per_dim[dk];
604
605 auto rank = _ranks_per_dim[di];
606 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
607 Kokkos::HostSpace(), _rectangle_partition_dev );
608 auto rec_partition = _rectangle_partition_dev;
609
611 compute_sub_workload( _rectangle_partition_dev,
612 _workload_prefix_sum );
613
614 // compute average workload in the dimension di
615 Kokkos::View<int*, memory_space> ave_workload(
616 "ave_workload", _ranks_per_dim[dj] * _ranks_per_dim[dk] );
617 Kokkos::parallel_for(
618 "Cabana::Grid::SparseDimPartitioner::computeAverageWorkLoad",
619 Kokkos::RangePolicy<execution_space>(
620 0, _ranks_per_dim[dj] * _ranks_per_dim[dk] ),
621 KOKKOS_LAMBDA( uint32_t jnk ) {
622 // compute rank_id in the fixed dimensions
623 int j = static_cast<int>( jnk / rank_k );
624 int k = static_cast<int>( jnk % rank_k );
625 // compute the average workload with the partition of the
626 // two fixed dimensions
627 ave_workload( jnk ) =
628 compute_sub_workload( di, 0, rec_partition( rank, di ),
629 dj, j, dk, k ) /
630 rank;
631 } );
632 Kokkos::fence();
633
634 // point_i: current partition position
635 int point_i = 1;
636 // equal_start_point: register the beginning pos of potentially
637 // equivalent partitions
638 int equal_start_point = 1;
639 // last_point: the optimized position for the lask partition
640 int last_point = 0;
641 // current_workload: the workload between [last_point, point_i)
642 Kokkos::View<int*, memory_space> current_workload(
643 "current_workload", _ranks_per_dim[dj] * _ranks_per_dim[dk] );
644 for ( int current_rank = 1; current_rank < rank; current_rank++ )
645 {
646 int last_diff = __INT_MAX__;
647 while ( true )
648 {
649 // compute current workload between [last_point, point_i)
650 Kokkos::parallel_for(
651 "Cabana::Grid::SparseDimPartitioner::"
652 "computeCurrentWorkLoad",
653 Kokkos::RangePolicy<execution_space>(
654 0, _ranks_per_dim[dj] * _ranks_per_dim[dk] ),
655 KOKKOS_LAMBDA( uint32_t jnk ) {
656 int j = static_cast<int>( jnk / rank_k );
657 int k = static_cast<int>( jnk % rank_k );
658 current_workload( jnk ) = compute_sub_workload(
659 di, last_point, point_i, dj, j, dk, k );
660 } );
661 Kokkos::fence();
662
663 // compute the (w_jk^ave - w_jk^{last_point:point_i})
664 Kokkos::parallel_for(
665 "Cabana::Grid::SparseDimPartitioner::computeDifference",
666 Kokkos::RangePolicy<execution_space>(
667 0, _ranks_per_dim[dj] * _ranks_per_dim[dk] ),
668 KOKKOS_LAMBDA( uint32_t jnk ) {
669 auto wl =
670 current_workload( jnk ) - ave_workload( jnk );
671 // compute absolute diff (rather than squares to
672 // avoid potential overflow)
673 // TODO: update when Kokkos::abs() available
674 wl = wl > 0 ? wl : -wl;
675 current_workload( jnk ) = wl;
676 } );
677 Kokkos::fence();
678
679 // compute the sum of the difference in all rank_j*rank_k
680 // regions
681 int diff;
682 Kokkos::parallel_reduce(
683 "diff_reduce",
684 Kokkos::RangePolicy<execution_space>(
685 0, _ranks_per_dim[dj] * _ranks_per_dim[dk] ),
686 KOKKOS_LAMBDA( const int idx, int& update ) {
687 update += current_workload( idx );
688 },
689 diff );
690 Kokkos::fence();
691
692 // record the new optimal position
693 if ( diff <= last_diff )
694 {
695 // register starting points of potentially equivalent
696 // partitions
697 if ( diff != last_diff )
698 equal_start_point = point_i;
699
700 // check if point_i reach the total_tile_num
701 if ( point_i == rec_mirror( rank, di ) )
702 {
703 rec_mirror( current_rank, di ) = point_i;
704 break;
705 }
706
707 last_diff = diff;
708 point_i++;
709 }
710 else
711 {
712 // final optimal position - middle position of all
713 // potentially equivalent partitions
714 if ( rec_mirror( current_rank, di ) !=
715 ( point_i - 1 + equal_start_point ) / 2 )
716 {
717 rec_mirror( current_rank, di ) =
718 ( point_i - 1 + equal_start_point ) / 2;
719 is_changed = true;
720 }
721 last_point = point_i - 1;
722 break;
723 }
724 } // end while (optimization for the current rank)
725 } // end for (all partition/rank in the optimized dimension)
726 Kokkos::deep_copy( _rectangle_partition_dev, rec_mirror );
727 } // end for (3 dimensions)
728 }
729
735 int currentRankWorkload( MPI_Comm cart_comm )
736 {
737 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
738 Kokkos::HostSpace(), _rectangle_partition_dev );
739 auto prefix_sum_mirror = Kokkos::create_mirror_view_and_copy(
740 Kokkos::HostSpace(), _workload_prefix_sum );
741
742 return currentRankWorkload( cart_comm, rec_mirror, prefix_sum_mirror );
743 }
744
752 template <typename PartitionViewHost, typename WorkloadViewHost>
753 int currentRankWorkload( MPI_Comm cart_comm, PartitionViewHost& rec_view,
754 WorkloadViewHost& prefix_sum_view )
755 {
757 compute_sub_workload_host( rec_view, prefix_sum_view );
758
759 // Get the Cartesian topology index of this rank.
760 Kokkos::Array<int, num_space_dim> cart_rank;
761 int linear_rank;
762 MPI_Comm_rank( cart_comm, &linear_rank );
763 MPI_Cart_coords( cart_comm, linear_rank, num_space_dim,
764 cart_rank.data() );
765
766 // compute total workload of the current rank
767 int workload_current_rank = compute_sub_workload_host(
768 0, rec_view( cart_rank[0], 0 ), rec_view( cart_rank[0] + 1, 0 ), 1,
769 cart_rank[1], 2, cart_rank[2] );
770
771 return workload_current_rank;
772 }
773
779 {
780 auto prefix_sum_view = Kokkos::create_mirror_view_and_copy(
781 Kokkos::HostSpace(), _workload_prefix_sum );
782 // compute total workload of the current rank
783 return averageRankWorkload( prefix_sum_view );
784 }
785
791 template <typename WorkloadViewHost>
792 int averageRankWorkload( WorkloadViewHost& prefix_sum_view )
793 {
794 // compute total workload of the current rank
795 return prefix_sum_view( prefix_sum_view.extent( 0 ) - 1,
796 prefix_sum_view.extent( 1 ) - 1,
797 prefix_sum_view.extent( 2 ) - 1 ) /
798 ( _ranks_per_dim[0] * _ranks_per_dim[1] * _ranks_per_dim[2] );
799 }
800
806 float computeImbalanceFactor( MPI_Comm cart_comm )
807 {
808 auto rec_mirror = Kokkos::create_mirror_view_and_copy(
809 Kokkos::HostSpace(), _rectangle_partition_dev );
810 auto prefix_sum_mirror = Kokkos::create_mirror_view_and_copy(
811 Kokkos::HostSpace(), _workload_prefix_sum );
812
813 int workload_current_rank =
814 currentRankWorkload( cart_comm, rec_mirror, prefix_sum_mirror );
815 int workload_ave_rank = averageRankWorkload( prefix_sum_mirror );
816
817 return static_cast<float>( workload_current_rank ) /
818 static_cast<float>( workload_ave_rank );
819 }
820
825 template <typename PartitionView, typename WorkloadView>
827 {
829 PartitionView rec_partition;
832
834 SubWorkloadFunctor( PartitionView rec_par, WorkloadView pre_sum )
835 : rec_partition( rec_par )
836 , workload_prefix_sum( pre_sum )
837 {
838 }
839
844 KOKKOS_INLINE_FUNCTION int operator()( int dim_i, int i_start,
845 int i_end, int dim_j, int j,
846 int dim_k, int k ) const
847 {
848 int end[num_space_dim], start[num_space_dim];
849 end[dim_i] = i_end;
850 end[dim_j] = rec_partition( j + 1, dim_j );
851 end[dim_k] = rec_partition( k + 1, dim_k );
852
853 start[dim_i] = i_start;
854 start[dim_j] = rec_partition( j, dim_j );
855 start[dim_k] = rec_partition( k, dim_k );
856
857 // S[i][j][k] = S[i-1][j][k] + S[i][j-1][k] + S[i][j][k-1] -
858 // S[i-1][j-1][k]
859 // - S[i][j-1][k-1] - S[i-1][j][k-1] + S[i-1][j-1][k-1] + a[i][j][k]
860 return workload_prefix_sum( end[0], end[1], end[2] ) // S[i][j][k]
861 - workload_prefix_sum( start[0], end[1],
862 end[2] ) // S[i-1][j][k]
863 - workload_prefix_sum( end[0], start[1],
864 end[2] ) // S[i][j-1][k]
865 - workload_prefix_sum( end[0], end[1],
866 start[2] ) // S[i][j][k-1]
867 + workload_prefix_sum( start[0], start[1],
868 end[2] ) // S[i-1][j-1][k]
869 + workload_prefix_sum( end[0], start[1],
870 start[2] ) // S[i][j-1][k-1]
871 + workload_prefix_sum( start[0], end[1],
872 start[2] ) // S[i-1][j][k-1]
873 - workload_prefix_sum( start[0], start[1],
874 start[2] ); // S[i-1][j-1][k-1]
875 }
876 };
877
878 private:
879 // workload_threshold
880 int _workload_threshold;
881 // default check point for re-balance
882 int _num_step_rebalance;
883 // max_optimize iterations
884 int _max_optimize_iteration;
885
886 // represent the rectangle partition in each dimension
887 // with form [0, p_1, ..., p_n, cell_num], n = rank num in current
888 // dimension, partition in this dimension would be [0, p_1), [p_1, p_2) ...
889 // [p_n, total_tile_num] (unit: tile)
890 partition_view _rectangle_partition_dev;
891 // the workload of each tile on current
892 workload_view _workload_per_tile;
893 // 3d prefix sum of the workload of each tile on current
894 workload_view _workload_prefix_sum;
895 // ranks per dimension
896 Kokkos::Array<int, num_space_dim> _ranks_per_dim;
897
898 void allocate( const std::array<int, num_space_dim>& global_cells_per_dim )
899 {
900 _workload_per_tile = workload_view(
901 Kokkos::view_alloc( Kokkos::WithoutInitializing,
902 "workload_per_tile" ),
903 ( global_cells_per_dim[0] >> cell_bits_per_tile_dim ) + 1,
904 ( global_cells_per_dim[1] >> cell_bits_per_tile_dim ) + 1,
905 ( global_cells_per_dim[2] >> cell_bits_per_tile_dim ) + 1 );
906
907 _workload_prefix_sum = workload_view(
908 Kokkos::view_alloc( Kokkos::WithoutInitializing,
909 "workload_prefix_sum" ),
910 ( global_cells_per_dim[0] >> cell_bits_per_tile_dim ) + 1,
911 ( global_cells_per_dim[1] >> cell_bits_per_tile_dim ) + 1,
912 ( global_cells_per_dim[2] >> cell_bits_per_tile_dim ) + 1 );
913 }
914};
915} // namespace Grid
916} // namespace Cabana
917
918#endif // end CABANA_GRID_SPARSEDIMPARTITIONER_HPP
Multi-node grid partitioner.
KOKKOS_INLINE_FUNCTION constexpr Integer bitCount(Integer input_int) noexcept
(Host/Device) Compute the lease bit number needed to index input integer
Definition Cabana_Grid_SparseIndexSpace.hpp:80
Cabana utilities.
Block partitioner base class.
Definition Cabana_Grid_Partitioner.hpp:37
void resetWorkload()
set all elements in _workload_per_tile and _workload_prefix_sum matrix to 0
Definition Cabana_Grid_SparseDimPartitioner.hpp:342
int currentRankWorkload(MPI_Comm cart_comm, PartitionViewHost &rec_view, WorkloadViewHost &prefix_sum_view)
compute the total workload on the current MPI rank
Definition Cabana_Grid_SparseDimPartitioner.hpp:753
int optimizePartition(const SparseMapType &sparseMap, MPI_Comm comm)
iteratively optimize the partition
Definition Cabana_Grid_SparseDimPartitioner.hpp:553
SparseDimPartitioner(MPI_Comm comm, float max_workload_coeff, int workload_num, int num_step_rebalance, const std::array< int, num_space_dim > &global_cells_per_dim, int max_optimize_iteration=10)
Constructor - automatically compute ranks_per_dim from MPI communicator.
Definition Cabana_Grid_SparseDimPartitioner.hpp:95
std::array< std::vector< int >, num_space_dim > getCurrentPartition()
Get the current partition. Copy partition from the device view to host std::array<vector>
Definition Cabana_Grid_SparseDimPartitioner.hpp:322
Kokkos::View< int ***, memory_space > workload_view
Workload device view.
Definition Cabana_Grid_SparseDimPartitioner.hpp:63
void computeFullPrefixSum(MPI_Comm comm)
reduce the total workload in all MPI ranks; 2. compute the workload prefix sum matrix for all MPI ran...
Definition Cabana_Grid_SparseDimPartitioner.hpp:421
void initializeRecPartition(std::vector< int > &rec_partition_i, std::vector< int > &rec_partition_j, std::vector< int > &rec_partition_k)
Initialize the tile partition; partition in each dimension has the form [0, p_1, ....
Definition Cabana_Grid_SparseDimPartitioner.hpp:292
void ownedTileInfo(MPI_Comm cart_comm, std::array< int, num_space_dim > &owned_num_tile, std::array< int, num_space_dim > &global_tile_offset) const
Get the owned number of tiles and the global tile offset of the current MPI rank.
Definition Cabana_Grid_SparseDimPartitioner.hpp:239
static constexpr std::size_t num_space_dim
dimension
Definition Cabana_Grid_SparseDimPartitioner.hpp:47
int averageRankWorkload()
compute the average workload on each MPI rank
Definition Cabana_Grid_SparseDimPartitioner.hpp:778
Kokkos::View< int *[num_space_dim], typename execution_space::array_layout, Kokkos::HostSpace > partition_view_host
Partition host view.
Definition Cabana_Grid_SparseDimPartitioner.hpp:71
float computeImbalanceFactor(MPI_Comm cart_comm)
compute the imbalance factor for the current partition
Definition Cabana_Grid_SparseDimPartitioner.hpp:806
static constexpr unsigned long long cell_num_per_tile_dim
Definition Cabana_Grid_SparseDimPartitioner.hpp:80
typename memory_space::execution_space execution_space
Default execution space.
Definition Cabana_Grid_SparseDimPartitioner.hpp:60
int averageRankWorkload(WorkloadViewHost &prefix_sum_view)
compute the average workload on each MPI rank
Definition Cabana_Grid_SparseDimPartitioner.hpp:792
void computeLocalWorkLoad(const SparseMapType &sparseMap)
compute the workload in the current MPI rank from sparseMap (the workload of a tile is 1 if the tile ...
Definition Cabana_Grid_SparseDimPartitioner.hpp:396
int currentRankWorkload(MPI_Comm cart_comm)
compute the total workload on the current MPI rank
Definition Cabana_Grid_SparseDimPartitioner.hpp:735
void ownedCellInfo(MPI_Comm cart_comm, const std::array< int, num_space_dim > &, std::array< int, num_space_dim > &owned_num_cell, std::array< int, num_space_dim > &global_cell_offset) const override
Get the owned number of cells and the global cell offset of the current MPI rank.
Definition Cabana_Grid_SparseDimPartitioner.hpp:270
void optimizePartition(bool &is_changed, int iter_seed)
optimize the partition in three dimensions separately
Definition Cabana_Grid_SparseDimPartitioner.hpp:591
typename MemorySpace::memory_space memory_space
Memory space.
Definition Cabana_Grid_SparseDimPartitioner.hpp:52
void computeLocalWorkLoad(const ParticlePosViewType &view, int particle_num, const ArrayType &global_lower_corner, const CellUnit dx)
compute the workload in the current MPI rank from particle positions (each particle count for 1 workl...
Definition Cabana_Grid_SparseDimPartitioner.hpp:358
std::array< int, num_space_dim > ranksPerDimension(MPI_Comm comm, const std::array< int, num_space_dim > &) const override
Get the number of MPI ranks in each dimension of the grid from the given MPI communicator.
Definition Cabana_Grid_SparseDimPartitioner.hpp:172
std::array< int, num_space_dim > ownedTilesPerDimension(MPI_Comm cart_comm) const
Get the tile number in each dimension owned by the current MPI rank.
Definition Cabana_Grid_SparseDimPartitioner.hpp:194
Kokkos::View< int ***, typename execution_space::array_layout, Kokkos::HostSpace > workload_view_host
Workload host view.
Definition Cabana_Grid_SparseDimPartitioner.hpp:67
std::array< int, num_space_dim > ownedCellsPerDimension(MPI_Comm cart_comm) const
Get the cell number in each dimension owned by the current MPI rank.
Definition Cabana_Grid_SparseDimPartitioner.hpp:218
SparseDimPartitioner(MPI_Comm comm, float max_workload_coeff, int workload_num, int num_step_rebalance, const std::array< int, num_space_dim > &ranks_per_dim, const std::array< int, num_space_dim > &global_cells_per_dim, int max_optimize_iteration=10)
Constructor - user-defined ranks_per_dim communicator.
Definition Cabana_Grid_SparseDimPartitioner.hpp:124
Kokkos::View< int *[num_space_dim], memory_space > partition_view
Partition device view.
Definition Cabana_Grid_SparseDimPartitioner.hpp:65
static constexpr unsigned long long cell_bits_per_tile_dim
Number of bits (per dimension) needed to index the cells inside a tile.
Definition Cabana_Grid_SparseDimPartitioner.hpp:76
std::array< int, num_space_dim > ranksPerDimension(MPI_Comm comm)
Compute the number of MPI ranks in each dimension of the grid from the given MPI communicator.
Definition Cabana_Grid_SparseDimPartitioner.hpp:150
int optimizePartition(const ParticlePosViewType &view, int particle_num, const ArrayType &global_lower_corner, const CellUnit dx, MPI_Comm comm)
iteratively optimize the partition
Definition Cabana_Grid_SparseDimPartitioner.hpp:510
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
functor to compute the sub workload in a given region (from the prefix sum)
Definition Cabana_Grid_SparseDimPartitioner.hpp:827
PartitionView rec_partition
Rectilinear partition.
Definition Cabana_Grid_SparseDimPartitioner.hpp:829
KOKKOS_INLINE_FUNCTION int operator()(int dim_i, int i_start, int i_end, int dim_j, int j, int dim_k, int k) const
Definition Cabana_Grid_SparseDimPartitioner.hpp:844
SubWorkloadFunctor(PartitionView rec_par, WorkloadView pre_sum)
Constructor.
Definition Cabana_Grid_SparseDimPartitioner.hpp:834
WorkloadView workload_prefix_sum
Workload prefix sum matrix.
Definition Cabana_Grid_SparseDimPartitioner.hpp:831