16#ifndef CABANA_PARALLEL_HPP
17#define CABANA_PARALLEL_HPP
23#include <impl/Cabana_CartesianGrid.hpp>
25#include <Kokkos_Core.hpp>
26#include <Kokkos_Profiling_ScopedRegion.hpp>
39template <
class WorkTag,
class FunctorType,
class... IndexTypes>
40KOKKOS_FORCEINLINE_FUNCTION
41 typename std::enable_if<std::is_same<WorkTag, void>::value>::type
42 functorTagDispatch(
const FunctorType& functor, IndexTypes&&... indices )
44 functor( std::forward<IndexTypes>( indices )... );
48template <
class WorkTag,
class FunctorType,
class... IndexTypes>
49KOKKOS_FORCEINLINE_FUNCTION
50 typename std::enable_if<!std::is_same<WorkTag, void>::value>::type
51 functorTagDispatch(
const FunctorType& functor, IndexTypes&&... indices )
54 functor( t, std::forward<IndexTypes>( indices )... );
58template <
class WorkTag,
class FunctorType,
class... IndexTypes,
60KOKKOS_FORCEINLINE_FUNCTION
61 typename std::enable_if<std::is_same<WorkTag, void>::value>::type
62 functorTagDispatch(
const FunctorType& functor, IndexTypes&&... indices,
63 ReduceType& reduce_val )
65 functor( std::forward<IndexTypes>( indices )..., reduce_val );
69template <
class WorkTag,
class FunctorType,
class... IndexTypes,
71KOKKOS_FORCEINLINE_FUNCTION
72 typename std::enable_if<!std::is_same<WorkTag, void>::value>::type
73 functorTagDispatch(
const FunctorType& functor, IndexTypes&&... indices,
74 ReduceType& reduce_val )
77 functor( t, std::forward<IndexTypes>( indices )..., reduce_val );
80template <
class ExecutionPolicy,
class Functor>
83template <
class Functor,
int VectorLength,
class... Properties>
84struct ParallelFor<SimdPolicy<VectorLength, Properties...>, Functor>
86 using simd_policy = SimdPolicy<VectorLength, Properties...>;
87 using team_policy =
typename simd_policy::base_type;
88 using work_tag =
typename team_policy::work_tag;
89 using index_type =
typename team_policy::index_type;
90 using member_type =
typename team_policy::member_type;
92 simd_policy exec_policy_;
95 ParallelFor( std::string label, simd_policy exec_policy, Functor functor )
96 : exec_policy_( std::move( exec_policy ) )
97 , functor_( std::move( functor ) )
100 Kokkos::parallel_for(
101 dynamic_cast<const team_policy&
>( exec_policy_ ), *
this );
103 Kokkos::parallel_for(
104 label,
dynamic_cast<const team_policy&
>( exec_policy_ ),
108 template <
class WorkTag>
109 KOKKOS_FUNCTION std::enable_if_t<!std::is_void<WorkTag>::value &&
110 std::is_same<WorkTag, work_tag>::value>
111 operator()( WorkTag, member_type
const& team )
const
113 this->operator()( team );
116 KOKKOS_FUNCTION
void operator()( member_type
const& team )
const
118 index_type s = team.league_rank() + exec_policy_.structBegin();
119 Kokkos::parallel_for(
120 Kokkos::ThreadVectorRange( team, exec_policy_.arrayBegin( s ),
121 exec_policy_.arrayEnd( s ) ),
123 { Impl::functorTagDispatch<work_tag>( functor_, s, a ); } );
173template <
class FunctorType,
int VectorLength,
class... ExecParameters>
176 const FunctorType& functor,
const std::string& str =
"" )
178 Kokkos::Profiling::ScopedRegion region(
"Cabana::simd_parallel_for" );
180 Impl::ParallelFor<
SimdPolicy<VectorLength, ExecParameters...>, FunctorType>(
181 str, exec_policy, functor );
251template <
class FunctorType,
class NeighborListType,
class... ExecParameters>
253 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
254 const FunctorType& functor,
const NeighborListType& list,
259 Kokkos::Profiling::ScopedRegion region(
"Cabana::neighbor_parallel_for" );
261 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
263 using execution_space =
264 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
267 typename Kokkos::RangePolicy<ExecParameters...>::index_type;
271 using memory_space =
typename neighbor_list_traits::memory_space;
273 auto begin = exec_policy.begin();
274 auto end = exec_policy.end();
275 using linear_policy_type = Kokkos::RangePolicy<execution_space, void, void>;
276 linear_policy_type linear_exec_policy( begin, end );
280 auto neigh_func = KOKKOS_LAMBDA(
const index_type i )
282 for ( index_type n = 0;
283 n < neighbor_list_traits::numNeighbor( list, i ); ++n )
284 Impl::functorTagDispatch<work_tag>(
286 static_cast<index_type
>(
287 neighbor_list_traits::getNeighbor( list, i, n ) ) );
290 Kokkos::parallel_for( linear_exec_policy, neigh_func );
292 Kokkos::parallel_for( str, linear_exec_policy, neigh_func );
315template <
class FunctorType,
class NeighborListType,
class... ExecParameters>
317 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
318 const FunctorType& functor,
const NeighborListType& list,
323 Kokkos::Profiling::ScopedRegion region(
"Cabana::neighbor_parallel_for" );
325 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
327 using execution_space =
328 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
331 typename Kokkos::RangePolicy<ExecParameters...>::index_type;
335 using memory_space =
typename neighbor_list_traits::memory_space;
337 auto begin = exec_policy.begin();
338 auto end = exec_policy.end();
339 using linear_policy_type = Kokkos::RangePolicy<execution_space, void, void>;
340 linear_policy_type linear_exec_policy( begin, end );
344 auto neigh_func = KOKKOS_LAMBDA(
const index_type i )
346 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
348 for ( index_type n = 0; n < nn; ++n )
351 neighbor_list_traits::getNeighbor( list, i, n );
353 for ( index_type a = n + 1; a < nn; ++a )
356 neighbor_list_traits::getNeighbor( list, i, a );
357 Impl::functorTagDispatch<work_tag>( functor, i, j, k );
362 Kokkos::parallel_for( linear_exec_policy, neigh_func );
364 Kokkos::parallel_for( str, linear_exec_policy, neigh_func );
386template <
class FunctorType,
class NeighborListType,
class... ExecParameters>
388 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
389 const FunctorType& functor,
const NeighborListType& list,
394 Kokkos::Profiling::ScopedRegion region(
"Cabana::neighbor_parallel_for" );
396 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
398 using execution_space =
399 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
401 using kokkos_policy =
402 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
403 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
406 using index_type =
typename kokkos_policy::index_type;
410 using memory_space =
typename neighbor_list_traits::memory_space;
414 const auto range_begin = exec_policy.begin();
417 KOKKOS_LAMBDA(
const typename kokkos_policy::member_type& team )
419 index_type i = team.league_rank() + range_begin;
420 Kokkos::parallel_for(
421 Kokkos::TeamThreadRange(
422 team, neighbor_list_traits::numNeighbor( list, i ) ),
423 [&](
const index_type n )
425 Impl::functorTagDispatch<work_tag>(
427 static_cast<index_type
>(
428 neighbor_list_traits::getNeighbor( list, i, n ) ) );
432 Kokkos::parallel_for( team_policy, neigh_func );
434 Kokkos::parallel_for( str, team_policy, neigh_func );
458template <
class FunctorType,
class NeighborListType,
class... ExecParameters>
460 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
461 const FunctorType& functor,
const NeighborListType& list,
466 Kokkos::Profiling::ScopedRegion region(
"Cabana::neighbor_parallel_for" );
468 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
470 using execution_space =
471 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
473 using kokkos_policy =
474 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
475 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
478 using index_type =
typename kokkos_policy::index_type;
482 using memory_space =
typename neighbor_list_traits::memory_space;
486 const auto range_begin = exec_policy.begin();
489 KOKKOS_LAMBDA(
const typename kokkos_policy::member_type& team )
491 index_type i = team.league_rank() + range_begin;
493 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
494 Kokkos::parallel_for(
495 Kokkos::TeamThreadRange( team, nn ),
496 [&](
const index_type n )
499 neighbor_list_traits::getNeighbor( list, i, n );
501 for ( index_type a = n + 1; a < nn; ++a )
504 neighbor_list_traits::getNeighbor( list, i, a );
505 Impl::functorTagDispatch<work_tag>( functor, i, j, k );
510 Kokkos::parallel_for( team_policy, neigh_func );
512 Kokkos::parallel_for( str, team_policy, neigh_func );
536template <
class FunctorType,
class NeighborListType,
class... ExecParameters>
538 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
539 const FunctorType& functor,
const NeighborListType& list,
541 const std::string& str =
"",
545 Kokkos::Profiling::ScopedRegion region(
"Cabana::neighbor_parallel_for" );
547 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
549 using execution_space =
550 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
552 using kokkos_policy =
553 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
554 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
557 using index_type =
typename kokkos_policy::index_type;
561 using memory_space =
typename neighbor_list_traits::memory_space;
565 const auto range_begin = exec_policy.begin();
568 KOKKOS_LAMBDA(
const typename kokkos_policy::member_type& team )
570 index_type i = team.league_rank() + range_begin;
572 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
573 Kokkos::parallel_for(
574 Kokkos::TeamThreadRange( team, nn ),
575 [&](
const index_type n )
578 neighbor_list_traits::getNeighbor( list, i, n );
580 Kokkos::parallel_for(
581 Kokkos::ThreadVectorRange( team, n + 1, nn ),
582 [&](
const index_type a )
585 neighbor_list_traits::getNeighbor( list, i, a );
586 Impl::functorTagDispatch<work_tag>( functor, i, j, k );
591 Kokkos::parallel_for( team_policy, neigh_func );
593 Kokkos::parallel_for( str, team_policy, neigh_func );
638template <
class FunctorType,
class NeighborListType,
class ReduceType,
639 class... ExecParameters>
641 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
642 const FunctorType& functor,
const NeighborListType& list,
644 const std::string& str =
"",
648 Kokkos::Profiling::ScopedRegion region(
649 "Cabana::neighbor_parallel_reduce" );
651 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
653 using execution_space =
654 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
657 typename Kokkos::RangePolicy<ExecParameters...>::index_type;
661 using memory_space =
typename neighbor_list_traits::memory_space;
663 auto begin = exec_policy.begin();
664 auto end = exec_policy.end();
665 using linear_policy_type = Kokkos::RangePolicy<execution_space, void, void>;
666 linear_policy_type linear_exec_policy( begin, end );
670 auto neigh_reduce = KOKKOS_LAMBDA(
const index_type i, ReduceType& ival )
672 for ( index_type n = 0;
673 n < neighbor_list_traits::numNeighbor( list, i ); ++n )
674 Impl::functorTagDispatch<work_tag>(
676 static_cast<index_type
>(
677 neighbor_list_traits::getNeighbor( list, i, n ) ),
681 Kokkos::parallel_reduce( linear_exec_policy, neigh_reduce, reduce_val );
683 Kokkos::parallel_reduce( str, linear_exec_policy, neigh_reduce,
709template <
class FunctorType,
class NeighborListType,
class ReduceType,
710 class... ExecParameters>
712 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
713 const FunctorType& functor,
const NeighborListType& list,
715 const std::string& str =
"",
719 Kokkos::Profiling::ScopedRegion region(
720 "Cabana::neighbor_parallel_reduce" );
722 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
724 using execution_space =
725 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
728 typename Kokkos::RangePolicy<ExecParameters...>::index_type;
732 using memory_space =
typename neighbor_list_traits::memory_space;
734 auto begin = exec_policy.begin();
735 auto end = exec_policy.end();
736 using linear_policy_type = Kokkos::RangePolicy<execution_space, void, void>;
737 linear_policy_type linear_exec_policy( begin, end );
741 auto neigh_reduce = KOKKOS_LAMBDA(
const index_type i, ReduceType& ival )
743 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
745 for ( index_type n = 0; n < nn; ++n )
748 neighbor_list_traits::getNeighbor( list, i, n );
750 for ( index_type a = n + 1; a < nn; ++a )
753 neighbor_list_traits::getNeighbor( list, i, a );
754 Impl::functorTagDispatch<work_tag>( functor, i, j, k, ival );
759 Kokkos::parallel_reduce( linear_exec_policy, neigh_reduce, reduce_val );
761 Kokkos::parallel_reduce( str, linear_exec_policy, neigh_reduce,
787template <
class FunctorType,
class NeighborListType,
class ReduceType,
788 class... ExecParameters>
790 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
791 const FunctorType& functor,
const NeighborListType& list,
793 const std::string& str =
"",
797 Kokkos::Profiling::ScopedRegion region(
798 "Cabana::neighbor_parallel_reduce" );
800 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
802 using execution_space =
803 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
805 using kokkos_policy =
806 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
807 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
810 using index_type =
typename kokkos_policy::index_type;
814 using memory_space =
typename neighbor_list_traits::memory_space;
818 const auto range_begin = exec_policy.begin();
820 auto neigh_reduce = KOKKOS_LAMBDA(
821 const typename kokkos_policy::member_type& team, ReduceType& ival )
823 index_type i = team.league_rank() + range_begin;
824 ReduceType reduce_n = 0;
826 Kokkos::parallel_reduce(
827 Kokkos::TeamThreadRange(
828 team, neighbor_list_traits::numNeighbor( list, i ) ),
829 [&](
const index_type n, ReduceType& nval )
831 Impl::functorTagDispatch<work_tag>(
833 static_cast<index_type
>(
834 neighbor_list_traits::getNeighbor( list, i, n ) ),
838 Kokkos::single( Kokkos::PerTeam( team ), [&]() { ival += reduce_n; } );
841 Kokkos::parallel_reduce( team_policy, neigh_reduce, reduce_val );
843 Kokkos::parallel_reduce( str, team_policy, neigh_reduce, reduce_val );
869template <
class FunctorType,
class NeighborListType,
class ReduceType,
870 class... ExecParameters>
872 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
873 const FunctorType& functor,
const NeighborListType& list,
875 const std::string& str =
"",
879 Kokkos::Profiling::ScopedRegion region(
880 "Cabana::neighbor_parallel_reduce" );
882 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
884 using execution_space =
885 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
887 using kokkos_policy =
888 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
889 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
892 using index_type =
typename kokkos_policy::index_type;
896 using memory_space =
typename neighbor_list_traits::memory_space;
900 const auto range_begin = exec_policy.begin();
902 auto neigh_reduce = KOKKOS_LAMBDA(
903 const typename kokkos_policy::member_type& team, ReduceType& ival )
905 index_type i = team.league_rank() + range_begin;
906 ReduceType reduce_n = 0;
908 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
909 Kokkos::parallel_reduce(
910 Kokkos::TeamThreadRange( team, nn ),
911 [&](
const index_type n, ReduceType& nval )
914 neighbor_list_traits::getNeighbor( list, i, n );
916 for ( index_type a = n + 1; a < nn; ++a )
919 neighbor_list_traits::getNeighbor( list, i, a );
920 Impl::functorTagDispatch<work_tag>( functor, i, j, k,
925 Kokkos::single( Kokkos::PerTeam( team ), [&]() { ival += reduce_n; } );
928 Kokkos::parallel_reduce( team_policy, neigh_reduce, reduce_val );
930 Kokkos::parallel_reduce( str, team_policy, neigh_reduce, reduce_val );
956template <
class FunctorType,
class NeighborListType,
class ReduceType,
957 class... ExecParameters>
959 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
960 const FunctorType& functor,
const NeighborListType& list,
962 const std::string& str =
"",
966 Kokkos::Profiling::ScopedRegion region(
967 "Cabana::neighbor_parallel_reduce" );
969 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
971 using execution_space =
972 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
974 using kokkos_policy =
975 Kokkos::TeamPolicy<execution_space, Kokkos::Schedule<Kokkos::Dynamic>>;
976 kokkos_policy team_policy( exec_policy.end() - exec_policy.begin(),
979 using index_type =
typename kokkos_policy::index_type;
983 using memory_space =
typename neighbor_list_traits::memory_space;
987 const auto range_begin = exec_policy.begin();
989 auto neigh_reduce = KOKKOS_LAMBDA(
990 const typename kokkos_policy::member_type& team, ReduceType& ival )
992 index_type i = team.league_rank() + range_begin;
993 ReduceType reduce_n = 0;
995 const index_type nn = neighbor_list_traits::numNeighbor( list, i );
996 Kokkos::parallel_reduce(
997 Kokkos::TeamThreadRange( team, nn ),
998 [&](
const index_type n, ReduceType& nval )
1000 const index_type j =
1001 neighbor_list_traits::getNeighbor( list, i, n );
1002 ReduceType reduce_a = 0;
1004 Kokkos::parallel_reduce(
1005 Kokkos::ThreadVectorRange( team, n + 1, nn ),
1006 [&](
const index_type a, ReduceType& aval )
1008 const index_type k =
1009 neighbor_list_traits::getNeighbor( list, i, a );
1010 Impl::functorTagDispatch<work_tag>( functor, i, j, k,
1017 Kokkos::single( Kokkos::PerTeam( team ), [&]() { ival += reduce_n; } );
1020 Kokkos::parallel_reduce( team_policy, neigh_reduce, reduce_val );
1022 Kokkos::parallel_reduce( str, team_policy, neigh_reduce, reduce_val );
1058template <
class IndexType,
class FunctorType,
class NeighborListType>
1059KOKKOS_INLINE_FUNCTION
void
1065 for ( IndexType n = 0;
1066 n < static_cast<IndexType>(
1067 neighbor_list_traits::numNeighbor( list, i ) );
1070 i,
static_cast<IndexType
>(
1071 neighbor_list_traits::getNeighbor( list, i, n ) ) );
1091template <
class IndexType,
class FunctorType,
class NeighborListType,
1092 class TeamMemberType>
1093KOKKOS_INLINE_FUNCTION
void
1095 const FunctorType& neighbor_functor,
1100 Kokkos::parallel_for(
1101 Kokkos::TeamThreadRange( team,
1102 neighbor_list_traits::numNeighbor( list, i ) ),
1103 [&](
const IndexType n )
1105 Impl::functorTagDispatch<void>(
1106 neighbor_functor, i,
1107 static_cast<IndexType
>(
1108 neighbor_list_traits::getNeighbor( list, i, n ) ) );
1122template <
class WorkTag,
class Functor,
class Policy,
class LinkedCellType,
1124struct LinkedCellParallelFor
1127 using index_type =
typename Policy::index_type;
1130 Policy _exec_policy;
1134 LinkedCellType _list;
1136 static constexpr std::size_t num_space_dim = LinkedCellType::num_space_dim;
1142 NeighborDiscriminator<SelfNeighborTag> _discriminator;
1145 LinkedCellParallelFor( std::string label, Policy exec_policy,
1146 Functor functor,
const LinkedCellType& list,
1147 const index_type begin = 0 )
1148 : _exec_policy( exec_policy )
1149 , _functor( functor )
1153 if ( label.empty() )
1154 Kokkos::parallel_for(
dynamic_cast<const Policy&
>( exec_policy ),
1157 Kokkos::parallel_for(
1158 label,
dynamic_cast<const Policy&
>( exec_policy ), *
this );
1162 KOKKOS_FUNCTION
void operator()( SerialOpTag,
const index_type i )
const
1164 Kokkos::Array<int, num_space_dim> min;
1165 Kokkos::Array<int, num_space_dim> max;
1166 _list.getStencilCells( _list.getParticleBin( i ), min, max );
1168 iterate_serial_bins( min, max, i );
1171 template <std::
size_t NSD = num_space_dim>
1172 KOKKOS_INLINE_FUNCTION std::enable_if_t<3 == NSD, void>
1173 iterate_serial_bins(
const Kokkos::Array<int, 3> min,
1174 const Kokkos::Array<int, 3> max,
1175 const std::size_t i )
const
1177 Kokkos::Array<int, 3> ijk;
1178 for (
int gi = min[0]; gi < max[0]; ++gi )
1179 for (
int gj = min[1]; gj < max[1]; ++gj )
1180 for (
int gk = min[2]; gk < max[2]; ++gk )
1182 ijk = { gi, gj, gk };
1183 iterate_serial_particles( ijk, i );
1187 template <std::
size_t NSD = num_space_dim>
1188 KOKKOS_INLINE_FUNCTION std::enable_if_t<2 == NSD, void>
1189 iterate_serial_bins(
const Kokkos::Array<int, 2> min,
1190 const Kokkos::Array<int, 2> max,
1191 const std::size_t i )
const
1193 Kokkos::Array<int, 2> ij;
1194 for (
int gi = min[0]; gi < max[0]; ++gi )
1195 for (
int gj = min[1]; gj < max[1]; ++gj )
1198 iterate_serial_particles( ij, i );
1202 KOKKOS_INLINE_FUNCTION
void
1203 iterate_serial_particles(
const Kokkos::Array<int, num_space_dim> ijk,
1204 const std::size_t i )
const
1208 auto offset = _list.binOffset( ijk );
1209 auto size = _list.binSize( ijk );
1210 for ( std::size_t n = offset; n < offset +
size; ++n )
1213 auto j = _list.getParticle( n );
1216 if ( _discriminator.isValid( i, j ) )
1218 Impl::functorTagDispatch<WorkTag>( _functor, i, j );
1224 KOKKOS_FUNCTION
void
1225 operator()( TeamOpTag,
const typename Policy::member_type& team )
const
1227 index_type i = team.league_rank() + _begin;
1228 Kokkos::Array<int, num_space_dim> min;
1229 Kokkos::Array<int, num_space_dim> max;
1230 _list.getStencilCells( _list.getParticleBin( i ), min, max );
1232 iterate_team_bins( team, min, max, i );
1235 template <
class TeamType, std::
size_t NSD = num_space_dim>
1236 KOKKOS_INLINE_FUNCTION std::enable_if_t<3 == NSD, void>
1237 iterate_team_bins( TeamType team,
const Kokkos::Array<int, 3> min,
1238 const Kokkos::Array<int, 3> max,
1239 const std::size_t i )
const
1241 Kokkos::Array<int, 3> ijk;
1242 for (
int gi = min[0]; gi < max[0]; ++gi )
1243 for (
int gj = min[1]; gj < max[1]; ++gj )
1244 for (
int gk = min[2]; gk < max[2]; ++gk )
1246 ijk = { gi, gj, gk };
1247 iterate_team_particles( team, ijk, i );
1251 template <
class TeamType, std::
size_t NSD = num_space_dim>
1252 KOKKOS_INLINE_FUNCTION std::enable_if_t<2 == NSD, void>
1253 iterate_team_bins( TeamType team,
const Kokkos::Array<int, 2> min,
1254 const Kokkos::Array<int, 2> max,
1255 const std::size_t i )
const
1257 Kokkos::Array<int, 2> ij;
1258 for (
int gi = min[0]; gi < max[0]; ++gi )
1259 for (
int gj = min[1]; gj < max[1]; ++gj )
1262 iterate_team_particles( team, ij, i );
1266 template <
class TeamType>
1267 KOKKOS_INLINE_FUNCTION
void
1268 iterate_team_particles( TeamType team,
1269 const Kokkos::Array<int, num_space_dim> ijk,
1270 const std::size_t i )
const
1274 auto offset = _list.binOffset( ijk );
1275 auto size = _list.binSize( ijk );
1276 Kokkos::parallel_for(
1277 Kokkos::TeamThreadRange( team, offset, offset + size ),
1278 [&](
const index_type n )
1281 auto j = _list.getParticle( n );
1284 if ( _discriminator.isValid( i, j ) )
1286 Impl::functorTagDispatch<WorkTag>( _functor, i, j );
1296template <
class WorkTag,
class Functor,
class Policy,
class LinkedCellType,
1297 class ViewType,
class ReduceType>
1298struct LinkedCellParallelReduce
1301 using index_type =
typename Policy::index_type;
1304 Policy _exec_policy;
1308 LinkedCellType _list;
1310 static constexpr std::size_t num_space_dim = LinkedCellType::num_space_dim;
1316 NeighborDiscriminator<SelfNeighborTag> _discriminator;
1319 LinkedCellParallelReduce( std::string label, Policy exec_policy,
1320 Functor functor,
const LinkedCellType& list,
1321 ReduceType& reduce_val,
1322 const index_type begin = 0 )
1323 : _exec_policy( exec_policy )
1324 , _functor( functor )
1328 if ( label.empty() )
1329 Kokkos::parallel_reduce(
dynamic_cast<const Policy&
>( exec_policy ),
1330 *
this, reduce_val );
1332 Kokkos::parallel_reduce( label,
1333 dynamic_cast<const Policy&
>( exec_policy ),
1334 *
this, reduce_val );
1338 KOKKOS_FUNCTION
void operator()( SerialOpTag,
const index_type i,
1339 ReduceType& ival )
const
1341 Kokkos::Array<int, num_space_dim> min;
1342 Kokkos::Array<int, num_space_dim> max;
1343 _list.getStencilCells( _list.getParticleBin( i ), min, max );
1345 iterate_serial_bins( min, max, i, ival );
1348 template <std::
size_t NSD = num_space_dim>
1349 KOKKOS_INLINE_FUNCTION std::enable_if_t<3 == NSD, void>
1350 iterate_serial_bins(
const Kokkos::Array<int, 3> min,
1351 const Kokkos::Array<int, 3> max,
const std::size_t i,
1352 ReduceType& ival )
const
1354 Kokkos::Array<int, 3> ijk;
1355 for (
int gi = min[0]; gi < max[0]; ++gi )
1356 for (
int gj = min[1]; gj < max[1]; ++gj )
1357 for (
int gk = min[2]; gk < max[2]; ++gk )
1359 ijk = { gi, gj, gk };
1360 iterate_serial_particles( ijk, i, ival );
1364 template <std::
size_t NSD = num_space_dim>
1365 KOKKOS_INLINE_FUNCTION std::enable_if_t<2 == NSD, void>
1366 iterate_serial_bins(
const Kokkos::Array<int, 2> min,
1367 const Kokkos::Array<int, 2> max,
const std::size_t i,
1368 ReduceType& ival )
const
1370 Kokkos::Array<int, 2> ij;
1371 for (
int gi = min[0]; gi < max[0]; ++gi )
1372 for (
int gj = min[1]; gj < max[1]; ++gj )
1375 iterate_serial_particles( ij, i, ival );
1379 KOKKOS_INLINE_FUNCTION
void
1380 iterate_serial_particles(
const Kokkos::Array<int, num_space_dim> ijk,
1381 const std::size_t i, ReduceType& ival )
const
1385 auto offset = _list.binOffset( ijk );
1386 auto size = _list.binSize( ijk );
1387 for ( std::size_t n = offset; n < offset +
size; ++n )
1390 auto j = _list.getParticle( n );
1393 if ( _discriminator.isValid( i, j ) )
1395 Impl::functorTagDispatch<WorkTag>( _functor, i, j, ival );
1401 KOKKOS_FUNCTION
void operator()( TeamOpTag,
1402 const typename Policy::member_type& team,
1403 ReduceType& ival )
const
1405 index_type i = team.league_rank() + _begin;
1406 Kokkos::Array<int, num_space_dim> min;
1407 Kokkos::Array<int, num_space_dim> max;
1408 _list.getStencilCells( _list.getParticleBin( i ), min, max );
1410 iterate_team_bins( team, min, max, i, ival );
1413 template <
class TeamType, std::
size_t NSD = num_space_dim>
1414 KOKKOS_INLINE_FUNCTION std::enable_if_t<3 == NSD, void>
1415 iterate_team_bins( TeamType team,
const Kokkos::Array<int, 3> min,
1416 const Kokkos::Array<int, 3> max,
const std::size_t i,
1417 ReduceType& ival )
const
1419 Kokkos::Array<int, 3> ijk;
1420 for (
int gi = min[0]; gi < max[0]; ++gi )
1421 for (
int gj = min[1]; gj < max[1]; ++gj )
1422 for (
int gk = min[2]; gk < max[2]; ++gk )
1424 ijk = { gi, gj, gk };
1425 iterate_team_particles( team, ijk, i, ival );
1429 template <
class TeamType, std::
size_t NSD = num_space_dim>
1430 KOKKOS_INLINE_FUNCTION std::enable_if_t<2 == NSD, void>
1431 iterate_team_bins( TeamType team,
const Kokkos::Array<int, 2> min,
1432 const Kokkos::Array<int, 2> max,
const std::size_t i,
1433 ReduceType& ival )
const
1435 Kokkos::Array<int, 2> ij;
1436 for (
int gi = min[0]; gi < max[0]; ++gi )
1437 for (
int gj = min[1]; gj < max[1]; ++gj )
1440 iterate_team_particles( team, ij, i, ival );
1444 template <
class TeamType>
1445 KOKKOS_INLINE_FUNCTION
void
1446 iterate_team_particles( TeamType team,
1447 const Kokkos::Array<int, num_space_dim> ijk,
1448 const std::size_t i, ReduceType& ival )
const
1452 auto offset = _list.binOffset( ijk );
1453 auto size = _list.binSize( ijk );
1454 Kokkos::parallel_for(
1455 Kokkos::TeamThreadRange( team, offset, offset + size ),
1456 [&](
const index_type n )
1459 auto j = _list.getParticle( n );
1462 if ( _discriminator.isValid( i, j ) )
1464 Impl::functorTagDispatch<WorkTag>( _functor, i, j, ival );
1511template <
class FunctorType,
class LinkedCellType,
class... ExecParameters>
1513 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
1514 const FunctorType& functor,
const LinkedCellType& list,
1519 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
1520 using execution_space =
1521 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
1523 using memory_space =
typename LinkedCellType::memory_space;
1525 auto begin = exec_policy.begin();
1526 auto end = exec_policy.end();
1528 assert( begin == list.getParticleBegin() );
1529 assert( end == list.getParticleEnd() );
1531 using linear_policy_type =
1532 Kokkos::RangePolicy<SerialOpTag, execution_space>;
1533 linear_policy_type linear_exec_policy( begin, end );
1537 Impl::LinkedCellParallelFor<work_tag, FunctorType, linear_policy_type,
1539 typename LinkedCellType::CountView>
1540 lcl_par( str, linear_exec_policy, functor, list, exec_policy.begin() );
1565template <
class FunctorType,
class LinkedCellType,
class... ExecParameters>
1567 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
1568 const FunctorType& functor,
const LinkedCellType& list,
1573 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
1574 using execution_space =
1575 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
1577 using team_policy_type =
1578 Kokkos::TeamPolicy<
TeamOpTag, execution_space,
1579 Kokkos::Schedule<Kokkos::Dynamic>>;
1580 team_policy_type team_policy( exec_policy.end() - exec_policy.begin(),
1583 using memory_space =
typename LinkedCellType::memory_space;
1588 assert( exec_policy.begin() == list.getParticleBegin() );
1589 assert( exec_policy.end() == list.getParticleEnd() );
1591 Impl::LinkedCellParallelFor<work_tag, FunctorType, team_policy_type,
1593 typename LinkedCellType::CountView>
1594 lcl_par( str, team_policy, functor, list, exec_policy.begin() );
1636template <
class FunctorType,
class LinkedCellType,
class ReduceType,
1637 class... ExecParameters>
1639 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
1640 const FunctorType& functor,
const LinkedCellType& list,
1642 const std::string& str =
"",
1646 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
1647 using execution_space =
1648 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
1650 using memory_space =
typename LinkedCellType::memory_space;
1652 auto begin = exec_policy.begin();
1653 auto end = exec_policy.end();
1655 assert( begin == list.getParticleBegin() );
1656 assert( end == list.getParticleEnd() );
1658 using linear_policy_type =
1659 Kokkos::RangePolicy<SerialOpTag, execution_space>;
1660 linear_policy_type linear_exec_policy( begin, end );
1664 Impl::LinkedCellParallelReduce<
1665 work_tag, FunctorType, linear_policy_type, LinkedCellType,
1666 typename LinkedCellType::CountView, ReduceType>
1667 lcl_par( str, linear_exec_policy, functor, list, reduce_val,
1668 exec_policy.begin() );
1693template <
class FunctorType,
class LinkedCellType,
class ReduceType,
1694 class... ExecParameters>
1696 const Kokkos::RangePolicy<ExecParameters...>& exec_policy,
1697 const FunctorType& functor,
const LinkedCellType& list,
1699 const std::string& str =
"",
1703 using work_tag =
typename Kokkos::RangePolicy<ExecParameters...>::work_tag;
1704 using execution_space =
1705 typename Kokkos::RangePolicy<ExecParameters...>::execution_space;
1707 using team_policy_type =
1708 Kokkos::TeamPolicy<
TeamOpTag, execution_space,
1709 Kokkos::Schedule<Kokkos::Dynamic>>;
1710 team_policy_type team_policy( exec_policy.end() - exec_policy.begin(),
1713 using memory_space =
typename LinkedCellType::memory_space;
1718 assert( exec_policy.begin() == list.getParticleBegin() );
1719 assert( exec_policy.end() == list.getParticleEnd() );
1721 Impl::LinkedCellParallelReduce<
1722 work_tag, FunctorType, team_policy_type, LinkedCellType,
1723 typename LinkedCellType::CountView, ReduceType>
1724 lcl_par( str, team_policy, functor, list, reduce_val,
1725 exec_policy.begin() );
Linked cell list binning (spatial sorting) and neighbor iteration.
Memory access type checking.
Loop over particle neighbors.
Definition Cabana_Parallel.hpp:191
Neighbor list interface. Provides an interface callable at the functor level that gives access to nei...
Definition Cabana_NeighborList.hpp:223
Loop over particle neighbors (first) and neighbor's neighbors (second)
Definition Cabana_Parallel.hpp:196
Neighbor operations are executed in serial on each particle thread.
Definition Cabana_Parallel.hpp:201
Execution policy over a range of 2d indices.
Definition Cabana_ExecutionPolicy.hpp:82
Neighbor operations are executed with team parallelism.
Definition Cabana_Parallel.hpp:206
Neighbor operations are executed with team vector parallelism.
Definition Cabana_Parallel.hpp:211
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
KOKKOS_INLINE_FUNCTION void for_each_neighbor(const IndexType i, const FunctorType &neighbor_functor, const NeighborListType &list, const FirstNeighborsTag)
Execute functor in serial within existing parallel kernel over particle first neighbors.
Definition Cabana_Parallel.hpp:1060
void neighbor_parallel_for(const Kokkos::RangePolicy< ExecParameters... > &exec_policy, const FunctorType &functor, const NeighborListType &list, const FirstNeighborsTag, const SerialOpTag, const std::string &str="", typename std::enable_if<(!is_linked_cell_list< NeighborListType >::value), int >::type *=0)
Execute functor in parallel according to the execution policy over particles with a thread-local seri...
Definition Cabana_Parallel.hpp:252
auto size(SliceType slice, typename std::enable_if< is_slice< SliceType >::value, int >::type *=0)
Check slice size (differs from Kokkos View).
Definition Cabana_Slice.hpp:1019
void neighbor_parallel_reduce(const Kokkos::RangePolicy< ExecParameters... > &exec_policy, const FunctorType &functor, const NeighborListType &list, const FirstNeighborsTag, const SerialOpTag, ReduceType &reduce_val, const std::string &str="", typename std::enable_if<(!is_linked_cell_list< NeighborListType >::value), int >::type *=0)
Execute functor reduction in parallel according to the execution policy over particles with a thread-...
Definition Cabana_Parallel.hpp:640
void simd_parallel_for(const SimdPolicy< VectorLength, ExecParameters... > &exec_policy, const FunctorType &functor, const std::string &str="")
Execute a vectorized functor in parallel with a 2d execution policy.
Definition Cabana_Parallel.hpp:174
Definition Cabana_Types.hpp:88
LinkedCellList static type checker.
Definition Cabana_LinkedCellList.hpp:1115