160 using device_type [[deprecated]] =
typename memory_space::device_type;
176 template <
class ScalarT,
class MemorySpaceT,
class ArrayLayoutT>
198 , _residual_norm( 0.0 )
218 const std::vector<std::array<int, num_space_dim>>& stencil,
219 const bool is_symmetric =
false )
override
221 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
244 const std::vector<std::array<int, num_space_dim>>& stencil,
245 const bool is_symmetric =
false )
override
247 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
265 void setMaxIter(
const int max_iter )
override { _max_iter = max_iter; }
270 _print_level = print_level;
283 Kokkos::Profiling::ScopedRegion region(
284 "Cabana::Grid::ReferenceStructuredSolver::solve" );
287 auto local_grid = _vectors->layout()->localGrid();
290 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
291 std::cout << std::endl
292 <<
"Preconditioned conjugate gradient" << std::endl;
296 local_grid->indexSpace(
Own(), EntityType(),
Local() );
307 auto x_view = x.view();
308 auto b_view = b.view();
309 auto M_view = _M->view();
310 auto A_view = _A->view();
311 auto p_old_view = p_old->view();
312 auto z_view = z->view();
313 auto r_old_view = r_old->view();
314 auto q_view = q->view();
315 auto p_new_view = p_new->view();
316 auto r_new_view = r_new->view();
322 std::vector<Scalar> b_norm( 1 );
326 Kokkos::deep_copy( p_old_view, x_view );
332 _residual_norm = 0.0;
333 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
334 b_view, r_old_view );
337 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
341 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
343 local_grid->globalGrid().comm() );
346 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
347 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
348 std::cout <<
"Iteration " << _num_iter
349 <<
": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
350 if ( _residual_norm <= _tol )
357 Scalar zTr_old = 0.0;
358 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
359 p_new_view, r_old_view, z_view );
362 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
367 MPI_SUM, local_grid->globalGrid().comm() );
375 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
378 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
383 MPI_SUM, local_grid->globalGrid().comm() );
386 bool converged =
false;
387 Scalar zTr_new = 0.0;
390 while ( _residual_norm > _tol && _num_iter < _max_iter )
396 alpha = zTr_old / pTAp;
398 auto cg_kernel_1 = createKernel1(
399 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
400 p_old_view, z_view, q_view, alpha );
403 std::integral_constant<std::size_t, num_space_dim>{},
404 cg_kernel_1, zTr_new );
408 MPI_SUM, local_grid->globalGrid().comm() );
411 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
417 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
418 std::cout <<
"Iteration " << _num_iter
419 <<
": |r|_2 / |b|_2 = " << _residual_norm
423 if ( _residual_norm <= _tol )
433 beta = zTr_new / zTr_old;
435 auto cg_kernel_2 = createKernel2(
436 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
437 p_old_view, z_view, q_view, beta );
440 std::integral_constant<std::size_t, num_space_dim>{},
445 MPI_SUM, local_grid->globalGrid().comm() );
452 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
453 std::cout <<
"Finished in " << _num_iter
454 <<
" iterations, converged to " << _residual_norm
460 throw std::runtime_error(
"CG solver did not converge" );
471 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
481 using value_type =
typename ViewB::value_type;
483 KOKKOS_INLINE_FUNCTION
void
484 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
485 const int j,
const int k, value_type& result )
const
492 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
493 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
494 Ax += A_view( i, j, k, c ) *
495 p_old_view( i + A_stencil( c, Dim::I ),
496 j + A_stencil( c, Dim::J ),
497 k + A_stencil( c, Dim::K ), 0 );
500 auto r_new = b_view( i, j, k, 0 ) - Ax;
503 r_old_view( i, j, k, 0 ) = r_new;
506 result += r_new * r_new;
509 KOKKOS_INLINE_FUNCTION
void
510 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
511 const int j, value_type& result )
const
518 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
519 if ( fabs( A_view( i, j, c ) ) > 0.0 )
520 Ax += A_view( i, j, c ) *
521 p_old_view( i + A_stencil( c, Dim::I ),
522 j + A_stencil( c, Dim::J ), 0 );
525 auto r_new = b_view( i, j, 0 ) - Ax;
528 r_old_view( i, j, 0 ) = r_new;
531 result += r_new * r_new;
535 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
537 auto createComputeR0(
const StencilA& A_stencil,
const ViewA& A_view,
538 const ViewOldP& p_old_view,
const ViewB& b_view,
539 const ViewOldR& r_old_view )
541 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
542 A_stencil, A_view, p_old_view, b_view, r_old_view };
545 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
546 class ViewOldR,
class ViewZ>
556 using value_type =
typename ViewZ::value_type;
558 KOKKOS_INLINE_FUNCTION
void
559 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
560 const int j,
const int k, value_type& result )
const
566 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
567 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
568 Mr += M_view( i, j, k, c ) *
569 r_old_view( i + M_stencil( c, Dim::I ),
570 j + M_stencil( c, Dim::J ),
571 k + M_stencil( c, Dim::K ), 0 );
573 z_view( i, j, k, 0 ) = Mr;
574 p_old_view( i, j, k, 0 ) = Mr;
575 p_new_view( i, j, k, 0 ) = Mr;
578 result += Mr * r_old_view( i, j, k, 0 );
581 KOKKOS_INLINE_FUNCTION
void
582 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
583 const int j, value_type& result )
const
589 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
590 if ( fabs( M_view( i, j, c ) ) > 0.0 )
591 Mr += M_view( i, j, c ) *
592 r_old_view( i + M_stencil( c, Dim::I ),
593 j + M_stencil( c, Dim::J ), 0 );
595 z_view( i, j, 0 ) = Mr;
596 p_old_view( i, j, 0 ) = Mr;
597 p_new_view( i, j, 0 ) = Mr;
600 result += Mr * r_old_view( i, j, 0 );
604 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
605 class ViewOldR,
class ViewZ>
606 auto createComputeZ0(
const StencilM& M_stencil,
const ViewM& M_view,
607 const ViewOldP& p_old_view,
608 const ViewNewP& p_new_view,
609 const ViewOldR& r_old_view,
const ViewZ& z_view )
611 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
612 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
615 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
623 using value_type =
typename ViewQ::value_type;
625 KOKKOS_INLINE_FUNCTION
void
626 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
627 const int j,
const int k, value_type& result )
const
634 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
635 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
636 Ap += A_view( i, j, k, c ) *
637 ( p_old_view( i + A_stencil( c, Dim::I ),
638 j + A_stencil( c, Dim::J ),
639 k + A_stencil( c, Dim::K ), 0 ) );
642 q_view( i, j, k, 0 ) = Ap;
645 result += p_old_view( i, j, k, 0 ) * Ap;
648 KOKKOS_INLINE_FUNCTION
void
649 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
650 const int j, value_type& result )
const
657 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
658 if ( fabs( A_view( i, j, c ) ) > 0.0 )
659 Ap += A_view( i, j, c ) *
660 ( p_old_view( i + A_stencil( c, Dim::I ),
661 j + A_stencil( c, Dim::J ), 0 ) );
664 q_view( i, j, 0 ) = Ap;
667 result += p_old_view( i, j, 0 ) * Ap;
671 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
672 auto createComputeQ0(
const StencilA& A_stencil,
const ViewA& A_view,
673 const ViewOldP& p_old_view,
const ViewQ& q_view )
675 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
676 A_stencil, A_view, p_old_view, q_view };
679 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
680 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
681 class ViewQ,
class ValueType>
695 KOKKOS_INLINE_FUNCTION
void
696 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
697 const int j,
const int k, ValueType& result )
const
704 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
705 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
706 Mr += M_view( i, j, k, c ) *
707 ( r_old_view( i + M_stencil( c, Dim::I ),
708 j + M_stencil( c, Dim::J ),
709 k + M_stencil( c, Dim::K ), 0 ) -
710 alpha * q_view( i + M_stencil( c, Dim::I ),
711 j + M_stencil( c, Dim::J ),
712 k + M_stencil( c, Dim::K ), 0 ) );
716 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
720 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
723 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
726 x_view( i, j, k, 0 ) = x_new;
727 r_new_view( i, j, k, 0 ) = r_new;
728 z_view( i, j, k, 0 ) = Mr;
731 result += Mr * r_new;
734 KOKKOS_INLINE_FUNCTION
void
735 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
736 const int j, ValueType& result )
const
743 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
744 if ( fabs( M_view( i, j, c ) ) > 0.0 )
745 Mr += M_view( i, j, c ) *
746 ( r_old_view( i + M_stencil( c, Dim::I ),
747 j + M_stencil( c, Dim::J ), 0 ) -
748 alpha * q_view( i + M_stencil( c, Dim::I ),
749 j + M_stencil( c, Dim::J ), 0 ) );
752 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
755 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
758 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
761 x_view( i, j, 0 ) = x_new;
762 r_new_view( i, j, 0 ) = r_new;
763 z_view( i, j, 0 ) = Mr;
766 result += Mr * r_new;
770 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
771 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
772 class ViewQ,
class ValueType>
773 auto createKernel1(
const StencilM& M_stencil,
const ViewM& M_view,
774 const ViewX& x_view,
const ViewNewR& r_new_view,
775 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
776 const ViewOldP& p_old_view,
const ViewZ& z_view,
777 const ViewQ& q_view,
const ValueType& alpha )
779 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
780 ViewNewP, ViewZ, ViewQ, ValueType>{
781 M_stencil, M_view, x_view, r_new_view, r_old_view,
782 p_new_view, p_old_view, z_view, q_view, alpha };
785 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
786 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
787 class ViewQ,
class ValueType>
801 KOKKOS_INLINE_FUNCTION
void
802 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
803 const int j,
const int k, ValueType& result )
const
810 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
811 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
813 A_view( i, j, k, c ) *
814 ( z_view( i + A_stencil( c, Dim::I ),
815 j + A_stencil( c, Dim::J ),
816 k + A_stencil( c, Dim::K ), 0 ) +
817 beta * p_old_view( i + A_stencil( c, Dim::I ),
818 j + A_stencil( c, Dim::J ),
819 k + A_stencil( c, Dim::K ), 0 ) );
823 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
826 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
829 q_view( i, j, k, 0 ) = Ap;
830 p_new_view( i, j, k, 0 ) = p_new;
833 result += p_new * Ap;
836 KOKKOS_INLINE_FUNCTION
void
837 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
838 const int j, ValueType& result )
const
845 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
846 if ( fabs( A_view( i, j, c ) ) > 0.0 )
849 ( z_view( i + A_stencil( c, Dim::I ),
850 j + A_stencil( c, Dim::J ), 0 ) +
851 beta * p_old_view( i + A_stencil( c, Dim::I ),
852 j + A_stencil( c, Dim::J ), 0 ) );
855 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
858 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
861 q_view( i, j, 0 ) = Ap;
862 p_new_view( i, j, 0 ) = p_new;
865 result += p_new * Ap;
869 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
870 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
871 class ViewQ,
class ValueType>
872 auto createKernel2(
const StencilA& A_stencil,
const ViewA& A_view,
873 const ViewX& x_view,
const ViewNewR& r_new_view,
874 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
875 const ViewOldP& p_old_view,
const ViewZ& z_view,
876 const ViewQ& q_view,
const ValueType& beta )
878 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
879 ViewNewP, ViewZ, ViewQ, ValueType>{
880 A_stencil, A_view, x_view, r_new_view, r_old_view,
881 p_new_view, p_old_view, z_view, q_view, beta };
888 setStencil(
const std::vector<std::array<int, num_space_dim>>& stencil,
889 const bool is_symmetric,
890 Kokkos::View<
int* [
num_space_dim], MemorySpace>& device_stencil,
891 std::shared_ptr<Halo<memory_space>>& halo,
892 std::shared_ptr<Array_t>& matrix,
893 std::shared_ptr<subarray_type>& halo_matrix )
897 throw std::logic_error(
898 "Reference CG currently does not support symmetry" );
901 auto local_grid = _vectors->layout()->localGrid();
904 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
905 Kokkos::ViewAllocateWithoutInitializing(
"stencil" ),
907 auto stencil_mirror =
908 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
909 for (
unsigned s = 0; s < stencil.size(); ++s )
911 stencil_mirror( s, d ) = stencil[s][d];
912 Kokkos::deep_copy( device_stencil, stencil_mirror );
916 std::set<std::array<int, num_space_dim>> neighbor_set;
917 std::array<int, num_space_dim> neighbor;
919 for (
auto s : stencil )
923 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
924 neighbor_set.emplace( neighbor );
928 width = std::max( width, std::abs( s[d] ) );
930 std::vector<std::array<int, num_space_dim>> halo_neighbors(
931 neighbor_set.size() );
932 std::copy( neighbor_set.begin(), neighbor_set.end(),
933 halo_neighbors.begin() );
940 matrix = createArray<Scalar, MemorySpace>(
"matrix", matrix_layout );
943 HaloPattern<num_space_dim> pattern;
944 pattern.setNeighbors( halo_neighbors );
945 halo =
createHalo( pattern, width, *halo_matrix );
953 Scalar _residual_norm;
955 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
956 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
957 std::shared_ptr<Halo<memory_space>> _A_halo;
958 std::shared_ptr<Halo<memory_space>> _M_halo;
959 std::shared_ptr<Array_t> _A;
960 std::shared_ptr<Array_t> _M;
961 std::shared_ptr<Array_t> _vectors;
962 std::shared_ptr<subarray_type> _A_halo_vectors;
963 std::shared_ptr<subarray_type> _M_halo_vectors;