171 template <
class ScalarT,
class MemorySpaceT,
class ArrayLayoutT>
193 , _residual_norm( 0.0 )
213 const std::vector<std::array<int, num_space_dim>>& stencil,
214 const bool is_symmetric =
false )
override
216 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
239 const std::vector<std::array<int, num_space_dim>>& stencil,
240 const bool is_symmetric =
false )
override
242 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
260 void setMaxIter(
const int max_iter )
override { _max_iter = max_iter; }
265 _print_level = print_level;
278 Kokkos::Profiling::ScopedRegion region(
279 "Cabana::Grid::ReferenceStructuredSolver::solve" );
282 auto local_grid = _vectors->layout()->localGrid();
285 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
286 std::cout << std::endl
287 <<
"Preconditioned conjugate gradient" << std::endl;
291 local_grid->indexSpace(
Own(), EntityType(),
Local() );
302 auto x_view = x.view();
303 auto b_view = b.view();
304 auto M_view = _M->view();
305 auto A_view = _A->view();
306 auto p_old_view = p_old->view();
307 auto z_view = z->view();
308 auto r_old_view = r_old->view();
309 auto q_view = q->view();
310 auto p_new_view = p_new->view();
311 auto r_new_view = r_new->view();
317 std::vector<Scalar> b_norm( 1 );
321 Kokkos::deep_copy( p_old_view, x_view );
327 _residual_norm = 0.0;
328 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
329 b_view, r_old_view );
332 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
336 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
338 local_grid->globalGrid().comm() );
341 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
342 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
343 std::cout <<
"Iteration " << _num_iter
344 <<
": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
345 if ( _residual_norm <= _tol )
352 Scalar zTr_old = 0.0;
353 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
354 p_new_view, r_old_view, z_view );
357 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
362 MPI_SUM, local_grid->globalGrid().comm() );
370 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
373 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
378 MPI_SUM, local_grid->globalGrid().comm() );
381 bool converged =
false;
382 Scalar zTr_new = 0.0;
385 while ( _residual_norm > _tol && _num_iter < _max_iter )
391 alpha = zTr_old / pTAp;
393 auto cg_kernel_1 = createKernel1(
394 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
395 p_old_view, z_view, q_view, alpha );
398 std::integral_constant<std::size_t, num_space_dim>{},
399 cg_kernel_1, zTr_new );
403 MPI_SUM, local_grid->globalGrid().comm() );
406 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
412 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
413 std::cout <<
"Iteration " << _num_iter
414 <<
": |r|_2 / |b|_2 = " << _residual_norm
418 if ( _residual_norm <= _tol )
428 beta = zTr_new / zTr_old;
430 auto cg_kernel_2 = createKernel2(
431 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
432 p_old_view, z_view, q_view, beta );
435 std::integral_constant<std::size_t, num_space_dim>{},
440 MPI_SUM, local_grid->globalGrid().comm() );
447 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
448 std::cout <<
"Finished in " << _num_iter
449 <<
" iterations, converged to " << _residual_norm
455 throw std::runtime_error(
456 "Cabana::Grid::ReferenceConjugateGradient::solve: CG solver "
457 "did not converge" );
468 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
478 using value_type =
typename ViewB::value_type;
480 KOKKOS_INLINE_FUNCTION
void
481 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
482 const int j,
const int k, value_type& result )
const
489 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
490 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
491 Ax += A_view( i, j, k, c ) *
492 p_old_view( i + A_stencil( c, Dim::I ),
493 j + A_stencil( c, Dim::J ),
494 k + A_stencil( c, Dim::K ), 0 );
497 auto r_new = b_view( i, j, k, 0 ) - Ax;
500 r_old_view( i, j, k, 0 ) = r_new;
503 result += r_new * r_new;
506 KOKKOS_INLINE_FUNCTION
void
507 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
508 const int j, value_type& result )
const
515 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
516 if ( fabs( A_view( i, j, c ) ) > 0.0 )
517 Ax += A_view( i, j, c ) *
518 p_old_view( i + A_stencil( c, Dim::I ),
519 j + A_stencil( c, Dim::J ), 0 );
522 auto r_new = b_view( i, j, 0 ) - Ax;
525 r_old_view( i, j, 0 ) = r_new;
528 result += r_new * r_new;
532 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
534 auto createComputeR0(
const StencilA& A_stencil,
const ViewA& A_view,
535 const ViewOldP& p_old_view,
const ViewB& b_view,
536 const ViewOldR& r_old_view )
538 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
539 A_stencil, A_view, p_old_view, b_view, r_old_view };
542 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
543 class ViewOldR,
class ViewZ>
553 using value_type =
typename ViewZ::value_type;
555 KOKKOS_INLINE_FUNCTION
void
556 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
557 const int j,
const int k, value_type& result )
const
563 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
564 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
565 Mr += M_view( i, j, k, c ) *
566 r_old_view( i + M_stencil( c, Dim::I ),
567 j + M_stencil( c, Dim::J ),
568 k + M_stencil( c, Dim::K ), 0 );
570 z_view( i, j, k, 0 ) = Mr;
571 p_old_view( i, j, k, 0 ) = Mr;
572 p_new_view( i, j, k, 0 ) = Mr;
575 result += Mr * r_old_view( i, j, k, 0 );
578 KOKKOS_INLINE_FUNCTION
void
579 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
580 const int j, value_type& result )
const
586 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
587 if ( fabs( M_view( i, j, c ) ) > 0.0 )
588 Mr += M_view( i, j, c ) *
589 r_old_view( i + M_stencil( c, Dim::I ),
590 j + M_stencil( c, Dim::J ), 0 );
592 z_view( i, j, 0 ) = Mr;
593 p_old_view( i, j, 0 ) = Mr;
594 p_new_view( i, j, 0 ) = Mr;
597 result += Mr * r_old_view( i, j, 0 );
601 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
602 class ViewOldR,
class ViewZ>
603 auto createComputeZ0(
const StencilM& M_stencil,
const ViewM& M_view,
604 const ViewOldP& p_old_view,
605 const ViewNewP& p_new_view,
606 const ViewOldR& r_old_view,
const ViewZ& z_view )
608 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
609 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
612 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
620 using value_type =
typename ViewQ::value_type;
622 KOKKOS_INLINE_FUNCTION
void
623 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
624 const int j,
const int k, value_type& result )
const
631 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
632 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
633 Ap += A_view( i, j, k, c ) *
634 ( p_old_view( i + A_stencil( c, Dim::I ),
635 j + A_stencil( c, Dim::J ),
636 k + A_stencil( c, Dim::K ), 0 ) );
639 q_view( i, j, k, 0 ) = Ap;
642 result += p_old_view( i, j, k, 0 ) * Ap;
645 KOKKOS_INLINE_FUNCTION
void
646 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
647 const int j, value_type& result )
const
654 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
655 if ( fabs( A_view( i, j, c ) ) > 0.0 )
656 Ap += A_view( i, j, c ) *
657 ( p_old_view( i + A_stencil( c, Dim::I ),
658 j + A_stencil( c, Dim::J ), 0 ) );
661 q_view( i, j, 0 ) = Ap;
664 result += p_old_view( i, j, 0 ) * Ap;
668 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
669 auto createComputeQ0(
const StencilA& A_stencil,
const ViewA& A_view,
670 const ViewOldP& p_old_view,
const ViewQ& q_view )
672 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
673 A_stencil, A_view, p_old_view, q_view };
676 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
677 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
678 class ViewQ,
class ValueType>
692 KOKKOS_INLINE_FUNCTION
void
693 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
694 const int j,
const int k, ValueType& result )
const
701 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
702 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
703 Mr += M_view( i, j, k, c ) *
704 ( r_old_view( i + M_stencil( c, Dim::I ),
705 j + M_stencil( c, Dim::J ),
706 k + M_stencil( c, Dim::K ), 0 ) -
707 alpha * q_view( i + M_stencil( c, Dim::I ),
708 j + M_stencil( c, Dim::J ),
709 k + M_stencil( c, Dim::K ), 0 ) );
713 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
717 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
720 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
723 x_view( i, j, k, 0 ) = x_new;
724 r_new_view( i, j, k, 0 ) = r_new;
725 z_view( i, j, k, 0 ) = Mr;
728 result += Mr * r_new;
731 KOKKOS_INLINE_FUNCTION
void
732 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
733 const int j, ValueType& result )
const
740 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
741 if ( fabs( M_view( i, j, c ) ) > 0.0 )
742 Mr += M_view( i, j, c ) *
743 ( r_old_view( i + M_stencil( c, Dim::I ),
744 j + M_stencil( c, Dim::J ), 0 ) -
745 alpha * q_view( i + M_stencil( c, Dim::I ),
746 j + M_stencil( c, Dim::J ), 0 ) );
749 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
752 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
755 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
758 x_view( i, j, 0 ) = x_new;
759 r_new_view( i, j, 0 ) = r_new;
760 z_view( i, j, 0 ) = Mr;
763 result += Mr * r_new;
767 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
768 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
769 class ViewQ,
class ValueType>
770 auto createKernel1(
const StencilM& M_stencil,
const ViewM& M_view,
771 const ViewX& x_view,
const ViewNewR& r_new_view,
772 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
773 const ViewOldP& p_old_view,
const ViewZ& z_view,
774 const ViewQ& q_view,
const ValueType& alpha )
776 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
777 ViewNewP, ViewZ, ViewQ, ValueType>{
778 M_stencil, M_view, x_view, r_new_view, r_old_view,
779 p_new_view, p_old_view, z_view, q_view, alpha };
782 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
783 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
784 class ViewQ,
class ValueType>
798 KOKKOS_INLINE_FUNCTION
void
799 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
800 const int j,
const int k, ValueType& result )
const
807 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
808 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
810 A_view( i, j, k, c ) *
811 ( z_view( i + A_stencil( c, Dim::I ),
812 j + A_stencil( c, Dim::J ),
813 k + A_stencil( c, Dim::K ), 0 ) +
814 beta * p_old_view( i + A_stencil( c, Dim::I ),
815 j + A_stencil( c, Dim::J ),
816 k + A_stencil( c, Dim::K ), 0 ) );
820 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
823 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
826 q_view( i, j, k, 0 ) = Ap;
827 p_new_view( i, j, k, 0 ) = p_new;
830 result += p_new * Ap;
833 KOKKOS_INLINE_FUNCTION
void
834 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
835 const int j, ValueType& result )
const
842 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
843 if ( fabs( A_view( i, j, c ) ) > 0.0 )
846 ( z_view( i + A_stencil( c, Dim::I ),
847 j + A_stencil( c, Dim::J ), 0 ) +
848 beta * p_old_view( i + A_stencil( c, Dim::I ),
849 j + A_stencil( c, Dim::J ), 0 ) );
852 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
855 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
858 q_view( i, j, 0 ) = Ap;
859 p_new_view( i, j, 0 ) = p_new;
862 result += p_new * Ap;
866 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
867 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
868 class ViewQ,
class ValueType>
869 auto createKernel2(
const StencilA& A_stencil,
const ViewA& A_view,
870 const ViewX& x_view,
const ViewNewR& r_new_view,
871 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
872 const ViewOldP& p_old_view,
const ViewZ& z_view,
873 const ViewQ& q_view,
const ValueType& beta )
875 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
876 ViewNewP, ViewZ, ViewQ, ValueType>{
877 A_stencil, A_view, x_view, r_new_view, r_old_view,
878 p_new_view, p_old_view, z_view, q_view, beta };
885 setStencil(
const std::vector<std::array<int, num_space_dim>>& stencil,
886 const bool is_symmetric,
887 Kokkos::View<
int* [
num_space_dim], MemorySpace>& device_stencil,
888 std::shared_ptr<Halo<memory_space>>& halo,
889 std::shared_ptr<Array_t>& matrix,
890 std::shared_ptr<subarray_type>& halo_matrix )
894 throw std::logic_error(
895 "Reference CG currently does not support symmetry" );
898 auto local_grid = _vectors->layout()->localGrid();
901 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
902 Kokkos::ViewAllocateWithoutInitializing(
"stencil" ),
904 auto stencil_mirror =
905 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
906 for (
unsigned s = 0; s < stencil.size(); ++s )
908 stencil_mirror( s, d ) = stencil[s][d];
909 Kokkos::deep_copy( device_stencil, stencil_mirror );
913 std::set<std::array<int, num_space_dim>> neighbor_set;
914 std::array<int, num_space_dim> neighbor;
916 for (
auto s : stencil )
920 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
921 neighbor_set.emplace( neighbor );
925 width = std::max( width, std::abs( s[d] ) );
927 std::vector<std::array<int, num_space_dim>> halo_neighbors(
928 neighbor_set.size() );
929 std::copy( neighbor_set.begin(), neighbor_set.end(),
930 halo_neighbors.begin() );
937 matrix = createArray<Scalar, MemorySpace>(
"matrix", matrix_layout );
940 HaloPattern<num_space_dim> pattern;
941 pattern.setNeighbors( halo_neighbors );
942 halo =
createHalo( pattern, width, *halo_matrix );
950 Scalar _residual_norm;
952 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
953 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
954 std::shared_ptr<Halo<memory_space>> _A_halo;
955 std::shared_ptr<Halo<memory_space>> _M_halo;
956 std::shared_ptr<Array_t> _A;
957 std::shared_ptr<Array_t> _M;
958 std::shared_ptr<Array_t> _vectors;
959 std::shared_ptr<subarray_type> _A_halo_vectors;
960 std::shared_ptr<subarray_type> _M_halo_vectors;