160 using device_type [[deprecated]] =
typename memory_space::device_type;
176 template <
class ScalarT,
class MemorySpaceT,
class ArrayLayoutT>
198 , _residual_norm( 0.0 )
218 const std::vector<std::array<int, num_space_dim>>& stencil,
219 const bool is_symmetric =
false )
override
221 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
244 const std::vector<std::array<int, num_space_dim>>& stencil,
245 const bool is_symmetric =
false )
override
247 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
265 void setMaxIter(
const int max_iter )
override { _max_iter = max_iter; }
270 _print_level = print_level;
283 Kokkos::Profiling::ScopedRegion region(
284 "Cabana::Grid::ReferenceStructuredSolver::solve" );
287 auto local_grid = _vectors->layout()->localGrid();
290 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
291 std::cout << std::endl
292 <<
"Preconditioned conjugate gradient" << std::endl;
296 local_grid->indexSpace(
Own(), EntityType(),
Local() );
307 auto x_view = x.view();
308 auto b_view = b.view();
309 auto M_view = _M->view();
310 auto A_view = _A->view();
311 auto p_old_view = p_old->view();
312 auto z_view = z->view();
313 auto r_old_view = r_old->view();
314 auto q_view = q->view();
315 auto p_new_view = p_new->view();
316 auto r_new_view = r_new->view();
322 std::vector<Scalar> b_norm( 1 );
326 Kokkos::deep_copy( p_old_view, x_view );
332 _residual_norm = 0.0;
333 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
334 b_view, r_old_view );
337 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
341 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
343 local_grid->globalGrid().comm() );
346 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
347 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
348 std::cout <<
"Iteration " << _num_iter
349 <<
": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
350 if ( _residual_norm <= _tol )
357 Scalar zTr_old = 0.0;
358 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
359 p_new_view, r_old_view, z_view );
362 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
367 MPI_SUM, local_grid->globalGrid().comm() );
375 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
378 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
383 MPI_SUM, local_grid->globalGrid().comm() );
386 bool converged =
false;
387 Scalar zTr_new = 0.0;
390 while ( _residual_norm > _tol && _num_iter < _max_iter )
396 alpha = zTr_old / pTAp;
398 auto cg_kernel_1 = createKernel1(
399 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
400 p_old_view, z_view, q_view, alpha );
403 std::integral_constant<std::size_t, num_space_dim>{},
404 cg_kernel_1, zTr_new );
408 MPI_SUM, local_grid->globalGrid().comm() );
411 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
417 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
418 std::cout <<
"Iteration " << _num_iter
419 <<
": |r|_2 / |b|_2 = " << _residual_norm
423 if ( _residual_norm <= _tol )
433 beta = zTr_new / zTr_old;
435 auto cg_kernel_2 = createKernel2(
436 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
437 p_old_view, z_view, q_view, beta );
440 std::integral_constant<std::size_t, num_space_dim>{},
445 MPI_SUM, local_grid->globalGrid().comm() );
452 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
453 std::cout <<
"Finished in " << _num_iter
454 <<
" iterations, converged to " << _residual_norm
460 throw std::runtime_error(
461 "Cabana::Grid::ReferenceConjugateGradient::solve: CG solver "
462 "did not converge" );
473 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
483 using value_type =
typename ViewB::value_type;
485 KOKKOS_INLINE_FUNCTION
void
486 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
487 const int j,
const int k, value_type& result )
const
494 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
495 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
496 Ax += A_view( i, j, k, c ) *
497 p_old_view( i + A_stencil( c, Dim::I ),
498 j + A_stencil( c, Dim::J ),
499 k + A_stencil( c, Dim::K ), 0 );
502 auto r_new = b_view( i, j, k, 0 ) - Ax;
505 r_old_view( i, j, k, 0 ) = r_new;
508 result += r_new * r_new;
511 KOKKOS_INLINE_FUNCTION
void
512 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
513 const int j, value_type& result )
const
520 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
521 if ( fabs( A_view( i, j, c ) ) > 0.0 )
522 Ax += A_view( i, j, c ) *
523 p_old_view( i + A_stencil( c, Dim::I ),
524 j + A_stencil( c, Dim::J ), 0 );
527 auto r_new = b_view( i, j, 0 ) - Ax;
530 r_old_view( i, j, 0 ) = r_new;
533 result += r_new * r_new;
537 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewB,
539 auto createComputeR0(
const StencilA& A_stencil,
const ViewA& A_view,
540 const ViewOldP& p_old_view,
const ViewB& b_view,
541 const ViewOldR& r_old_view )
543 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
544 A_stencil, A_view, p_old_view, b_view, r_old_view };
547 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
548 class ViewOldR,
class ViewZ>
558 using value_type =
typename ViewZ::value_type;
560 KOKKOS_INLINE_FUNCTION
void
561 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
562 const int j,
const int k, value_type& result )
const
568 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
569 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
570 Mr += M_view( i, j, k, c ) *
571 r_old_view( i + M_stencil( c, Dim::I ),
572 j + M_stencil( c, Dim::J ),
573 k + M_stencil( c, Dim::K ), 0 );
575 z_view( i, j, k, 0 ) = Mr;
576 p_old_view( i, j, k, 0 ) = Mr;
577 p_new_view( i, j, k, 0 ) = Mr;
580 result += Mr * r_old_view( i, j, k, 0 );
583 KOKKOS_INLINE_FUNCTION
void
584 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
585 const int j, value_type& result )
const
591 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
592 if ( fabs( M_view( i, j, c ) ) > 0.0 )
593 Mr += M_view( i, j, c ) *
594 r_old_view( i + M_stencil( c, Dim::I ),
595 j + M_stencil( c, Dim::J ), 0 );
597 z_view( i, j, 0 ) = Mr;
598 p_old_view( i, j, 0 ) = Mr;
599 p_new_view( i, j, 0 ) = Mr;
602 result += Mr * r_old_view( i, j, 0 );
606 template <
class StencilM,
class ViewM,
class ViewOldP,
class ViewNewP,
607 class ViewOldR,
class ViewZ>
608 auto createComputeZ0(
const StencilM& M_stencil,
const ViewM& M_view,
609 const ViewOldP& p_old_view,
610 const ViewNewP& p_new_view,
611 const ViewOldR& r_old_view,
const ViewZ& z_view )
613 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
614 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
617 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
625 using value_type =
typename ViewQ::value_type;
627 KOKKOS_INLINE_FUNCTION
void
628 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
629 const int j,
const int k, value_type& result )
const
636 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
637 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
638 Ap += A_view( i, j, k, c ) *
639 ( p_old_view( i + A_stencil( c, Dim::I ),
640 j + A_stencil( c, Dim::J ),
641 k + A_stencil( c, Dim::K ), 0 ) );
644 q_view( i, j, k, 0 ) = Ap;
647 result += p_old_view( i, j, k, 0 ) * Ap;
650 KOKKOS_INLINE_FUNCTION
void
651 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
652 const int j, value_type& result )
const
659 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
660 if ( fabs( A_view( i, j, c ) ) > 0.0 )
661 Ap += A_view( i, j, c ) *
662 ( p_old_view( i + A_stencil( c, Dim::I ),
663 j + A_stencil( c, Dim::J ), 0 ) );
666 q_view( i, j, 0 ) = Ap;
669 result += p_old_view( i, j, 0 ) * Ap;
673 template <
class StencilA,
class ViewA,
class ViewOldP,
class ViewQ>
674 auto createComputeQ0(
const StencilA& A_stencil,
const ViewA& A_view,
675 const ViewOldP& p_old_view,
const ViewQ& q_view )
677 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
678 A_stencil, A_view, p_old_view, q_view };
681 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
682 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
683 class ViewQ,
class ValueType>
697 KOKKOS_INLINE_FUNCTION
void
698 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
699 const int j,
const int k, ValueType& result )
const
706 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
707 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
708 Mr += M_view( i, j, k, c ) *
709 ( r_old_view( i + M_stencil( c, Dim::I ),
710 j + M_stencil( c, Dim::J ),
711 k + M_stencil( c, Dim::K ), 0 ) -
712 alpha * q_view( i + M_stencil( c, Dim::I ),
713 j + M_stencil( c, Dim::J ),
714 k + M_stencil( c, Dim::K ), 0 ) );
718 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
722 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
725 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
728 x_view( i, j, k, 0 ) = x_new;
729 r_new_view( i, j, k, 0 ) = r_new;
730 z_view( i, j, k, 0 ) = Mr;
733 result += Mr * r_new;
736 KOKKOS_INLINE_FUNCTION
void
737 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
738 const int j, ValueType& result )
const
745 for (
unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
746 if ( fabs( M_view( i, j, c ) ) > 0.0 )
747 Mr += M_view( i, j, c ) *
748 ( r_old_view( i + M_stencil( c, Dim::I ),
749 j + M_stencil( c, Dim::J ), 0 ) -
750 alpha * q_view( i + M_stencil( c, Dim::I ),
751 j + M_stencil( c, Dim::J ), 0 ) );
754 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
757 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
760 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
763 x_view( i, j, 0 ) = x_new;
764 r_new_view( i, j, 0 ) = r_new;
765 z_view( i, j, 0 ) = Mr;
768 result += Mr * r_new;
772 template <
class StencilM,
class ViewM,
class ViewX,
class ViewNewR,
773 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
774 class ViewQ,
class ValueType>
775 auto createKernel1(
const StencilM& M_stencil,
const ViewM& M_view,
776 const ViewX& x_view,
const ViewNewR& r_new_view,
777 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
778 const ViewOldP& p_old_view,
const ViewZ& z_view,
779 const ViewQ& q_view,
const ValueType& alpha )
781 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
782 ViewNewP, ViewZ, ViewQ, ValueType>{
783 M_stencil, M_view, x_view, r_new_view, r_old_view,
784 p_new_view, p_old_view, z_view, q_view, alpha };
787 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
788 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
789 class ViewQ,
class ValueType>
803 KOKKOS_INLINE_FUNCTION
void
804 operator()(
const std::integral_constant<std::size_t, 3>&,
const int i,
805 const int j,
const int k, ValueType& result )
const
812 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
813 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
815 A_view( i, j, k, c ) *
816 ( z_view( i + A_stencil( c, Dim::I ),
817 j + A_stencil( c, Dim::J ),
818 k + A_stencil( c, Dim::K ), 0 ) +
819 beta * p_old_view( i + A_stencil( c, Dim::I ),
820 j + A_stencil( c, Dim::J ),
821 k + A_stencil( c, Dim::K ), 0 ) );
825 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
828 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
831 q_view( i, j, k, 0 ) = Ap;
832 p_new_view( i, j, k, 0 ) = p_new;
835 result += p_new * Ap;
838 KOKKOS_INLINE_FUNCTION
void
839 operator()(
const std::integral_constant<std::size_t, 2>&,
const int i,
840 const int j, ValueType& result )
const
847 for (
unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
848 if ( fabs( A_view( i, j, c ) ) > 0.0 )
851 ( z_view( i + A_stencil( c, Dim::I ),
852 j + A_stencil( c, Dim::J ), 0 ) +
853 beta * p_old_view( i + A_stencil( c, Dim::I ),
854 j + A_stencil( c, Dim::J ), 0 ) );
857 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
860 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
863 q_view( i, j, 0 ) = Ap;
864 p_new_view( i, j, 0 ) = p_new;
867 result += p_new * Ap;
871 template <
class StencilA,
class ViewA,
class ViewX,
class ViewNewR,
872 class ViewOldR,
class ViewOldP,
class ViewNewP,
class ViewZ,
873 class ViewQ,
class ValueType>
874 auto createKernel2(
const StencilA& A_stencil,
const ViewA& A_view,
875 const ViewX& x_view,
const ViewNewR& r_new_view,
876 const ViewOldR& r_old_view,
const ViewNewP& p_new_view,
877 const ViewOldP& p_old_view,
const ViewZ& z_view,
878 const ViewQ& q_view,
const ValueType& beta )
880 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
881 ViewNewP, ViewZ, ViewQ, ValueType>{
882 A_stencil, A_view, x_view, r_new_view, r_old_view,
883 p_new_view, p_old_view, z_view, q_view, beta };
890 setStencil(
const std::vector<std::array<int, num_space_dim>>& stencil,
891 const bool is_symmetric,
892 Kokkos::View<
int* [
num_space_dim], MemorySpace>& device_stencil,
893 std::shared_ptr<Halo<memory_space>>& halo,
894 std::shared_ptr<Array_t>& matrix,
895 std::shared_ptr<subarray_type>& halo_matrix )
899 throw std::logic_error(
900 "Reference CG currently does not support symmetry" );
903 auto local_grid = _vectors->layout()->localGrid();
906 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
907 Kokkos::ViewAllocateWithoutInitializing(
"stencil" ),
909 auto stencil_mirror =
910 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
911 for (
unsigned s = 0; s < stencil.size(); ++s )
913 stencil_mirror( s, d ) = stencil[s][d];
914 Kokkos::deep_copy( device_stencil, stencil_mirror );
918 std::set<std::array<int, num_space_dim>> neighbor_set;
919 std::array<int, num_space_dim> neighbor;
921 for (
auto s : stencil )
925 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
926 neighbor_set.emplace( neighbor );
930 width = std::max( width, std::abs( s[d] ) );
932 std::vector<std::array<int, num_space_dim>> halo_neighbors(
933 neighbor_set.size() );
934 std::copy( neighbor_set.begin(), neighbor_set.end(),
935 halo_neighbors.begin() );
942 matrix = createArray<Scalar, MemorySpace>(
"matrix", matrix_layout );
945 HaloPattern<num_space_dim> pattern;
946 pattern.setNeighbors( halo_neighbors );
947 halo =
createHalo( pattern, width, *halo_matrix );
955 Scalar _residual_norm;
957 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
958 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
959 std::shared_ptr<Halo<memory_space>> _A_halo;
960 std::shared_ptr<Halo<memory_space>> _M_halo;
961 std::shared_ptr<Array_t> _A;
962 std::shared_ptr<Array_t> _M;
963 std::shared_ptr<Array_t> _vectors;
964 std::shared_ptr<subarray_type> _A_halo_vectors;
965 std::shared_ptr<subarray_type> _M_halo_vectors;