Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_Grid_ReferenceStructuredSolver.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
17#define CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
18
19#include <Cabana_Grid_Array.hpp>
21#include <Cabana_Grid_Halo.hpp>
26#include <Cabana_Grid_Types.hpp>
27#include <Cabana_Utils.hpp> // FIXME: remove after next release.
28
29#include <Kokkos_Core.hpp>
30#include <Kokkos_Profiling_ScopedRegion.hpp>
31
32#include <array>
33#include <iostream>
34#include <memory>
35#include <numeric>
36#include <set>
37#include <sstream>
38#include <string>
39#include <type_traits>
40#include <vector>
41
42namespace Cabana
43{
44namespace Grid
45{
46//---------------------------------------------------------------------------//
48template <class Scalar, class EntityType, class MeshType, class MemorySpace>
50{
51 public:
53 using entity_type = EntityType;
55 using value_type = Scalar;
56 // FIXME: extracting the self type for backwards compatibility with previous
57 // template on DeviceType. Should simply be MemorySpace after next release.
59 using memory_space = typename MemorySpace::memory_space;
61 using device_type [[deprecated]] = typename memory_space::device_type;
63 using execution_space = typename memory_space::execution_space;
69 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
70
71 // Destructor.
73
82 virtual void setMatrixStencil(
83 const std::vector<std::array<int, num_space_dim>>& stencil,
84 const bool is_symmetric ) = 0;
85
95 virtual const Array_t& getMatrixValues() = 0;
96
106 const std::vector<std::array<int, num_space_dim>>& stencil,
107 const bool is_symmetric ) = 0;
108
118 virtual const Array_t& getPreconditionerValues() = 0;
119
121 virtual void setTolerance( const double tol ) = 0;
122
124 virtual void setMaxIter( const int max_iter ) = 0;
125
127 virtual void setPrintLevel( const int print_level ) = 0;
128
130 virtual void setup() = 0;
131
137 virtual void solve( const Array_t& b, Array_t& x ) = 0;
138
140 virtual int getNumIter() = 0;
141
143 virtual double getFinalRelativeResidualNorm() = 0;
144};
145
146//---------------------------------------------------------------------------//
148template <class Scalar, class EntityType, class MeshType, class MemorySpace>
150 : public ReferenceStructuredSolver<Scalar, EntityType, MeshType,
151 MemorySpace>
152{
153 public:
155 using base_type =
160 using device_type [[deprecated]] = typename memory_space::device_type;
163
169 using Array_t = typename base_type::Array_t;
173 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
174
176 template <class ScalarT, class MemorySpaceT, class ArrayLayoutT>
178 {
180 using value_type = ScalarT;
182 using memory_space = MemorySpaceT;
184 const ArrayLayoutT& array_layout;
186 const ArrayLayoutT* layout() const { return &array_layout; }
187 };
188
194 : _tol( 1.0e-6 )
195 , _max_iter( 1000 )
196 , _print_level( 0 )
197 , _num_iter( 0 )
198 , _residual_norm( 0.0 )
199 {
200 // Array layout for vectors (p_old,z,r_old,q,p_new,r_new).
201 auto vector_layout =
202 createArrayLayout( layout.localGrid(), 6, EntityType() );
203 _vectors =
204 createArray<Scalar, MemorySpace>( "cg_vectors", vector_layout );
205 _A_halo_vectors = createSubarray( *_vectors, 0, 2 );
206 _M_halo_vectors = createSubarray( *_vectors, 2, 4 );
207 }
208
218 const std::vector<std::array<int, num_space_dim>>& stencil,
219 const bool is_symmetric = false ) override
220 {
221 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
222 _A_halo_vectors );
223 }
224
233 const Array_t& getMatrixValues() override { return *_A; }
234
244 const std::vector<std::array<int, num_space_dim>>& stencil,
245 const bool is_symmetric = false ) override
246 {
247 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
248 _M_halo_vectors );
249 }
250
259 const Array_t& getPreconditionerValues() override { return *_M; }
260
262 void setTolerance( const double tol ) override { _tol = tol; }
263
265 void setMaxIter( const int max_iter ) override { _max_iter = max_iter; }
266
268 void setPrintLevel( const int print_level ) override
269 {
270 _print_level = print_level;
271 }
272
274 void setup() override {}
275
281 void solve( const Array_t& b, Array_t& x ) override
282 {
283 Kokkos::Profiling::ScopedRegion region(
284 "Cabana::Grid::ReferenceStructuredSolver::solve" );
285
286 // Get the local grid.
287 auto local_grid = _vectors->layout()->localGrid();
288
289 // Print banner
290 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
291 std::cout << std::endl
292 << "Preconditioned conjugate gradient" << std::endl;
293
294 // Index space.
295 auto entity_space =
296 local_grid->indexSpace( Own(), EntityType(), Local() );
297
298 // Subarrays.
299 auto p_old = createSubarray( *_vectors, 0, 1 );
300 auto z = createSubarray( *_vectors, 1, 2 );
301 auto r_old = createSubarray( *_vectors, 2, 3 );
302 auto q = createSubarray( *_vectors, 3, 4 );
303 auto p_new = createSubarray( *_vectors, 4, 5 );
304 auto r_new = createSubarray( *_vectors, 5, 6 );
305
306 // Views.
307 auto x_view = x.view();
308 auto b_view = b.view();
309 auto M_view = _M->view();
310 auto A_view = _A->view();
311 auto p_old_view = p_old->view();
312 auto z_view = z->view();
313 auto r_old_view = r_old->view();
314 auto q_view = q->view();
315 auto p_new_view = p_new->view();
316 auto r_new_view = r_new->view();
317
318 // Reset iteration count.
319 _num_iter = 0;
320
321 // Compute the norm of the RHS.
322 std::vector<Scalar> b_norm( 1 );
323 ArrayOp::norm2( b, b_norm );
324
325 // Copy the LHS into p so we can gather it.
326 Kokkos::deep_copy( p_old_view, x_view );
327
328 // Gather the LHS through gatheing p and z.
329 _A_halo->gather( execution_space(), *_A_halo_vectors );
330
331 // Compute the initial residual and norm.
332 _residual_norm = 0.0;
333 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
334 b_view, r_old_view );
336 "compute_r0", execution_space(), entity_space,
337 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
338 _residual_norm );
339
340 // Finish the global norm reduction.
341 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
342 MpiTraits<Scalar>::type(), MPI_SUM,
343 local_grid->globalGrid().comm() );
344
345 // If we already have met our criteria then return.
346 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
347 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
348 std::cout << "Iteration " << _num_iter
349 << ": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
350 if ( _residual_norm <= _tol )
351 return;
352
353 // r and q.
354 _M_halo->gather( execution_space(), *_M_halo_vectors );
355
356 // Compute the initial preconditioned residual.
357 Scalar zTr_old = 0.0;
358 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
359 p_new_view, r_old_view, z_view );
361 "compute_z0", execution_space(), entity_space,
362 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
363 zTr_old );
364
365 // Finish computation of zTr
366 MPI_Allreduce( MPI_IN_PLACE, &zTr_old, 1, MpiTraits<Scalar>::type(),
367 MPI_SUM, local_grid->globalGrid().comm() );
368
369 // Gather the LHS through gatheing p and z.
370 _A_halo->gather( execution_space(), *_A_halo_vectors );
371
372 // Compute A*p and pT*A*p.
373 Scalar pTAp = 0.0;
374 auto compute_q0 =
375 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
377 "compute_q0", execution_space(), entity_space,
378 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
379 pTAp );
380
381 // Finish the global reduction on pTAp.
382 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
383 MPI_SUM, local_grid->globalGrid().comm() );
384
385 // Iterate.
386 bool converged = false;
387 Scalar zTr_new = 0.0;
388 Scalar alpha;
389 Scalar beta;
390 while ( _residual_norm > _tol && _num_iter < _max_iter )
391 {
392 // Gather r and q.
393 _M_halo->gather( execution_space(), *_M_halo_vectors );
394
395 // Kernel 1: Compute x, r, residual norm, and zTr
396 alpha = zTr_old / pTAp;
397 zTr_new = 0.0;
398 auto cg_kernel_1 = createKernel1(
399 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
400 p_old_view, z_view, q_view, alpha );
402 "cg_kernel_1", execution_space(), entity_space,
403 std::integral_constant<std::size_t, num_space_dim>{},
404 cg_kernel_1, zTr_new );
405
406 // Finish the global reduction on zTr and r_norm.
407 MPI_Allreduce( MPI_IN_PLACE, &zTr_new, 1, MpiTraits<Scalar>::type(),
408 MPI_SUM, local_grid->globalGrid().comm() );
409
410 // Update residual norm
411 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
412
413 // Increment iteration count.
414 _num_iter++;
415
416 // Output result
417 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
418 std::cout << "Iteration " << _num_iter
419 << ": |r|_2 / |b|_2 = " << _residual_norm
420 << std::endl;
421
422 // Check for convergence.
423 if ( _residual_norm <= _tol )
424 {
425 converged = true;
426 break;
427 }
428
429 // Gather p and z.
430 _A_halo->gather( execution_space(), *_A_halo_vectors );
431
432 // Kernel 2: Compute p, A*p, and p^T*A*p
433 beta = zTr_new / zTr_old;
434 pTAp = 0.0;
435 auto cg_kernel_2 = createKernel2(
436 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
437 p_old_view, z_view, q_view, beta );
439 "cg_kernel_2", execution_space(), entity_space,
440 std::integral_constant<std::size_t, num_space_dim>{},
441 cg_kernel_2, pTAp );
442
443 // Finish the global reduction on pTAp.
444 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
445 MPI_SUM, local_grid->globalGrid().comm() );
446
447 // Update zTr
448 zTr_old = zTr_new;
449 }
450
451 // Output end state.
452 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
453 std::cout << "Finished in " << _num_iter
454 << " iterations, converged to " << _residual_norm
455 << std::endl
456 << std::endl;
457
458 // If we didn't converge throw.
459 if ( !converged )
460 throw std::runtime_error(
461 "Cabana::Grid::ReferenceConjugateGradient::solve: CG solver "
462 "did not converge" );
463 }
464
466 int getNumIter() override { return _num_iter; }
467
469 double getFinalRelativeResidualNorm() override { return _residual_norm; }
470
471 public:
473 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
474 class ViewOldR>
475 struct ComputeR0
476 {
477 StencilA A_stencil;
478 ViewA A_view;
479 ViewOldP p_old_view;
480 ViewB b_view;
481 ViewOldR r_old_view;
482
483 using value_type = typename ViewB::value_type;
484
485 KOKKOS_INLINE_FUNCTION void
486 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
487 const int j, const int k, value_type& result ) const
488 {
489 // Compute the local contribution from matrix-vector
490 // multiplication. Note that we copied x into p for this
491 // operation to easily perform the gather. Only apply the
492 // stencil entry if it is greater than 0.
493 value_type Ax = 0.0;
494 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
495 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
496 Ax += A_view( i, j, k, c ) *
497 p_old_view( i + A_stencil( c, Dim::I ),
498 j + A_stencil( c, Dim::J ),
499 k + A_stencil( c, Dim::K ), 0 );
500
501 // Compute the residual.
502 auto r_new = b_view( i, j, k, 0 ) - Ax;
503
504 // Assign the residual.
505 r_old_view( i, j, k, 0 ) = r_new;
506
507 // Contribute to the reduction.
508 result += r_new * r_new;
509 }
510
511 KOKKOS_INLINE_FUNCTION void
512 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
513 const int j, value_type& result ) const
514 {
515 // Compute the local contribution from matrix-vector
516 // multiplication. Note that we copied x into p for this
517 // operation to easily perform the gather. Only apply the
518 // stencil entry if it is greater than 0.
519 value_type Ax = 0.0;
520 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
521 if ( fabs( A_view( i, j, c ) ) > 0.0 )
522 Ax += A_view( i, j, c ) *
523 p_old_view( i + A_stencil( c, Dim::I ),
524 j + A_stencil( c, Dim::J ), 0 );
525
526 // Compute the residual.
527 auto r_new = b_view( i, j, 0 ) - Ax;
528
529 // Assign the residual.
530 r_old_view( i, j, 0 ) = r_new;
531
532 // Contribute to the reduction.
533 result += r_new * r_new;
534 }
535 };
536
537 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
538 class ViewOldR>
539 auto createComputeR0( const StencilA& A_stencil, const ViewA& A_view,
540 const ViewOldP& p_old_view, const ViewB& b_view,
541 const ViewOldR& r_old_view )
542 {
543 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
544 A_stencil, A_view, p_old_view, b_view, r_old_view };
545 }
546
547 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
548 class ViewOldR, class ViewZ>
549 struct ComputeZ0
550 {
551 StencilM M_stencil;
552 ViewM M_view;
553 ViewOldP p_old_view;
554 ViewNewP p_new_view;
555 ViewOldR r_old_view;
556 ViewZ z_view;
557
558 using value_type = typename ViewZ::value_type;
559
560 KOKKOS_INLINE_FUNCTION void
561 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
562 const int j, const int k, value_type& result ) const
563 {
564 // Compute the local contribution from matrix-vector
565 // multiplication. Only apply the stencil entry if it is
566 // greater than 0.
567 value_type Mr = 0.0;
568 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
569 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
570 Mr += M_view( i, j, k, c ) *
571 r_old_view( i + M_stencil( c, Dim::I ),
572 j + M_stencil( c, Dim::J ),
573 k + M_stencil( c, Dim::K ), 0 );
574 // Write values.
575 z_view( i, j, k, 0 ) = Mr;
576 p_old_view( i, j, k, 0 ) = Mr;
577 p_new_view( i, j, k, 0 ) = Mr;
578
579 // Compute zTr
580 result += Mr * r_old_view( i, j, k, 0 );
581 }
582
583 KOKKOS_INLINE_FUNCTION void
584 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
585 const int j, value_type& result ) const
586 {
587 // Compute the local contribution from matrix-vector
588 // multiplication. Only apply the stencil entry if it is
589 // greater than 0.
590 value_type Mr = 0.0;
591 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
592 if ( fabs( M_view( i, j, c ) ) > 0.0 )
593 Mr += M_view( i, j, c ) *
594 r_old_view( i + M_stencil( c, Dim::I ),
595 j + M_stencil( c, Dim::J ), 0 );
596 // Write values.
597 z_view( i, j, 0 ) = Mr;
598 p_old_view( i, j, 0 ) = Mr;
599 p_new_view( i, j, 0 ) = Mr;
600
601 // Compute zTr
602 result += Mr * r_old_view( i, j, 0 );
603 }
604 };
605
606 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
607 class ViewOldR, class ViewZ>
608 auto createComputeZ0( const StencilM& M_stencil, const ViewM& M_view,
609 const ViewOldP& p_old_view,
610 const ViewNewP& p_new_view,
611 const ViewOldR& r_old_view, const ViewZ& z_view )
612 {
613 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
614 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
615 }
616
617 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
618 struct ComputeQ0
619 {
620 StencilA A_stencil;
621 ViewA A_view;
622 ViewOldP p_old_view;
623 ViewQ q_view;
624
625 using value_type = typename ViewQ::value_type;
626
627 KOKKOS_INLINE_FUNCTION void
628 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
629 const int j, const int k, value_type& result ) const
630 {
631 // Compute the local contribution from matrix-vector
632 // multiplication. This computes the updated p vector
633 // in-line to avoid another kernel launch. Only apply the
634 // stencil entry if it is greater than 0.
635 value_type Ap = 0.0;
636 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
637 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
638 Ap += A_view( i, j, k, c ) *
639 ( p_old_view( i + A_stencil( c, Dim::I ),
640 j + A_stencil( c, Dim::J ),
641 k + A_stencil( c, Dim::K ), 0 ) );
642
643 // Write values.
644 q_view( i, j, k, 0 ) = Ap;
645
646 // Compute contribution to the dot product.
647 result += p_old_view( i, j, k, 0 ) * Ap;
648 }
649
650 KOKKOS_INLINE_FUNCTION void
651 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
652 const int j, value_type& result ) const
653 {
654 // Compute the local contribution from matrix-vector
655 // multiplication. This computes the updated p vector
656 // in-line to avoid another kernel launch. Only apply the
657 // stencil entry if it is greater than 0.
658 value_type Ap = 0.0;
659 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
660 if ( fabs( A_view( i, j, c ) ) > 0.0 )
661 Ap += A_view( i, j, c ) *
662 ( p_old_view( i + A_stencil( c, Dim::I ),
663 j + A_stencil( c, Dim::J ), 0 ) );
664
665 // Write values.
666 q_view( i, j, 0 ) = Ap;
667
668 // Compute contribution to the dot product.
669 result += p_old_view( i, j, 0 ) * Ap;
670 }
671 };
672
673 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
674 auto createComputeQ0( const StencilA& A_stencil, const ViewA& A_view,
675 const ViewOldP& p_old_view, const ViewQ& q_view )
676 {
677 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
678 A_stencil, A_view, p_old_view, q_view };
679 }
680
681 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
682 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
683 class ViewQ, class ValueType>
684 struct Kernel1
685 {
686 StencilM M_stencil;
687 ViewM M_view;
688 ViewX x_view;
689 ViewNewR r_new_view;
690 ViewOldR r_old_view;
691 ViewNewP p_new_view;
692 ViewOldP p_old_view;
693 ViewZ z_view;
694 ViewQ q_view;
695 ValueType alpha;
696
697 KOKKOS_INLINE_FUNCTION void
698 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
699 const int j, const int k, ValueType& result ) const
700 {
701 // Compute the local contribution from matrix-vector
702 // multiplication. This computes the updated q vector
703 // in-line to avoid another kernel launch. Only apply the
704 // stencil entry if it is greater than 0.
705 ValueType Mr = 0.0;
706 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
707 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
708 Mr += M_view( i, j, k, c ) *
709 ( r_old_view( i + M_stencil( c, Dim::I ),
710 j + M_stencil( c, Dim::J ),
711 k + M_stencil( c, Dim::K ), 0 ) -
712 alpha * q_view( i + M_stencil( c, Dim::I ),
713 j + M_stencil( c, Dim::J ),
714 k + M_stencil( c, Dim::K ), 0 ) );
715
716 // Compute the updated x.
717 ValueType x_new =
718 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
719
720 // Compute the updated residual.
721 ValueType r_new =
722 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
723
724 // Write to old p vector.
725 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
726
727 // Write values.
728 x_view( i, j, k, 0 ) = x_new;
729 r_new_view( i, j, k, 0 ) = r_new;
730 z_view( i, j, k, 0 ) = Mr;
731
732 // Compute contribution to the zTr.
733 result += Mr * r_new;
734 }
735
736 KOKKOS_INLINE_FUNCTION void
737 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
738 const int j, ValueType& result ) const
739 {
740 // Compute the local contribution from matrix-vector
741 // multiplication. This computes the updated q vector
742 // in-line to avoid another kernel launch. Only apply the
743 // stencil entry if it is greater than 0.
744 ValueType Mr = 0.0;
745 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
746 if ( fabs( M_view( i, j, c ) ) > 0.0 )
747 Mr += M_view( i, j, c ) *
748 ( r_old_view( i + M_stencil( c, Dim::I ),
749 j + M_stencil( c, Dim::J ), 0 ) -
750 alpha * q_view( i + M_stencil( c, Dim::I ),
751 j + M_stencil( c, Dim::J ), 0 ) );
752
753 // Compute the updated x.
754 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
755
756 // Compute the updated residual.
757 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
758
759 // Write to old p vector.
760 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
761
762 // Write values.
763 x_view( i, j, 0 ) = x_new;
764 r_new_view( i, j, 0 ) = r_new;
765 z_view( i, j, 0 ) = Mr;
766
767 // Compute contribution to the zTr.
768 result += Mr * r_new;
769 }
770 };
771
772 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
773 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
774 class ViewQ, class ValueType>
775 auto createKernel1( const StencilM& M_stencil, const ViewM& M_view,
776 const ViewX& x_view, const ViewNewR& r_new_view,
777 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
778 const ViewOldP& p_old_view, const ViewZ& z_view,
779 const ViewQ& q_view, const ValueType& alpha )
780 {
781 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
782 ViewNewP, ViewZ, ViewQ, ValueType>{
783 M_stencil, M_view, x_view, r_new_view, r_old_view,
784 p_new_view, p_old_view, z_view, q_view, alpha };
785 }
786
787 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
788 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
789 class ViewQ, class ValueType>
790 struct Kernel2
791 {
792 StencilA A_stencil;
793 ViewA A_view;
794 ViewX x_view;
795 ViewNewR r_new_view;
796 ViewOldR r_old_view;
797 ViewNewP p_new_view;
798 ViewOldP p_old_view;
799 ViewZ z_view;
800 ViewQ q_view;
801 ValueType beta;
802
803 KOKKOS_INLINE_FUNCTION void
804 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
805 const int j, const int k, ValueType& result ) const
806 {
807 // Compute the local contribution from matrix-vector
808 // multiplication. This computes the updated p vector
809 // in-line to avoid another kernel launch. Only apply the
810 // stencil entry if it is greater than 0.
811 ValueType Ap = 0.0;
812 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
813 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
814 Ap +=
815 A_view( i, j, k, c ) *
816 ( z_view( i + A_stencil( c, Dim::I ),
817 j + A_stencil( c, Dim::J ),
818 k + A_stencil( c, Dim::K ), 0 ) +
819 beta * p_old_view( i + A_stencil( c, Dim::I ),
820 j + A_stencil( c, Dim::J ),
821 k + A_stencil( c, Dim::K ), 0 ) );
822
823 // Compute the updated p.
824 ValueType p_new =
825 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
826
827 // Write to old residual.
828 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
829
830 // Write values.
831 q_view( i, j, k, 0 ) = Ap;
832 p_new_view( i, j, k, 0 ) = p_new;
833
834 // Compute contribution to the dot product.
835 result += p_new * Ap;
836 }
837
838 KOKKOS_INLINE_FUNCTION void
839 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
840 const int j, ValueType& result ) const
841 {
842 // Compute the local contribution from matrix-vector
843 // multiplication. This computes the updated p vector
844 // in-line to avoid another kernel launch. Only apply the
845 // stencil entry if it is greater than 0.
846 ValueType Ap = 0.0;
847 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
848 if ( fabs( A_view( i, j, c ) ) > 0.0 )
849 Ap +=
850 A_view( i, j, c ) *
851 ( z_view( i + A_stencil( c, Dim::I ),
852 j + A_stencil( c, Dim::J ), 0 ) +
853 beta * p_old_view( i + A_stencil( c, Dim::I ),
854 j + A_stencil( c, Dim::J ), 0 ) );
855
856 // Compute the updated p.
857 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
858
859 // Write to old residual.
860 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
861
862 // Write values.
863 q_view( i, j, 0 ) = Ap;
864 p_new_view( i, j, 0 ) = p_new;
865
866 // Compute contribution to the dot product.
867 result += p_new * Ap;
868 }
869 };
870
871 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
872 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
873 class ViewQ, class ValueType>
874 auto createKernel2( const StencilA& A_stencil, const ViewA& A_view,
875 const ViewX& x_view, const ViewNewR& r_new_view,
876 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
877 const ViewOldP& p_old_view, const ViewZ& z_view,
878 const ViewQ& q_view, const ValueType& beta )
879 {
880 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
881 ViewNewP, ViewZ, ViewQ, ValueType>{
882 A_stencil, A_view, x_view, r_new_view, r_old_view,
883 p_new_view, p_old_view, z_view, q_view, beta };
884 }
886
887 private:
888 // Set the stencil of a matrix.
889 void
890 setStencil( const std::vector<std::array<int, num_space_dim>>& stencil,
891 const bool is_symmetric,
892 Kokkos::View<int* [num_space_dim], MemorySpace>& device_stencil,
893 std::shared_ptr<Halo<memory_space>>& halo,
894 std::shared_ptr<Array_t>& matrix,
895 std::shared_ptr<subarray_type>& halo_matrix )
896 {
897 // For now we don't support symmetry.
898 if ( is_symmetric )
899 throw std::logic_error(
900 "Reference CG currently does not support symmetry" );
901
902 // Get the local grid.
903 auto local_grid = _vectors->layout()->localGrid();
904
905 // Copy stencil to the device.
906 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
907 Kokkos::ViewAllocateWithoutInitializing( "stencil" ),
908 stencil.size() );
909 auto stencil_mirror =
910 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
911 for ( unsigned s = 0; s < stencil.size(); ++s )
912 for ( std::size_t d = 0; d < num_space_dim; ++d )
913 stencil_mirror( s, d ) = stencil[s][d];
914 Kokkos::deep_copy( device_stencil, stencil_mirror );
915
916 // Compose the halo pattern and compute how wide the halo needs to be
917 // to gather all elements accessed by the stencil.
918 std::set<std::array<int, num_space_dim>> neighbor_set;
919 std::array<int, num_space_dim> neighbor;
920 int width = 0;
921 for ( auto s : stencil )
922 {
923 // Compse a set of the neighbor ranks based on the stencil.
924 for ( std::size_t d = 0; d < num_space_dim; ++d )
925 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
926 neighbor_set.emplace( neighbor );
927
928 // Compute the width of the halo needed to apply the stencil.
929 for ( std::size_t d = 0; d < num_space_dim; ++d )
930 width = std::max( width, std::abs( s[d] ) );
931 }
932 std::vector<std::array<int, num_space_dim>> halo_neighbors(
933 neighbor_set.size() );
934 std::copy( neighbor_set.begin(), neighbor_set.end(),
935 halo_neighbors.begin() );
936
937 // Create a new layout.
938 auto matrix_layout =
939 createArrayLayout( local_grid, stencil.size(), EntityType() );
940
941 // Allocate the matrix.
942 matrix = createArray<Scalar, MemorySpace>( "matrix", matrix_layout );
943
944 // Build the halo.
945 HaloPattern<num_space_dim> pattern;
946 pattern.setNeighbors( halo_neighbors );
947 halo = createHalo( pattern, width, *halo_matrix );
948 }
949
950 private:
951 Scalar _tol;
952 int _max_iter;
953 int _print_level;
954 int _num_iter;
955 Scalar _residual_norm;
956 int _diag_entry;
957 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
958 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
959 std::shared_ptr<Halo<memory_space>> _A_halo;
960 std::shared_ptr<Halo<memory_space>> _M_halo;
961 std::shared_ptr<Array_t> _A;
962 std::shared_ptr<Array_t> _M;
963 std::shared_ptr<Array_t> _vectors;
964 std::shared_ptr<subarray_type> _A_halo_vectors;
965 std::shared_ptr<subarray_type> _M_halo_vectors;
966};
967
968//---------------------------------------------------------------------------//
969// Builders.
970//---------------------------------------------------------------------------//
976template <class Scalar, class MemorySpace, class EntityType, class MeshType>
977std::shared_ptr<
986
987//---------------------------------------------------------------------------//
988
989} // namespace Grid
990} // namespace Cabana
991
992#endif // end CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
Grid field arrays.
std::shared_ptr< Array< Scalar, EntityType, MeshType, typename Array< Scalar, EntityType, MeshType, Params... >::subview_layout, typename Array< Scalar, EntityType, MeshType, Params... >::memory_space, typename Array< Scalar, EntityType, MeshType, Params... >::subview_memory_traits > > createSubarray(const Array< Scalar, EntityType, MeshType, Params... > &array, const int dof_min, const int dof_max)
Create a subarray of the given array over the given range of degrees of freedom.
Definition Cabana_Grid_Array.hpp:376
void norm2(const Array_t &array, std::vector< typename Array_t::value_type > &norms)
Calculate the two-norm of the owned elements of the array.
Definition Cabana_Grid_Array.hpp:1111
std::shared_ptr< Array< Scalar, EntityType, MeshType, Params... > > createArray(const std::string &label, const std::shared_ptr< ArrayLayout< EntityType, MeshType > > &layout)
Create an array with the given array layout. Views are constructed over the ghosted index space of th...
Definition Cabana_Grid_Array.hpp:351
std::shared_ptr< ArrayLayout< EntityType, MeshType > > createArrayLayout(const std::shared_ptr< LocalGrid< MeshType > > &local_grid, const int dofs_per_entity, EntityType)
Create an array layout over the entities of a local grid.
Definition Cabana_Grid_Array.hpp:189
Multi-node grid scatter/gather.
auto createHalo(const Pattern &pattern, const int width, const ArrayTypes &... arrays)
Halo creation function.
Definition Cabana_Grid_Halo.hpp:955
Logical grid indexing.
Logical grid extension of Kokkos parallel iteration.
void grid_parallel_reduce(const std::string &label, const ExecutionSpace &exec_space, const IndexSpace< N > &index_space, const FunctorType &functor, ReduceType &reducer)
Execute a reduction functor in parallel with a multidimensional execution policy specified by the giv...
Definition Cabana_Grid_Parallel.hpp:436
std::shared_ptr< ReferenceConjugateGradient< Scalar, EntityType, MeshType, MemorySpace > > createReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Creation function for reference structured preconditioned block conjugate gradient.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:979
Grid type tags.
Cabana utilities.
Entity layout for array data on the local mesh.
Definition Cabana_Grid_Array.hpp:46
const std::shared_ptr< LocalGrid< MeshType > > localGrid() const
Get the local grid over which this layout is defined.
Definition Cabana_Grid_Array.hpp:71
Array of field data on the local mesh.
Definition Cabana_Grid_Array.hpp:229
Array< Scalar, EntityType, MeshType, subview_layout, memory_space, subview_memory_traits > subarray_type
Definition Cabana_Grid_Array.hpp:314
Reference structured preconditioned block conjugate gradient implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:152
void setup() override
Setup the problem.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:274
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:173
void solve(const Array_t &b, Array_t &x) override
Solve the problem Ax = b for x.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:281
void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the matrix stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:217
ReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Constructor.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:192
typename base_type::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:162
typename base_type::Array_t Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:169
const Array_t & getPreconditionerValues() override
Get the preconditioner values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:259
void setMaxIter(const int max_iter) override
Set maximum iteration implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:265
void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the preconditioner stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:243
void setTolerance(const double tol) override
Set convergence tolerance implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:262
typename base_type::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:171
typename base_type::entity_type entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:165
int getNumIter() override
Get the number of iterations taken on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:466
const Array_t & getMatrixValues() override
Get the matrix values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:233
double getFinalRelativeResidualNorm() override
Get the relative residual norm achieved on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:469
void setPrintLevel(const int print_level) override
Set the output level.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:268
ReferenceStructuredSolver< Scalar, EntityType, MeshType, MemorySpace > base_type
Base type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:155
typename base_type::value_type value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:167
typename base_type::memory_space memory_space
Memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:158
Reference preconditioned structured solver interface.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:50
virtual void setMaxIter(const int max_iter)=0
Set maximum iteration implementation.
virtual double getFinalRelativeResidualNorm()=0
Get the relative residual norm achieved on the last solve.
typename Array_t::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:67
typename memory_space::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:63
typename MemorySpace::memory_space memory_space
Memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:59
virtual void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the preconditioner stencil.
virtual const Array_t & getMatrixValues()=0
Get the matrix values.
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:69
virtual void solve(const Array_t &b, Array_t &x)=0
Solve the problem Ax = b for x.
virtual void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the matrix stencil.
EntityType entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:53
virtual void setTolerance(const double tol)=0
Set convergence tolerance implementation.
virtual void setup()=0
Setup the problem.
Scalar value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:55
virtual const Array_t & getPreconditionerValues()=0
Get the preconditioner values.
Array< Scalar, EntityType, MeshType, MemorySpace > Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:65
virtual void setPrintLevel(const int print_level)=0
Set the output level.
virtual int getNumIter()=0
Get the number of iterations taken on the last solve.
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
Local index tag.
Definition Cabana_Grid_Types.hpp:208
Definition Cabana_Grid_MpiTraits.hpp:32
Owned decomposition tag.
Definition Cabana_Grid_Types.hpp:190
Array-like container to hold layout and data information.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:178
ScalarT value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:180
MemorySpaceT memory_space
Kokkos memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:182
const ArrayLayoutT * layout() const
Get the array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:186
const ArrayLayoutT & array_layout
Array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:184