Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_Grid_ReferenceStructuredSolver.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
17#define CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
18
19#include <Cabana_Grid_Array.hpp>
21#include <Cabana_Grid_Halo.hpp>
26#include <Cabana_Grid_Types.hpp>
27#include <Cabana_Utils.hpp> // FIXME: remove after next release.
28
29#include <Kokkos_Core.hpp>
30#include <Kokkos_Profiling_ScopedRegion.hpp>
31
32#include <array>
33#include <iostream>
34#include <memory>
35#include <numeric>
36#include <set>
37#include <sstream>
38#include <string>
39#include <type_traits>
40#include <vector>
41
42namespace Cabana
43{
44namespace Grid
45{
46//---------------------------------------------------------------------------//
48template <class Scalar, class EntityType, class MeshType, class MemorySpace>
50{
51 public:
53 using entity_type = EntityType;
55 using value_type = Scalar;
57 using memory_space = MemorySpace;
58 static_assert( Kokkos::is_memory_space<MemorySpace>() );
60 using execution_space = typename memory_space::execution_space;
66 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
67
68 // Destructor.
70
79 virtual void setMatrixStencil(
80 const std::vector<std::array<int, num_space_dim>>& stencil,
81 const bool is_symmetric ) = 0;
82
92 virtual const Array_t& getMatrixValues() = 0;
93
103 const std::vector<std::array<int, num_space_dim>>& stencil,
104 const bool is_symmetric ) = 0;
105
115 virtual const Array_t& getPreconditionerValues() = 0;
116
118 virtual void setTolerance( const double tol ) = 0;
119
121 virtual void setMaxIter( const int max_iter ) = 0;
122
124 virtual void setPrintLevel( const int print_level ) = 0;
125
127 virtual void setup() = 0;
128
134 virtual void solve( const Array_t& b, Array_t& x ) = 0;
135
137 virtual int getNumIter() = 0;
138
140 virtual double getFinalRelativeResidualNorm() = 0;
141};
142
143//---------------------------------------------------------------------------//
145template <class Scalar, class EntityType, class MeshType, class MemorySpace>
147 : public ReferenceStructuredSolver<Scalar, EntityType, MeshType,
148 MemorySpace>
149{
150 public:
152 using base_type =
158
164 using Array_t = typename base_type::Array_t;
168 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
169
171 template <class ScalarT, class MemorySpaceT, class ArrayLayoutT>
173 {
175 using value_type = ScalarT;
177 using memory_space = MemorySpaceT;
179 const ArrayLayoutT& array_layout;
181 const ArrayLayoutT* layout() const { return &array_layout; }
182 };
183
189 : _tol( 1.0e-6 )
190 , _max_iter( 1000 )
191 , _print_level( 0 )
192 , _num_iter( 0 )
193 , _residual_norm( 0.0 )
194 {
195 // Array layout for vectors (p_old,z,r_old,q,p_new,r_new).
196 auto vector_layout =
197 createArrayLayout( layout.localGrid(), 6, EntityType() );
198 _vectors =
199 createArray<Scalar, MemorySpace>( "cg_vectors", vector_layout );
200 _A_halo_vectors = createSubarray( *_vectors, 0, 2 );
201 _M_halo_vectors = createSubarray( *_vectors, 2, 4 );
202 }
203
213 const std::vector<std::array<int, num_space_dim>>& stencil,
214 const bool is_symmetric = false ) override
215 {
216 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
217 _A_halo_vectors );
218 }
219
228 const Array_t& getMatrixValues() override { return *_A; }
229
239 const std::vector<std::array<int, num_space_dim>>& stencil,
240 const bool is_symmetric = false ) override
241 {
242 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
243 _M_halo_vectors );
244 }
245
254 const Array_t& getPreconditionerValues() override { return *_M; }
255
257 void setTolerance( const double tol ) override { _tol = tol; }
258
260 void setMaxIter( const int max_iter ) override { _max_iter = max_iter; }
261
263 void setPrintLevel( const int print_level ) override
264 {
265 _print_level = print_level;
266 }
267
269 void setup() override {}
270
276 void solve( const Array_t& b, Array_t& x ) override
277 {
278 Kokkos::Profiling::ScopedRegion region(
279 "Cabana::Grid::ReferenceStructuredSolver::solve" );
280
281 // Get the local grid.
282 auto local_grid = _vectors->layout()->localGrid();
283
284 // Print banner
285 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
286 std::cout << std::endl
287 << "Preconditioned conjugate gradient" << std::endl;
288
289 // Index space.
290 auto entity_space =
291 local_grid->indexSpace( Own(), EntityType(), Local() );
292
293 // Subarrays.
294 auto p_old = createSubarray( *_vectors, 0, 1 );
295 auto z = createSubarray( *_vectors, 1, 2 );
296 auto r_old = createSubarray( *_vectors, 2, 3 );
297 auto q = createSubarray( *_vectors, 3, 4 );
298 auto p_new = createSubarray( *_vectors, 4, 5 );
299 auto r_new = createSubarray( *_vectors, 5, 6 );
300
301 // Views.
302 auto x_view = x.view();
303 auto b_view = b.view();
304 auto M_view = _M->view();
305 auto A_view = _A->view();
306 auto p_old_view = p_old->view();
307 auto z_view = z->view();
308 auto r_old_view = r_old->view();
309 auto q_view = q->view();
310 auto p_new_view = p_new->view();
311 auto r_new_view = r_new->view();
312
313 // Reset iteration count.
314 _num_iter = 0;
315
316 // Compute the norm of the RHS.
317 std::vector<Scalar> b_norm( 1 );
318 ArrayOp::norm2( b, b_norm );
319
320 // Copy the LHS into p so we can gather it.
321 Kokkos::deep_copy( p_old_view, x_view );
322
323 // Gather the LHS through gatheing p and z.
324 _A_halo->gather( execution_space(), *_A_halo_vectors );
325
326 // Compute the initial residual and norm.
327 _residual_norm = 0.0;
328 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
329 b_view, r_old_view );
331 "compute_r0", execution_space(), entity_space,
332 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
333 _residual_norm );
334
335 // Finish the global norm reduction.
336 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
337 MpiTraits<Scalar>::type(), MPI_SUM,
338 local_grid->globalGrid().comm() );
339
340 // If we already have met our criteria then return.
341 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
342 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
343 std::cout << "Iteration " << _num_iter
344 << ": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
345 if ( _residual_norm <= _tol )
346 return;
347
348 // r and q.
349 _M_halo->gather( execution_space(), *_M_halo_vectors );
350
351 // Compute the initial preconditioned residual.
352 Scalar zTr_old = 0.0;
353 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
354 p_new_view, r_old_view, z_view );
356 "compute_z0", execution_space(), entity_space,
357 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
358 zTr_old );
359
360 // Finish computation of zTr
361 MPI_Allreduce( MPI_IN_PLACE, &zTr_old, 1, MpiTraits<Scalar>::type(),
362 MPI_SUM, local_grid->globalGrid().comm() );
363
364 // Gather the LHS through gatheing p and z.
365 _A_halo->gather( execution_space(), *_A_halo_vectors );
366
367 // Compute A*p and pT*A*p.
368 Scalar pTAp = 0.0;
369 auto compute_q0 =
370 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
372 "compute_q0", execution_space(), entity_space,
373 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
374 pTAp );
375
376 // Finish the global reduction on pTAp.
377 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
378 MPI_SUM, local_grid->globalGrid().comm() );
379
380 // Iterate.
381 bool converged = false;
382 Scalar zTr_new = 0.0;
383 Scalar alpha;
384 Scalar beta;
385 while ( _residual_norm > _tol && _num_iter < _max_iter )
386 {
387 // Gather r and q.
388 _M_halo->gather( execution_space(), *_M_halo_vectors );
389
390 // Kernel 1: Compute x, r, residual norm, and zTr
391 alpha = zTr_old / pTAp;
392 zTr_new = 0.0;
393 auto cg_kernel_1 = createKernel1(
394 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
395 p_old_view, z_view, q_view, alpha );
397 "cg_kernel_1", execution_space(), entity_space,
398 std::integral_constant<std::size_t, num_space_dim>{},
399 cg_kernel_1, zTr_new );
400
401 // Finish the global reduction on zTr and r_norm.
402 MPI_Allreduce( MPI_IN_PLACE, &zTr_new, 1, MpiTraits<Scalar>::type(),
403 MPI_SUM, local_grid->globalGrid().comm() );
404
405 // Update residual norm
406 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
407
408 // Increment iteration count.
409 _num_iter++;
410
411 // Output result
412 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
413 std::cout << "Iteration " << _num_iter
414 << ": |r|_2 / |b|_2 = " << _residual_norm
415 << std::endl;
416
417 // Check for convergence.
418 if ( _residual_norm <= _tol )
419 {
420 converged = true;
421 break;
422 }
423
424 // Gather p and z.
425 _A_halo->gather( execution_space(), *_A_halo_vectors );
426
427 // Kernel 2: Compute p, A*p, and p^T*A*p
428 beta = zTr_new / zTr_old;
429 pTAp = 0.0;
430 auto cg_kernel_2 = createKernel2(
431 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
432 p_old_view, z_view, q_view, beta );
434 "cg_kernel_2", execution_space(), entity_space,
435 std::integral_constant<std::size_t, num_space_dim>{},
436 cg_kernel_2, pTAp );
437
438 // Finish the global reduction on pTAp.
439 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
440 MPI_SUM, local_grid->globalGrid().comm() );
441
442 // Update zTr
443 zTr_old = zTr_new;
444 }
445
446 // Output end state.
447 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
448 std::cout << "Finished in " << _num_iter
449 << " iterations, converged to " << _residual_norm
450 << std::endl
451 << std::endl;
452
453 // If we didn't converge throw.
454 if ( !converged )
455 throw std::runtime_error(
456 "Cabana::Grid::ReferenceConjugateGradient::solve: CG solver "
457 "did not converge" );
458 }
459
461 int getNumIter() override { return _num_iter; }
462
464 double getFinalRelativeResidualNorm() override { return _residual_norm; }
465
466 public:
468 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
469 class ViewOldR>
470 struct ComputeR0
471 {
472 StencilA A_stencil;
473 ViewA A_view;
474 ViewOldP p_old_view;
475 ViewB b_view;
476 ViewOldR r_old_view;
477
478 using value_type = typename ViewB::value_type;
479
480 KOKKOS_INLINE_FUNCTION void
481 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
482 const int j, const int k, value_type& result ) const
483 {
484 // Compute the local contribution from matrix-vector
485 // multiplication. Note that we copied x into p for this
486 // operation to easily perform the gather. Only apply the
487 // stencil entry if it is greater than 0.
488 value_type Ax = 0.0;
489 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
490 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
491 Ax += A_view( i, j, k, c ) *
492 p_old_view( i + A_stencil( c, Dim::I ),
493 j + A_stencil( c, Dim::J ),
494 k + A_stencil( c, Dim::K ), 0 );
495
496 // Compute the residual.
497 auto r_new = b_view( i, j, k, 0 ) - Ax;
498
499 // Assign the residual.
500 r_old_view( i, j, k, 0 ) = r_new;
501
502 // Contribute to the reduction.
503 result += r_new * r_new;
504 }
505
506 KOKKOS_INLINE_FUNCTION void
507 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
508 const int j, value_type& result ) const
509 {
510 // Compute the local contribution from matrix-vector
511 // multiplication. Note that we copied x into p for this
512 // operation to easily perform the gather. Only apply the
513 // stencil entry if it is greater than 0.
514 value_type Ax = 0.0;
515 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
516 if ( fabs( A_view( i, j, c ) ) > 0.0 )
517 Ax += A_view( i, j, c ) *
518 p_old_view( i + A_stencil( c, Dim::I ),
519 j + A_stencil( c, Dim::J ), 0 );
520
521 // Compute the residual.
522 auto r_new = b_view( i, j, 0 ) - Ax;
523
524 // Assign the residual.
525 r_old_view( i, j, 0 ) = r_new;
526
527 // Contribute to the reduction.
528 result += r_new * r_new;
529 }
530 };
531
532 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
533 class ViewOldR>
534 auto createComputeR0( const StencilA& A_stencil, const ViewA& A_view,
535 const ViewOldP& p_old_view, const ViewB& b_view,
536 const ViewOldR& r_old_view )
537 {
538 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
539 A_stencil, A_view, p_old_view, b_view, r_old_view };
540 }
541
542 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
543 class ViewOldR, class ViewZ>
544 struct ComputeZ0
545 {
546 StencilM M_stencil;
547 ViewM M_view;
548 ViewOldP p_old_view;
549 ViewNewP p_new_view;
550 ViewOldR r_old_view;
551 ViewZ z_view;
552
553 using value_type = typename ViewZ::value_type;
554
555 KOKKOS_INLINE_FUNCTION void
556 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
557 const int j, const int k, value_type& result ) const
558 {
559 // Compute the local contribution from matrix-vector
560 // multiplication. Only apply the stencil entry if it is
561 // greater than 0.
562 value_type Mr = 0.0;
563 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
564 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
565 Mr += M_view( i, j, k, c ) *
566 r_old_view( i + M_stencil( c, Dim::I ),
567 j + M_stencil( c, Dim::J ),
568 k + M_stencil( c, Dim::K ), 0 );
569 // Write values.
570 z_view( i, j, k, 0 ) = Mr;
571 p_old_view( i, j, k, 0 ) = Mr;
572 p_new_view( i, j, k, 0 ) = Mr;
573
574 // Compute zTr
575 result += Mr * r_old_view( i, j, k, 0 );
576 }
577
578 KOKKOS_INLINE_FUNCTION void
579 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
580 const int j, value_type& result ) const
581 {
582 // Compute the local contribution from matrix-vector
583 // multiplication. Only apply the stencil entry if it is
584 // greater than 0.
585 value_type Mr = 0.0;
586 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
587 if ( fabs( M_view( i, j, c ) ) > 0.0 )
588 Mr += M_view( i, j, c ) *
589 r_old_view( i + M_stencil( c, Dim::I ),
590 j + M_stencil( c, Dim::J ), 0 );
591 // Write values.
592 z_view( i, j, 0 ) = Mr;
593 p_old_view( i, j, 0 ) = Mr;
594 p_new_view( i, j, 0 ) = Mr;
595
596 // Compute zTr
597 result += Mr * r_old_view( i, j, 0 );
598 }
599 };
600
601 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
602 class ViewOldR, class ViewZ>
603 auto createComputeZ0( const StencilM& M_stencil, const ViewM& M_view,
604 const ViewOldP& p_old_view,
605 const ViewNewP& p_new_view,
606 const ViewOldR& r_old_view, const ViewZ& z_view )
607 {
608 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
609 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
610 }
611
612 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
613 struct ComputeQ0
614 {
615 StencilA A_stencil;
616 ViewA A_view;
617 ViewOldP p_old_view;
618 ViewQ q_view;
619
620 using value_type = typename ViewQ::value_type;
621
622 KOKKOS_INLINE_FUNCTION void
623 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
624 const int j, const int k, value_type& result ) const
625 {
626 // Compute the local contribution from matrix-vector
627 // multiplication. This computes the updated p vector
628 // in-line to avoid another kernel launch. Only apply the
629 // stencil entry if it is greater than 0.
630 value_type Ap = 0.0;
631 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
632 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
633 Ap += A_view( i, j, k, c ) *
634 ( p_old_view( i + A_stencil( c, Dim::I ),
635 j + A_stencil( c, Dim::J ),
636 k + A_stencil( c, Dim::K ), 0 ) );
637
638 // Write values.
639 q_view( i, j, k, 0 ) = Ap;
640
641 // Compute contribution to the dot product.
642 result += p_old_view( i, j, k, 0 ) * Ap;
643 }
644
645 KOKKOS_INLINE_FUNCTION void
646 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
647 const int j, value_type& result ) const
648 {
649 // Compute the local contribution from matrix-vector
650 // multiplication. This computes the updated p vector
651 // in-line to avoid another kernel launch. Only apply the
652 // stencil entry if it is greater than 0.
653 value_type Ap = 0.0;
654 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
655 if ( fabs( A_view( i, j, c ) ) > 0.0 )
656 Ap += A_view( i, j, c ) *
657 ( p_old_view( i + A_stencil( c, Dim::I ),
658 j + A_stencil( c, Dim::J ), 0 ) );
659
660 // Write values.
661 q_view( i, j, 0 ) = Ap;
662
663 // Compute contribution to the dot product.
664 result += p_old_view( i, j, 0 ) * Ap;
665 }
666 };
667
668 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
669 auto createComputeQ0( const StencilA& A_stencil, const ViewA& A_view,
670 const ViewOldP& p_old_view, const ViewQ& q_view )
671 {
672 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
673 A_stencil, A_view, p_old_view, q_view };
674 }
675
676 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
677 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
678 class ViewQ, class ValueType>
679 struct Kernel1
680 {
681 StencilM M_stencil;
682 ViewM M_view;
683 ViewX x_view;
684 ViewNewR r_new_view;
685 ViewOldR r_old_view;
686 ViewNewP p_new_view;
687 ViewOldP p_old_view;
688 ViewZ z_view;
689 ViewQ q_view;
690 ValueType alpha;
691
692 KOKKOS_INLINE_FUNCTION void
693 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
694 const int j, const int k, ValueType& result ) const
695 {
696 // Compute the local contribution from matrix-vector
697 // multiplication. This computes the updated q vector
698 // in-line to avoid another kernel launch. Only apply the
699 // stencil entry if it is greater than 0.
700 ValueType Mr = 0.0;
701 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
702 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
703 Mr += M_view( i, j, k, c ) *
704 ( r_old_view( i + M_stencil( c, Dim::I ),
705 j + M_stencil( c, Dim::J ),
706 k + M_stencil( c, Dim::K ), 0 ) -
707 alpha * q_view( i + M_stencil( c, Dim::I ),
708 j + M_stencil( c, Dim::J ),
709 k + M_stencil( c, Dim::K ), 0 ) );
710
711 // Compute the updated x.
712 ValueType x_new =
713 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
714
715 // Compute the updated residual.
716 ValueType r_new =
717 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
718
719 // Write to old p vector.
720 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
721
722 // Write values.
723 x_view( i, j, k, 0 ) = x_new;
724 r_new_view( i, j, k, 0 ) = r_new;
725 z_view( i, j, k, 0 ) = Mr;
726
727 // Compute contribution to the zTr.
728 result += Mr * r_new;
729 }
730
731 KOKKOS_INLINE_FUNCTION void
732 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
733 const int j, ValueType& result ) const
734 {
735 // Compute the local contribution from matrix-vector
736 // multiplication. This computes the updated q vector
737 // in-line to avoid another kernel launch. Only apply the
738 // stencil entry if it is greater than 0.
739 ValueType Mr = 0.0;
740 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
741 if ( fabs( M_view( i, j, c ) ) > 0.0 )
742 Mr += M_view( i, j, c ) *
743 ( r_old_view( i + M_stencil( c, Dim::I ),
744 j + M_stencil( c, Dim::J ), 0 ) -
745 alpha * q_view( i + M_stencil( c, Dim::I ),
746 j + M_stencil( c, Dim::J ), 0 ) );
747
748 // Compute the updated x.
749 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
750
751 // Compute the updated residual.
752 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
753
754 // Write to old p vector.
755 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
756
757 // Write values.
758 x_view( i, j, 0 ) = x_new;
759 r_new_view( i, j, 0 ) = r_new;
760 z_view( i, j, 0 ) = Mr;
761
762 // Compute contribution to the zTr.
763 result += Mr * r_new;
764 }
765 };
766
767 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
768 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
769 class ViewQ, class ValueType>
770 auto createKernel1( const StencilM& M_stencil, const ViewM& M_view,
771 const ViewX& x_view, const ViewNewR& r_new_view,
772 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
773 const ViewOldP& p_old_view, const ViewZ& z_view,
774 const ViewQ& q_view, const ValueType& alpha )
775 {
776 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
777 ViewNewP, ViewZ, ViewQ, ValueType>{
778 M_stencil, M_view, x_view, r_new_view, r_old_view,
779 p_new_view, p_old_view, z_view, q_view, alpha };
780 }
781
782 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
783 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
784 class ViewQ, class ValueType>
785 struct Kernel2
786 {
787 StencilA A_stencil;
788 ViewA A_view;
789 ViewX x_view;
790 ViewNewR r_new_view;
791 ViewOldR r_old_view;
792 ViewNewP p_new_view;
793 ViewOldP p_old_view;
794 ViewZ z_view;
795 ViewQ q_view;
796 ValueType beta;
797
798 KOKKOS_INLINE_FUNCTION void
799 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
800 const int j, const int k, ValueType& result ) const
801 {
802 // Compute the local contribution from matrix-vector
803 // multiplication. This computes the updated p vector
804 // in-line to avoid another kernel launch. Only apply the
805 // stencil entry if it is greater than 0.
806 ValueType Ap = 0.0;
807 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
808 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
809 Ap +=
810 A_view( i, j, k, c ) *
811 ( z_view( i + A_stencil( c, Dim::I ),
812 j + A_stencil( c, Dim::J ),
813 k + A_stencil( c, Dim::K ), 0 ) +
814 beta * p_old_view( i + A_stencil( c, Dim::I ),
815 j + A_stencil( c, Dim::J ),
816 k + A_stencil( c, Dim::K ), 0 ) );
817
818 // Compute the updated p.
819 ValueType p_new =
820 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
821
822 // Write to old residual.
823 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
824
825 // Write values.
826 q_view( i, j, k, 0 ) = Ap;
827 p_new_view( i, j, k, 0 ) = p_new;
828
829 // Compute contribution to the dot product.
830 result += p_new * Ap;
831 }
832
833 KOKKOS_INLINE_FUNCTION void
834 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
835 const int j, ValueType& result ) const
836 {
837 // Compute the local contribution from matrix-vector
838 // multiplication. This computes the updated p vector
839 // in-line to avoid another kernel launch. Only apply the
840 // stencil entry if it is greater than 0.
841 ValueType Ap = 0.0;
842 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
843 if ( fabs( A_view( i, j, c ) ) > 0.0 )
844 Ap +=
845 A_view( i, j, c ) *
846 ( z_view( i + A_stencil( c, Dim::I ),
847 j + A_stencil( c, Dim::J ), 0 ) +
848 beta * p_old_view( i + A_stencil( c, Dim::I ),
849 j + A_stencil( c, Dim::J ), 0 ) );
850
851 // Compute the updated p.
852 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
853
854 // Write to old residual.
855 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
856
857 // Write values.
858 q_view( i, j, 0 ) = Ap;
859 p_new_view( i, j, 0 ) = p_new;
860
861 // Compute contribution to the dot product.
862 result += p_new * Ap;
863 }
864 };
865
866 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
867 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
868 class ViewQ, class ValueType>
869 auto createKernel2( const StencilA& A_stencil, const ViewA& A_view,
870 const ViewX& x_view, const ViewNewR& r_new_view,
871 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
872 const ViewOldP& p_old_view, const ViewZ& z_view,
873 const ViewQ& q_view, const ValueType& beta )
874 {
875 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
876 ViewNewP, ViewZ, ViewQ, ValueType>{
877 A_stencil, A_view, x_view, r_new_view, r_old_view,
878 p_new_view, p_old_view, z_view, q_view, beta };
879 }
881
882 private:
883 // Set the stencil of a matrix.
884 void
885 setStencil( const std::vector<std::array<int, num_space_dim>>& stencil,
886 const bool is_symmetric,
887 Kokkos::View<int* [num_space_dim], MemorySpace>& device_stencil,
888 std::shared_ptr<Halo<memory_space>>& halo,
889 std::shared_ptr<Array_t>& matrix,
890 std::shared_ptr<subarray_type>& halo_matrix )
891 {
892 // For now we don't support symmetry.
893 if ( is_symmetric )
894 throw std::logic_error(
895 "Reference CG currently does not support symmetry" );
896
897 // Get the local grid.
898 auto local_grid = _vectors->layout()->localGrid();
899
900 // Copy stencil to the device.
901 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
902 Kokkos::ViewAllocateWithoutInitializing( "stencil" ),
903 stencil.size() );
904 auto stencil_mirror =
905 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
906 for ( unsigned s = 0; s < stencil.size(); ++s )
907 for ( std::size_t d = 0; d < num_space_dim; ++d )
908 stencil_mirror( s, d ) = stencil[s][d];
909 Kokkos::deep_copy( device_stencil, stencil_mirror );
910
911 // Compose the halo pattern and compute how wide the halo needs to be
912 // to gather all elements accessed by the stencil.
913 std::set<std::array<int, num_space_dim>> neighbor_set;
914 std::array<int, num_space_dim> neighbor;
915 int width = 0;
916 for ( auto s : stencil )
917 {
918 // Compse a set of the neighbor ranks based on the stencil.
919 for ( std::size_t d = 0; d < num_space_dim; ++d )
920 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
921 neighbor_set.emplace( neighbor );
922
923 // Compute the width of the halo needed to apply the stencil.
924 for ( std::size_t d = 0; d < num_space_dim; ++d )
925 width = std::max( width, std::abs( s[d] ) );
926 }
927 std::vector<std::array<int, num_space_dim>> halo_neighbors(
928 neighbor_set.size() );
929 std::copy( neighbor_set.begin(), neighbor_set.end(),
930 halo_neighbors.begin() );
931
932 // Create a new layout.
933 auto matrix_layout =
934 createArrayLayout( local_grid, stencil.size(), EntityType() );
935
936 // Allocate the matrix.
937 matrix = createArray<Scalar, MemorySpace>( "matrix", matrix_layout );
938
939 // Build the halo.
940 HaloPattern<num_space_dim> pattern;
941 pattern.setNeighbors( halo_neighbors );
942 halo = createHalo( pattern, width, *halo_matrix );
943 }
944
945 private:
946 Scalar _tol;
947 int _max_iter;
948 int _print_level;
949 int _num_iter;
950 Scalar _residual_norm;
951 int _diag_entry;
952 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
953 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
954 std::shared_ptr<Halo<memory_space>> _A_halo;
955 std::shared_ptr<Halo<memory_space>> _M_halo;
956 std::shared_ptr<Array_t> _A;
957 std::shared_ptr<Array_t> _M;
958 std::shared_ptr<Array_t> _vectors;
959 std::shared_ptr<subarray_type> _A_halo_vectors;
960 std::shared_ptr<subarray_type> _M_halo_vectors;
961};
962
963//---------------------------------------------------------------------------//
964// Builders.
965//---------------------------------------------------------------------------//
971template <class Scalar, class MemorySpace, class EntityType, class MeshType>
972std::shared_ptr<
981
982//---------------------------------------------------------------------------//
983
984} // namespace Grid
985} // namespace Cabana
986
987#endif // end CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
Grid field arrays.
std::shared_ptr< Array< Scalar, EntityType, MeshType, typename Array< Scalar, EntityType, MeshType, Params... >::subview_layout, typename Array< Scalar, EntityType, MeshType, Params... >::memory_space, typename Array< Scalar, EntityType, MeshType, Params... >::subview_memory_traits > > createSubarray(const Array< Scalar, EntityType, MeshType, Params... > &array, const int dof_min, const int dof_max)
Create a subarray of the given array over the given range of degrees of freedom.
Definition Cabana_Grid_Array.hpp:376
void norm2(const Array_t &array, std::vector< typename Array_t::value_type > &norms)
Calculate the two-norm of the owned elements of the array.
Definition Cabana_Grid_Array.hpp:1078
std::shared_ptr< Array< Scalar, EntityType, MeshType, Params... > > createArray(const std::string &label, const std::shared_ptr< ArrayLayout< EntityType, MeshType > > &layout)
Create an array with the given array layout. Views are constructed over the ghosted index space of th...
Definition Cabana_Grid_Array.hpp:351
std::shared_ptr< ArrayLayout< EntityType, MeshType > > createArrayLayout(const std::shared_ptr< LocalGrid< MeshType > > &local_grid, const int dofs_per_entity, EntityType)
Create an array layout over the entities of a local grid.
Definition Cabana_Grid_Array.hpp:189
Multi-node grid scatter/gather.
auto createHalo(const Pattern &pattern, const int width, const ArrayTypes &... arrays)
Halo creation function.
Definition Cabana_Grid_Halo.hpp:955
Logical grid indexing.
Logical grid extension of Kokkos parallel iteration.
void grid_parallel_reduce(const std::string &label, const ExecutionSpace &exec_space, const IndexSpace< N > &index_space, const FunctorType &functor, ReduceType &reducer)
Execute a reduction functor in parallel with a multidimensional execution policy specified by the giv...
Definition Cabana_Grid_Parallel.hpp:436
std::shared_ptr< ReferenceConjugateGradient< Scalar, EntityType, MeshType, MemorySpace > > createReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Creation function for reference structured preconditioned block conjugate gradient.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:974
Grid type tags.
Cabana utilities.
Entity layout for array data on the local mesh.
Definition Cabana_Grid_Array.hpp:46
const std::shared_ptr< LocalGrid< MeshType > > localGrid() const
Get the local grid over which this layout is defined.
Definition Cabana_Grid_Array.hpp:71
Array of field data on the local mesh.
Definition Cabana_Grid_Array.hpp:229
Array< Scalar, EntityType, MeshType, subview_layout, memory_space, subview_memory_traits > subarray_type
Definition Cabana_Grid_Array.hpp:314
Reference structured preconditioned block conjugate gradient implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:149
void setup() override
Setup the problem.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:269
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:168
void solve(const Array_t &b, Array_t &x) override
Solve the problem Ax = b for x.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:276
void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the matrix stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:212
ReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Constructor.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:187
typename base_type::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:157
typename base_type::Array_t Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:164
const Array_t & getPreconditionerValues() override
Get the preconditioner values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:254
void setMaxIter(const int max_iter) override
Set maximum iteration implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:260
void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the preconditioner stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:238
void setTolerance(const double tol) override
Set convergence tolerance implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:257
typename base_type::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:166
typename base_type::entity_type entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:160
int getNumIter() override
Get the number of iterations taken on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:461
const Array_t & getMatrixValues() override
Get the matrix values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:228
double getFinalRelativeResidualNorm() override
Get the relative residual norm achieved on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:464
void setPrintLevel(const int print_level) override
Set the output level.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:263
ReferenceStructuredSolver< Scalar, EntityType, MeshType, MemorySpace > base_type
Base type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:152
typename base_type::value_type value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:162
typename base_type::memory_space memory_space
Memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:155
Reference preconditioned structured solver interface.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:50
virtual void setMaxIter(const int max_iter)=0
Set maximum iteration implementation.
virtual double getFinalRelativeResidualNorm()=0
Get the relative residual norm achieved on the last solve.
MemorySpace memory_space
Kokkos memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:57
typename Array_t::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:64
typename memory_space::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:60
virtual void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the preconditioner stencil.
virtual const Array_t & getMatrixValues()=0
Get the matrix values.
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:66
virtual void solve(const Array_t &b, Array_t &x)=0
Solve the problem Ax = b for x.
virtual void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the matrix stencil.
EntityType entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:53
virtual void setTolerance(const double tol)=0
Set convergence tolerance implementation.
virtual void setup()=0
Setup the problem.
Scalar value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:55
virtual const Array_t & getPreconditionerValues()=0
Get the preconditioner values.
Array< Scalar, EntityType, MeshType, MemorySpace > Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:62
virtual void setPrintLevel(const int print_level)=0
Set the output level.
virtual int getNumIter()=0
Get the number of iterations taken on the last solve.
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
Local index tag.
Definition Cabana_Grid_Types.hpp:208
Definition Cabana_Grid_MpiTraits.hpp:32
Owned decomposition tag.
Definition Cabana_Grid_Types.hpp:190
Array-like container to hold layout and data information.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:173
ScalarT value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:175
MemorySpaceT memory_space
Kokkos memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:177
const ArrayLayoutT * layout() const
Get the array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:181
const ArrayLayoutT & array_layout
Array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:179