Cabana 0.8.0-dev
 
Loading...
Searching...
No Matches
Cabana_Grid_ReferenceStructuredSolver.hpp
Go to the documentation of this file.
1/****************************************************************************
2 * Copyright (c) 2018-2023 by the Cabana authors *
3 * All rights reserved. *
4 * *
5 * This file is part of the Cabana library. Cabana is distributed under a *
6 * BSD 3-clause license. For the licensing terms see the LICENSE file in *
7 * the top-level directory. *
8 * *
9 * SPDX-License-Identifier: BSD-3-Clause *
10 ****************************************************************************/
11
16#ifndef CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
17#define CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
18
19#include <Cabana_Grid_Array.hpp>
21#include <Cabana_Grid_Halo.hpp>
26#include <Cabana_Grid_Types.hpp>
27#include <Cabana_Utils.hpp> // FIXME: remove after next release.
28
29#include <Kokkos_Core.hpp>
30#include <Kokkos_Profiling_ScopedRegion.hpp>
31
32#include <array>
33#include <iostream>
34#include <memory>
35#include <numeric>
36#include <set>
37#include <sstream>
38#include <string>
39#include <type_traits>
40#include <vector>
41
42namespace Cabana
43{
44namespace Grid
45{
46//---------------------------------------------------------------------------//
48template <class Scalar, class EntityType, class MeshType, class MemorySpace>
50{
51 public:
53 using entity_type = EntityType;
55 using value_type = Scalar;
56 // FIXME: extracting the self type for backwards compatibility with previous
57 // template on DeviceType. Should simply be MemorySpace after next release.
59 using memory_space = typename MemorySpace::memory_space;
61 using device_type [[deprecated]] = typename memory_space::device_type;
63 using execution_space = typename memory_space::execution_space;
69 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
70
71 // Destructor.
73
82 virtual void setMatrixStencil(
83 const std::vector<std::array<int, num_space_dim>>& stencil,
84 const bool is_symmetric ) = 0;
85
95 virtual const Array_t& getMatrixValues() = 0;
96
106 const std::vector<std::array<int, num_space_dim>>& stencil,
107 const bool is_symmetric ) = 0;
108
118 virtual const Array_t& getPreconditionerValues() = 0;
119
121 virtual void setTolerance( const double tol ) = 0;
122
124 virtual void setMaxIter( const int max_iter ) = 0;
125
127 virtual void setPrintLevel( const int print_level ) = 0;
128
130 virtual void setup() = 0;
131
137 virtual void solve( const Array_t& b, Array_t& x ) = 0;
138
140 virtual int getNumIter() = 0;
141
143 virtual double getFinalRelativeResidualNorm() = 0;
144};
145
146//---------------------------------------------------------------------------//
148template <class Scalar, class EntityType, class MeshType, class MemorySpace>
150 : public ReferenceStructuredSolver<Scalar, EntityType, MeshType,
151 MemorySpace>
152{
153 public:
155 using base_type =
160 using device_type [[deprecated]] = typename memory_space::device_type;
163
169 using Array_t = typename base_type::Array_t;
173 static constexpr std::size_t num_space_dim = MeshType::num_space_dim;
174
176 template <class ScalarT, class MemorySpaceT, class ArrayLayoutT>
178 {
180 using value_type = ScalarT;
182 using memory_space = MemorySpaceT;
184 const ArrayLayoutT& array_layout;
186 const ArrayLayoutT* layout() const { return &array_layout; }
187 };
188
194 : _tol( 1.0e-6 )
195 , _max_iter( 1000 )
196 , _print_level( 0 )
197 , _num_iter( 0 )
198 , _residual_norm( 0.0 )
199 {
200 // Array layout for vectors (p_old,z,r_old,q,p_new,r_new).
201 auto vector_layout =
202 createArrayLayout( layout.localGrid(), 6, EntityType() );
203 _vectors =
204 createArray<Scalar, MemorySpace>( "cg_vectors", vector_layout );
205 _A_halo_vectors = createSubarray( *_vectors, 0, 2 );
206 _M_halo_vectors = createSubarray( *_vectors, 2, 4 );
207 }
208
218 const std::vector<std::array<int, num_space_dim>>& stencil,
219 const bool is_symmetric = false ) override
220 {
221 setStencil( stencil, is_symmetric, _A_stencil, _A_halo, _A,
222 _A_halo_vectors );
223 }
224
233 const Array_t& getMatrixValues() override { return *_A; }
234
244 const std::vector<std::array<int, num_space_dim>>& stencil,
245 const bool is_symmetric = false ) override
246 {
247 setStencil( stencil, is_symmetric, _M_stencil, _M_halo, _M,
248 _M_halo_vectors );
249 }
250
259 const Array_t& getPreconditionerValues() override { return *_M; }
260
262 void setTolerance( const double tol ) override { _tol = tol; }
263
265 void setMaxIter( const int max_iter ) override { _max_iter = max_iter; }
266
268 void setPrintLevel( const int print_level ) override
269 {
270 _print_level = print_level;
271 }
272
274 void setup() override {}
275
281 void solve( const Array_t& b, Array_t& x ) override
282 {
283 Kokkos::Profiling::ScopedRegion region(
284 "Cabana::Grid::ReferenceStructuredSolver::solve" );
285
286 // Get the local grid.
287 auto local_grid = _vectors->layout()->localGrid();
288
289 // Print banner
290 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
291 std::cout << std::endl
292 << "Preconditioned conjugate gradient" << std::endl;
293
294 // Index space.
295 auto entity_space =
296 local_grid->indexSpace( Own(), EntityType(), Local() );
297
298 // Subarrays.
299 auto p_old = createSubarray( *_vectors, 0, 1 );
300 auto z = createSubarray( *_vectors, 1, 2 );
301 auto r_old = createSubarray( *_vectors, 2, 3 );
302 auto q = createSubarray( *_vectors, 3, 4 );
303 auto p_new = createSubarray( *_vectors, 4, 5 );
304 auto r_new = createSubarray( *_vectors, 5, 6 );
305
306 // Views.
307 auto x_view = x.view();
308 auto b_view = b.view();
309 auto M_view = _M->view();
310 auto A_view = _A->view();
311 auto p_old_view = p_old->view();
312 auto z_view = z->view();
313 auto r_old_view = r_old->view();
314 auto q_view = q->view();
315 auto p_new_view = p_new->view();
316 auto r_new_view = r_new->view();
317
318 // Reset iteration count.
319 _num_iter = 0;
320
321 // Compute the norm of the RHS.
322 std::vector<Scalar> b_norm( 1 );
323 ArrayOp::norm2( b, b_norm );
324
325 // Copy the LHS into p so we can gather it.
326 Kokkos::deep_copy( p_old_view, x_view );
327
328 // Gather the LHS through gatheing p and z.
329 _A_halo->gather( execution_space(), *_A_halo_vectors );
330
331 // Compute the initial residual and norm.
332 _residual_norm = 0.0;
333 auto compute_r0 = createComputeR0( _A_stencil, A_view, p_old_view,
334 b_view, r_old_view );
336 "compute_r0", execution_space(), entity_space,
337 std::integral_constant<std::size_t, num_space_dim>{}, compute_r0,
338 _residual_norm );
339
340 // Finish the global norm reduction.
341 MPI_Allreduce( MPI_IN_PLACE, &_residual_norm, 1,
342 MpiTraits<Scalar>::type(), MPI_SUM,
343 local_grid->globalGrid().comm() );
344
345 // If we already have met our criteria then return.
346 _residual_norm = std::sqrt( _residual_norm ) / b_norm[0];
347 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
348 std::cout << "Iteration " << _num_iter
349 << ": |r|_2 / |b|_2 = " << _residual_norm << std::endl;
350 if ( _residual_norm <= _tol )
351 return;
352
353 // r and q.
354 _M_halo->gather( execution_space(), *_M_halo_vectors );
355
356 // Compute the initial preconditioned residual.
357 Scalar zTr_old = 0.0;
358 auto compute_z0 = createComputeZ0( _M_stencil, M_view, p_old_view,
359 p_new_view, r_old_view, z_view );
361 "compute_z0", execution_space(), entity_space,
362 std::integral_constant<std::size_t, num_space_dim>{}, compute_z0,
363 zTr_old );
364
365 // Finish computation of zTr
366 MPI_Allreduce( MPI_IN_PLACE, &zTr_old, 1, MpiTraits<Scalar>::type(),
367 MPI_SUM, local_grid->globalGrid().comm() );
368
369 // Gather the LHS through gatheing p and z.
370 _A_halo->gather( execution_space(), *_A_halo_vectors );
371
372 // Compute A*p and pT*A*p.
373 Scalar pTAp = 0.0;
374 auto compute_q0 =
375 createComputeQ0( _A_stencil, A_view, p_old_view, q_view );
377 "compute_q0", execution_space(), entity_space,
378 std::integral_constant<std::size_t, num_space_dim>{}, compute_q0,
379 pTAp );
380
381 // Finish the global reduction on pTAp.
382 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
383 MPI_SUM, local_grid->globalGrid().comm() );
384
385 // Iterate.
386 bool converged = false;
387 Scalar zTr_new = 0.0;
388 Scalar alpha;
389 Scalar beta;
390 while ( _residual_norm > _tol && _num_iter < _max_iter )
391 {
392 // Gather r and q.
393 _M_halo->gather( execution_space(), *_M_halo_vectors );
394
395 // Kernel 1: Compute x, r, residual norm, and zTr
396 alpha = zTr_old / pTAp;
397 zTr_new = 0.0;
398 auto cg_kernel_1 = createKernel1(
399 _M_stencil, M_view, x_view, r_new_view, r_old_view, p_new_view,
400 p_old_view, z_view, q_view, alpha );
402 "cg_kernel_1", execution_space(), entity_space,
403 std::integral_constant<std::size_t, num_space_dim>{},
404 cg_kernel_1, zTr_new );
405
406 // Finish the global reduction on zTr and r_norm.
407 MPI_Allreduce( MPI_IN_PLACE, &zTr_new, 1, MpiTraits<Scalar>::type(),
408 MPI_SUM, local_grid->globalGrid().comm() );
409
410 // Update residual norm
411 _residual_norm = std::sqrt( fabs( zTr_new ) ) / b_norm[0];
412
413 // Increment iteration count.
414 _num_iter++;
415
416 // Output result
417 if ( 2 == _print_level && 0 == local_grid->globalGrid().blockId() )
418 std::cout << "Iteration " << _num_iter
419 << ": |r|_2 / |b|_2 = " << _residual_norm
420 << std::endl;
421
422 // Check for convergence.
423 if ( _residual_norm <= _tol )
424 {
425 converged = true;
426 break;
427 }
428
429 // Gather p and z.
430 _A_halo->gather( execution_space(), *_A_halo_vectors );
431
432 // Kernel 2: Compute p, A*p, and p^T*A*p
433 beta = zTr_new / zTr_old;
434 pTAp = 0.0;
435 auto cg_kernel_2 = createKernel2(
436 _A_stencil, A_view, x_view, r_new_view, r_old_view, p_new_view,
437 p_old_view, z_view, q_view, beta );
439 "cg_kernel_2", execution_space(), entity_space,
440 std::integral_constant<std::size_t, num_space_dim>{},
441 cg_kernel_2, pTAp );
442
443 // Finish the global reduction on pTAp.
444 MPI_Allreduce( MPI_IN_PLACE, &pTAp, 1, MpiTraits<Scalar>::type(),
445 MPI_SUM, local_grid->globalGrid().comm() );
446
447 // Update zTr
448 zTr_old = zTr_new;
449 }
450
451 // Output end state.
452 if ( 1 <= _print_level && 0 == local_grid->globalGrid().blockId() )
453 std::cout << "Finished in " << _num_iter
454 << " iterations, converged to " << _residual_norm
455 << std::endl
456 << std::endl;
457
458 // If we didn't converge throw.
459 if ( !converged )
460 throw std::runtime_error( "CG solver did not converge" );
461 }
462
464 int getNumIter() override { return _num_iter; }
465
467 double getFinalRelativeResidualNorm() override { return _residual_norm; }
468
469 public:
471 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
472 class ViewOldR>
473 struct ComputeR0
474 {
475 StencilA A_stencil;
476 ViewA A_view;
477 ViewOldP p_old_view;
478 ViewB b_view;
479 ViewOldR r_old_view;
480
481 using value_type = typename ViewB::value_type;
482
483 KOKKOS_INLINE_FUNCTION void
484 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
485 const int j, const int k, value_type& result ) const
486 {
487 // Compute the local contribution from matrix-vector
488 // multiplication. Note that we copied x into p for this
489 // operation to easily perform the gather. Only apply the
490 // stencil entry if it is greater than 0.
491 value_type Ax = 0.0;
492 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
493 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
494 Ax += A_view( i, j, k, c ) *
495 p_old_view( i + A_stencil( c, Dim::I ),
496 j + A_stencil( c, Dim::J ),
497 k + A_stencil( c, Dim::K ), 0 );
498
499 // Compute the residual.
500 auto r_new = b_view( i, j, k, 0 ) - Ax;
501
502 // Assign the residual.
503 r_old_view( i, j, k, 0 ) = r_new;
504
505 // Contribute to the reduction.
506 result += r_new * r_new;
507 }
508
509 KOKKOS_INLINE_FUNCTION void
510 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
511 const int j, value_type& result ) const
512 {
513 // Compute the local contribution from matrix-vector
514 // multiplication. Note that we copied x into p for this
515 // operation to easily perform the gather. Only apply the
516 // stencil entry if it is greater than 0.
517 value_type Ax = 0.0;
518 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
519 if ( fabs( A_view( i, j, c ) ) > 0.0 )
520 Ax += A_view( i, j, c ) *
521 p_old_view( i + A_stencil( c, Dim::I ),
522 j + A_stencil( c, Dim::J ), 0 );
523
524 // Compute the residual.
525 auto r_new = b_view( i, j, 0 ) - Ax;
526
527 // Assign the residual.
528 r_old_view( i, j, 0 ) = r_new;
529
530 // Contribute to the reduction.
531 result += r_new * r_new;
532 }
533 };
534
535 template <class StencilA, class ViewA, class ViewOldP, class ViewB,
536 class ViewOldR>
537 auto createComputeR0( const StencilA& A_stencil, const ViewA& A_view,
538 const ViewOldP& p_old_view, const ViewB& b_view,
539 const ViewOldR& r_old_view )
540 {
541 return ComputeR0<StencilA, ViewA, ViewOldP, ViewB, ViewOldR>{
542 A_stencil, A_view, p_old_view, b_view, r_old_view };
543 }
544
545 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
546 class ViewOldR, class ViewZ>
547 struct ComputeZ0
548 {
549 StencilM M_stencil;
550 ViewM M_view;
551 ViewOldP p_old_view;
552 ViewNewP p_new_view;
553 ViewOldR r_old_view;
554 ViewZ z_view;
555
556 using value_type = typename ViewZ::value_type;
557
558 KOKKOS_INLINE_FUNCTION void
559 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
560 const int j, const int k, value_type& result ) const
561 {
562 // Compute the local contribution from matrix-vector
563 // multiplication. Only apply the stencil entry if it is
564 // greater than 0.
565 value_type Mr = 0.0;
566 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
567 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
568 Mr += M_view( i, j, k, c ) *
569 r_old_view( i + M_stencil( c, Dim::I ),
570 j + M_stencil( c, Dim::J ),
571 k + M_stencil( c, Dim::K ), 0 );
572 // Write values.
573 z_view( i, j, k, 0 ) = Mr;
574 p_old_view( i, j, k, 0 ) = Mr;
575 p_new_view( i, j, k, 0 ) = Mr;
576
577 // Compute zTr
578 result += Mr * r_old_view( i, j, k, 0 );
579 }
580
581 KOKKOS_INLINE_FUNCTION void
582 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
583 const int j, value_type& result ) const
584 {
585 // Compute the local contribution from matrix-vector
586 // multiplication. Only apply the stencil entry if it is
587 // greater than 0.
588 value_type Mr = 0.0;
589 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
590 if ( fabs( M_view( i, j, c ) ) > 0.0 )
591 Mr += M_view( i, j, c ) *
592 r_old_view( i + M_stencil( c, Dim::I ),
593 j + M_stencil( c, Dim::J ), 0 );
594 // Write values.
595 z_view( i, j, 0 ) = Mr;
596 p_old_view( i, j, 0 ) = Mr;
597 p_new_view( i, j, 0 ) = Mr;
598
599 // Compute zTr
600 result += Mr * r_old_view( i, j, 0 );
601 }
602 };
603
604 template <class StencilM, class ViewM, class ViewOldP, class ViewNewP,
605 class ViewOldR, class ViewZ>
606 auto createComputeZ0( const StencilM& M_stencil, const ViewM& M_view,
607 const ViewOldP& p_old_view,
608 const ViewNewP& p_new_view,
609 const ViewOldR& r_old_view, const ViewZ& z_view )
610 {
611 return ComputeZ0<StencilM, ViewM, ViewOldP, ViewNewP, ViewOldR, ViewZ>{
612 M_stencil, M_view, p_old_view, p_new_view, r_old_view, z_view };
613 }
614
615 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
616 struct ComputeQ0
617 {
618 StencilA A_stencil;
619 ViewA A_view;
620 ViewOldP p_old_view;
621 ViewQ q_view;
622
623 using value_type = typename ViewQ::value_type;
624
625 KOKKOS_INLINE_FUNCTION void
626 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
627 const int j, const int k, value_type& result ) const
628 {
629 // Compute the local contribution from matrix-vector
630 // multiplication. This computes the updated p vector
631 // in-line to avoid another kernel launch. Only apply the
632 // stencil entry if it is greater than 0.
633 value_type Ap = 0.0;
634 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
635 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
636 Ap += A_view( i, j, k, c ) *
637 ( p_old_view( i + A_stencil( c, Dim::I ),
638 j + A_stencil( c, Dim::J ),
639 k + A_stencil( c, Dim::K ), 0 ) );
640
641 // Write values.
642 q_view( i, j, k, 0 ) = Ap;
643
644 // Compute contribution to the dot product.
645 result += p_old_view( i, j, k, 0 ) * Ap;
646 }
647
648 KOKKOS_INLINE_FUNCTION void
649 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
650 const int j, value_type& result ) const
651 {
652 // Compute the local contribution from matrix-vector
653 // multiplication. This computes the updated p vector
654 // in-line to avoid another kernel launch. Only apply the
655 // stencil entry if it is greater than 0.
656 value_type Ap = 0.0;
657 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
658 if ( fabs( A_view( i, j, c ) ) > 0.0 )
659 Ap += A_view( i, j, c ) *
660 ( p_old_view( i + A_stencil( c, Dim::I ),
661 j + A_stencil( c, Dim::J ), 0 ) );
662
663 // Write values.
664 q_view( i, j, 0 ) = Ap;
665
666 // Compute contribution to the dot product.
667 result += p_old_view( i, j, 0 ) * Ap;
668 }
669 };
670
671 template <class StencilA, class ViewA, class ViewOldP, class ViewQ>
672 auto createComputeQ0( const StencilA& A_stencil, const ViewA& A_view,
673 const ViewOldP& p_old_view, const ViewQ& q_view )
674 {
675 return ComputeQ0<StencilA, ViewA, ViewOldP, ViewQ>{
676 A_stencil, A_view, p_old_view, q_view };
677 }
678
679 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
680 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
681 class ViewQ, class ValueType>
682 struct Kernel1
683 {
684 StencilM M_stencil;
685 ViewM M_view;
686 ViewX x_view;
687 ViewNewR r_new_view;
688 ViewOldR r_old_view;
689 ViewNewP p_new_view;
690 ViewOldP p_old_view;
691 ViewZ z_view;
692 ViewQ q_view;
693 ValueType alpha;
694
695 KOKKOS_INLINE_FUNCTION void
696 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
697 const int j, const int k, ValueType& result ) const
698 {
699 // Compute the local contribution from matrix-vector
700 // multiplication. This computes the updated q vector
701 // in-line to avoid another kernel launch. Only apply the
702 // stencil entry if it is greater than 0.
703 ValueType Mr = 0.0;
704 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
705 if ( fabs( M_view( i, j, k, c ) ) > 0.0 )
706 Mr += M_view( i, j, k, c ) *
707 ( r_old_view( i + M_stencil( c, Dim::I ),
708 j + M_stencil( c, Dim::J ),
709 k + M_stencil( c, Dim::K ), 0 ) -
710 alpha * q_view( i + M_stencil( c, Dim::I ),
711 j + M_stencil( c, Dim::J ),
712 k + M_stencil( c, Dim::K ), 0 ) );
713
714 // Compute the updated x.
715 ValueType x_new =
716 x_view( i, j, k, 0 ) + alpha * p_new_view( i, j, k, 0 );
717
718 // Compute the updated residual.
719 ValueType r_new =
720 r_old_view( i, j, k, 0 ) - alpha * q_view( i, j, k, 0 );
721
722 // Write to old p vector.
723 p_old_view( i, j, k, 0 ) = p_new_view( i, j, k, 0 );
724
725 // Write values.
726 x_view( i, j, k, 0 ) = x_new;
727 r_new_view( i, j, k, 0 ) = r_new;
728 z_view( i, j, k, 0 ) = Mr;
729
730 // Compute contribution to the zTr.
731 result += Mr * r_new;
732 }
733
734 KOKKOS_INLINE_FUNCTION void
735 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
736 const int j, ValueType& result ) const
737 {
738 // Compute the local contribution from matrix-vector
739 // multiplication. This computes the updated q vector
740 // in-line to avoid another kernel launch. Only apply the
741 // stencil entry if it is greater than 0.
742 ValueType Mr = 0.0;
743 for ( unsigned c = 0; c < M_stencil.extent( 0 ); ++c )
744 if ( fabs( M_view( i, j, c ) ) > 0.0 )
745 Mr += M_view( i, j, c ) *
746 ( r_old_view( i + M_stencil( c, Dim::I ),
747 j + M_stencil( c, Dim::J ), 0 ) -
748 alpha * q_view( i + M_stencil( c, Dim::I ),
749 j + M_stencil( c, Dim::J ), 0 ) );
750
751 // Compute the updated x.
752 ValueType x_new = x_view( i, j, 0 ) + alpha * p_new_view( i, j, 0 );
753
754 // Compute the updated residual.
755 ValueType r_new = r_old_view( i, j, 0 ) - alpha * q_view( i, j, 0 );
756
757 // Write to old p vector.
758 p_old_view( i, j, 0 ) = p_new_view( i, j, 0 );
759
760 // Write values.
761 x_view( i, j, 0 ) = x_new;
762 r_new_view( i, j, 0 ) = r_new;
763 z_view( i, j, 0 ) = Mr;
764
765 // Compute contribution to the zTr.
766 result += Mr * r_new;
767 }
768 };
769
770 template <class StencilM, class ViewM, class ViewX, class ViewNewR,
771 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
772 class ViewQ, class ValueType>
773 auto createKernel1( const StencilM& M_stencil, const ViewM& M_view,
774 const ViewX& x_view, const ViewNewR& r_new_view,
775 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
776 const ViewOldP& p_old_view, const ViewZ& z_view,
777 const ViewQ& q_view, const ValueType& alpha )
778 {
779 return Kernel1<StencilM, ViewM, ViewX, ViewNewR, ViewOldR, ViewOldP,
780 ViewNewP, ViewZ, ViewQ, ValueType>{
781 M_stencil, M_view, x_view, r_new_view, r_old_view,
782 p_new_view, p_old_view, z_view, q_view, alpha };
783 }
784
785 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
786 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
787 class ViewQ, class ValueType>
788 struct Kernel2
789 {
790 StencilA A_stencil;
791 ViewA A_view;
792 ViewX x_view;
793 ViewNewR r_new_view;
794 ViewOldR r_old_view;
795 ViewNewP p_new_view;
796 ViewOldP p_old_view;
797 ViewZ z_view;
798 ViewQ q_view;
799 ValueType beta;
800
801 KOKKOS_INLINE_FUNCTION void
802 operator()( const std::integral_constant<std::size_t, 3>&, const int i,
803 const int j, const int k, ValueType& result ) const
804 {
805 // Compute the local contribution from matrix-vector
806 // multiplication. This computes the updated p vector
807 // in-line to avoid another kernel launch. Only apply the
808 // stencil entry if it is greater than 0.
809 ValueType Ap = 0.0;
810 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
811 if ( fabs( A_view( i, j, k, c ) ) > 0.0 )
812 Ap +=
813 A_view( i, j, k, c ) *
814 ( z_view( i + A_stencil( c, Dim::I ),
815 j + A_stencil( c, Dim::J ),
816 k + A_stencil( c, Dim::K ), 0 ) +
817 beta * p_old_view( i + A_stencil( c, Dim::I ),
818 j + A_stencil( c, Dim::J ),
819 k + A_stencil( c, Dim::K ), 0 ) );
820
821 // Compute the updated p.
822 ValueType p_new =
823 z_view( i, j, k, 0 ) + beta * p_old_view( i, j, k, 0 );
824
825 // Write to old residual.
826 r_old_view( i, j, k, 0 ) = r_new_view( i, j, k, 0 );
827
828 // Write values.
829 q_view( i, j, k, 0 ) = Ap;
830 p_new_view( i, j, k, 0 ) = p_new;
831
832 // Compute contribution to the dot product.
833 result += p_new * Ap;
834 }
835
836 KOKKOS_INLINE_FUNCTION void
837 operator()( const std::integral_constant<std::size_t, 2>&, const int i,
838 const int j, ValueType& result ) const
839 {
840 // Compute the local contribution from matrix-vector
841 // multiplication. This computes the updated p vector
842 // in-line to avoid another kernel launch. Only apply the
843 // stencil entry if it is greater than 0.
844 ValueType Ap = 0.0;
845 for ( unsigned c = 0; c < A_stencil.extent( 0 ); ++c )
846 if ( fabs( A_view( i, j, c ) ) > 0.0 )
847 Ap +=
848 A_view( i, j, c ) *
849 ( z_view( i + A_stencil( c, Dim::I ),
850 j + A_stencil( c, Dim::J ), 0 ) +
851 beta * p_old_view( i + A_stencil( c, Dim::I ),
852 j + A_stencil( c, Dim::J ), 0 ) );
853
854 // Compute the updated p.
855 ValueType p_new = z_view( i, j, 0 ) + beta * p_old_view( i, j, 0 );
856
857 // Write to old residual.
858 r_old_view( i, j, 0 ) = r_new_view( i, j, 0 );
859
860 // Write values.
861 q_view( i, j, 0 ) = Ap;
862 p_new_view( i, j, 0 ) = p_new;
863
864 // Compute contribution to the dot product.
865 result += p_new * Ap;
866 }
867 };
868
869 template <class StencilA, class ViewA, class ViewX, class ViewNewR,
870 class ViewOldR, class ViewOldP, class ViewNewP, class ViewZ,
871 class ViewQ, class ValueType>
872 auto createKernel2( const StencilA& A_stencil, const ViewA& A_view,
873 const ViewX& x_view, const ViewNewR& r_new_view,
874 const ViewOldR& r_old_view, const ViewNewP& p_new_view,
875 const ViewOldP& p_old_view, const ViewZ& z_view,
876 const ViewQ& q_view, const ValueType& beta )
877 {
878 return Kernel2<StencilA, ViewA, ViewX, ViewNewR, ViewOldR, ViewOldP,
879 ViewNewP, ViewZ, ViewQ, ValueType>{
880 A_stencil, A_view, x_view, r_new_view, r_old_view,
881 p_new_view, p_old_view, z_view, q_view, beta };
882 }
884
885 private:
886 // Set the stencil of a matrix.
887 void
888 setStencil( const std::vector<std::array<int, num_space_dim>>& stencil,
889 const bool is_symmetric,
890 Kokkos::View<int* [num_space_dim], MemorySpace>& device_stencil,
891 std::shared_ptr<Halo<memory_space>>& halo,
892 std::shared_ptr<Array_t>& matrix,
893 std::shared_ptr<subarray_type>& halo_matrix )
894 {
895 // For now we don't support symmetry.
896 if ( is_symmetric )
897 throw std::logic_error(
898 "Reference CG currently does not support symmetry" );
899
900 // Get the local grid.
901 auto local_grid = _vectors->layout()->localGrid();
902
903 // Copy stencil to the device.
904 device_stencil = Kokkos::View<int* [num_space_dim], MemorySpace>(
905 Kokkos::ViewAllocateWithoutInitializing( "stencil" ),
906 stencil.size() );
907 auto stencil_mirror =
908 Kokkos::create_mirror_view( Kokkos::HostSpace(), device_stencil );
909 for ( unsigned s = 0; s < stencil.size(); ++s )
910 for ( std::size_t d = 0; d < num_space_dim; ++d )
911 stencil_mirror( s, d ) = stencil[s][d];
912 Kokkos::deep_copy( device_stencil, stencil_mirror );
913
914 // Compose the halo pattern and compute how wide the halo needs to be
915 // to gather all elements accessed by the stencil.
916 std::set<std::array<int, num_space_dim>> neighbor_set;
917 std::array<int, num_space_dim> neighbor;
918 int width = 0;
919 for ( auto s : stencil )
920 {
921 // Compse a set of the neighbor ranks based on the stencil.
922 for ( std::size_t d = 0; d < num_space_dim; ++d )
923 neighbor[d] = ( s[d] == 0 ) ? 0 : s[d] / std::abs( s[d] );
924 neighbor_set.emplace( neighbor );
925
926 // Compute the width of the halo needed to apply the stencil.
927 for ( std::size_t d = 0; d < num_space_dim; ++d )
928 width = std::max( width, std::abs( s[d] ) );
929 }
930 std::vector<std::array<int, num_space_dim>> halo_neighbors(
931 neighbor_set.size() );
932 std::copy( neighbor_set.begin(), neighbor_set.end(),
933 halo_neighbors.begin() );
934
935 // Create a new layout.
936 auto matrix_layout =
937 createArrayLayout( local_grid, stencil.size(), EntityType() );
938
939 // Allocate the matrix.
940 matrix = createArray<Scalar, MemorySpace>( "matrix", matrix_layout );
941
942 // Build the halo.
943 HaloPattern<num_space_dim> pattern;
944 pattern.setNeighbors( halo_neighbors );
945 halo = createHalo( pattern, width, *halo_matrix );
946 }
947
948 private:
949 Scalar _tol;
950 int _max_iter;
951 int _print_level;
952 int _num_iter;
953 Scalar _residual_norm;
954 int _diag_entry;
955 Kokkos::View<int* [num_space_dim], MemorySpace> _A_stencil;
956 Kokkos::View<int* [num_space_dim], MemorySpace> _M_stencil;
957 std::shared_ptr<Halo<memory_space>> _A_halo;
958 std::shared_ptr<Halo<memory_space>> _M_halo;
959 std::shared_ptr<Array_t> _A;
960 std::shared_ptr<Array_t> _M;
961 std::shared_ptr<Array_t> _vectors;
962 std::shared_ptr<subarray_type> _A_halo_vectors;
963 std::shared_ptr<subarray_type> _M_halo_vectors;
964};
965
966//---------------------------------------------------------------------------//
967// Builders.
968//---------------------------------------------------------------------------//
974template <class Scalar, class MemorySpace, class EntityType, class MeshType>
975std::shared_ptr<
984
985//---------------------------------------------------------------------------//
986
987} // namespace Grid
988} // namespace Cabana
989
990#endif // end CABANA_GRID_REFERENCESTRUCTUREDSOLVER_HPP
Grid field arrays.
std::shared_ptr< Array< Scalar, EntityType, MeshType, typename Array< Scalar, EntityType, MeshType, Params... >::subview_layout, typename Array< Scalar, EntityType, MeshType, Params... >::memory_space, typename Array< Scalar, EntityType, MeshType, Params... >::subview_memory_traits > > createSubarray(const Array< Scalar, EntityType, MeshType, Params... > &array, const int dof_min, const int dof_max)
Create a subarray of the given array over the given range of degrees of freedom.
Definition Cabana_Grid_Array.hpp:374
void norm2(const Array_t &array, std::vector< typename Array_t::value_type > &norms)
Calculate the two-norm of the owned elements of the array.
Definition Cabana_Grid_Array.hpp:1086
std::shared_ptr< Array< Scalar, EntityType, MeshType, Params... > > createArray(const std::string &label, const std::shared_ptr< ArrayLayout< EntityType, MeshType > > &layout)
Create an array with the given array layout. Views are constructed over the ghosted index space of th...
Definition Cabana_Grid_Array.hpp:349
std::shared_ptr< ArrayLayout< EntityType, MeshType > > createArrayLayout(const std::shared_ptr< LocalGrid< MeshType > > &local_grid, const int dofs_per_entity, EntityType)
Create an array layout over the entities of a local grid.
Definition Cabana_Grid_Array.hpp:189
Multi-node grid scatter/gather.
auto createHalo(const Pattern &pattern, const int width, const ArrayTypes &... arrays)
Halo creation function.
Definition Cabana_Grid_Halo.hpp:949
Logical grid indexing.
Logical grid extension of Kokkos parallel iteration.
void grid_parallel_reduce(const std::string &label, const ExecutionSpace &exec_space, const IndexSpace< N > &index_space, const FunctorType &functor, ReduceType &reducer)
Execute a reduction functor in parallel with a multidimensional execution policy specified by the giv...
Definition Cabana_Grid_Parallel.hpp:436
std::shared_ptr< ReferenceConjugateGradient< Scalar, EntityType, MeshType, MemorySpace > > createReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Creation function for reference structured preconditioned block conjugate gradient.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:977
Grid type tags.
Cabana utilities.
Entity layout for array data on the local mesh.
Definition Cabana_Grid_Array.hpp:46
const std::shared_ptr< LocalGrid< MeshType > > localGrid() const
Get the local grid over which this layout is defined.
Definition Cabana_Grid_Array.hpp:71
Array of field data on the local mesh.
Definition Cabana_Grid_Array.hpp:229
Array< Scalar, EntityType, MeshType, subview_layout, memory_space, subview_memory_traits > subarray_type
Definition Cabana_Grid_Array.hpp:312
Reference structured preconditioned block conjugate gradient implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:152
void setup() override
Setup the problem.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:274
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:173
void solve(const Array_t &b, Array_t &x) override
Solve the problem Ax = b for x.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:281
void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the matrix stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:217
ReferenceConjugateGradient(const ArrayLayout< EntityType, MeshType > &layout)
Constructor.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:192
typename base_type::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:162
typename base_type::Array_t Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:169
const Array_t & getPreconditionerValues() override
Get the preconditioner values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:259
void setMaxIter(const int max_iter) override
Set maximum iteration implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:265
void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric=false) override
Set the preconditioner stencil.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:243
void setTolerance(const double tol) override
Set convergence tolerance implementation.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:262
typename base_type::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:171
typename base_type::entity_type entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:165
int getNumIter() override
Get the number of iterations taken on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:464
const Array_t & getMatrixValues() override
Get the matrix values.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:233
double getFinalRelativeResidualNorm() override
Get the relative residual norm achieved on the last solve.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:467
void setPrintLevel(const int print_level) override
Set the output level.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:268
ReferenceStructuredSolver< Scalar, EntityType, MeshType, MemorySpace > base_type
Base type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:155
typename base_type::value_type value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:167
typename base_type::memory_space memory_space
Memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:158
Reference preconditioned structured solver interface.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:50
virtual void setMaxIter(const int max_iter)=0
Set maximum iteration implementation.
virtual double getFinalRelativeResidualNorm()=0
Get the relative residual norm achieved on the last solve.
typename Array_t::subarray_type subarray_type
SubArray type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:67
typename memory_space::execution_space execution_space
Default execution space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:63
typename MemorySpace::memory_space memory_space
Memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:59
virtual void setPreconditionerStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the preconditioner stencil.
virtual const Array_t & getMatrixValues()=0
Get the matrix values.
static constexpr std::size_t num_space_dim
Spatial dimension.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:69
virtual void solve(const Array_t &b, Array_t &x)=0
Solve the problem Ax = b for x.
virtual void setMatrixStencil(const std::vector< std::array< int, num_space_dim > > &stencil, const bool is_symmetric)=0
Set the matrix stencil.
EntityType entity_type
Entity type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:53
virtual void setTolerance(const double tol)=0
Set convergence tolerance implementation.
virtual void setup()=0
Setup the problem.
Scalar value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:55
virtual const Array_t & getPreconditionerValues()=0
Get the preconditioner values.
Array< Scalar, EntityType, MeshType, MemorySpace > Array_t
Array type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:65
virtual void setPrintLevel(const int print_level)=0
Set the output level.
virtual int getNumIter()=0
Get the number of iterations taken on the last solve.
Core: particle data structures and algorithms.
Definition Cabana_AoSoA.hpp:36
Local index tag.
Definition Cabana_Grid_Types.hpp:208
Definition Cabana_Grid_MpiTraits.hpp:32
Owned decomposition tag.
Definition Cabana_Grid_Types.hpp:190
Array-like container to hold layout and data information.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:178
ScalarT value_type
Scalar value type.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:180
MemorySpaceT memory_space
Kokkos memory space.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:182
const ArrayLayoutT * layout() const
Get the array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:186
const ArrayLayoutT & array_layout
Array layout.
Definition Cabana_Grid_ReferenceStructuredSolver.hpp:184