From 19b1a67ea2632a03da9c82b2ab2bd48a6afba163 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 16:17:12 -0600 Subject: [PATCH 01/48] Add Kokkos numerics core support --- include/base/libmesh_common.h | 70 ++++++++++---------- include/base/libmesh_device.h | 74 +++++++++++++++++++++ include/include_HEADERS | 2 +- include/libmesh/Makefile.am | 9 ++- include/libmesh_config.h.in | 6 +- include/numerics/tensor_tools.h | 36 +++++------ include/numerics/tensor_value.h | 24 ++++--- include/numerics/type_tensor.h | 110 +++++++++++++++++++------------- include/numerics/type_vector.h | 89 ++++++++++++++++---------- include/numerics/vector_value.h | 16 +++-- 10 files changed, 276 insertions(+), 160 deletions(-) create mode 100644 include/base/libmesh_device.h diff --git a/include/base/libmesh_common.h b/include/base/libmesh_common.h index d907f4a5fe3..77dfbb69f08 100644 --- a/include/base/libmesh_common.h +++ b/include/base/libmesh_common.h @@ -30,6 +30,10 @@ // The library configuration options #include "libmesh/libmesh_config.h" +// Device compilation support — must be included before assert macros +// so that LIBMESH_DEVICE_ASSERT is available for the Kokkos path. +#include "libmesh/libmesh_device.h" + // Use actual timestamps or constant dummies (to aid ccache) #ifdef LIBMESH_ENABLE_TIMESTAMPS # define LIBMESH_TIME __TIME__ @@ -183,33 +187,33 @@ typedef std::complex COMPLEX; // Helper functions for complex/real numbers // to clean up #ifdef LIBMESH_USE_COMPLEX_NUMBERS elsewhere -template inline T libmesh_real(T a) { return a; } -template inline T libmesh_imag(T /*a*/) { return 0; } -template inline T libmesh_conj(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_real(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_imag(T /*a*/) { return 0; } +template LIBMESH_DEVICE_INLINE T libmesh_conj(T a) { return a; } template -inline T libmesh_real(std::complex a) { return std::real(a); } +LIBMESH_DEVICE_INLINE T libmesh_real(std::complex a) { return std::real(a); } template -inline T libmesh_imag(std::complex a) { return std::imag(a); } +LIBMESH_DEVICE_INLINE T libmesh_imag(std::complex a) { return std::imag(a); } template -inline std::complex libmesh_conj(std::complex a) { return std::conj(a); } +LIBMESH_DEVICE_INLINE std::complex libmesh_conj(std::complex a) { return std::conj(a); } // std::isnan() is in as of C++11. template -inline bool libmesh_isnan(T x) { return std::isnan(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isnan(T x) { return std::isnan(x); } template -inline bool libmesh_isnan(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isnan(std::complex a) { return (std::isnan(std::real(a)) || std::isnan(std::imag(a))); } // std::isinf() is in as of C++11. template -inline bool libmesh_isinf(T x) { return std::isinf(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isinf(T x) { return std::isinf(x); } template -inline bool libmesh_isinf(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isinf(std::complex a) { return (std::isinf(std::real(a)) || std::isinf(std::imag(a))); } // Define the value type for unknowns in simulations. @@ -287,7 +291,13 @@ extern bool warned_about_auto_ptr; #endif // The libmesh_assert() macro acts like C's assert(), but throws a -// libmesh_error() (including stack trace, etc) instead of just exiting +// libmesh_error() (including stack trace, etc) instead of just exiting. +// +// In .K translation units (LIBMESH_KOKKOS_COMPILATION defined), +// LIBMESH_DEVICE_ASSERT is provided by libmesh_device.h using +// printf + Kokkos::abort() — device-safe across CUDA/HIP/SYCL. +// The assert macros delegate to it so that both host and device +// code in the same file get assertion checking. #ifdef NDEBUG #define libmesh_assert_msg(asserted, msg) ((void) 0) @@ -299,6 +309,18 @@ extern bool warned_about_auto_ptr; #define libmesh_assert_less_equal_msg(expr1,expr2, msg) ((void) 0) #define libmesh_assert_greater_equal_msg(expr1,expr2, msg) ((void) 0) +#elif defined(LIBMESH_DEVICE_ASSERT) + +// Kokkos compilation: use the device-safe assert from libmesh_device.h. +#define libmesh_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_exceptionless_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_assert_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) == (expr2)) +#define libmesh_assert_not_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) != (expr2)) +#define libmesh_assert_less_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) < (expr2)) +#define libmesh_assert_greater_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) > (expr2)) +#define libmesh_assert_less_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) <= (expr2)) +#define libmesh_assert_greater_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) >= (expr2)) + #else #define libmesh_assertion_types(expr1,expr2) \ @@ -649,32 +671,6 @@ inline Tnew libmesh_cast_int (Told oldvar) return cast_int(oldvar); } - -/** - * restrict_int checks that the value of the castee is within the - * bounds which are exactly representable by the output type, even in - * optimized modes. - * - * Use this cast when you suspect that the input may not succeed in - * correct code (e.g. when an input file is being read from a format - * that may allow wider integer types than the current libMesh - * configuration). - */ -template -inline Tnew restrict_int (Told oldvar) -{ - if constexpr (!std::is_same_v) - { - const Tnew returnval = static_cast(oldvar); - - libmesh_error_msg_if (oldvar != static_cast(returnval), - "restrict_int failed: " << oldvar << " does not fit in type " << typeid(returnval).name()); - } - - return oldvar; -} - - /** * This is a helper variable template for cases when we want to use a default compile-time * error with constexpr-based if conditions. The templating delays the triggering diff --git a/include/base/libmesh_device.h b/include/base/libmesh_device.h new file mode 100644 index 00000000000..f41d4c70b01 --- /dev/null +++ b/include/base/libmesh_device.h @@ -0,0 +1,74 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_LIBMESH_DEVICE_H +#define LIBMESH_LIBMESH_DEVICE_H + +// Defines LIBMESH_DEVICE_INLINE, mirroring MetaPhysicL's METAPHYSICL_INLINE +// pattern (metaphysicl_device.h / METAPHYSICL_KOKKOS_COMPILATION). +// +// When compiling a .K translation unit (LIBMESH_KOKKOS_COMPILATION is defined +// by kokkos.mk), this expands to KOKKOS_INLINE_FUNCTION so that annotated +// methods are callable from both host and device code. In all other +// translation units it expands to plain `inline`. +#ifdef LIBMESH_KOKKOS_COMPILATION +# include +# include +# define LIBMESH_DEVICE_INLINE KOKKOS_INLINE_FUNCTION + +// Backend-neutral device-code detection for Kokkos .K translation units. +// This lets error/exception plumbing share a single predicate instead of +// hardcoding per-backend checks in multiple headers. +# if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || defined(__SYCL_DEVICE_ONLY__) +# define LIBMESH_IN_DEVICE_CODE 1 +# else +# define LIBMESH_IN_DEVICE_CODE 0 +# endif + +// Device-safe assert: uses printf (supported on CUDA/HIP) and +// Kokkos::abort() for backend-portable device termination. +// Defined here (not in libmesh_common.h) because Kokkos headers +// are only available in .K translation units. +# ifndef NDEBUG +# define LIBMESH_DEVICE_ASSERT(asserted) \ + do { if (!(asserted)) { \ + printf("libMesh assert failed: %s, file %s, line %d\n", \ + #asserted, __FILE__, __LINE__); \ + ::Kokkos::abort("libmesh_assert failed"); \ + } } while (0) +# else +# define LIBMESH_DEVICE_ASSERT(asserted) ((void) 0) +# endif + +# define LIBMESH_DEVICE_ERROR_MSG(msg) \ + do { \ + printf("libMesh error: %s, file %s, line %d\n", \ + msg, __FILE__, __LINE__); \ + ::Kokkos::abort(msg); \ + } while (0) + +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) \ + do { if (cond) { LIBMESH_DEVICE_ERROR_MSG(msg); } } while (0) + +#else +# define LIBMESH_DEVICE_INLINE inline +# define LIBMESH_IN_DEVICE_CODE 0 +# define LIBMESH_DEVICE_ERROR_MSG(msg) libmesh_error_msg(msg) +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) libmesh_error_msg_if(cond, msg) +#endif + +#endif // LIBMESH_LIBMESH_DEVICE_H diff --git a/include/include_HEADERS b/include/include_HEADERS index 115b473ba2e..8d980280d31 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -28,6 +28,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -322,7 +323,6 @@ include_HEADERS = \ parallel/threads_allocators.h \ parallel/threads_none.h \ parallel/threads_pthread.h \ - parallel/threads_spin_mutex_forward.h \ parallel/threads_tbb.h \ partitioning/centroid_partitioner.h \ partitioning/hilbert_sfc_partitioner.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index 7b8880c3a42..f6010488ff0 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -19,6 +19,7 @@ BUILT_SOURCES = \ libmesh_augment_std_namespace.h \ libmesh_base.h \ libmesh_common.h \ + libmesh_device.h \ libmesh_documentation.h \ libmesh_exceptions.h \ libmesh_logging.h \ @@ -317,7 +318,6 @@ BUILT_SOURCES = \ threads_allocators.h \ threads_none.h \ threads_pthread.h \ - threads_spin_mutex_forward.h \ threads_tbb.h \ centroid_partitioner.h \ hilbert_sfc_partitioner.h \ @@ -657,6 +657,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1551,9 +1554,6 @@ threads_none.h: $(top_srcdir)/include/parallel/threads_none.h threads_pthread.h: $(top_srcdir)/include/parallel/threads_pthread.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ -threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h - $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - threads_tbb.h: $(top_srcdir)/include/parallel/threads_tbb.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -2138,4 +2138,3 @@ xdr_cxx.h: $(top_srcdir)/include/utils/xdr_cxx.h parallel_communicator_specializations: $(top_srcdir)/include/timpi_shims/parallel_communicator_specializations $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - diff --git a/include/libmesh_config.h.in b/include/libmesh_config.h.in index 9adaa9efe05..62256949fed 100644 --- a/include/libmesh_config.h.in +++ b/include/libmesh_config.h.in @@ -434,6 +434,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H +/* Define if Kokkos support is enabled in libMesh */ +#undef HAVE_KOKKOS + /* Flag indicating whether the library will be compiled with LASPACK support */ #undef HAVE_LASPACK @@ -485,9 +488,6 @@ annotations */ #undef HAVE_NVTX_API -/* Defined if the installed TBB is oneTBB (>= 2021) */ -#undef HAVE_ONETBB - /* Define if OpenMP is enabled */ #undef HAVE_OPENMP diff --git a/include/numerics/tensor_tools.h b/include/numerics/tensor_tools.h index 7617116f10d..f183380a84d 100644 --- a/include/numerics/tensor_tools.h +++ b/include/numerics/tensor_tools.h @@ -45,92 +45,92 @@ namespace TensorTools // Vector specializations will follow. template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if::value && ScalarTraits::value, typename CompareTypes::supertype>::type inner_product(const T & a, const T2& b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeVector & a, const TypeVector & b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeTensor & a, const TypeTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeNTensor & a, const TypeNTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const T & a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE T norm(std::complex a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeVector & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const VectorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeTensor & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TensorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const T & a) -{ using std::norm; return norm(a); } +{ return a * libmesh_conj(a); } template -inline +LIBMESH_DEVICE_INLINE T norm_sq(std::complex a) { using std::norm; return norm(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeVector & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const VectorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeTensor & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TensorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE bool is_zero(const T & a){ return a.is_zero();} // Any tensor-rank-independent code will need to include diff --git a/include/numerics/tensor_value.h b/include/numerics/tensor_value.h index 3a0d680476d..c99e0cac003 100644 --- a/include/numerics/tensor_value.h +++ b/include/numerics/tensor_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_tensor.h" +#include "libmesh/libmesh_device.h" #include "libmesh/libmesh.h" // for pi #ifdef LIBMESH_HAVE_METAPHYSICL @@ -93,12 +94,14 @@ class TensorValue : public TypeTensor * Constructor. Takes 1 row vector for LIBMESH_DIM=1 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx); /** * Constructor. Takes 2 row vectors for LIBMESH_DIM=2 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy); @@ -106,6 +109,7 @@ class TensorValue : public TypeTensor * Constructor. Takes 3 row vectors for LIBMESH_DIM=3 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -134,11 +138,11 @@ class TensorValue : public TypeTensor const TypeTensor & p_im); #endif - /** * Assignment-from-scalar operator. Used only to zero out tensors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TensorValue &>::type @@ -211,7 +215,7 @@ typedef NumberTensorValue Tensor; //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue () : TypeTensor () { @@ -220,7 +224,7 @@ TensorValue::TensorValue () : template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const T & xx, const T & xy, const T & xz, @@ -237,7 +241,7 @@ TensorValue::TensorValue (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -257,7 +261,7 @@ TensorValue::TensorValue (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TensorValue & p) : TypeTensor (p) { @@ -267,7 +271,7 @@ TensorValue::TensorValue (const TensorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx) : TypeTensor (vx) { @@ -277,7 +281,7 @@ TensorValue::TensorValue (const TypeVector & vx) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy) : TypeTensor (vx, vy) @@ -288,7 +292,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) : @@ -300,7 +304,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p) : TypeTensor (p) { @@ -309,7 +313,7 @@ TensorValue::TensorValue (const TypeTensor & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p_re, const TypeTensor & p_im) : TypeTensor (Complex (p_re(0,0), p_im(0,0)), diff --git a/include/numerics/type_tensor.h b/include/numerics/type_tensor.h index 470b745f120..ac6dc145428 100644 --- a/include/numerics/type_tensor.h +++ b/include/numerics/type_tensor.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" // C++ includes @@ -101,13 +102,16 @@ class TypeTensor * many vectors are needed. */ template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -133,12 +137,14 @@ class TypeTensor /** * Destructor. */ + LIBMESH_DEVICE_INLINE ~TypeTensor(); /** * Assign to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeTensor &); /** @@ -147,6 +153,7 @@ class TypeTensor * \returns A reference to *this. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor &>::type @@ -166,11 +173,13 @@ class TypeTensor /** * \returns A proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE ConstTypeTensorColumn slice (const unsigned int i) const; /** * \returns A writable proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE TypeTensorColumn slice (const unsigned int i); /** @@ -181,6 +190,7 @@ class TypeTensor /** * \returns A copy of one column of the tensor as a TypeVector. */ + LIBMESH_DEVICE_INLINE TypeVector column(const unsigned int r) const; /** @@ -210,6 +220,7 @@ class TypeTensor * Add a scaled tensor to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeTensor &, const T &); /** @@ -240,6 +251,7 @@ class TypeTensor * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeTensor &, const T &); /** @@ -265,6 +277,7 @@ class TypeTensor */ template ::value, int>::type = 0> + LIBMESH_DEVICE_INLINE const TypeTensor & operator *= (const Scalar & factor) { for (unsigned int i=0; i + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeTensor &) const; @@ -339,6 +353,7 @@ class TypeTensor * \returns A copy of the result vector, this tensor is unchanged. */ template + LIBMESH_DEVICE_INLINE TypeVector::supertype> left_multiply (const TypeVector & p) const; @@ -358,6 +373,7 @@ class TypeTensor * * \returns The solution in the \p x vector. */ + LIBMESH_DEVICE_INLINE void solve(const TypeVector & b, TypeVector & x) const; /** @@ -375,6 +391,7 @@ class TypeTensor /** * \returns True if all values in the tensor are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** @@ -393,11 +410,13 @@ class TypeTensor /** * Set all entries of the tensor to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** * \returns \p true if two tensors are equal, \p false otherwise. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeTensor & rhs) const; /** @@ -513,7 +532,7 @@ class ConstTypeTensorColumn //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor () { _coords[0] = {}; @@ -536,7 +555,7 @@ TypeTensor::TypeTensor () template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const T & xx, const T & xy, const T & xz, @@ -582,7 +601,7 @@ TypeTensor::TypeTensor (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -631,7 +650,7 @@ TypeTensor::TypeTensor (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const TypeTensor & p) { // copy the nodes from vector p to me @@ -642,6 +661,7 @@ TypeTensor::TypeTensor (const TypeTensor & p) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx) { libmesh_assert_equal_to (LIBMESH_DIM, 1); @@ -650,6 +670,7 @@ TypeTensor::TypeTensor(const TypeVector & vx) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy) { @@ -666,6 +687,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) @@ -690,7 +712,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template -inline +LIBMESH_DEVICE_INLINE TypeTensor::~TypeTensor () { } @@ -699,7 +721,7 @@ TypeTensor::~TypeTensor () template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::assign (const TypeTensor & p) { for (unsigned int i=0; i::assign (const TypeTensor & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeTensor::operator () (const unsigned int i, const unsigned int j) const { @@ -728,14 +750,14 @@ const T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE T & TypeTensor::operator () (const unsigned int i, const unsigned int j) { #if LIBMESH_DIM < 3 - libmesh_error_msg_if(i >= LIBMESH_DIM || j >= LIBMESH_DIM, - "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); + LIBMESH_DEVICE_ERROR_MSG_IF(i >= LIBMESH_DIM || j >= LIBMESH_DIM, + "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); #endif @@ -747,7 +769,7 @@ T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE ConstTypeTensorColumn TypeTensor::slice (const unsigned int i) const { @@ -757,7 +779,7 @@ TypeTensor::slice (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE TypeTensorColumn TypeTensor::slice (const unsigned int i) { @@ -767,7 +789,7 @@ TypeTensor::slice (const unsigned int i) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::row(const unsigned int r) const { @@ -781,7 +803,7 @@ TypeTensor::row(const unsigned int r) const template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::column(const unsigned int r) const { @@ -796,7 +818,7 @@ TypeTensor::column(const unsigned int r) const template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator + (const TypeTensor & p) const { @@ -831,7 +853,7 @@ TypeTensor::operator + (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator += (const TypeTensor & p) { this->add (p); @@ -843,7 +865,7 @@ const TypeTensor & TypeTensor::operator += (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add (const TypeTensor & p) { for (unsigned int i=0; i::add (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::add_scaled (const TypeTensor & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator - (const TypeTensor & p) const { @@ -901,7 +923,7 @@ TypeTensor::operator - (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) { this->subtract (p); @@ -913,7 +935,7 @@ const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract (const TypeTensor & p) { for (unsigned int i=0; i::subtract (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeTensor & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::operator - () const { @@ -967,7 +989,7 @@ TypeTensor TypeTensor::operator - () const template template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_if< ScalarTraits::value, @@ -1003,7 +1025,7 @@ TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_ template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1015,7 +1037,7 @@ operator * (const Scalar & factor, template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1053,7 +1075,7 @@ TypeTensor::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::transpose() const { #if LIBMESH_DIM == 1 @@ -1083,7 +1105,7 @@ TypeTensor TypeTensor::transpose() const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::inverse() const { #if LIBMESH_DIM == 1 @@ -1132,7 +1154,7 @@ TypeTensor TypeTensor::inverse() const template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::solve(const TypeVector & b, TypeVector & x) const { #if LIBMESH_DIM == 1 @@ -1183,7 +1205,7 @@ void TypeTensor::solve(const TypeVector & b, TypeVector & x) const template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator /= (const T & factor) { libmesh_assert_not_equal_to (factor, static_cast(0.)); @@ -1199,7 +1221,7 @@ const TypeTensor & TypeTensor::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::operator * (const TypeVector & p) const { @@ -1213,7 +1235,7 @@ TypeTensor::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::left_multiply (const TypeVector & p) const { @@ -1226,7 +1248,7 @@ TypeTensor::left_multiply (const TypeVector & p) const } template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> operator * (const TypeVector & a, const TypeTensor & b) { @@ -1235,7 +1257,7 @@ operator * (const TypeVector & a, const TypeTensor & b) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator * (const TypeTensor & p) const { @@ -1250,7 +1272,7 @@ TypeTensor::operator * (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) { TypeTensor temp; @@ -1270,7 +1292,7 @@ const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) */ template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeTensor::contract (const TypeTensor & t) const { @@ -1283,7 +1305,7 @@ TypeTensor::contract (const TypeTensor & t) const template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm() const { using std::sqrt; @@ -1292,7 +1314,7 @@ auto TypeTensor::norm() const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::is_zero() const { for (const auto & val : _coords) @@ -1302,7 +1324,7 @@ bool TypeTensor::is_zero() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::det() const { #if LIBMESH_DIM == 1 @@ -1325,7 +1347,7 @@ T TypeTensor::det() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::tr() const { #if LIBMESH_DIM == 1 @@ -1342,7 +1364,7 @@ T TypeTensor::tr() const } template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm_sq () const { Real sum = 0.; @@ -1364,7 +1386,7 @@ auto TypeTensor::norm_sq () const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::operator == (const TypeTensor & rhs) const { #if LIBMESH_DIM == 1 @@ -1436,7 +1458,7 @@ void TypeTensor::print(std::ostream & os) const } template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> outer_product(const TypeVector & a, const TypeVector & b) { diff --git a/include/numerics/type_vector.h b/include/numerics/type_vector.h index aaf79a9fd22..a9ae1bb2518 100644 --- a/include/numerics/type_vector.h +++ b/include/numerics/type_vector.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #include "libmesh/tensor_tools.h" #include "libmesh/int_range.h" @@ -141,12 +142,14 @@ class TypeVector * Assign to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeVector &); /** * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector &>::type @@ -157,12 +160,14 @@ class TypeVector * \returns A const reference to the \f$ i^{th} \f$ entry of the vector. */ const T & operator () (const unsigned int i) const; + LIBMESH_DEVICE_INLINE const T & slice (const unsigned int i) const { return (*this)(i); } /** * \returns A writable reference to the \f$ i^{th} \f$ entry of the vector. */ T & operator () (const unsigned int i); + LIBMESH_DEVICE_INLINE T & slice (const unsigned int i) { return (*this)(i); } /** @@ -192,6 +197,7 @@ class TypeVector * Add a scaled value to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeVector &, const T &); /** @@ -222,6 +228,7 @@ class TypeVector * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeVector &, const T &); /** @@ -279,6 +286,7 @@ class TypeVector * \returns The result of TypeVector::operator*(). */ template + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeVector &) const; @@ -292,6 +300,7 @@ class TypeVector /** * \returns A unit vector in the direction of *this. */ + LIBMESH_DEVICE_INLINE TypeVector unit() const; /** @@ -309,16 +318,19 @@ class TypeVector /** * \returns The L1 norm of the vector */ + LIBMESH_DEVICE_INLINE auto l1_norm() const; /** * \returns True if all values in the vector are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** * Set all entries of the vector to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** @@ -342,11 +354,13 @@ class TypeVector * \note For floating point types T, the function \p absolute_fuzzy_equals() * may be a more appropriate choice. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeVector & rhs) const; /** * \returns !(*this == rhs) */ + LIBMESH_DEVICE_INLINE bool operator != (const TypeVector & rhs) const; /** @@ -425,7 +439,7 @@ class TypeVector // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector () { _coords[0] = {}; @@ -442,7 +456,7 @@ TypeVector::TypeVector () template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const T & x, const T & y, const T & z) @@ -467,7 +481,7 @@ TypeVector::TypeVector (const T & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (typename std::enable_if::value, const Scalar1>::type & x, @@ -497,7 +511,7 @@ TypeVector::TypeVector (typename template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const Scalar & x, typename std::enable_if::value, @@ -518,7 +532,7 @@ TypeVector::TypeVector (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const TypeVector & p) { // copy the nodes from vector p to me @@ -530,7 +544,7 @@ TypeVector::TypeVector (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::assign (const TypeVector & p) { for (unsigned int i=0; i::assign (const TypeVector & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeVector::operator () (const unsigned int i) const { libmesh_assert_less (i, LIBMESH_DIM); @@ -551,7 +565,7 @@ const T & TypeVector::operator () (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE T & TypeVector::operator () (const unsigned int i) { libmesh_assert_less (i, LIBMESH_DIM); @@ -563,7 +577,7 @@ T & TypeVector::operator () (const unsigned int i) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator + (const TypeVector & p) const { @@ -589,7 +603,7 @@ TypeVector::operator + (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator += (const TypeVector & p) { this->add (p); @@ -601,7 +615,7 @@ const TypeVector & TypeVector::operator += (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add (const TypeVector & p) { #if LIBMESH_DIM == 1 @@ -625,7 +639,7 @@ void TypeVector::add (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add_scaled (const TypeVector & p, const T & factor) { #if LIBMESH_DIM == 1 @@ -649,7 +663,7 @@ void TypeVector::add_scaled (const TypeVector & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator - (const TypeVector & p) const { @@ -676,7 +690,7 @@ TypeVector::operator - (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator -= (const TypeVector & p) { this->subtract (p); @@ -688,7 +702,7 @@ const TypeVector & TypeVector::operator -= (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract (const TypeVector & p) { for (unsigned int i=0; i::subtract (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract_scaled (const TypeVector & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeVector & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::operator - () const { @@ -734,7 +748,7 @@ TypeVector TypeVector::operator - () const template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -761,7 +775,7 @@ TypeVector::operator * (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -774,7 +788,7 @@ operator * (const Scalar & factor, template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator *= (const T & factor) { #if LIBMESH_DIM == 1 @@ -799,7 +813,7 @@ const TypeVector & TypeVector::operator *= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -830,7 +844,7 @@ TypeVector::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator /= (const T & factor) { @@ -847,7 +861,7 @@ TypeVector::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::operator * (const TypeVector & p) const { @@ -869,7 +883,7 @@ TypeVector::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::contract(const TypeVector & p) const { @@ -880,6 +894,7 @@ TypeVector::contract(const TypeVector & p) const template template +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::cross(const TypeVector & p) const { @@ -903,7 +918,7 @@ TypeVector::cross(const TypeVector & p) const template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm() const { using std::sqrt; @@ -913,7 +928,7 @@ auto TypeVector::norm() const template -inline +LIBMESH_DEVICE_INLINE void TypeVector::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm_sq() const { #if LIBMESH_DIM == 1 @@ -944,7 +959,7 @@ auto TypeVector::norm_sq() const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::is_zero() const { for (const auto & val : _coords) @@ -958,6 +973,7 @@ auto TypeVector::l1_norm() const; template +LIBMESH_DEVICE_INLINE auto TypeVector::l1_norm() const { @@ -988,7 +1004,7 @@ bool TypeVector::relative_fuzzy_equals(const TypeVector & rhs, Real tol) c template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator == (const TypeVector & rhs) const { #if LIBMESH_DIM == 1 @@ -1010,7 +1026,7 @@ bool TypeVector::operator == (const TypeVector & rhs) const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator != (const TypeVector & rhs) const { return (!(*this == rhs)); @@ -1027,7 +1043,7 @@ bool TypeVector::operator != (const TypeVector & rhs) const // [b0, b1, b2] // [c0, c1, c2] template -inline +LIBMESH_DEVICE_INLINE T triple_product(const TypeVector & a, const TypeVector & b, const TypeVector & c) @@ -1049,7 +1065,7 @@ T triple_product(const TypeVector & a, // to be positive if the vectors are obey the right-hand rule, or // negative for a left-hand orientation. template -inline +LIBMESH_DEVICE_INLINE T solid_angle(const TypeVector & v01, const TypeVector & v02, const TypeVector & v03) @@ -1075,7 +1091,7 @@ T solid_angle(const TypeVector & v01, * calling b.cross(c).norm_sq(). */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm_sq(const TypeVector & b, const TypeVector & c) { @@ -1096,7 +1112,7 @@ T cross_norm_sq(const TypeVector & b, * Calls cross_norm_sq() and takes the square root of the result. */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm(const TypeVector & b, const TypeVector & c) { @@ -1105,7 +1121,7 @@ T cross_norm(const TypeVector & b, } template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::unit() const { @@ -1167,6 +1183,7 @@ struct CompareTypes, TypeVector> }; template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const T & a, const TypeVector & b) { @@ -1178,6 +1195,7 @@ outer_product(const T & a, const TypeVector & b) } template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const TypeVector & a, const T2 & b) { @@ -1208,6 +1226,7 @@ l1_norm_diff(const TypeVector & vec1, const TypeVector & vec2) namespace std { template +LIBMESH_DEVICE_INLINE auto norm(const libMesh::TypeVector & vector) -> decltype(std::norm(T())) { // Yea I agree it's dumb that the standard returns the square of the Euclidean norm diff --git a/include/numerics/vector_value.h b/include/numerics/vector_value.h index c93f17313dc..45116e1a737 100644 --- a/include/numerics/vector_value.h +++ b/include/numerics/vector_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_vector.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #ifdef LIBMESH_HAVE_METAPHYSICL @@ -124,6 +125,7 @@ class VectorValue : public TypeVector * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, VectorValue &>::type @@ -146,7 +148,7 @@ typedef NumberVectorValue Gradient; // Inline functions template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue () : TypeVector () { @@ -154,7 +156,7 @@ VectorValue::VectorValue () : template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const T & x, const T & y, const T & z) : @@ -166,7 +168,7 @@ VectorValue::VectorValue (const T & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (typename std::enable_if::value, const Scalar1>::type & x, @@ -183,7 +185,7 @@ VectorValue::VectorValue (typename template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const Scalar & x, typename std::enable_if::value, @@ -194,7 +196,7 @@ VectorValue::VectorValue (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const VectorValue & p) : TypeVector (p) { @@ -204,7 +206,7 @@ VectorValue::VectorValue (const VectorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p) : TypeVector (p) { @@ -212,7 +214,7 @@ VectorValue::VectorValue (const TypeVector & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p_re, const TypeVector & p_im) : TypeVector (Complex (p_re(0), p_im(0)), From 37c9464ff1d6e7f6886cbf236d0bd236e127832f Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 16:17:12 -0600 Subject: [PATCH 02/48] Fix Kokkos backend device and link support --- include/base/libmesh_exceptions.h | 7 ++ m4/libmesh_optional_packages.m4 | 128 ++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) diff --git a/include/base/libmesh_exceptions.h b/include/base/libmesh_exceptions.h index 6ca79b7b269..65237e1478f 100644 --- a/include/base/libmesh_exceptions.h +++ b/include/base/libmesh_exceptions.h @@ -23,6 +23,7 @@ #include "libmesh/libmesh_config.h" #include "libmesh/libmesh_abort.h" +#include "libmesh/libmesh_device.h" #include #include @@ -212,7 +213,13 @@ class TerminationException #ifdef LIBMESH_ENABLE_EXCEPTIONS #define libmesh_noexcept noexcept +#if LIBMESH_IN_DEVICE_CODE +// Kokkos device code does not support C++ exceptions. +#define LIBMESH_THROW(e) do { LIBMESH_DEVICE_ERROR_MSG((e).what()); } while (0) +#else #define LIBMESH_THROW(e) do { throw e; } while (0) +#endif + #define libmesh_rethrow throw #define libmesh_try try #define libmesh_catch(e) catch(e) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 2c569d088c0..9fb641d1945 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -861,6 +861,134 @@ AM_CONDITIONAL(LIBMESH_ENABLE_METAPHYSICL, test x$enablemetaphysicl = xyes) +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- +AC_ARG_WITH([kokkos], + AS_HELP_STRING([--with-kokkos=DIR], + [Enable Kokkos support using the installation at DIR]), + [KOKKOS_DIR="$withval"], + [KOKKOS_DIR="no"]) + +AC_ARG_WITH([kokkos-backend], + AS_HELP_STRING([--with-kokkos-backend=BACKEND], + [cuda|hip|sycl|openmp|serial (default: auto-detect from KokkosCore_config.h)]), + [KOKKOS_BACKEND="$withval"], [KOKKOS_BACKEND="auto"]) + +dnl Allow the caller (e.g. MOOSE's configure_libmesh.sh) to pre-set the +dnl Kokkos compiler and flags via environment variables. If KOKKOS_CXX is +dnl already set, we skip auto-detection entirely — the caller knows best. +dnl We use AC_SUBST (not AC_ARG_VAR) so these flags stay scoped to .K +dnl compilation rules and don't leak into the main CPPFLAGS/CXXFLAGS. + +AS_IF([test "x$KOKKOS_DIR" != "xno"], + [ + AC_CHECK_FILE([$KOKKOS_DIR/include/Kokkos_Core.hpp], + [ + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" + libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + + dnl Only auto-detect if KOKKOS_CXX was not pre-set by the caller + AS_IF([test "x$KOKKOS_CXX" = "x"], + [ + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + + dnl Auto-detect backend + AS_IF([test "x$KOKKOS_BACKEND" = "xauto"], + [ + AS_IF([test -r "$KOKKOS_CFG"], + [ + AS_IF([grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=cuda], + [AS_IF([grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=hip], + [AS_IF([grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=sycl], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=openmp], + [KOKKOS_BACKEND=serial])])])]) + ], + [KOKKOS_BACKEND=serial]) + ]) + + AC_MSG_RESULT([Kokkos backend: $KOKKOS_BACKEND]) + + dnl Check if Kokkos was built with OpenMP + have_kokkos_openmp=no + AS_IF([test -r "$KOKKOS_CFG"], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [have_kokkos_openmp=yes])]) + + case "$KOKKOS_BACKEND" in + cuda) + AC_PATH_PROG([NVCC],[nvcc],[no],[$PATH]) + AS_IF([test "x$NVCC" = "xno"], + [AC_MSG_ERROR([nvcc not found but Kokkos CUDA backend requested])]) + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + AS_IF([test "x$have_kokkos_openmp" = "xyes"], + [ + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + ]) + ;; + hip) + AC_PATH_PROG([HIPCC],[hipcc],[no],[$PATH]) + AS_IF([test "x$HIPCC" = "xno"], + [AC_MSG_ERROR([hipcc not found but Kokkos HIP backend requested])]) + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + sycl) + AC_PATH_PROG([ICPX],[icpx],[no],[$PATH]) + AS_IF([test "x$ICPX" = "xno"], + [AC_MSG_ERROR([icpx not found but Kokkos SYCL backend requested])]) + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + esac + ], + [AC_MSG_RESULT([Using caller-provided KOKKOS_CXX=$KOKKOS_CXX])]) + + dnl Set defaults for any variables not provided by caller or auto-detect + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + + AC_DEFINE([HAVE_KOKKOS], [1], + [Define if Kokkos support is enabled in libMesh]) + AC_MSG_RESULT(<<< Configuring library with Kokkos support >>>) + ], + [ + AC_MSG_WARN([Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support]) + enablekokkos=no + ]) + ], + [enablekokkos=no]) + +AC_SUBST([KOKKOS_CXX]) +AC_SUBST([KOKKOS_CPPFLAGS]) +AC_SUBST([KOKKOS_CXXFLAGS]) +AC_SUBST([KOKKOS_LDFLAGS]) +AC_SUBST([KOKKOS_LIBS]) +AM_CONDITIONAL(LIBMESH_ENABLE_KOKKOS, test x$enablekokkos = xyes) +# ------------------------------------------------------------- + + + AS_IF([test "$enableoptional" != no], [ AC_MSG_RESULT(----------------------------------------------) From 1d82eb8a60e43ca78bd878a17da4d6299347b663 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 16:17:19 -0600 Subject: [PATCH 03/48] Add Kokkos numerics oracle tests --- include/gpu/kokkos_storage.h | 60 ++ tests/Makefile.am | 41 +- .../numerics/kokkos_tensor_ops_oracle_test.K | 588 ++++++++++++++++++ .../numerics/kokkos_vector_ops_oracle_test.K | 442 +++++++++++++ 4 files changed, 1130 insertions(+), 1 deletion(-) create mode 100644 include/gpu/kokkos_storage.h create mode 100644 tests/numerics/kokkos_tensor_ops_oracle_test.K create mode 100644 tests/numerics/kokkos_vector_ops_oracle_test.K diff --git a/include/gpu/kokkos_storage.h b/include/gpu/kokkos_storage.h new file mode 100644 index 00000000000..819aa1fdaa4 --- /dev/null +++ b/include/gpu/kokkos_storage.h @@ -0,0 +1,60 @@ +// libMesh Kokkos storage helpers for dimension-aware vector/tensor views. + +#ifndef LIBMESH_KOKKOS_STORAGE_H +#define LIBMESH_KOKKOS_STORAGE_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" + +namespace libMesh::Kokkos +{ + +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + VectorType v; + v.zero(); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + v(d) = view(i, d); + + return v; +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + view(i, d) = v(d); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + TensorType T; + T.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + T(row, col) = view(i, row, col); + + return T; +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + view(i, row, col) = T(row, col); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_STORAGE_H diff --git a/tests/Makefile.am b/tests/Makefile.am index bb12f424833..6efb6d23e88 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -7,6 +7,7 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) LIBS = $(libmesh_optional_LIBS) $(CPPUNIT_LIBS) +KOKKOS_TEST_CPPFLAGS = # We might have turned on -Werror and/or paranoid warnings CXXFLAGS_DBG += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) @@ -248,6 +249,26 @@ if LIBMESH_ENABLE_FPARSER endif check_PROGRAMS = # empty, append below +TESTS = + +if LIBMESH_ENABLE_KOKKOS + KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) + + check_PROGRAMS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + TESTS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + + kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K + kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K + kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +endif # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, @@ -358,9 +379,27 @@ $(top_builddir)/libmesh_oprof.la: FORCE if LIBMESH_ENABLE_CPPUNIT -TESTS = run_unit_tests.sh +TESTS += run_unit_tests.sh endif +# Compile .K translation units with the Kokkos device compiler. +# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc +# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + +# Custom link rules so the Kokkos compiler drives the final link step. +kokkos_vector_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ + +kokkos_tensor_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ + CLEANFILES = cube_mesh.xda \ slit_mesh.xda \ slit_solution.xda \ diff --git a/tests/numerics/kokkos_tensor_ops_oracle_test.K b/tests/numerics/kokkos_tensor_ops_oracle_test.K new file mode 100644 index 00000000000..917a99aba23 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_test.K @@ -0,0 +1,588 @@ +// Kokkos kernel oracle tests for libMesh tensor algebra used by Kokkos kernels. +// +// Compares device-side tensor/vector +// operations against host libMesh TypeTensor / TypeVector oracles built from +// direct hand-written fixtures. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "libmesh/libmesh.h" +#include "libmesh/point.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_n_tensor.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_storage.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include +#include +#include + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; + +using kernel_vector = libMesh::TypeVector; +using kernel_tensor = libMesh::TypeTensor; + +namespace +{ + +using vector_storage_view = Kokkos::View; +using tensor_storage_view = Kokkos::View; + +inline int +compare_device_scalars(const Kokkos::View & d_values, + const std::vector & ref_values, + const double local_tol = tol) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > local_tol) + ++fail; + + return fail; +} + +template +inline Kokkos::View +upload_objects(const std::vector & values, const char * label) +{ + Kokkos::View d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + Kokkos::deep_copy(d, h); + return d; +} + +inline int +compare_device_vectors(const vector_storage_view & d_values, + const std::vector & ref_values, + const double local_tol = tol) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + if (std::fabs(h_values(i, d) - ref_values[i](d)) > local_tol) + ++fail; + + return fail; +} + +inline int +compare_device_tensors(const tensor_storage_view & d_values, + const std::vector & ref_values, + const double local_tol = tol) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (std::fabs(h_values(i, row, col) - ref_values[i](row, col)) > local_tol) + ++fail; + + return fail; +} + +inline vector_storage_view +upload_vectors(const std::vector & values, const char * label) +{ + vector_storage_view d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + h(i, component) = values[i](component); + + Kokkos::deep_copy(d, h); + return d; +} + +inline tensor_storage_view +upload_tensors(const std::vector & values, const char * label) +{ + tensor_storage_view d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + h(i, row, col) = values[i](row, col); + + Kokkos::deep_copy(d, h); + return d; +} + +inline libMesh::TypeVector +make_host_vector(const Real x, const Real y = 0, const Real z = 0) +{ + libMesh::TypeVector v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +inline libMesh::TypeTensor +make_host_tensor(const Real xx, + const Real xy = 0, + const Real xz = 0, + const Real yx = 0, + const Real yy = 0, + const Real yz = 0, + const Real zx = 0, + const Real zy = 0, + const Real zz = 0) +{ + libMesh::TypeTensor T; + T.zero(); + T(0, 0) = xx; +#if LIBMESH_DIM > 1 + T(0, 1) = xy; + T(1, 0) = yx; + T(1, 1) = yy; +#endif +#if LIBMESH_DIM > 2 + T(0, 2) = xz; + T(1, 2) = yz; + T(2, 0) = zx; + T(2, 1) = zy; + T(2, 2) = zz; +#endif + return T; +} + +struct tensor_dim_case +{ + libMesh::TypeTensor J; + unsigned int dim; + const char * name; +}; + +static const tensor_dim_case dim_cases[] = { + { make_host_tensor(1.7, -0.2, 0.5, + 0.3, 1.1, -0.4, + -0.6, 0.8, 0.9), + 1, + "leading_1d" }, +#if LIBMESH_DIM > 1 + { make_host_tensor(2.5, -0.75, 0.4, + 1.2, 1.8, -0.6, + -0.3, 0.9, 1.4), + 2, + "leading_2d" }, +#endif +#if LIBMESH_DIM > 2 + { make_host_tensor(9.08973348886179e-01, 3.36455579239923e-01, 5.16389236893863e-01, + 9.44156071777472e-01, 1.35610910092516e-01, 1.49881119060538e-02, + 1.15988384086146e-01, 6.79845197685518e-03, 3.77028969454745e-01), + 3, + "leading_3d" } +#endif +}; + +inline libMesh::TypeTensor +host_identity(const unsigned int dim) +{ + libMesh::TypeTensor I; + I.zero(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + return I; +} + +inline Real +host_leading_determinant(const libMesh::TypeTensor & J, const unsigned int dim) +{ + if (dim == 0) + return Real(1); + if (dim == 1) + return J(0, 0); + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); +#if LIBMESH_DIM > 2 + return J.det(); +#else + return Real(0); +#endif +} + +inline libMesh::TypeTensor +host_leading_inverse(const libMesh::TypeTensor & J, const unsigned int dim) +{ + libMesh::TypeTensor inv; + inv.zero(); + + if (dim == 1) + { + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real det = host_leading_determinant(J, dim); + inv(0, 0) = J(1, 1) / det; + inv(0, 1) = -J(0, 1) / det; + inv(1, 0) = -J(1, 0) / det; + inv(1, 1) = J(0, 0) / det; + return inv; + } + +#if LIBMESH_DIM > 2 + return libMesh::TypeTensor(J.inverse()); +#else + return inv; +#endif +} + +LIBMESH_DEVICE_INLINE +kernel_vector +zero_kernel_vector() +{ + kernel_vector v; + v.zero(); + return v; +} + +LIBMESH_DEVICE_INLINE +kernel_tensor +zero_kernel_tensor() +{ + kernel_tensor T; + T.zero(); + return T; +} + +LIBMESH_DEVICE_INLINE +kernel_tensor +kernel_leading_identity(const unsigned int dim) +{ + kernel_tensor I = zero_kernel_tensor(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + return I; +} + +LIBMESH_DEVICE_INLINE +Real +kernel_leading_determinant(const kernel_tensor & J, const unsigned int dim) +{ + if (dim == 0) + return Real(1); + if (dim == 1) + return J(0, 0); + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); +#if LIBMESH_DIM > 2 + return J.det(); +#else + return Real(0); +#endif +} + +LIBMESH_DEVICE_INLINE +kernel_tensor +kernel_leading_inverse(const kernel_tensor & J, const unsigned int dim) +{ + kernel_tensor inv = zero_kernel_tensor(); + + if (dim == 1) + { + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real det = kernel_leading_determinant(J, dim); + inv(0, 0) = J(1, 1) / det; + inv(0, 1) = -J(0, 1) / det; + inv(1, 0) = -J(1, 0) / det; + inv(1, 1) = J(0, 0) / det; + return inv; + } + +#if LIBMESH_DIM > 2 + return J.inverse(); +#else + return inv; +#endif +} + +static int +test_dim_ops() +{ + const unsigned int ncases = sizeof(dim_cases) / sizeof(dim_cases[0]); + + std::vector J_values(ncases); + std::vector dims(ncases); + std::vector ref_det(ncases); + std::vector ref_inv(ncases); + std::vector ref_I(ncases); + std::vector ref_prod_left(ncases); + std::vector ref_prod_right(ncases); + + for (unsigned int c = 0; c < ncases; ++c) + { + const auto & info = dim_cases[c]; + J_values[c] = info.J; + dims[c] = info.dim; + + ref_det[c] = host_leading_determinant(info.J, info.dim); + ref_inv[c] = host_leading_inverse(info.J, info.dim); + ref_I[c] = host_identity(info.dim); + ref_prod_left[c] = info.J * ref_inv[c]; + ref_prod_right[c] = ref_inv[c] * info.J; + } + + auto d_J = upload_tensors(J_values, "tensor_dim_ops_J"); + auto d_dims = upload_objects(dims, "tensor_dim_ops_dim"); + Kokkos::View d_det("tensor_dim_ops_det", ncases); + tensor_storage_view d_inv("tensor_dim_ops_inv", ncases); + tensor_storage_view d_I("tensor_dim_ops_I", ncases); + tensor_storage_view d_prod_left("tensor_dim_ops_prod_left", ncases); + tensor_storage_view d_prod_right("tensor_dim_ops_prod_right", ncases); + + Kokkos::parallel_for( + static_cast(ncases), + KOKKOS_LAMBDA(int c) { + const auto J = libMesh::Kokkos::load_tensor(d_J, c); + const unsigned int dim = d_dims(c); + const Real det = kernel_leading_determinant(J, dim); + const auto inv = kernel_leading_inverse(J, dim); + const auto I = kernel_leading_identity(dim); + const auto prod_left = J * inv; + const auto prod_right = inv * J; + + d_det(c) = det; + libMesh::Kokkos::store_tensor(d_inv, c, inv); + libMesh::Kokkos::store_tensor(d_I, c, I); + libMesh::Kokkos::store_tensor(d_prod_left, c, prod_left); + libMesh::Kokkos::store_tensor(d_prod_right, c, prod_right); + }); + Kokkos::fence(); + + return compare_device_scalars(d_det, ref_det) + + compare_device_tensors(d_inv, ref_inv) + + compare_device_tensors(d_I, ref_I) + + compare_device_tensors(d_prod_left, ref_prod_left) + + compare_device_tensors(d_prod_right, ref_prod_right); +} + +static int +test_tensor_ops() +{ + const auto A = make_host_tensor(1.1, -0.4, 0.7, + 0.3, 1.9, -1.2, + -0.8, 0.5, 2.2); + const auto a = make_host_vector(2.0, 3.0, 4.0); + const auto b = make_host_vector(5.0, -6.0, 7.0); + const auto c = make_host_vector(1.25, -0.5, 2.0); + + const auto outer = libMesh::outer_product(a, b); + const auto transpose = A.transpose(); + const auto mix = 1.5 * A - 0.25 * outer; + const auto right = A * c; + const auto left = c * A; + const Real contract = A.contract(outer); + const Real norm = A.norm(); + const auto zero = zero_kernel_tensor(); + + std::vector ref_outer(1, outer); + std::vector ref_transpose(1, transpose); + std::vector ref_mix(1, mix); + std::vector ref_rows(LIBMESH_DIM); + std::vector ref_columns(LIBMESH_DIM); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + ref_rows[i] = A.row(i); + ref_columns[i] = A.column(i); + } + std::vector ref_right(1, right); + std::vector ref_left(1, left); + std::vector ref_scalars = {contract, norm, zero.is_zero() ? 1.0 : 0.0, A.is_zero() ? 1.0 : 0.0}; + + auto d_A = upload_tensors(std::vector{A}, "tensor_ops_A"); + auto d_a = upload_vectors(std::vector{a}, "tensor_ops_a"); + auto d_b = upload_vectors(std::vector{b}, "tensor_ops_b"); + auto d_c = upload_vectors(std::vector{c}, "tensor_ops_c"); + tensor_storage_view d_outer("tensor_ops_outer", 1); + tensor_storage_view d_transpose("tensor_ops_transpose", 1); + tensor_storage_view d_mix("tensor_ops_mix", 1); + vector_storage_view d_rows("tensor_ops_rows", LIBMESH_DIM); + vector_storage_view d_columns("tensor_ops_columns", LIBMESH_DIM); + vector_storage_view d_right("tensor_ops_right", 1); + vector_storage_view d_left("tensor_ops_left", 1); + Kokkos::View d_scalars("tensor_ops_scalars", 4); + + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto A_d = libMesh::Kokkos::load_tensor(d_A, 0); + const auto a_d = libMesh::Kokkos::load_vector(d_a, 0); + const auto b_d = libMesh::Kokkos::load_vector(d_b, 0); + const auto c_d = libMesh::Kokkos::load_vector(d_c, 0); + const auto outer_d = libMesh::outer_product(a_d, b_d); + const auto transpose_d = A_d.transpose(); + const auto mix_d = 1.5 * A_d - 0.25 * outer_d; + const auto right_d = A_d * c_d; + const auto left_d = c_d * A_d; + const Real contract_d = A_d.contract(outer_d); + const Real norm_d = A_d.norm(); + const bool zero_is_zero_d = zero_kernel_tensor().is_zero(); + const bool A_is_zero_d = A_d.is_zero(); + + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + libMesh::Kokkos::store_vector(d_rows, i, A_d.row(i)); + libMesh::Kokkos::store_vector(d_columns, i, A_d.column(i)); + } + + libMesh::Kokkos::store_tensor(d_outer, 0, outer_d); + libMesh::Kokkos::store_tensor(d_transpose, 0, transpose_d); + libMesh::Kokkos::store_tensor(d_mix, 0, mix_d); + libMesh::Kokkos::store_vector(d_right, 0, right_d); + libMesh::Kokkos::store_vector(d_left, 0, left_d); + d_scalars(0) = contract_d; + d_scalars(1) = norm_d; + d_scalars(2) = zero_is_zero_d ? 1.0 : 0.0; + d_scalars(3) = A_is_zero_d ? 1.0 : 0.0; + }); + Kokkos::fence(); + + return compare_device_tensors(d_outer, ref_outer) + + compare_device_tensors(d_transpose, ref_transpose) + + compare_device_tensors(d_mix, ref_mix) + + compare_device_vectors(d_rows, ref_rows) + + compare_device_vectors(d_columns, ref_columns) + + compare_device_vectors(d_right, ref_right) + + compare_device_vectors(d_left, ref_left) + + compare_device_scalars(d_scalars, ref_scalars); +} + +static int +test_tensor_host_only_ops() +{ + int fail = 0; + +#if LIBMESH_DIM > 2 + { + libMesh::TensorValue tensor(2., 1., 0., + 1., 2., 1., + 0., 1., 2.); + fail += tensor.is_hpd(/*rel_tol=*/0.) ? 0 : 1; + } + + { + libMesh::TensorValue tensor(1., 0., 0., + 0., 0., 1., + 0., 1., 0.); + fail += tensor.is_hpd() ? 1 : 0; + } + + { + const libMesh::Point x(1., 0., 0.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 0., 0.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0)) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2)) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 0., 0.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1)) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2)) <= tol) ? 0 : 1; + } + + { + const libMesh::Point x(1., 1., 1.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 90., 90.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) + 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2) - 1.) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 90., 90.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2) - 1.) <= tol) ? 0 : 1; + } +#endif + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool assertion = + std::is_same>>::value; + fail += assertion ? 0 : 1; +#endif + + return fail; +} + +} // anonymous namespace + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + const int dim_fail = test_dim_ops(); + std::printf("[tensor_dim_kernel_oracle] %s (%d failures)\n", + dim_fail ? "FAIL" : "PASS", + dim_fail); + total_fail += dim_fail; + + const int tensor_fail = test_tensor_ops(); + std::printf("[tensor_ops_kernel_oracle] %s (%d failures)\n", + tensor_fail ? "FAIL" : "PASS", + tensor_fail); + total_fail += tensor_fail; + + const int host_fail = test_tensor_host_only_ops(); + std::printf("[tensor_host_ops_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/numerics/kokkos_vector_ops_oracle_test.K b/tests/numerics/kokkos_vector_ops_oracle_test.K new file mode 100644 index 00000000000..ff91e65fe0c --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_test.K @@ -0,0 +1,442 @@ +// Kokkos kernel oracle tests for libMesh::TypeVector / VectorValue numerics. +// +// Compares device-side vector algebra +// against host libMesh TypeVector oracles built from direct hand-written +// fixtures. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "libmesh/libmesh.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_vector.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_storage.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include +#include +#include + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; +static constexpr double unit_tol = 1.0e-14; +static constexpr Real golden_ratio = 1.6180339887498948482; +static constexpr unsigned int solid_angle_results = + 1 + ((LIBMESH_DIM > 1) ? 2u : 0u) + ((LIBMESH_DIM > 2) ? 4u : 0u); +static constexpr unsigned int vector_results = + 11 + ((LIBMESH_DIM > 2) ? 2u : 0u); +static constexpr unsigned int scalar_results = 11 + solid_angle_results; + +namespace +{ + +using vector_storage_view = Kokkos::View; + +inline int +compare_device_scalars(const Kokkos::View & d_values, + const std::vector & ref_values, + const double local_tol = tol) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > local_tol) + ++fail; + + return fail; +} + +template +inline vector_storage_view +upload_vectors(const std::vector & values, const char * label) +{ + vector_storage_view d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + h(i, component) = values[i](component); + Kokkos::deep_copy(d, h); + return d; +} + +template +inline int +compare_device_vectors(const vector_storage_view & d_values, + const std::vector & ref_values, + const double local_tol = tol) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + if (std::fabs(h_values(i, d) - ref_values[i](d)) > local_tol) + ++fail; + + return fail; +} + +template +LIBMESH_DEVICE_INLINE +Vec +make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + Vec v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +template +struct vector_oracle +{ + std::vector vectors; + std::vector scalars; +}; + +struct vector_kernel_case +{ + const char * name; + Real ax, ay, az; + Real bx, by, bz; + Real cx, cy, cz; +}; + +static const vector_kernel_case cases[] = { +#if LIBMESH_DIM >= 1 + { "line_case_a", 2.0, 0.0, 0.0, -3.0, 0.0, 0.0, 0.5, 0.0, 0.0 }, + { "line_case_b", -1.25, 0.0, 0.0, 4.5, 0.0, 0.0, -2.0, 0.0, 0.0 }, +#endif +#if LIBMESH_DIM >= 2 + { "plane_case_a", 2.0, 3.0, 0.0, 5.0, -6.0, 0.0, 1.25, -0.5, 0.0 }, + { "plane_case_b", -1.0, 4.0, 0.0, 0.5, 2.5, 0.0, -3.0, 1.5, 0.0 }, +#endif +#if LIBMESH_DIM >= 3 + { "space_case_a", 2.0, 3.0, 4.0, 5.0, -6.0, 7.0, 1.25, -0.5, 2.0 }, + { "space_case_b", -1.0, 4.0, 0.75, 0.5, 2.5, -3.5, -3.0, 1.5, 2.25 }, +#endif +}; + +template +inline vector_oracle +make_manual_oracle(const Vec & a, const Vec & b, const Vec & c) +{ + vector_oracle oracle; + oracle.vectors.reserve(vector_results); + oracle.scalars.reserve(scalar_results); + + const auto copied = a; + + Vec mix = a + b; + mix -= c; + + Vec scaled = 1.25 * a; + scaled += (-0.5) * b; + scaled += (0.25) * c; + + Vec plus_assign = a; + plus_assign += b; + + Vec minus_assign = a; + minus_assign -= b; + + Vec accum; + accum.zero(); + accum.add_scaled(a, 1.25); + accum.add_scaled(b, -0.5); + accum.subtract_scaled(c, -0.25); + + const auto divided = a / 5.0; + const auto outer_right = libMesh::outer_product(a, 5.0); + const auto outer_left = libMesh::outer_product(5.0, a); + + Vec mult_assign = a; + mult_assign *= 5.0; + + Vec div_assign = a; + div_assign /= 5.0; + + Vec assign_zero = a; + assign_zero = 0.0; + + oracle.vectors.push_back(copied); + oracle.vectors.push_back(mix); + oracle.vectors.push_back(scaled); + oracle.vectors.push_back(accum); + oracle.vectors.push_back(plus_assign); + oracle.vectors.push_back(minus_assign); + oracle.vectors.push_back(divided); + oracle.vectors.push_back(outer_right); + oracle.vectors.push_back(outer_left); + oracle.vectors.push_back(mult_assign); + oracle.vectors.push_back(div_assign); + + oracle.scalars.push_back(a * b); + oracle.scalars.push_back(a.contract(b)); + oracle.scalars.push_back(mix.norm()); + oracle.scalars.push_back(mix.norm_sq()); + oracle.scalars.push_back(make_vector(0.0, 0.0, 0.0).is_zero() ? 1.0 : 0.0); + oracle.scalars.push_back(mix.is_zero() ? 1.0 : 0.0); + oracle.scalars.push_back((a == a) ? 1.0 : 0.0); + oracle.scalars.push_back((a == b) ? 1.0 : 0.0); + oracle.scalars.push_back((a != a) ? 1.0 : 0.0); + oracle.scalars.push_back((a != b) ? 1.0 : 0.0); + oracle.scalars.push_back(assign_zero.is_zero() ? 1.0 : 0.0); + + const auto xvec = make_vector(1.3); + oracle.scalars.push_back(solid_angle(xvec, xvec, xvec)); + +#if LIBMESH_DIM > 1 + const auto yvec = make_vector(0.0, 2.7); + const auto xydiag = make_vector(3.1, 3.1); + oracle.scalars.push_back(solid_angle(xvec, xvec, yvec)); + oracle.scalars.push_back(solid_angle(xvec, yvec, xydiag)); +#endif + +#if LIBMESH_DIM > 2 + const auto xypdiag = make_vector(0.8, -0.8); + const auto zvec = make_vector(0.0, 0.0, 1.1); + const auto xzdiag = make_vector(0.0, 0.7, 0.7); + const auto icosa1 = make_vector(1.0, golden_ratio, 0.0); + const auto icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const auto icosa3 = make_vector(0.0, 1.0, golden_ratio); + oracle.scalars.push_back(solid_angle(xydiag, yvec, zvec)); + oracle.scalars.push_back(solid_angle(xvec, yvec, xzdiag)); + oracle.scalars.push_back(solid_angle(xypdiag, xydiag, zvec)); + oracle.scalars.push_back(solid_angle(icosa1, icosa2, icosa3)); +#endif + +#if LIBMESH_DIM > 2 + const auto cross = a.cross(b); + auto unit_cross = cross; + if (cross.norm() > unit_tol) + unit_cross = cross.unit(); + + oracle.vectors.push_back(cross); + oracle.vectors.push_back(unit_cross); +#endif + + libmesh_assert_equal_to(oracle.vectors.size(), vector_results); + libmesh_assert_equal_to(oracle.scalars.size(), scalar_results); + + return oracle; +} + +template +static int +test_vector_ops_case(const vector_kernel_case & info) +{ + const auto a = make_vector(info.ax, info.ay, info.az); + const auto b = make_vector(info.bx, info.by, info.bz); + const auto c = make_vector(info.cx, info.cy, info.cz); + + const auto oracle = make_manual_oracle(a, b, c); + + auto d_a = upload_vectors(std::vector{a}, "vector_ops_a"); + auto d_b = upload_vectors(std::vector{b}, "vector_ops_b"); + auto d_c = upload_vectors(std::vector{c}, "vector_ops_c"); + vector_storage_view d_vectors("vector_ops_vectors", vector_results); + Kokkos::View d_scalars("vector_ops_scalars", scalar_results); + + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const Vec a_d = libMesh::Kokkos::load_vector(d_a, 0); + const Vec b_d = libMesh::Kokkos::load_vector(d_b, 0); + const Vec c_d = libMesh::Kokkos::load_vector(d_c, 0); + + const Vec copied = a_d; + + Vec mix = a_d + b_d; + mix -= c_d; + + Vec scaled = 1.25 * a_d; + scaled += (-0.5) * b_d; + scaled += (0.25) * c_d; + + Vec plus_assign = a_d; + plus_assign += b_d; + + Vec minus_assign = a_d; + minus_assign -= b_d; + + Vec accum; + accum.zero(); + accum.add_scaled(a_d, 1.25); + accum.add_scaled(b_d, -0.5); + accum.subtract_scaled(c_d, -0.25); + + const Vec divided = a_d / 5.0; + const Vec outer_right = libMesh::outer_product(a_d, 5.0); + const Vec outer_left = libMesh::outer_product(5.0, a_d); + + Vec mult_assign = a_d; + mult_assign *= 5.0; + + Vec div_assign = a_d; + div_assign /= 5.0; + + Vec assign_zero = a_d; + assign_zero = 0.0; + + const Real dot = a_d * b_d; + const Real contract = a_d.contract(b_d); + const Real norm = mix.norm(); + const Real norm_sq = mix.norm_sq(); + const Vec zero = make_vector(0.0, 0.0, 0.0); + + unsigned int vector_offset = 0; + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, copied); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mix); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, scaled); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, accum); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, plus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, minus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, divided); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_right); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_left); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mult_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, div_assign); + + unsigned int scalar_offset = 0; + d_scalars(scalar_offset++) = dot; + d_scalars(scalar_offset++) = contract; + d_scalars(scalar_offset++) = norm; + d_scalars(scalar_offset++) = norm_sq; + d_scalars(scalar_offset++) = zero.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = mix.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_d == a_d) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_d == b_d) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_d != a_d) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_d != b_d) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = assign_zero.is_zero() ? 1.0 : 0.0; + + const Vec xvec = make_vector(1.3); + d_scalars(scalar_offset++) = solid_angle(xvec, xvec, xvec); + +#if LIBMESH_DIM > 1 + const Vec yvec = make_vector(0.0, 2.7); + const Vec xydiag = make_vector(3.1, 3.1); + d_scalars(scalar_offset++) = solid_angle(xvec, xvec, yvec); + d_scalars(scalar_offset++) = solid_angle(xvec, yvec, xydiag); +#endif + +#if LIBMESH_DIM > 2 + const Vec xypdiag = make_vector(0.8, -0.8); + const Vec zvec = make_vector(0.0, 0.0, 1.1); + const Vec xzdiag = make_vector(0.0, 0.7, 0.7); + const Vec icosa1 = make_vector(1.0, golden_ratio, 0.0); + const Vec icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const Vec icosa3 = make_vector(0.0, 1.0, golden_ratio); + d_scalars(scalar_offset++) = solid_angle(xydiag, yvec, zvec); + d_scalars(scalar_offset++) = solid_angle(xvec, yvec, xzdiag); + d_scalars(scalar_offset++) = solid_angle(xypdiag, xydiag, zvec); + d_scalars(scalar_offset++) = solid_angle(icosa1, icosa2, icosa3); +#endif + +#if LIBMESH_DIM > 2 + const Vec cross = a_d.cross(b_d); + Vec unit_cross = cross; + if (cross.norm() > unit_tol) + unit_cross = cross.unit(); + + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, cross); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, unit_cross); +#endif + + libmesh_assert_equal_to(vector_offset, vector_results); + libmesh_assert_equal_to(scalar_offset, scalar_results); + }); + Kokkos::fence(); + + return compare_device_vectors(d_vectors, oracle.vectors) + + compare_device_scalars(d_scalars, oracle.scalars); +} + +static int +test_vector_host_only_traits() +{ + int fail = 0; + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool typevector_assertion = + std::is_same>>::value; + fail += typevector_assertion ? 0 : 1; + + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedValueType; + constexpr bool vectorvalue_assertion = + std::is_same>>::value; + fail += vectorvalue_assertion ? 0 : 1; +#endif + + return fail; +} + +} // anonymous namespace + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : cases) + { + const int f = test_vector_ops_case>(info); + std::printf("[typevector_kernel_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : cases) + { + const int f = test_vector_ops_case>(info); + std::printf("[vectorvalue_kernel_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + const int host_fail = test_vector_host_only_traits(); + std::printf("[vector_host_traits_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} From 2ddeb118f545f72d65ce4a81f83d84179e24d2e4 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 16:17:57 -0600 Subject: [PATCH 04/48] Regenerate configure and Makefile.in files --- Makefile.in | 8 + configure | 418 ++++++++++++++++-- contrib/Makefile.in | 8 + contrib/capnproto/Makefile.in | 8 + contrib/eigen/gitshim/Makefile.in | 8 + contrib/exodusii/5.22b/exodus/Makefile.in | 8 + contrib/exodusii/5.22b/nemesis/Makefile.in | 8 + contrib/exodusii/Lib/Makefile.in | 8 + contrib/exodusii/v8.11/exodus/Makefile.in | 8 + contrib/exodusii/v8.11/nemesis/Makefile.in | 8 + contrib/fparser/Makefile.in | 8 + contrib/fparser/extrasrc/Makefile.in | 8 + contrib/gmv/Makefile.in | 8 + contrib/gzstream/Makefile.in | 8 + contrib/laspack/Makefile.in | 8 + contrib/libHilbert/Makefile.in | 8 + contrib/metis/Makefile.in | 8 + contrib/nanoflann/Makefile.in | 8 + contrib/nemesis/Lib/Makefile.in | 8 + contrib/netgen/Makefile.in | 401 +---------------- contrib/parmetis/Makefile.in | 8 + contrib/poly2tri/modified/Makefile.in | 8 + contrib/qhull/2012.1/Makefile.in | 8 + contrib/sfcurves/Makefile.in | 8 + contrib/tecplot/binary/Makefile.in | 8 + contrib/tecplot/tecio/Makefile.in | 8 + contrib/tetgen/Makefile.in | 8 + contrib/triangle/Makefile.in | 8 + doc/Makefile.in | 8 + doc/html/Makefile.in | 8 + examples/Makefile.in | 8 + .../adaptivity/adaptivity_ex1/Makefile.in | 8 + .../adaptivity/adaptivity_ex2/Makefile.in | 8 + .../adaptivity/adaptivity_ex3/Makefile.in | 8 + .../adaptivity/adaptivity_ex4/Makefile.in | 8 + .../adaptivity/adaptivity_ex5/Makefile.in | 8 + examples/adjoints/adjoints_ex1/Makefile.in | 8 + examples/adjoints/adjoints_ex2/Makefile.in | 8 + examples/adjoints/adjoints_ex3/Makefile.in | 8 + examples/adjoints/adjoints_ex4/Makefile.in | 8 + examples/adjoints/adjoints_ex5/Makefile.in | 8 + examples/adjoints/adjoints_ex6/Makefile.in | 8 + examples/adjoints/adjoints_ex7/Makefile.in | 8 + .../eigenproblems_ex1/Makefile.in | 8 + .../eigenproblems_ex2/Makefile.in | 8 + .../eigenproblems_ex3/Makefile.in | 8 + .../eigenproblems_ex4/Makefile.in | 8 + .../fem_system/fem_system_ex1/Makefile.in | 8 + .../fem_system/fem_system_ex2/Makefile.in | 8 + .../fem_system/fem_system_ex3/Makefile.in | 8 + .../fem_system/fem_system_ex4/Makefile.in | 8 + .../fem_system/fem_system_ex5/Makefile.in | 8 + .../introduction/introduction_ex1/Makefile.in | 8 + .../introduction/introduction_ex2/Makefile.in | 8 + .../introduction/introduction_ex3/Makefile.in | 8 + .../introduction/introduction_ex4/Makefile.in | 8 + .../introduction/introduction_ex5/Makefile.in | 8 + .../miscellaneous_ex1/Makefile.in | 8 + .../miscellaneous_ex10/Makefile.in | 8 + .../miscellaneous_ex11/Makefile.in | 8 + .../miscellaneous_ex12/Makefile.in | 8 + .../miscellaneous_ex13/Makefile.in | 8 + .../miscellaneous_ex14/Makefile.in | 8 + .../miscellaneous_ex15/Makefile.in | 8 + .../miscellaneous_ex16/Makefile.in | 8 + .../miscellaneous_ex17/Makefile.in | 8 + .../miscellaneous_ex2/Makefile.in | 8 + .../miscellaneous_ex3/Makefile.in | 8 + .../miscellaneous_ex4/Makefile.in | 8 + .../miscellaneous_ex5/Makefile.in | 8 + .../miscellaneous_ex6/Makefile.in | 8 + .../miscellaneous_ex7/Makefile.in | 8 + .../miscellaneous_ex8/Makefile.in | 8 + .../miscellaneous_ex9/Makefile.in | 8 + .../optimization/optimization_ex1/Makefile.in | 8 + .../optimization/optimization_ex2/Makefile.in | 8 + .../reduced_basis_ex1/Makefile.in | 8 + .../reduced_basis_ex2/Makefile.in | 8 + .../reduced_basis_ex3/Makefile.in | 8 + .../reduced_basis_ex4/Makefile.in | 8 + .../reduced_basis_ex5/Makefile.in | 8 + .../reduced_basis_ex6/Makefile.in | 8 + .../reduced_basis_ex7/Makefile.in | 8 + .../solution_transfer_ex1/Makefile.in | 8 + .../subdomains/subdomains_ex1/Makefile.in | 8 + .../subdomains/subdomains_ex2/Makefile.in | 8 + .../subdomains/subdomains_ex3/Makefile.in | 8 + .../systems_of_equations_ex1/Makefile.in | 8 + .../systems_of_equations_ex2/Makefile.in | 8 + .../systems_of_equations_ex3/Makefile.in | 8 + .../systems_of_equations_ex4/Makefile.in | 8 + .../systems_of_equations_ex5/Makefile.in | 8 + .../systems_of_equations_ex6/Makefile.in | 8 + .../systems_of_equations_ex7/Makefile.in | 8 + .../systems_of_equations_ex8/Makefile.in | 8 + .../systems_of_equations_ex9/Makefile.in | 8 + examples/transient/transient_ex1/Makefile.in | 8 + examples/transient/transient_ex2/Makefile.in | 8 + examples/transient/transient_ex3/Makefile.in | 8 + examples/vector_fe/vector_fe_ex1/Makefile.in | 8 + examples/vector_fe/vector_fe_ex10/Makefile.in | 8 + examples/vector_fe/vector_fe_ex2/Makefile.in | 8 + examples/vector_fe/vector_fe_ex3/Makefile.in | 8 + examples/vector_fe/vector_fe_ex4/Makefile.in | 8 + examples/vector_fe/vector_fe_ex5/Makefile.in | 8 + examples/vector_fe/vector_fe_ex6/Makefile.in | 8 + examples/vector_fe/vector_fe_ex7/Makefile.in | 8 + examples/vector_fe/vector_fe_ex8/Makefile.in | 8 + examples/vector_fe/vector_fe_ex9/Makefile.in | 8 + include/Makefile.in | 10 +- include/libmesh/Makefile.in | 37 +- tests/Makefile.in | 136 ++++-- 112 files changed, 1387 insertions(+), 471 deletions(-) diff --git a/Makefile.in b/Makefile.in index cec62a434fa..186be47b152 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7517,11 +7517,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -7569,6 +7576,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/configure b/configure index 33ff470d60a..5c6abbddb9c 100755 --- a/configure +++ b/configure @@ -672,6 +672,16 @@ libmesh_contrib_LDFLAGS libmesh_contrib_INCLUDES libmesh_optional_LIBS libmesh_optional_INCLUDES +LIBMESH_ENABLE_KOKKOS_FALSE +LIBMESH_ENABLE_KOKKOS_TRUE +KOKKOS_LIBS +KOKKOS_LDFLAGS +KOKKOS_CXXFLAGS +KOKKOS_CPPFLAGS +KOKKOS_CXX +ICPX +HIPCC +NVCC LIBMESH_ENABLE_METAPHYSICL_FALSE LIBMESH_ENABLE_METAPHYSICL_TRUE METAPHYSICL_INCLUDE @@ -1347,6 +1357,8 @@ enable_metaphysicl with_metaphysicl with_metaphysicl_include enable_metaphysicl_required +with_kokkos +with_kokkos_backend ' ac_precious_vars='build_alias host_alias @@ -2273,6 +2285,10 @@ Optional Packages: internal: build from contrib --with-metaphysicl-include= + --with-kokkos=DIR Enable Kokkos support using the installation at DIR + --with-kokkos-backend=BACKEND + cuda|hip|sycl|openmp|serial (default: auto-detect + from KokkosCore_config.h) Some influential environment variables: PETSC_DIR path to PETSc installation @@ -57353,22 +57369,11 @@ then : then : withtbb=/usr fi - - case $withtbb in - "~/"*) withtbb=$HOME${withtbb#"~"} ;; - esac - - tbb_is_onetbb=no - if test -r $withtbb/include/tbb/version.h -then : - TBB_INCLUDE_PATH=$withtbb/include - tbb_is_onetbb=yes -else case e in #( - e) if test -r $withtbb/include/tbb/tbb_stddef.h + as_ac_Header=`printf "%s\n" "ac_cv_header_$withtbb/include/tbb/task_scheduler_init.h" | sed "$as_sed_sh"` +ac_fn_cxx_check_header_compile "$LINENO" "$withtbb/include/tbb/task_scheduler_init.h" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes" then : TBB_INCLUDE_PATH=$withtbb/include -fi ;; -esac fi if test "x$withtbblib" != "x" @@ -57381,7 +57386,7 @@ fi fi - if test "x$TBB_INCLUDE_PATH" != "x" + if test -r $TBB_INCLUDE_PATH/tbb/task_scheduler_init.h then : TBB_LIBRARY="-L$TBB_LIBS -ltbb -ltbbmalloc" @@ -57392,15 +57397,8 @@ then : TBB_LIBRARY="${RPATHFLAG}${TBB_LIBS} $TBB_LIBRARY" fi - if test "x$tbb_is_onetbb" = "xyes" -then : - tbbverfile=$TBB_INCLUDE_PATH/tbb/version.h -else case e in #( - e) tbbverfile=$TBB_INCLUDE_PATH/tbb/tbb_stddef.h ;; -esac -fi - tbbmajor=`grep "define TBB_VERSION_MAJOR" $tbbverfile | sed -e "s/#define TBB_VERSION_MAJOR[ ]*//g"` - tbbminor=`grep "define TBB_VERSION_MINOR" $tbbverfile | sed -e "s/#define TBB_VERSION_MINOR[ ]*//g"` + tbbmajor=`grep "define TBB_VERSION_MAJOR" $TBB_INCLUDE_PATH/tbb/tbb_stddef.h | sed -e "s/#define TBB_VERSION_MAJOR[ ]*//g"` + tbbminor=`grep "define TBB_VERSION_MINOR" $TBB_INCLUDE_PATH/tbb/tbb_stddef.h | sed -e "s/#define TBB_VERSION_MINOR[ ]*//g"` else case e in #( e) enabletbb=no ;; @@ -57410,8 +57408,8 @@ fi if test "x$enabletbb" != "xno" then : - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for TBB support" >&5 -printf %s "checking for TBB support... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tbb::tbb_thread support" >&5 +printf %s "checking for tbb::tbb_thread support... " >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -57419,20 +57417,20 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - saveCXXFLAGS="$CXXFLAGS" + saveCXXFLAGS="$CXXFLAGS" CXXFLAGS="$saveCXXFLAGS $TBB_INCLUDE" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ - #include + #include int main (void) { - tbb::blocked_range r(0, 1); - (void)r.size(); + tbb::tbb_thread t; + t.join(); ; return 0; @@ -57455,7 +57453,8 @@ esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CXXFLAGS=$saveCXXFLAGS + CXXFLAGS=$saveCXXFLAGS + ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -57475,13 +57474,6 @@ printf "%s\n" "#define DETECTED_TBB_VERSION_MAJOR $tbbmajor" >>confdefs.h printf "%s\n" "#define DETECTED_TBB_VERSION_MINOR $tbbminor" >>confdefs.h - if test "x$tbb_is_onetbb" = "xyes" -then : - -printf "%s\n" "#define HAVE_ONETBB 1" >>confdefs.h - -fi - @@ -63913,6 +63905,352 @@ fi +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- + +# Check whether --with-kokkos was given. +if test ${with_kokkos+y} +then : + withval=$with_kokkos; KOKKOS_DIR="$withval" +else case e in #( + e) KOKKOS_DIR="no" ;; +esac +fi + + + +# Check whether --with-kokkos-backend was given. +if test ${with_kokkos_backend+y} +then : + withval=$with_kokkos_backend; KOKKOS_BACKEND="$withval" +else case e in #( + e) KOKKOS_BACKEND="auto" ;; +esac +fi + + + +if test "x$KOKKOS_DIR" != "xno" +then : + + as_ac_File=`printf "%s\n" "ac_cv_file_$KOKKOS_DIR/include/Kokkos_Core.hpp" | sed "$as_sed_sh"` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $KOKKOS_DIR/include/Kokkos_Core.hpp" >&5 +printf %s "checking for $KOKKOS_DIR/include/Kokkos_Core.hpp... " >&6; } +if eval test \${$as_ac_File+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "$KOKKOS_DIR/include/Kokkos_Core.hpp"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi ;; +esac +fi +eval ac_res=\$$as_ac_File + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes" +then : + + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" + libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + + if test "x$KOKKOS_CXX" = "x" +then : + + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + + if test "x$KOKKOS_BACKEND" = "xauto" +then : + + if test -r "$KOKKOS_CFG" +then : + + if grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=cuda +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=hip +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=sycl +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=openmp +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi ;; +esac +fi ;; +esac +fi ;; +esac +fi + +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Kokkos backend: $KOKKOS_BACKEND" >&5 +printf "%s\n" "Kokkos backend: $KOKKOS_BACKEND" >&6; } + + have_kokkos_openmp=no + if test -r "$KOKKOS_CFG" +then : + if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + have_kokkos_openmp=yes +fi +fi + + case "$KOKKOS_BACKEND" in + cuda) + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="no" + ;; +esac ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$NVCC" = "xno" +then : + as_fn_error $? "nvcc not found but Kokkos CUDA backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + if test "x$have_kokkos_openmp" = "xyes" +then : + + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + +fi + ;; + hip) + # Extract the first word of "hipcc", so it can be a program name with args. +set dummy hipcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_HIPCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $HIPCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_HIPCC="$HIPCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_HIPCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_HIPCC" && ac_cv_path_HIPCC="no" + ;; +esac ;; +esac +fi +HIPCC=$ac_cv_path_HIPCC +if test -n "$HIPCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCC" >&5 +printf "%s\n" "$HIPCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$HIPCC" = "xno" +then : + as_fn_error $? "hipcc not found but Kokkos HIP backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + sycl) + # Extract the first word of "icpx", so it can be a program name with args. +set dummy icpx; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ICPX+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $ICPX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ICPX="$ICPX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ICPX="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_ICPX" && ac_cv_path_ICPX="no" + ;; +esac ;; +esac +fi +ICPX=$ac_cv_path_ICPX +if test -n "$ICPX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ICPX" >&5 +printf "%s\n" "$ICPX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$ICPX" = "xno" +then : + as_fn_error $? "icpx not found but Kokkos SYCL backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + esac + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&5 +printf "%s\n" "Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&6; } ;; +esac +fi + + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + + +printf "%s\n" "#define HAVE_KOKKOS 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: <<< Configuring library with Kokkos support >>>" >&5 +printf "%s\n" "<<< Configuring library with Kokkos support >>>" >&6; } + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&5 +printf "%s\n" "$as_me: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&2;} + enablekokkos=no + ;; +esac +fi + + +else case e in #( + e) enablekokkos=no ;; +esac +fi + + + + + + + if test x$enablekokkos = xyes; then + LIBMESH_ENABLE_KOKKOS_TRUE= + LIBMESH_ENABLE_KOKKOS_FALSE='#' +else + LIBMESH_ENABLE_KOKKOS_TRUE='#' + LIBMESH_ENABLE_KOKKOS_FALSE= +fi + +# ------------------------------------------------------------- + + + if test "$enableoptional" != no then : @@ -65182,6 +65520,10 @@ if test -z "${LIBMESH_ENABLE_METAPHYSICL_TRUE}" && test -z "${LIBMESH_ENABLE_MET as_fn_error $? "conditional \"LIBMESH_ENABLE_METAPHYSICL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${LIBMESH_ENABLE_KOKKOS_TRUE}" && test -z "${LIBMESH_ENABLE_KOKKOS_FALSE}"; then + as_fn_error $? "conditional \"LIBMESH_ENABLE_KOKKOS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${GIT_CHECKOUT_TRUE}" && test -z "${GIT_CHECKOUT_FALSE}"; then as_fn_error $? "conditional \"GIT_CHECKOUT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/contrib/Makefile.in b/contrib/Makefile.in index 6c8d8649cdc..668594d8c04 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in @@ -604,11 +604,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -656,6 +663,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/capnproto/Makefile.in b/contrib/capnproto/Makefile.in index 43bfdcb44d5..56c7844f40d 100644 --- a/contrib/capnproto/Makefile.in +++ b/contrib/capnproto/Makefile.in @@ -453,11 +453,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -505,6 +512,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/eigen/gitshim/Makefile.in b/contrib/eigen/gitshim/Makefile.in index cd83617aabb..bab4d953ac5 100644 --- a/contrib/eigen/gitshim/Makefile.in +++ b/contrib/eigen/gitshim/Makefile.in @@ -337,11 +337,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -389,6 +396,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/exodus/Makefile.in b/contrib/exodusii/5.22b/exodus/Makefile.in index d6f576d1a90..66a5e6a0357 100644 --- a/contrib/exodusii/5.22b/exodus/Makefile.in +++ b/contrib/exodusii/5.22b/exodus/Makefile.in @@ -3320,11 +3320,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -3372,6 +3379,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/nemesis/Makefile.in b/contrib/exodusii/5.22b/nemesis/Makefile.in index 8538cdc184f..17636fa0144 100644 --- a/contrib/exodusii/5.22b/nemesis/Makefile.in +++ b/contrib/exodusii/5.22b/nemesis/Makefile.in @@ -399,11 +399,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -451,6 +458,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/Lib/Makefile.in b/contrib/exodusii/Lib/Makefile.in index 4c6ef829ed5..59c360d8928 100644 --- a/contrib/exodusii/Lib/Makefile.in +++ b/contrib/exodusii/Lib/Makefile.in @@ -1955,11 +1955,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2007,6 +2014,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/exodus/Makefile.in b/contrib/exodusii/v8.11/exodus/Makefile.in index 826c62c23e0..6c21419de79 100644 --- a/contrib/exodusii/v8.11/exodus/Makefile.in +++ b/contrib/exodusii/v8.11/exodus/Makefile.in @@ -4248,11 +4248,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -4300,6 +4307,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/nemesis/Makefile.in b/contrib/exodusii/v8.11/nemesis/Makefile.in index d1909c9f1d5..715ed90b4fe 100644 --- a/contrib/exodusii/v8.11/nemesis/Makefile.in +++ b/contrib/exodusii/v8.11/nemesis/Makefile.in @@ -409,11 +409,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -461,6 +468,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/Makefile.in b/contrib/fparser/Makefile.in index a9a20542d31..dd6e31b0e76 100644 --- a/contrib/fparser/Makefile.in +++ b/contrib/fparser/Makefile.in @@ -867,11 +867,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -919,6 +926,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/extrasrc/Makefile.in b/contrib/fparser/extrasrc/Makefile.in index f257d59051c..3eafacaf7a0 100644 --- a/contrib/fparser/extrasrc/Makefile.in +++ b/contrib/fparser/extrasrc/Makefile.in @@ -339,11 +339,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -391,6 +398,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/gmv/Makefile.in b/contrib/gmv/Makefile.in index 1043f694cf0..249f658088a 100644 --- a/contrib/gmv/Makefile.in +++ b/contrib/gmv/Makefile.in @@ -394,11 +394,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -446,6 +453,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/gzstream/Makefile.in b/contrib/gzstream/Makefile.in index 26d9c6a99cd..d7694ab76c8 100644 --- a/contrib/gzstream/Makefile.in +++ b/contrib/gzstream/Makefile.in @@ -446,11 +446,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -498,6 +505,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/laspack/Makefile.in b/contrib/laspack/Makefile.in index a921519ee01..125376d4347 100644 --- a/contrib/laspack/Makefile.in +++ b/contrib/laspack/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/libHilbert/Makefile.in b/contrib/libHilbert/Makefile.in index 92b7a8a5c6e..35d049071ec 100644 --- a/contrib/libHilbert/Makefile.in +++ b/contrib/libHilbert/Makefile.in @@ -477,11 +477,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -529,6 +536,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/metis/Makefile.in b/contrib/metis/Makefile.in index 2167e22fd72..5e8047805b9 100644 --- a/contrib/metis/Makefile.in +++ b/contrib/metis/Makefile.in @@ -1021,11 +1021,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1073,6 +1080,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nanoflann/Makefile.in b/contrib/nanoflann/Makefile.in index cb6fb5b1e25..94694c9b03c 100644 --- a/contrib/nanoflann/Makefile.in +++ b/contrib/nanoflann/Makefile.in @@ -443,11 +443,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -495,6 +502,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nemesis/Lib/Makefile.in b/contrib/nemesis/Lib/Makefile.in index 30e196af739..722bf4b86f0 100644 --- a/contrib/nemesis/Lib/Makefile.in +++ b/contrib/nemesis/Lib/Makefile.in @@ -789,11 +789,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -841,6 +848,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/netgen/Makefile.in b/contrib/netgen/Makefile.in index f7db0d91967..21c0f0247b3 100644 --- a/contrib/netgen/Makefile.in +++ b/contrib/netgen/Makefile.in @@ -231,7 +231,7 @@ am__define_uniq_tagged_files = \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` -am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/netgen_SOURCES +am__DIST_COMMON = $(srcdir)/Makefile.in DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ACSM_ANY_PARANOID_FLAGS = @ACSM_ANY_PARANOID_FLAGS@ @@ -341,11 +341,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -393,6 +400,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -555,387 +563,6 @@ top_srcdir = @top_srcdir@ vtkbuild = @vtkbuild@ vtkmajor = @vtkmajor@ vtkversion = @vtkversion@ - -# Do not edit - automatically generated from ./rebuild_netgen_SOURCES.sh -@LIBMESH_ENABLE_NETGEN_TRUE@netgen_SOURCE_FILES = \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/_get_glibcxx_use_cxx11_abi.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/_get_gxx_abi.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/archive.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/archive.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/array.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/autodiff.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/autodiffdiff.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/bitarray.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/bitarray.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/concurrentqueue.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/exception.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/exception.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/flags.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/flags.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/hashtable.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/localheap.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/localheap.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/logging.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/logging.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/memtracer.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/mpi4py_pycapi.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/mpi_wrapper.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi_generated_declarations.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi_generated_dummy_init.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi_generated_init.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi_native.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ng_mpi_wrapper.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ngcore.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ngcore_api.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ngstream.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/paje_trace.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/paje_trace.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/profiler.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/profiler.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/python_ngcore.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/python_ngcore.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/python_ngcore_export.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/ranges.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/register_archive.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/signal.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd_arm64.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd_avx.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd_avx512.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd_generic.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/simd_sse.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/symboltable.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/table.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/table.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/taskmanager.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/taskmanager.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/type_traits.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/utils.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/utils.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/version.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/version.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/core/xbool.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/algprim.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/algprim.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/brick.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/brick.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/bspline2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csg.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csgeom.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csgparser.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csgparser.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/csgpkg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/curve2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/curve2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/edgeflw.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/edgeflw.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/explicitcurve2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/explicitcurve2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/extrusion.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/extrusion.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/gencyl.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/gencyl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/genmesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/geoml.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/identify.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/identify.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/manifold.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/manifold.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/meshsurf.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/meshsurf.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/polyhedra.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/polyhedra.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/python_csg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/revolution.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/revolution.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/singularref.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/singularref.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/solid.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/solid.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/specpoin.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/specpoin.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/spline3d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/spline3d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/splinesurface.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/splinesurface.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/surface.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/surface.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/triapprox.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/triapprox.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/vscsg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/vscsg.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/csg/zrefine.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/autodiff.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/autoptr.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/dynamicmem.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/dynamicmem.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/gzstream.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/gzstream.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/hashtabl.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/hashtabl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/myadt.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/mystring.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/mystring.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/netgenout.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/ngarray.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/ngarray.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/ngbitarray.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/ngbitarray.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/ngpython.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/optmem.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/optmem.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/parthreads.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/parthreads.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/seti.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/seti.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/sort.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/sort.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/spbita2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/spbita2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/stack.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/table.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/table.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/general/template.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/csg2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/csg2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/genmesh2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/geom2dpkg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/geometry2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/geometry2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/python_geom2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/spline2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/vsgeom2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/geom2d/vsgeom2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/adtree.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/adtree.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geom2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geom2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geom3d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geom3d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomfuncs.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomfuncs.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomobjects.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomops.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomtest3d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/geomtest3d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/gprim.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/spline.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/spline.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/splinegeometry.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/splinegeometry.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/transform3d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/gprim/transform3d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/acisgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/csg.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/geometry2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/gprim.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/incopengl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/inctcl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/incvis.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/linalg.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/meshing.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/myadt.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/mydefs.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/mystdlib.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/nginterface.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/nginterface_v2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/nginterface_v2_impl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/ngsimd.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/occgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/opti.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/parallel.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/stlgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/include/visual.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/nginterface.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/nginterface_v2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/read_fnf_mesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/readtetmesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/readuser.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/rw_cgns.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/rw_medit.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/rw_medit.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writeOpenFOAM15x.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writeabaqus.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writediffpack.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writedolfin.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writeelmer.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writefeap.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writefluent.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writegmsh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writegmsh2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writejcm.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writepermas.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writetecplot.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writetet.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writetochnog.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writeuser.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/writeuser.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/interface/wuchemnitz.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/bfgs.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/densemat.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/densemat.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/linalg.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/linopt.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/linsearch.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/opti.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/polynomial.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/polynomial.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/linalg/vector.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/adfront2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/adfront2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/adfront3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/adfront3.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/basegeom.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/basegeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/bcfunctions.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/bcfunctions.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/bisect.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/bisect.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/boundarylayer.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/boundarylayer.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/boundarylayer2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/classifyhpel.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/clusters.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/clusters.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/curvedelems.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/curvedelems.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/debugging.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/debugging.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/delaunay.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/delaunay2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/delaunay2d.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/fieldlines.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/fieldlines.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/findip.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/findip2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/geomsearch.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/geomsearch.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/global.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/global.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_hex.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_prism.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_pyramid.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_quad.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_segm.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_tet.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hpref_trig.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hprefinement.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/hprefinement.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/improve2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/improve2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/improve2gen.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/improve3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/improve3.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/localh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/localh.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshclass.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshclass.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshfunc.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshfunc.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshfunc2d.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshing.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshing2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshing2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshing3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshing3.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshtool.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshtool.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshtype.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/meshtype.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/msghandler.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/msghandler.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/netrule2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/netrule3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/parallelmesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/paralleltop.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/paralleltop.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/parser2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/parser3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/python_mesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/python_mesh.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/refine.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/ruler2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/ruler2.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/ruler3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/ruler3.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/secondorder.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/smoothing2.5.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/smoothing2.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/smoothing3.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/soldata.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/specials.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/specials.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/surfacegeom.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/surfacegeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/topology.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/topology.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/validate.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/validate.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/visual_interface.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/meshing/visual_interface.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_edge.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_edge.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_face.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_face.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_solid.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_utils.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_utils.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_vertex.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occ_vertex.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occconstruction.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occgenmesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occgeom.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occmeshsurf.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occmeshsurf.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/occpkg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/python_occ.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/python_occ_basic.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/python_occ_shapes.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/utilities.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/vsocc.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/occ/vsocc.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/meshstlsurface.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/meshstlsurface.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/python_stl.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlgeom.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlgeom.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlgeomchart.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlgeommesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlline.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlline.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stlpkg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stltool.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stltool.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stltopology.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/stltopology.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/vsstl.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/stlgeom/vsstl.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/importsolution.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/meshdoc.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/meshdoc.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/mvdraw.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/mvdraw.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/vispar.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/visual.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/visual_api.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/visualpkg.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/vsfieldlines.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/vsmesh.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/vssolution.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/libsrc/visualization/vssolution.hpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/ng_occ.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/ng_stl.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/ng_vol.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.cpp \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/parallelfunc.cpp - @LIBMESH_ENABLE_NETGEN_TRUE@netgenincludedir = $(includedir)/netgen @LIBMESH_ENABLE_NETGEN_TRUE@nglibincludedir = $(includedir)/netgen/nglib @LIBMESH_ENABLE_NETGEN_TRUE@netgenlibdir = $(libdir) @@ -954,7 +581,7 @@ all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/netgen_SOURCES $(am__configure_deps) +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ @@ -974,7 +601,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ esac; -$(srcdir)/netgen_SOURCES $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh @@ -1297,14 +923,9 @@ uninstall-am: uninstall-netgenincludeHEADERS uninstall-netgenlibDATA \ .PRECIOUS: Makefile -# Try to include Netgen files as depndencies; otherwise we can get -# errors if we recompile from an existing libMesh build dir after a -# Netgen header has changed or if we link from one after a Netgen ABI -# has changed. - # Use a stamp file to make sure we don't have multiple sub-$(MAKE) # going at once; cmake gets horribly confused if that happens. -@LIBMESH_ENABLE_NETGEN_TRUE@.buildstamp: $(netgen_SOURCE_FILES) +@LIBMESH_ENABLE_NETGEN_TRUE@.buildstamp: @LIBMESH_ENABLE_NETGEN_TRUE@ $(MAKE) -C build $(AM_MAKEFLAGS) @LIBMESH_ENABLE_NETGEN_TRUE@ touch .buildstamp diff --git a/contrib/parmetis/Makefile.in b/contrib/parmetis/Makefile.in index 70875e6ff39..cde4b12aab9 100644 --- a/contrib/parmetis/Makefile.in +++ b/contrib/parmetis/Makefile.in @@ -855,11 +855,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -907,6 +914,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/poly2tri/modified/Makefile.in b/contrib/poly2tri/modified/Makefile.in index abb0c8cffa6..514fd6a38a6 100644 --- a/contrib/poly2tri/modified/Makefile.in +++ b/contrib/poly2tri/modified/Makefile.in @@ -541,11 +541,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -593,6 +600,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/qhull/2012.1/Makefile.in b/contrib/qhull/2012.1/Makefile.in index d9910cea212..03f2cbcb819 100644 --- a/contrib/qhull/2012.1/Makefile.in +++ b/contrib/qhull/2012.1/Makefile.in @@ -1164,11 +1164,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1216,6 +1223,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/sfcurves/Makefile.in b/contrib/sfcurves/Makefile.in index 453e333f449..18be38e4660 100644 --- a/contrib/sfcurves/Makefile.in +++ b/contrib/sfcurves/Makefile.in @@ -414,11 +414,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -466,6 +473,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/binary/Makefile.in b/contrib/tecplot/binary/Makefile.in index 9c918125c59..9a13af38818 100644 --- a/contrib/tecplot/binary/Makefile.in +++ b/contrib/tecplot/binary/Makefile.in @@ -384,11 +384,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -436,6 +443,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/tecio/Makefile.in b/contrib/tecplot/tecio/Makefile.in index 0ca8d9faaf2..f937b27b6db 100644 --- a/contrib/tecplot/tecio/Makefile.in +++ b/contrib/tecplot/tecio/Makefile.in @@ -623,11 +623,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -675,6 +682,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tetgen/Makefile.in b/contrib/tetgen/Makefile.in index b3035e72b2b..725d7a30d36 100644 --- a/contrib/tetgen/Makefile.in +++ b/contrib/tetgen/Makefile.in @@ -429,11 +429,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -481,6 +488,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/triangle/Makefile.in b/contrib/triangle/Makefile.in index a63cb31454c..f87cb42296f 100644 --- a/contrib/triangle/Makefile.in +++ b/contrib/triangle/Makefile.in @@ -424,11 +424,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -476,6 +483,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/doc/Makefile.in b/doc/Makefile.in index 42ba76ef15b..812994568fe 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -347,11 +347,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -399,6 +406,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index ea0e18aacb4..d3e9103668c 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -307,11 +307,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -359,6 +366,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/Makefile.in b/examples/Makefile.in index d16d580d3a2..27a8db4d760 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -375,11 +375,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -427,6 +434,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex1/Makefile.in b/examples/adaptivity/adaptivity_ex1/Makefile.in index 156523e9faa..3dc4c77a9dd 100644 --- a/examples/adaptivity/adaptivity_ex1/Makefile.in +++ b/examples/adaptivity/adaptivity_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex2/Makefile.in b/examples/adaptivity/adaptivity_ex2/Makefile.in index be5308ddd74..04c82fb9137 100644 --- a/examples/adaptivity/adaptivity_ex2/Makefile.in +++ b/examples/adaptivity/adaptivity_ex2/Makefile.in @@ -484,11 +484,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -536,6 +543,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex3/Makefile.in b/examples/adaptivity/adaptivity_ex3/Makefile.in index 0a64440aa41..7e31802fa14 100644 --- a/examples/adaptivity/adaptivity_ex3/Makefile.in +++ b/examples/adaptivity/adaptivity_ex3/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex4/Makefile.in b/examples/adaptivity/adaptivity_ex4/Makefile.in index b25aca31264..e155bc31268 100644 --- a/examples/adaptivity/adaptivity_ex4/Makefile.in +++ b/examples/adaptivity/adaptivity_ex4/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex5/Makefile.in b/examples/adaptivity/adaptivity_ex5/Makefile.in index b419dc98312..50cfa24eefd 100644 --- a/examples/adaptivity/adaptivity_ex5/Makefile.in +++ b/examples/adaptivity/adaptivity_ex5/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex1/Makefile.in b/examples/adjoints/adjoints_ex1/Makefile.in index a174eb3b2fe..f658f98c231 100644 --- a/examples/adjoints/adjoints_ex1/Makefile.in +++ b/examples/adjoints/adjoints_ex1/Makefile.in @@ -559,11 +559,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -611,6 +618,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex2/Makefile.in b/examples/adjoints/adjoints_ex2/Makefile.in index d0ac7f06aeb..1583e06ba69 100644 --- a/examples/adjoints/adjoints_ex2/Makefile.in +++ b/examples/adjoints/adjoints_ex2/Makefile.in @@ -527,11 +527,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +586,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex3/Makefile.in b/examples/adjoints/adjoints_ex3/Makefile.in index af52a508f53..8e744ca96ee 100644 --- a/examples/adjoints/adjoints_ex3/Makefile.in +++ b/examples/adjoints/adjoints_ex3/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex4/Makefile.in b/examples/adjoints/adjoints_ex4/Makefile.in index 6721b984eb7..8ff3dceb0f3 100644 --- a/examples/adjoints/adjoints_ex4/Makefile.in +++ b/examples/adjoints/adjoints_ex4/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex5/Makefile.in b/examples/adjoints/adjoints_ex5/Makefile.in index 5dcc545ab49..f4bdc7d11ae 100644 --- a/examples/adjoints/adjoints_ex5/Makefile.in +++ b/examples/adjoints/adjoints_ex5/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex6/Makefile.in b/examples/adjoints/adjoints_ex6/Makefile.in index f8568140952..43ada1d6034 100644 --- a/examples/adjoints/adjoints_ex6/Makefile.in +++ b/examples/adjoints/adjoints_ex6/Makefile.in @@ -527,11 +527,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +586,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex7/Makefile.in b/examples/adjoints/adjoints_ex7/Makefile.in index 793d69f1149..c807bd13682 100644 --- a/examples/adjoints/adjoints_ex7/Makefile.in +++ b/examples/adjoints/adjoints_ex7/Makefile.in @@ -577,11 +577,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -629,6 +636,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex1/Makefile.in b/examples/eigenproblems/eigenproblems_ex1/Makefile.in index a23b7ec4684..b2aeeba0c70 100644 --- a/examples/eigenproblems/eigenproblems_ex1/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex2/Makefile.in b/examples/eigenproblems/eigenproblems_ex2/Makefile.in index 5ef2c3bc3ec..e4584dfbd8f 100644 --- a/examples/eigenproblems/eigenproblems_ex2/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex2/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex3/Makefile.in b/examples/eigenproblems/eigenproblems_ex3/Makefile.in index 4a060dc7358..1bb7ca59f97 100644 --- a/examples/eigenproblems/eigenproblems_ex3/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex3/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex4/Makefile.in b/examples/eigenproblems/eigenproblems_ex4/Makefile.in index eb92f3d91f8..becef7ee553 100644 --- a/examples/eigenproblems/eigenproblems_ex4/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex4/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex1/Makefile.in b/examples/fem_system/fem_system_ex1/Makefile.in index 47f9d7e206f..ab49a448096 100644 --- a/examples/fem_system/fem_system_ex1/Makefile.in +++ b/examples/fem_system/fem_system_ex1/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex2/Makefile.in b/examples/fem_system/fem_system_ex2/Makefile.in index d1357da5ca8..5990a9ddcc7 100644 --- a/examples/fem_system/fem_system_ex2/Makefile.in +++ b/examples/fem_system/fem_system_ex2/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex3/Makefile.in b/examples/fem_system/fem_system_ex3/Makefile.in index 6783edf17bd..614048a0e48 100644 --- a/examples/fem_system/fem_system_ex3/Makefile.in +++ b/examples/fem_system/fem_system_ex3/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex4/Makefile.in b/examples/fem_system/fem_system_ex4/Makefile.in index 8e6296abb4e..1a31f0ddab4 100644 --- a/examples/fem_system/fem_system_ex4/Makefile.in +++ b/examples/fem_system/fem_system_ex4/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex5/Makefile.in b/examples/fem_system/fem_system_ex5/Makefile.in index 00224c8d1c2..2526a04fe16 100644 --- a/examples/fem_system/fem_system_ex5/Makefile.in +++ b/examples/fem_system/fem_system_ex5/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex1/Makefile.in b/examples/introduction/introduction_ex1/Makefile.in index 59920135dfb..13087163eff 100644 --- a/examples/introduction/introduction_ex1/Makefile.in +++ b/examples/introduction/introduction_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex2/Makefile.in b/examples/introduction/introduction_ex2/Makefile.in index 4168b619aa4..5e11b336ef9 100644 --- a/examples/introduction/introduction_ex2/Makefile.in +++ b/examples/introduction/introduction_ex2/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex3/Makefile.in b/examples/introduction/introduction_ex3/Makefile.in index 859921ed0fa..bb24117c806 100644 --- a/examples/introduction/introduction_ex3/Makefile.in +++ b/examples/introduction/introduction_ex3/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex4/Makefile.in b/examples/introduction/introduction_ex4/Makefile.in index 3572fea8ef5..efdd5b9b127 100644 --- a/examples/introduction/introduction_ex4/Makefile.in +++ b/examples/introduction/introduction_ex4/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex5/Makefile.in b/examples/introduction/introduction_ex5/Makefile.in index 3ae3f4020fe..b08a51822e9 100644 --- a/examples/introduction/introduction_ex5/Makefile.in +++ b/examples/introduction/introduction_ex5/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex1/Makefile.in b/examples/miscellaneous/miscellaneous_ex1/Makefile.in index d0de4bad619..fa1d6604c10 100644 --- a/examples/miscellaneous/miscellaneous_ex1/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex10/Makefile.in b/examples/miscellaneous/miscellaneous_ex10/Makefile.in index 1afb2f88961..4ceaf8ccf94 100644 --- a/examples/miscellaneous/miscellaneous_ex10/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex10/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex11/Makefile.in b/examples/miscellaneous/miscellaneous_ex11/Makefile.in index de80da2aab8..e36b21636b2 100644 --- a/examples/miscellaneous/miscellaneous_ex11/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex11/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex12/Makefile.in b/examples/miscellaneous/miscellaneous_ex12/Makefile.in index 99775b9b0b9..2c4c4fc193d 100644 --- a/examples/miscellaneous/miscellaneous_ex12/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex12/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex13/Makefile.in b/examples/miscellaneous/miscellaneous_ex13/Makefile.in index d6f6a103170..4f0080367ed 100644 --- a/examples/miscellaneous/miscellaneous_ex13/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex13/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex14/Makefile.in b/examples/miscellaneous/miscellaneous_ex14/Makefile.in index feb36eea1f3..768948775af 100644 --- a/examples/miscellaneous/miscellaneous_ex14/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex14/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex15/Makefile.in b/examples/miscellaneous/miscellaneous_ex15/Makefile.in index 504236ef024..438762128b7 100644 --- a/examples/miscellaneous/miscellaneous_ex15/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex15/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex16/Makefile.in b/examples/miscellaneous/miscellaneous_ex16/Makefile.in index 816dc5777b4..acaecda9d82 100644 --- a/examples/miscellaneous/miscellaneous_ex16/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex16/Makefile.in @@ -481,11 +481,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -533,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex17/Makefile.in b/examples/miscellaneous/miscellaneous_ex17/Makefile.in index d7264bce057..8ab2dc797f8 100644 --- a/examples/miscellaneous/miscellaneous_ex17/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex17/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex2/Makefile.in b/examples/miscellaneous/miscellaneous_ex2/Makefile.in index bc5b0d64089..5e2d6279049 100644 --- a/examples/miscellaneous/miscellaneous_ex2/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex3/Makefile.in b/examples/miscellaneous/miscellaneous_ex3/Makefile.in index 2f5e956a6ae..d58bfb75a85 100644 --- a/examples/miscellaneous/miscellaneous_ex3/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex3/Makefile.in @@ -470,11 +470,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -522,6 +529,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex4/Makefile.in b/examples/miscellaneous/miscellaneous_ex4/Makefile.in index d9d6f954cbc..f35a439ba5f 100644 --- a/examples/miscellaneous/miscellaneous_ex4/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex4/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex5/Makefile.in b/examples/miscellaneous/miscellaneous_ex5/Makefile.in index 719d522a31a..dc7dbeeef7b 100644 --- a/examples/miscellaneous/miscellaneous_ex5/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex5/Makefile.in @@ -479,11 +479,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -531,6 +538,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex6/Makefile.in b/examples/miscellaneous/miscellaneous_ex6/Makefile.in index a5a756000c3..a985950b56d 100644 --- a/examples/miscellaneous/miscellaneous_ex6/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex6/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex7/Makefile.in b/examples/miscellaneous/miscellaneous_ex7/Makefile.in index d0115c3cac9..e3515b9f785 100644 --- a/examples/miscellaneous/miscellaneous_ex7/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex7/Makefile.in @@ -508,11 +508,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -560,6 +567,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex8/Makefile.in b/examples/miscellaneous/miscellaneous_ex8/Makefile.in index 270aa93b899..d990641f148 100644 --- a/examples/miscellaneous/miscellaneous_ex8/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex8/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex9/Makefile.in b/examples/miscellaneous/miscellaneous_ex9/Makefile.in index 46f5b5339d5..3860b7d8388 100644 --- a/examples/miscellaneous/miscellaneous_ex9/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex9/Makefile.in @@ -505,11 +505,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -557,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex1/Makefile.in b/examples/optimization/optimization_ex1/Makefile.in index cf5012ab431..fb3baf00e2d 100644 --- a/examples/optimization/optimization_ex1/Makefile.in +++ b/examples/optimization/optimization_ex1/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex2/Makefile.in b/examples/optimization/optimization_ex2/Makefile.in index f3065b739c7..c7816e72654 100644 --- a/examples/optimization/optimization_ex2/Makefile.in +++ b/examples/optimization/optimization_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex1/Makefile.in b/examples/reduced_basis/reduced_basis_ex1/Makefile.in index 0879171980e..fa1d195a31a 100644 --- a/examples/reduced_basis/reduced_basis_ex1/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex1/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex2/Makefile.in b/examples/reduced_basis/reduced_basis_ex2/Makefile.in index 986069c3c25..2c34d349aeb 100644 --- a/examples/reduced_basis/reduced_basis_ex2/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex2/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex3/Makefile.in b/examples/reduced_basis/reduced_basis_ex3/Makefile.in index f68264db04e..41d6871eef3 100644 --- a/examples/reduced_basis/reduced_basis_ex3/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex3/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex4/Makefile.in b/examples/reduced_basis/reduced_basis_ex4/Makefile.in index 2c3343c83e8..a7bd31cb843 100644 --- a/examples/reduced_basis/reduced_basis_ex4/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex4/Makefile.in @@ -494,11 +494,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +553,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex5/Makefile.in b/examples/reduced_basis/reduced_basis_ex5/Makefile.in index 238b469fe39..82b5b2fbc05 100644 --- a/examples/reduced_basis/reduced_basis_ex5/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex5/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex6/Makefile.in b/examples/reduced_basis/reduced_basis_ex6/Makefile.in index 50076f35515..dbc24072e30 100644 --- a/examples/reduced_basis/reduced_basis_ex6/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex6/Makefile.in @@ -494,11 +494,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +553,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex7/Makefile.in b/examples/reduced_basis/reduced_basis_ex7/Makefile.in index 3817bee46df..a8446610838 100644 --- a/examples/reduced_basis/reduced_basis_ex7/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex7/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/solution_transfer/solution_transfer_ex1/Makefile.in b/examples/solution_transfer/solution_transfer_ex1/Makefile.in index cd7613f44f7..5a0d14c6136 100644 --- a/examples/solution_transfer/solution_transfer_ex1/Makefile.in +++ b/examples/solution_transfer/solution_transfer_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex1/Makefile.in b/examples/subdomains/subdomains_ex1/Makefile.in index f6238ba3205..52deb3d0ade 100644 --- a/examples/subdomains/subdomains_ex1/Makefile.in +++ b/examples/subdomains/subdomains_ex1/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex2/Makefile.in b/examples/subdomains/subdomains_ex2/Makefile.in index 5c20167218c..b32dcd0e3c9 100644 --- a/examples/subdomains/subdomains_ex2/Makefile.in +++ b/examples/subdomains/subdomains_ex2/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex3/Makefile.in b/examples/subdomains/subdomains_ex3/Makefile.in index 3a83f502b61..a3d5c4a101d 100644 --- a/examples/subdomains/subdomains_ex3/Makefile.in +++ b/examples/subdomains/subdomains_ex3/Makefile.in @@ -474,11 +474,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -526,6 +533,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in index 3add79bf28a..c0fc74e97bf 100644 --- a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in index cd44f4a5603..a89c3ec3026 100644 --- a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in index f5c4ff69de1..f0afcad3be3 100644 --- a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in index 0fbdb133bf1..dcdbbe495d2 100644 --- a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in index d1f25e7dc8b..ff6e7676c11 100644 --- a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in index 5712834b887..da837f22e98 100644 --- a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in index 15445020704..4ef11376ea4 100644 --- a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in @@ -472,11 +472,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in index abcd909b40d..e5c7279beb3 100644 --- a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in @@ -510,11 +510,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -562,6 +569,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in index a497c06f581..495f1b401f7 100644 --- a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in @@ -472,11 +472,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex1/Makefile.in b/examples/transient/transient_ex1/Makefile.in index 288208b4c6c..cb1f7123cd7 100644 --- a/examples/transient/transient_ex1/Makefile.in +++ b/examples/transient/transient_ex1/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex2/Makefile.in b/examples/transient/transient_ex2/Makefile.in index bae2b2fcc21..3a5648a9715 100644 --- a/examples/transient/transient_ex2/Makefile.in +++ b/examples/transient/transient_ex2/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex3/Makefile.in b/examples/transient/transient_ex3/Makefile.in index 031ab6ae7a6..418ce7dfed8 100644 --- a/examples/transient/transient_ex3/Makefile.in +++ b/examples/transient/transient_ex3/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex1/Makefile.in b/examples/vector_fe/vector_fe_ex1/Makefile.in index c474401c7ad..66462dd6f04 100644 --- a/examples/vector_fe/vector_fe_ex1/Makefile.in +++ b/examples/vector_fe/vector_fe_ex1/Makefile.in @@ -478,11 +478,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -530,6 +537,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex10/Makefile.in b/examples/vector_fe/vector_fe_ex10/Makefile.in index c2a3aa0dd15..97e297f7568 100644 --- a/examples/vector_fe/vector_fe_ex10/Makefile.in +++ b/examples/vector_fe/vector_fe_ex10/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex2/Makefile.in b/examples/vector_fe/vector_fe_ex2/Makefile.in index 1aca800f63c..ad09c3743fa 100644 --- a/examples/vector_fe/vector_fe_ex2/Makefile.in +++ b/examples/vector_fe/vector_fe_ex2/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex3/Makefile.in b/examples/vector_fe/vector_fe_ex3/Makefile.in index b033d436f7d..5994d90a2bf 100644 --- a/examples/vector_fe/vector_fe_ex3/Makefile.in +++ b/examples/vector_fe/vector_fe_ex3/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex4/Makefile.in b/examples/vector_fe/vector_fe_ex4/Makefile.in index b2320fc3c58..91be49794d4 100644 --- a/examples/vector_fe/vector_fe_ex4/Makefile.in +++ b/examples/vector_fe/vector_fe_ex4/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex5/Makefile.in b/examples/vector_fe/vector_fe_ex5/Makefile.in index 21a71638e6e..62b45033645 100644 --- a/examples/vector_fe/vector_fe_ex5/Makefile.in +++ b/examples/vector_fe/vector_fe_ex5/Makefile.in @@ -491,11 +491,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -543,6 +550,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex6/Makefile.in b/examples/vector_fe/vector_fe_ex6/Makefile.in index cb550f280e4..d092712abe2 100644 --- a/examples/vector_fe/vector_fe_ex6/Makefile.in +++ b/examples/vector_fe/vector_fe_ex6/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex7/Makefile.in b/examples/vector_fe/vector_fe_ex7/Makefile.in index 6ae32888b36..65dce847a17 100644 --- a/examples/vector_fe/vector_fe_ex7/Makefile.in +++ b/examples/vector_fe/vector_fe_ex7/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex8/Makefile.in b/examples/vector_fe/vector_fe_ex8/Makefile.in index fe881610825..d5c542a648e 100644 --- a/examples/vector_fe/vector_fe_ex8/Makefile.in +++ b/examples/vector_fe/vector_fe_ex8/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex9/Makefile.in b/examples/vector_fe/vector_fe_ex9/Makefile.in index 34e5092c976..0e362c7a3f8 100644 --- a/examples/vector_fe/vector_fe_ex9/Makefile.in +++ b/examples/vector_fe/vector_fe_ex9/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/include/Makefile.in b/include/Makefile.in index 15e2ded0d9e..e55732f1235 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -380,11 +380,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -432,6 +439,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -635,6 +643,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -929,7 +938,6 @@ include_HEADERS = \ parallel/threads_allocators.h \ parallel/threads_none.h \ parallel/threads_pthread.h \ - parallel/threads_spin_mutex_forward.h \ parallel/threads_tbb.h \ partitioning/centroid_partitioner.h \ partitioning/hilbert_sfc_partitioner.h \ diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index 0e95a2a8ef6..5c5b77c25db 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -309,11 +309,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -361,6 +368,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -530,10 +538,11 @@ EXTRA_DIST = rebuild_makefile.sh BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ dof_object.h factory.h float128_shims.h getpot.h id_types.h \ libmesh.h libmesh_abort.h libmesh_augment_std_namespace.h \ - libmesh_base.h libmesh_common.h libmesh_documentation.h \ - libmesh_exceptions.h libmesh_logging.h libmesh_singleton.h \ - libmesh_version.h multi_predicates.h periodic_boundaries.h \ - periodic_boundary.h periodic_boundary_base.h print_trace.h \ + libmesh_base.h libmesh_common.h libmesh_device.h \ + libmesh_documentation.h libmesh_exceptions.h libmesh_logging.h \ + libmesh_singleton.h libmesh_version.h multi_predicates.h \ + periodic_boundaries.h periodic_boundary.h \ + periodic_boundary_base.h print_trace.h \ reference_counted_object.h reference_counter.h \ single_predicates.h sparsity_pattern.h variable.h \ variant_filter_iterator.h enum_convergence_flags.h \ @@ -629,13 +638,13 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ parallel_ghost_sync.h parallel_hilbert.h parallel_histogram.h \ parallel_node.h parallel_object.h parallel_only.h \ parallel_sort.h threads.h threads_allocators.h threads_none.h \ - threads_pthread.h threads_spin_mutex_forward.h threads_tbb.h \ - centroid_partitioner.h hilbert_sfc_partitioner.h \ - linear_partitioner.h mapped_subdomain_partitioner.h \ - metis_csr_graph.h metis_partitioner.h morton_sfc_partitioner.h \ - parmetis_helper.h parmetis_partitioner.h partitioner.h \ - sfc_partitioner.h subdomain_partitioner.h diff_physics.h \ - diff_qoi.h fem_physics.h quadrature.h quadrature_clough.h \ + threads_pthread.h threads_tbb.h centroid_partitioner.h \ + hilbert_sfc_partitioner.h linear_partitioner.h \ + mapped_subdomain_partitioner.h metis_csr_graph.h \ + metis_partitioner.h morton_sfc_partitioner.h parmetis_helper.h \ + parmetis_partitioner.h partitioner.h sfc_partitioner.h \ + subdomain_partitioner.h diff_physics.h diff_qoi.h \ + fem_physics.h quadrature.h quadrature_clough.h \ quadrature_composite.h quadrature_conical.h quadrature_gauss.h \ quadrature_gauss_lobatto.h quadrature_gm.h quadrature_grid.h \ quadrature_jacobi.h quadrature_monomial.h quadrature_nodal.h \ @@ -992,6 +1001,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1886,9 +1898,6 @@ threads_none.h: $(top_srcdir)/include/parallel/threads_none.h threads_pthread.h: $(top_srcdir)/include/parallel/threads_pthread.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ -threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h - $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - threads_tbb.h: $(top_srcdir)/include/parallel/threads_tbb.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/tests/Makefile.in b/tests/Makefile.in index 59ed2e7641e..d94f4b9384d 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -95,25 +95,31 @@ target_triplet = @target@ @LIBMESH_ENABLE_FPARSER_TRUE@ fparser/autodiff.C check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ - $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) + $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) \ + $(am__EXEEXT_7) +TESTS = $(am__EXEEXT_1) $(am__append_11) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_2 = -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_3 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_4 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, # others segfault and/or hang. By default we will not run # GLIBCXX-debugging builds with cppunit unless specifically # configured to. -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_2 = unit_tests-dbg -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_3 = unit_tests-dbg -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_4 = unit_tests-devel -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_5 = unit_tests-prof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_6 = unit_tests-oprof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_7 = unit_tests-opt -@LIBMESH_VPATH_BUILD_TRUE@am__append_8 = .linkstamp +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_5 = unit_tests-dbg +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_6 = unit_tests-dbg +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_7 = unit_tests-devel +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_8 = unit_tests-prof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_9 = unit_tests-oprof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_10 = unit_tests-opt +@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_11 = run_unit_tests.sh +@LIBMESH_VPATH_BUILD_TRUE@am__append_12 = .linkstamp ###################################################################### # # Don't leave code coverage outputs lying around -@CODE_COVERAGE_ENABLED_TRUE@am__append_9 = */*.gcda */*.gcno +@CODE_COVERAGE_ENABLED_TRUE@am__append_13 = */*.gcda */*.gcno subdir = tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = \ @@ -182,12 +188,34 @@ mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/include/libmesh_config.h.tmp CONFIG_CLEAN_FILES = run_unit_tests.sh CONFIG_CLEAN_VPATH_FILES = -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_1 = unit_tests-dbg$(EXEEXT) -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-devel$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_4 = unit_tests-prof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-oprof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_6 = unit_tests-opt$(EXEEXT) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__EXEEXT_1 = kokkos_vector_ops_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-dbg$(EXEEXT) +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_4 = unit_tests-devel$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-prof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_6 = unit_tests-oprof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_7 = unit_tests-opt$(EXEEXT) +am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_tensor_ops_oracle_test.K +am__dirstamp = $(am__leading_dot)dirstamp +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_tensor_ops_oracle_unit_OBJECTS = numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT) +kokkos_tensor_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_tensor_ops_oracle_unit_OBJECTS) +am__DEPENDENCIES_1 = +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_vector_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_vector_ops_oracle_unit_OBJECTS = numerics/kokkos_vector_ops_oracle_test.$(OBJEXT) +kokkos_vector_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_vector_ops_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ stream_redirector.h test_comm.h base/dof_object_test.h \ base/dof_map_test.C base/default_coupling_test.C \ @@ -262,7 +290,6 @@ am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ utils/parameters_test.C utils/point_locator_test.C \ utils/rb_parameters_test.C utils/transparent_comparator.C \ utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C -am__dirstamp = $(am__leading_dot)dirstamp @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_1 = fparser/unit_tests_dbg-autodiff.$(OBJEXT) am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ base/unit_tests_dbg-dof_map_test.$(OBJEXT) \ @@ -1860,10 +1887,14 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ +SOURCES = $(kokkos_tensor_ops_oracle_unit_SOURCES) \ + $(kokkos_vector_ops_oracle_unit_SOURCES) \ + $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ $(unit_tests_oprof_SOURCES) $(unit_tests_opt_SOURCES) \ $(unit_tests_prof_SOURCES) -DIST_SOURCES = $(am__unit_tests_dbg_SOURCES_DIST) \ +DIST_SOURCES = $(am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_vector_ops_oracle_unit_SOURCES_DIST) \ + $(am__unit_tests_dbg_SOURCES_DIST) \ $(am__unit_tests_devel_SOURCES_DIST) \ $(am__unit_tests_oprof_SOURCES_DIST) \ $(am__unit_tests_opt_SOURCES_DIST) \ @@ -2064,11 +2095,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2116,6 +2154,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -2288,6 +2327,7 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) +KOKKOS_TEST_CPPFLAGS = $(am__append_2) unit_tests_sources = driver.C libmesh_cppunit.h stream_redirector.h \ test_comm.h base/dof_object_test.h base/dof_map_test.C \ base/default_coupling_test.C base/getpot_test.C \ @@ -2450,6 +2490,16 @@ unit_tests_data = $(data) # Why isn't this working automatically? EXTRA_DIST = $(data) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_SOURCES = $(unit_tests_sources) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CXXFLAGS = $(CXXFLAGS_DBG) @@ -2480,7 +2530,16 @@ EXTRA_DIST = $(data) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_LDADD = $(top_builddir)/libmesh_opt.la @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_optdir = $(datadir) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_DATA = $(data) -@LIBMESH_ENABLE_CPPUNIT_TRUE@TESTS = run_unit_tests.sh + +# Custom link rules so the Kokkos compiler drives the final link step. +kokkos_vector_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ + +kokkos_tensor_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ + CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ mesh_with_soln.e elemental_from_nodal.e write_elemset_data.e \ write_sideset_data.e write_nodeset_data.e write_edgeset_data.e \ @@ -2514,8 +2573,8 @@ CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ write_exodus_QUADSHELL9.e write_exodus_TET10.e \ write_exodus_TET14.e write_exodus_TET4.e write_exodus_TRI3.e \ write_exodus_TRI6.e write_exodus_TRI7.e \ - write_exodus_TRISHELL3.e smoother.out $(am__append_8) \ - $(am__append_9) + write_exodus_TRISHELL3.e smoother.out $(am__append_12) \ + $(am__append_13) # need to link any data files for VPATH builds @LIBMESH_VPATH_BUILD_TRUE@BUILT_SOURCES = .linkstamp @@ -2523,7 +2582,7 @@ all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: -.SUFFIXES: .C .lo .o .obj +.SUFFIXES: .C .K .lo .o .obj $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -2559,6 +2618,24 @@ run_unit_tests.sh: $(top_builddir)/config.status $(srcdir)/run_unit_tests.sh.in clean-checkPROGRAMS: $(am__rm_f) $(check_PROGRAMS) test -z "$(EXEEXT)" || $(am__rm_f) $(check_PROGRAMS:$(EXEEXT)=) +numerics/$(am__dirstamp): + @$(MKDIR_P) numerics + @: >>numerics/$(am__dirstamp) +numerics/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) numerics/$(DEPDIR) + @: >>numerics/$(DEPDIR)/$(am__dirstamp) +numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_tensor_ops_oracle_unit$(EXEEXT): $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_tensor_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_tensor_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_tensor_ops_oracle_unit_LINK) $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_LDADD) $(LIBS) +numerics/kokkos_vector_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_vector_ops_oracle_unit$(EXEEXT): $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_vector_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_vector_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_vector_ops_oracle_unit_LINK) $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_LDADD) $(LIBS) base/$(am__dirstamp): @$(MKDIR_P) base @: >>base/$(am__dirstamp) @@ -2727,12 +2804,6 @@ mesh/unit_tests_dbg-project_solution_test.$(OBJEXT): \ mesh/$(am__dirstamp) mesh/$(DEPDIR)/$(am__dirstamp) mesh/unit_tests_dbg-xdrio_test.$(OBJEXT): mesh/$(am__dirstamp) \ mesh/$(DEPDIR)/$(am__dirstamp) -numerics/$(am__dirstamp): - @$(MKDIR_P) numerics - @: >>numerics/$(am__dirstamp) -numerics/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) numerics/$(DEPDIR) - @: >>numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-composite_function_test.$(OBJEXT): \ numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-coupling_matrix_test.$(OBJEXT): \ @@ -14717,6 +14788,15 @@ $(top_builddir)/libmesh_prof.la: FORCE $(top_builddir)/libmesh_oprof.la: FORCE (cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) libmesh_oprof.la) +# Compile .K translation units with the Kokkos device compiler. +# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc +# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + @LIBMESH_VPATH_BUILD_TRUE@.linkstamp: @LIBMESH_VPATH_BUILD_TRUE@ -rm -f solutions && $(LN_S) -f $(srcdir)/solutions . @LIBMESH_VPATH_BUILD_TRUE@ -rm -f meshes && $(LN_S) -f $(srcdir)/meshes . From a326237d8faf97f364ab447e9d13883fd74f63cc Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 5 May 2026 10:28:23 -0600 Subject: [PATCH 05/48] Add Kokkos FE implementation headers --- include/Makefile.am | 18 + include/enums/enum_fe_elem_class.h | 50 ++ include/gpu/kokkos_fe_base.h | 43 ++ include/gpu/kokkos_fe_evaluator.h | 368 +++++++++++ include/gpu/kokkos_fe_face_map.h | 160 +++++ include/gpu/kokkos_fe_lagrange_1d.h | 92 +++ include/gpu/kokkos_fe_lagrange_2d.h | 253 +++++++ include/gpu/kokkos_fe_lagrange_3d.h | 367 +++++++++++ include/gpu/kokkos_fe_map.h | 235 +++++++ include/gpu/kokkos_fe_monomial.h | 941 +++++++++++++++++++++++++++ include/gpu/kokkos_fe_types.h | 681 +++++++++++++++++++ include/gpu/kokkos_quadrature.h | 652 +++++++++++++++++++ include/gpu/kokkos_scalar_types.h | 118 ++++ src/quadrature/quadrature_gauss_3D.C | 2 - 14 files changed, 3978 insertions(+), 2 deletions(-) create mode 100644 include/enums/enum_fe_elem_class.h create mode 100644 include/gpu/kokkos_fe_base.h create mode 100644 include/gpu/kokkos_fe_evaluator.h create mode 100644 include/gpu/kokkos_fe_face_map.h create mode 100644 include/gpu/kokkos_fe_lagrange_1d.h create mode 100644 include/gpu/kokkos_fe_lagrange_2d.h create mode 100644 include/gpu/kokkos_fe_lagrange_3d.h create mode 100644 include/gpu/kokkos_fe_map.h create mode 100644 include/gpu/kokkos_fe_monomial.h create mode 100644 include/gpu/kokkos_fe_types.h create mode 100644 include/gpu/kokkos_quadrature.h create mode 100644 include/gpu/kokkos_scalar_types.h diff --git a/include/Makefile.am b/include/Makefile.am index a8ace90467c..1eb5f275748 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,5 +1,23 @@ SUBDIRS = libmesh +# GPU (Kokkos) FE math headers — installed preserving the gpu/ subdirectory so +# downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. +# nobase_ is used instead of the standard flat install to keep the namespace. +if LIBMESH_ENABLE_KOKKOS +nobase_include_HEADERS = \ + gpu/kokkos_scalar_types.h \ + gpu/kokkos_fe_types.h \ + gpu/kokkos_fe_base.h \ + gpu/kokkos_fe_evaluator.h \ + gpu/kokkos_fe_lagrange_1d.h \ + gpu/kokkos_fe_lagrange_2d.h \ + gpu/kokkos_fe_lagrange_3d.h \ + gpu/kokkos_fe_monomial.h \ + gpu/kokkos_fe_face_map.h \ + gpu/kokkos_fe_map.h \ + gpu/kokkos_quadrature.h +endif + # special handholding for prefix_config.m4 generated files # so that 'make clean ; make' works as does 'make distcheck' # libmesh_config.h is made by ./configure, so it should get diff --git a/include/enums/enum_fe_elem_class.h b/include/enums/enum_fe_elem_class.h new file mode 100644 index 00000000000..2b1b2e96d12 --- /dev/null +++ b/include/enums/enum_fe_elem_class.h @@ -0,0 +1,50 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + +#ifndef LIBMESH_ENUM_FE_ELEM_CLASS_H +#define LIBMESH_ENUM_FE_ELEM_CLASS_H + +namespace libMesh { + +/** + * \enum libMesh::FEElemClass groups element types by topological class, + * independent of polynomial order. + * + * e.g. QUAD4, QUAD8, QUAD9 all map to QUAD; TRI3, TRI6, TRI7 all map to TRI. + * Used together with FEFamily and polynomial order to uniquely identify a + * physics finite element space. + * + * The fixed type allows forward declaration as: + * enum class FEElemClass : unsigned int; + */ +enum class FEElemClass : unsigned int +{ + EDGE = 0, + TRI = 1, + QUAD = 2, + TET = 3, + HEX = 4, + PRISM = 5, + PYRAMID = 6, + N_CLASSES +}; + +} // namespace libMesh + +#endif // LIBMESH_ENUM_FE_ELEM_CLASS_H diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h new file mode 100644 index 00000000000..07664e627eb --- /dev/null +++ b/include/gpu/kokkos_fe_base.h @@ -0,0 +1,43 @@ +// Primary FEEvaluator template for Kokkos device-compatible shape functions. +// +// Uses libMesh's own ElemType and FEFamily enums as non-type template +// parameters — no separate tag structs are needed. +// +// All uses must be explicit specializations defined in the kokkos_fe_lagrange_*.h +// and kokkos_fe_monomial.h headers. Every specialization must provide: +// +// static constexpr unsigned int n_dofs() +// +// LIBMESH_DEVICE_INLINE +// static Real shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// LIBMESH_DEVICE_INLINE +// static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// Reference-element coordinate conventions (matching libMesh): +// Edge: xi in [-1, 1] +// Quad: (xi, eta) in [-1,1]^2 +// Hex: (xi, eta, zeta) in [-1,1]^3 +// Tri: (xi, eta) in unit triangle, xi >= 0, eta >= 0, xi+eta <= 1 +// Tet: (xi, eta, zeta) in unit tetrahedron +// +// Unused coordinate arguments (e.g. zeta on a 2D element) are accepted but +// ignored, so call sites can always pass all three without special-casing. +// +#ifndef LIBMESH_KOKKOS_FE_BASE_H +#define LIBMESH_KOKKOS_FE_BASE_H + +#include "gpu/kokkos_scalar_types.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" + +namespace libMesh::Kokkos +{ + +template +struct FEEvaluator; // forward declaration only; instantiation requires a specialization + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_BASE_H diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h new file mode 100644 index 00000000000..118880c614d --- /dev/null +++ b/include/gpu/kokkos_fe_evaluator.h @@ -0,0 +1,368 @@ +// Kokkos on-device FE shape function dispatch (fe_evaluator.h). +// +// Provides: +// map_shape — isoparametric Lagrange shape (topology-based) +// grad_map_shape — isoparametric Lagrange gradient (topology-based) +// shape — physics FE shape (FEShapeKey-based) +// grad_shape — physics FE gradient (FEShapeKey-based) +// +// All functions are LIBMESH_DEVICE_INLINE and dispatch via switch statements +// that compile to fast GPU branch logic. +// +// These helpers are intended for Kokkos-enabled code paths. Device execution +// happens from .K translation units, but the header is also parsed by host code. + +#ifndef LIBMESH_KOKKOS_FE_EVALUATOR_H +#define LIBMESH_KOKKOS_FE_EVALUATOR_H + +#include "gpu/kokkos_fe_base.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_fe_lagrange_1d.h" +#include "gpu/kokkos_fe_lagrange_2d.h" +#include "gpu/kokkos_fe_lagrange_3d.h" +#include "gpu/kokkos_fe_monomial.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" + +namespace libMesh::Kokkos +{ + +// ── On-device helpers: element class -> spatial dimension ───────────────────── + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_class(FEElemClass cls) +{ + switch (cls) + { + case FEElemClass::EDGE: + return 1; + case FEElemClass::TRI: + case FEElemClass::QUAD: + return 2; + case FEElemClass::TET: + case FEElemClass::HEX: + case FEElemClass::PRISM: + case FEElemClass::PYRAMID: + return 3; + default: + detail::abort_unsupported("dim_from_class(): unsupported element class"); + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_topology(libMesh::ElemType topo) +{ + return dim_from_class(class_from_topology(topo)); +} + +// ── On-device helper: exact libMesh Lagrange key -> evaluator topology ───────── + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_for_key(FEShapeKey key) +{ + const libMesh::ElemType topo = lagrange_shape_topology_or_invalid(key); + + if (topo == libMesh::INVALID_ELEM) + { + detail::abort_unsupported("lagrange_shape_topology_for_key(): unsupported LAGRANGE key for current Kokkos evaluator support boundary"); + return libMesh::INVALID_ELEM; + } + + return topo; +} + +LIBMESH_DEVICE_INLINE Real +eval_lagrange_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (topo) + { + case libMesh::EDGE2: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::EDGE3: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TRI3: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TRI6: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD4: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD8: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD9: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TET4: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TET10: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX8: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX20: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX27: + return FEEvaluator::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("eval_lagrange_shape(): unsupported evaluator topology"); + return Real(0); + } +} + +LIBMESH_DEVICE_INLINE RealVector +eval_lagrange_grad_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (topo) + { + case libMesh::EDGE2: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::EDGE3: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TRI3: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TRI6: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD4: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD8: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD9: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TET4: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TET10: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX8: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX20: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX27: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("eval_lagrange_grad_shape(): unsupported evaluator topology"); + return zero_vector(); + } +} + +// ── Geometry-only shape dispatch (mapping-type + topology) ──────────────────── +// +// Used by map_face_qp_to_parent() for the isoparametric mapping from face reference +// coordinates to parent reference coordinates. +// +// The mapping_type parameter selects the geometric map family. Currently only +// LAGRANGE_MAP is supported; RATIONAL_BERNSTEIN_MAP requires additional +// rational-weight data that is not yet threaded through the device path. + +// ── Compile-time topology versions (preferred for GPU) ─────────────────── +// Template on FEFamily and ElemType so gpu compiler only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. + +/// Compile-time map shape evaluation. +template +LIBMESH_DEVICE_INLINE Real +map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::shape(i, xi, eta, zeta); +} + +/// Compile-time map gradient evaluation. +template +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::grad_shape(i, xi, eta, zeta); +} + +// ── Runtime topology versions (larger GPU stack usage) ─────────────────── + +/// Evaluate the i-th geometric map shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("map_shape(): only LAGRANGE_MAP is implemented"); + return Real(0); + } +} + +/// Evaluate the reference-space gradient of the i-th geometric map shape function. +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_grad_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_map_shape(): only LAGRANGE_MAP is implemented"); + return zero_vector(); + } +} + +// ── Physics shape dispatch (FEShapeKey-based) ───────────────────────────────── + +/// Evaluate the i-th physics shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_shape(key)) + { + detail::abort_unsupported("shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return Real(0); + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + + case libMesh::MONOMIAL: + { + switch (monomial_evaluator_dim_or_zero(key.elem_type)) + { + case 1: + switch (key.order) + { + case 0: return MonomialImpl1D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl1D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl1D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl1D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl1D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl1D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 1D MONOMIAL order"); + return Real(0); + } + case 2: + switch (key.order) + { + case 0: return MonomialImpl2D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl2D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl2D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl2D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl2D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl2D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 2D MONOMIAL order"); + return Real(0); + } + case 3: + switch (key.order) + { + case 0: return MonomialImpl3D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl3D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl3D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl3D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl3D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl3D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 3D MONOMIAL order"); + return Real(0); + } + default: + detail::abort_unsupported("shape(): unsupported MONOMIAL element topology"); + return Real(0); + } + } + + default: + detail::abort_unsupported("shape(): unsupported FE family"); + return Real(0); + } +} + +/// Evaluate the reference-space gradient of the i-th physics shape function. +/// With J from jacobian(), rows are reference derivatives, so +/// grad_ref = J * grad_phys and grad_phys = J.inverse(dim) * grad_ref. +LIBMESH_DEVICE_INLINE RealVector +grad_shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_grad_shape(key)) + { + detail::abort_unsupported("grad_shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return zero_vector(); + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + + case libMesh::MONOMIAL: + { + switch (monomial_evaluator_dim_or_zero(key.elem_type)) + { + case 1: + switch (key.order) + { + case 0: return MonomialImpl1D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl1D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl1D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl1D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl1D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl1D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 1D MONOMIAL order"); + return zero_vector(); + } + case 2: + switch (key.order) + { + case 0: return MonomialImpl2D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl2D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl2D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl2D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl2D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl2D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 2D MONOMIAL order"); + return zero_vector(); + } + case 3: + switch (key.order) + { + case 0: return MonomialImpl3D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl3D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl3D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl3D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl3D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl3D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 3D MONOMIAL order"); + return zero_vector(); + } + default: + detail::abort_unsupported("grad_shape(): unsupported MONOMIAL element topology"); + return zero_vector(); + } + } + + default: + detail::abort_unsupported("grad_shape(): unsupported FE family"); + return zero_vector(); + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_EVALUATOR_H diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h new file mode 100644 index 00000000000..822ce4da406 --- /dev/null +++ b/include/gpu/kokkos_fe_face_map.h @@ -0,0 +1,160 @@ +#ifndef LIBMESH_KOKKOS_FE_FACE_MAP_H +#define LIBMESH_KOKKOS_FE_FACE_MAP_H + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "gpu/kokkos_fe_evaluator.h" +#include "libmesh/elem.h" + +namespace libMesh::Kokkos +{ + +LIBMESH_DEVICE_INLINE +RealVector point_to_real_vector(const libMesh::Point & pt) +{ +#if LIBMESH_DIM == 1 + return make_vector(pt(0)); +#elif LIBMESH_DIM == 2 + return make_vector(pt(0), pt(1)); +#else + return make_vector(pt(0), pt(1), pt(2)); +#endif +} + +inline unsigned int +recover_parent_side(const libMesh::Elem & parent, + const libMesh::Elem & side_in_parent) +{ + for (unsigned int side = 0; side < parent.n_sides(); ++side) + { + auto candidate = parent.build_side_ptr(side); + + if (candidate->type() != side_in_parent.type() || + candidate->n_nodes() != side_in_parent.n_nodes()) + continue; + + bool same_side = true; + for (unsigned int k = 0; k < candidate->n_nodes(); ++k) + if (candidate->node_ptr(k) != side_in_parent.node_ptr(k)) + { + same_side = false; + break; + } + + if (same_side) + return side; + } + + return libMesh::invalid_uint; +} + +inline libMesh::Point +parent_refspace_node(const libMesh::Elem & parent, unsigned int node) +{ + switch (parent.type()) + { + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + switch (node) + { + case 9: + return libMesh::Point(-0.5, -0.5, 0.5); + case 10: + return libMesh::Point(0.5, -0.5, 0.5); + case 11: + return libMesh::Point(0.5, 0.5, 0.5); + case 12: + return libMesh::Point(-0.5, 0.5, 0.5); + default: + return parent.master_point(node); + } + + case libMesh::PYRAMID18: + switch (node) + { + case 9: + return libMesh::Point(-0.5, -0.5, 0.5); + case 10: + return libMesh::Point(0.5, -0.5, 0.5); + case 11: + return libMesh::Point(0.5, 0.5, 0.5); + case 12: + return libMesh::Point(-0.5, 0.5, 0.5); + case 14: + return libMesh::Point(-2. / 3., 0.0, 1. / 3.); + case 15: + return libMesh::Point(0.0, 2. / 3., 1. / 3.); + case 16: + return libMesh::Point(2. / 3., 0.0, 1. / 3.); + case 17: + return libMesh::Point(0.0, -2. / 3., 1. / 3.); + default: + return parent.master_point(node); + } + + default: + return parent.master_point(node); + } +} + +/** + * Map a face quadrature point from the side element's reference coordinate system + * to the parent element's reference coordinate system. + * + * side_in_parent must be obtained via build_side_ptr() (not side_ptr()), so that + * second-order sides carry their midpoint nodes. Parent reference coordinates + * are reconstructed from the FE reference-space node convention used by + * FE::side_map(), not from side_in_parent.point(k), which lives in physical + * space, and not from Elem::master_point() on pyramids, where those node + * coordinates differ. + * + * @param side_in_parent The side element as embedded in the parent (from build_side_ptr()) + * @param mapping_type Geometric mapping type (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) + * @param side_topo Topology of the side element (libMesh::ElemType) + * @param face_qpt Quadrature point in the side element's reference coordinates + * @returns Corresponding point in the parent element's reference coordinates + */ +inline RealVector +map_face_qp_to_parent(const libMesh::Elem & side_in_parent, + libMesh::ElemMappingType mapping_type, + libMesh::ElemType side_topo, + RealVector face_qpt) +{ + const libMesh::Elem * parent = side_in_parent.interior_parent(); + libmesh_error_msg_if(!parent, + "map_face_qp_to_parent(): side element must carry an interior_parent() from build_side_ptr()"); + + const unsigned int side = recover_parent_side(*parent, side_in_parent); + libmesh_error_msg_if(side == libMesh::invalid_uint, + "map_face_qp_to_parent(): could not recover parent side for the provided side element"); + + const unsigned int n = side_in_parent.n_nodes(); + RealVector parent_pt = zero_vector(); + + // 1-D elements: the "side" is a single vertex node. There is only one + // point-side reference coordinate, (0,0,0), so we map directly to the + // corresponding parent vertex in the parent reference element. + if (n == 1) + { + const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, 0)); + return point_to_real_vector(pt); + } + + for (unsigned int k = 0; k < n; ++k) + { + const Real s = face_qpt(0); + const Real t = face_qpt(1); + const Real psi = map_shape(mapping_type, side_topo, k, s, t, 0.0); + + const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, k)); + parent_pt.add_scaled(point_to_real_vector(pt), psi); + } + + return parent_pt; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_FE_FACE_MAP_H diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h new file mode 100644 index 00000000000..72e78692882 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -0,0 +1,92 @@ +// Kokkos FEEvaluator specializations for 1-D Lagrange elements. +// +// Covers EDGE2 (linear) and EDGE3 (quadratic). +// Reference-element coordinate convention (libMesh-compatible): +// EDGE2/EDGE3: xi in [-1, 1] +// +// EDGE3 node ordering (libMesh non-sequential): +// index 0 -> xi = -1 (left node) +// index 1 -> xi = +1 (right node) +// index 2 -> xi = 0 (midpoint) + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_1D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── EDGE2 (linear edge, 2 nodes) ───────────────────────────────────────────── + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 2; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.5 * (1.0 - xi); + case 1: return 0.5 * (1.0 + xi); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-0.5, 0.0, 0.0); + case 1: return make_vector( 0.5, 0.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── EDGE3 (quadratic edge, 3 nodes) ────────────────────────────────────────── +// Node ordering matches libMesh: 0->left(-1), 1->right(+1), 2->mid(0) +// L_0(xi) = 0.5*xi*(xi-1) dL_0/dxi = xi - 0.5 +// L_1(xi) = 0.5*xi*(xi+1) dL_1/dxi = xi + 0.5 +// L_2(xi) = 1 - xi² dL_2/dxi = -2*xi + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.5 * xi * (xi - 1.0); + case 1: return 0.5 * xi * (xi + 1.0); + case 2: return 1.0 - xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(xi - 0.5, 0.0, 0.0); + case 1: return make_vector(xi + 0.5, 0.0, 0.0); + case 2: return make_vector(-2.0 * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_1D_H diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h new file mode 100644 index 00000000000..08d1e2f5ba6 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -0,0 +1,253 @@ +// Kokkos FEEvaluator specializations for 2-D Lagrange elements. +// +// Covers TRI3, TRI6, QUAD4, QUAD8, QUAD9. +// Reference-element coordinate conventions (libMesh-compatible): +// Tri: xi >= 0, eta >= 0, xi+eta <= 1 (unit triangle) +// Quad: (xi, eta) in [-1,1]² + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_2D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── TRI3 (linear triangle, 3 nodes) ────────────────────────────────────────── +// Barycentric: zeta0 = 1-xi-eta, zeta1 = xi, zeta2 = eta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0 - xi - eta; + case 1: return xi; + case 2: return eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-1.0, -1.0, 0.0); + case 1: return make_vector( 1.0, 0.0, 0.0); + case 2: return make_vector( 0.0, 1.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── TRI6 (quadratic triangle, 6 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta, z1=xi, z2=eta +// phi_0 = z0*(2*z0-1) = (1-xi-eta)*(1-2*xi-2*eta) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = 4*z0*z1 = 4*(1-xi-eta)*xi +// phi_4 = 4*z1*z2 = 4*xi*eta +// phi_5 = 4*z2*z0 = 4*eta*(1-xi-eta) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 6; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + const Real z0 = 1.0 - xi - eta; + switch (i) + { + case 0: return z0 * (2.0 * z0 - 1.0); + case 1: return xi * (2.0 * xi - 1.0); + case 2: return eta * (2.0 * eta - 1.0); + case 3: return 4.0 * z0 * xi; + case 4: return 4.0 * xi * eta; + case 5: return 4.0 * eta * z0; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(4.0*xi + 4.0*eta - 3.0, 4.0*xi + 4.0*eta - 3.0, 0.0); + case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); + case 3: return make_vector(4.0*(1.0 - 2.0*xi - eta), -4.0*xi, 0.0); + case 4: return make_vector(4.0*eta, 4.0*xi, 0.0); + case 5: return make_vector(-4.0*eta, 4.0*(1.0 - xi - 2.0*eta), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD4 (bilinear quadrilateral, 4 nodes) ─────────────────────────────────── +// Tensor product of two EDGE2 bases. libMesh node ordering: +// node 0: (-1,-1) node 1: (+1,-1) +// node 2: (+1,+1) node 3: (-1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (1.0 - eta); + case 1: return 0.25 * (1.0 + xi) * (1.0 - eta); + case 2: return 0.25 * (1.0 + xi) * (1.0 + eta); + case 3: return 0.25 * (1.0 - xi) * (1.0 + eta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-0.25*(1.0-eta), -0.25*(1.0-xi), 0.0); + case 1: return make_vector( 0.25*(1.0-eta), -0.25*(1.0+xi), 0.0); + case 2: return make_vector( 0.25*(1.0+eta), 0.25*(1.0+xi), 0.0); + case 3: return make_vector(-0.25*(1.0+eta), 0.25*(1.0-xi), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD8 (serendipity quadrilateral, 8 nodes) ──────────────────────────────── +// Node ordering: +// 0: (-1,-1) 1: (+1,-1) 2: (+1,+1) 3: (-1,+1) +// 4: ( 0,-1) 5: (+1, 0) 6: ( 0,+1) 7: (-1, 0) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.25 * (1.0-xi) * (1.0-eta) * (-1.0-xi-eta); + case 1: return 0.25 * (1.0+xi) * (1.0-eta) * (-1.0+xi-eta); + case 2: return 0.25 * (1.0+xi) * (1.0+eta) * (-1.0+xi+eta); + case 3: return 0.25 * (1.0-xi) * (1.0+eta) * (-1.0-xi+eta); + case 4: return 0.5 * (1.0-xi*xi) * (1.0-eta); + case 5: return 0.5 * (1.0+xi) * (1.0-eta*eta); + case 6: return 0.5 * (1.0-xi*xi) * (1.0+eta); + case 7: return 0.5 * (1.0-xi) * (1.0-eta*eta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(0.25*(1.0-eta)*(2.0*xi+eta), + 0.25*(1.0-xi)*(xi+2.0*eta), + 0.0); + case 1: return make_vector(0.25*(1.0-eta)*(2.0*xi-eta), + 0.25*(1.0+xi)*(2.0*eta-xi), + 0.0); + case 2: return make_vector(0.25*(1.0+eta)*(2.0*xi+eta), + 0.25*(1.0+xi)*(xi+2.0*eta), + 0.0); + case 3: return make_vector(0.25*(1.0+eta)*(2.0*xi-eta), + 0.25*(1.0-xi)*(2.0*eta-xi), + 0.0); + case 4: return make_vector(-xi*(1.0-eta), -0.5*(1.0-xi*xi), 0.0); + case 5: return make_vector(0.5*(1.0-eta*eta), -eta*(1.0+xi), 0.0); + case 6: return make_vector(-xi*(1.0+eta), 0.5*(1.0-xi*xi), 0.0); + case 7: return make_vector(-0.5*(1.0-eta*eta), -eta*(1.0-xi), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD9 (biquadratic quadrilateral, 9 nodes) ──────────────────────────────── +// Tensor product of two EDGE3 bases. libMesh node ordering: +// i0[] = {0,1,1,0, 2,1,2,0, 2} +// i1[] = {0,0,1,1, 0,2,1,2, 2} +// +// 1D basis (libMesh non-sequential ordering): +// L_0(t) = 0.5*t*(t-1) dL_0/dt = t - 0.5 +// L_1(t) = 0.5*t*(t+1) dL_1/dt = t + 0.5 +// L_2(t) = 1 - t² dL_2/dt = -2*t + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 9; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) + { + switch (k) + { + case 0: return 0.5 * t * (t - 1.0); + case 1: return 0.5 * t * (t + 1.0); + case 2: return 1.0 - t * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) + { + switch (k) + { + case 0: return t - 0.5; + case 1: return t + 0.5; + case 2: return -2.0 * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; + static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + return L(i0[i], xi) * L(i1[i], eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; + static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + const Real dxi = dL(i0[i], xi) * L(i1[i], eta); + const Real deta = L(i0[i], xi) * dL(i1[i], eta); + return make_vector(dxi, deta, 0.0); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_2D_H diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h new file mode 100644 index 00000000000..5f2fbb203c3 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -0,0 +1,367 @@ +// Kokkos FEEvaluator specializations for 3-D Lagrange elements. +// +// Covers TET4, TET10, HEX8, HEX20, HEX27. +// Reference-element coordinate conventions (libMesh-compatible): +// Tet: xi >= 0, eta >= 0, zeta >= 0, xi+eta+zeta <= 1 (unit tetrahedron) +// Hex: (xi, eta, zeta) in [-1,1]³ + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_3D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── TET4 (linear tetrahedron, 4 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0 - xi - eta - zeta; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-1.0, -1.0, -1.0); + case 1: return make_vector( 1.0, 0.0, 0.0); + case 2: return make_vector( 0.0, 1.0, 0.0); + case 3: return make_vector( 0.0, 0.0, 1.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── TET10 (quadratic tetrahedron, 10 nodes) ─────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta +// phi_0 = z0*(2*z0-1) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = z3*(2*z3-1) = zeta*(2*zeta-1) +// phi_4 = 4*z0*z1 = 4*(1-xi-eta-zeta)*xi +// phi_5 = 4*z1*z2 = 4*xi*eta +// phi_6 = 4*z2*z0 = 4*eta*(1-xi-eta-zeta) +// phi_7 = 4*z0*z3 = 4*(1-xi-eta-zeta)*zeta +// phi_8 = 4*z1*z3 = 4*xi*zeta +// phi_9 = 4*z2*z3 = 4*eta*zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 10; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + const Real z0 = 1.0 - xi - eta - zeta; + switch (i) + { + case 0: return z0 * (2.0*z0 - 1.0); + case 1: return xi * (2.0*xi - 1.0); + case 2: return eta * (2.0*eta - 1.0); + case 3: return zeta* (2.0*zeta - 1.0); + case 4: return 4.0 * z0 * xi; + case 5: return 4.0 * xi * eta; + case 6: return 4.0 * eta * z0; + case 7: return 4.0 * z0 * zeta; + case 8: return 4.0 * xi * zeta; + case 9: return 4.0 * eta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: + { + const Real v = 4.0*(xi + eta + zeta) - 3.0; + return make_vector(v, v, v); + } + case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 4.0*zeta - 1.0); + case 4: return make_vector( 4.0*(1.0-2.0*xi-eta-zeta), -4.0*xi, -4.0*xi); + case 5: return make_vector( 4.0*eta, 4.0*xi, 0.0); + case 6: return make_vector(-4.0*eta, 4.0*(1.0-xi-2.0*eta-zeta), -4.0*eta); + case 7: return make_vector(-4.0*zeta, -4.0*zeta, 4.0*(1.0-xi-eta-2.0*zeta)); + case 8: return make_vector(4.0*zeta, 0.0, 4.0*xi); + case 9: return make_vector(0.0, 4.0*zeta, 4.0*eta); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX8 (trilinear hexahedron, 8 nodes) ───────────────────────────────────── +// Tensor product of three EDGE2 bases. +// Node ordering (same as libMesh): +// 0:(-1,-1,-1) 1:(+1,-1,-1) 2:(+1,+1,-1) 3:(-1,+1,-1) +// 4:(-1,-1,+1) 5:(+1,-1,+1) 6:(+1,+1,+1) 7:(-1,+1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta); + case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta); + case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta); + case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta); + case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta); + case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta); + case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta); + case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return make_vector(-0.125*(1.0-eta)*(1.0-zeta), + -0.125*(1.0-xi) *(1.0-zeta), + -0.125*(1.0-xi) *(1.0-eta)); + case 1: return make_vector( 0.125*(1.0-eta)*(1.0-zeta), + -0.125*(1.0+xi) *(1.0-zeta), + -0.125*(1.0+xi) *(1.0-eta)); + case 2: return make_vector( 0.125*(1.0+eta)*(1.0-zeta), + 0.125*(1.0+xi) *(1.0-zeta), + -0.125*(1.0+xi) *(1.0+eta)); + case 3: return make_vector(-0.125*(1.0+eta)*(1.0-zeta), + 0.125*(1.0-xi) *(1.0-zeta), + -0.125*(1.0-xi) *(1.0+eta)); + case 4: return make_vector(-0.125*(1.0-eta)*(1.0+zeta), + -0.125*(1.0-xi) *(1.0+zeta), + 0.125*(1.0-xi) *(1.0-eta)); + case 5: return make_vector( 0.125*(1.0-eta)*(1.0+zeta), + -0.125*(1.0+xi) *(1.0+zeta), + 0.125*(1.0+xi) *(1.0-eta)); + case 6: return make_vector( 0.125*(1.0+eta)*(1.0+zeta), + 0.125*(1.0+xi) *(1.0+zeta), + 0.125*(1.0+xi) *(1.0+eta)); + case 7: return make_vector(-0.125*(1.0+eta)*(1.0+zeta), + 0.125*(1.0-xi) *(1.0+zeta), + 0.125*(1.0-xi) *(1.0+eta)); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX20 (serendipity hexahedron, 20 nodes) ───────────────────────────────── +// Corner nodes: phi = 0.125*(1+sx*xi)*(1+sy*eta)*(1+sz*zeta)*(sx*xi+sy*eta+sz*zeta-2) +// Node ordering follows libMesh (nodes 0-7 corners, 8-19 midside). + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 20; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta)*(-xi-eta-zeta-2.0); + case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta)*( xi-eta-zeta-2.0); + case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta)*( xi+eta-zeta-2.0); + case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta)*(-xi+eta-zeta-2.0); + case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta)*(-xi-eta+zeta-2.0); + case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta)*( xi-eta+zeta-2.0); + case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta)*( xi+eta+zeta-2.0); + case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta)*(-xi+eta+zeta-2.0); + case 8: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0-zeta); + case 10: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0-zeta); + case 16: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0+zeta); + case 18: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0+zeta); + case 9: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0-zeta); + case 11: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0-zeta); + case 17: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0+zeta); + case 19: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0+zeta); + case 12: return 0.25*(1.0-xi)*(1.0-eta)*(1.0-zeta*zeta); + case 13: return 0.25*(1.0+xi)*(1.0-eta)*(1.0-zeta*zeta); + case 14: return 0.25*(1.0+xi)*(1.0+eta)*(1.0-zeta*zeta); + case 15: return 0.25*(1.0-xi)*(1.0+eta)*(1.0-zeta*zeta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return make_vector( + -0.125*(1.0-eta)*(1.0-zeta)*(-2.0*xi-eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0-zeta)*(-xi-2.0*eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0-eta) *(-xi-eta-2.0*zeta-1.0)); + case 1: return make_vector( + 0.125*(1.0-eta)*(1.0-zeta)*(2.0*xi-eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0-zeta)*(xi-2.0*eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0-eta) *(xi-eta-2.0*zeta-1.0)); + case 2: return make_vector( + 0.125*(1.0+eta)*(1.0-zeta)*(2.0*xi+eta-zeta-1.0), + 0.125*(1.0+xi) *(1.0-zeta)*(xi+2.0*eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0+eta) *(xi+eta-2.0*zeta-1.0)); + case 3: return make_vector( + -0.125*(1.0+eta)*(1.0-zeta)*(-2.0*xi+eta-zeta-1.0), + 0.125*(1.0-xi) *(1.0-zeta)*(-xi+2.0*eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0+eta) *(-xi+eta-2.0*zeta-1.0)); + case 4: return make_vector( + -0.125*(1.0-eta)*(1.0+zeta)*(-2.0*xi-eta+zeta-1.0), + -0.125*(1.0-xi) *(1.0+zeta)*(-xi-2.0*eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0-eta) *(-xi-eta+2.0*zeta-1.0)); + case 5: return make_vector( + 0.125*(1.0-eta)*(1.0+zeta)*(2.0*xi-eta+zeta-1.0), + -0.125*(1.0+xi) *(1.0+zeta)*(xi-2.0*eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0-eta) *(xi-eta+2.0*zeta-1.0)); + case 6: return make_vector( + 0.125*(1.0+eta)*(1.0+zeta)*(2.0*xi+eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0+zeta)*(xi+2.0*eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0+eta) *(xi+eta+2.0*zeta-1.0)); + case 7: return make_vector( + -0.125*(1.0+eta)*(1.0+zeta)*(-2.0*xi+eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0+zeta)*(-xi+2.0*eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0+eta) *(-xi+eta+2.0*zeta-1.0)); + case 8: return make_vector(-0.5*xi*(1.0-eta)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0-eta)); + case 10: return make_vector(-0.5*xi*(1.0+eta)*(1.0-zeta), + 0.25*(1.0-xi*xi)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0+eta)); + case 16: return make_vector(-0.5*xi*(1.0-eta)*(1.0+zeta), + -0.25*(1.0-xi*xi)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0-eta)); + case 18: return make_vector(-0.5*xi*(1.0+eta)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0+eta)); + case 9: return make_vector( 0.25*(1.0-eta*eta)*(1.0-zeta), + -0.5*eta*(1.0+xi)*(1.0-zeta), + -0.25*(1.0+xi)*(1.0-eta*eta)); + case 11: return make_vector(-0.25*(1.0-eta*eta)*(1.0-zeta), + -0.5*eta*(1.0-xi)*(1.0-zeta), + -0.25*(1.0-xi)*(1.0-eta*eta)); + case 17: return make_vector( 0.25*(1.0-eta*eta)*(1.0+zeta), + -0.5*eta*(1.0+xi)*(1.0+zeta), + 0.25*(1.0+xi)*(1.0-eta*eta)); + case 19: return make_vector(-0.25*(1.0-eta*eta)*(1.0+zeta), + -0.5*eta*(1.0-xi)*(1.0+zeta), + 0.25*(1.0-xi)*(1.0-eta*eta)); + case 12: return make_vector(-0.25*(1.0-eta)*(1.0-zeta*zeta), + -0.25*(1.0-xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0-xi)*(1.0-eta)); + case 13: return make_vector( 0.25*(1.0-eta)*(1.0-zeta*zeta), + -0.25*(1.0+xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0+xi)*(1.0-eta)); + case 14: return make_vector( 0.25*(1.0+eta)*(1.0-zeta*zeta), + 0.25*(1.0+xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0+xi)*(1.0+eta)); + case 15: return make_vector(-0.25*(1.0+eta)*(1.0-zeta*zeta), + 0.25*(1.0-xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0-xi)*(1.0+eta)); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX27 (triquadratic hexahedron, 27 nodes) ───────────────────────────────── +// Tensor product of three EDGE3 bases. +// Index tables (libMesh fe_lagrange_shape_3D.C): +// i0[] = {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2} +// i1[] = {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2} +// i2[] = {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2} + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 27; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) + { + switch (k) + { + case 0: return 0.5 * t * (t - 1.0); + case 1: return 0.5 * t * (t + 1.0); + case 2: return 1.0 - t * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) + { + switch (k) + { + case 0: return t - 0.5; + case 1: return t + 0.5; + case 2: return -2.0 * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + static const unsigned int i0[] = + {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; + static const unsigned int i1[] = + {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; + static const unsigned int i2[] = + {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; + return L(i0[i], xi) * L(i1[i], eta) * L(i2[i], zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + static const unsigned int i0[] = + {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; + static const unsigned int i1[] = + {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; + static const unsigned int i2[] = + {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; + const Real lxi = L(i0[i], xi); + const Real leta = L(i1[i], eta); + const Real lzeta = L(i2[i], zeta); + return make_vector(dL(i0[i], xi) * leta * lzeta, + lxi * dL(i1[i], eta) * lzeta, + lxi * leta * dL(i2[i], zeta)); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_3D_H diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h new file mode 100644 index 00000000000..d71f81c931a --- /dev/null +++ b/include/gpu/kokkos_fe_map.h @@ -0,0 +1,235 @@ +// Kokkos device-compatible physical map evaluation. +// +// All functions are LIBMESH_DEVICE_INLINE — callable from both host and GPU. +// +// Two API levels: +// 1. Template on ElemType (preferred): eliminates the topology switch at +// compile time, producing small inlined functions with no stack pressure. +// 2. Runtime ElemType dispatch: convenient but requires increased CUDA +// stack size due to the large switch in map_shape. +// +// Given node coordinates and a reference-space point, these functions compute: +// - Physical coordinates (xyz) +// - Jacobian matrix (reference -> physical) +// - Jacobian measures and JxW +// - Outward normal helpers for face/edge integrals + +#ifndef LIBMESH_KOKKOS_FE_MAP_H +#define LIBMESH_KOKKOS_FE_MAP_H + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_scalar_types.h" + +namespace libMesh::Kokkos +{ + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(i, xi, eta, zeta) * nodes[i]; + return xyz; +} + +// ========================================================================= +// Compile-time dispatch (preferred for GPU — no switch overhead) +// +// Template on FEFamily and ElemType so nvcc only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. +// ========================================================================= + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), nodes[k]); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(k, xi, eta, zeta); + const RealVector grad = grad_map_shape(k, xi, eta, zeta); + xyz += phi * nodes[k]; + J += libMesh::outer_product(grad, nodes[k]); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + face_nodes[k]); + return J; +} + +// ========================================================================= +// Runtime topology dispatch (convenient, but larger GPU stack usage) +// ========================================================================= + +/// Compute physical coordinate (runtime topology). +LIBMESH_DEVICE_INLINE RealVector +physical_point(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(mapping_type, topo, i, xi, eta, zeta) * nodes[i]; + return xyz; +} + +/// Compute Jacobian matrix (runtime topology), with rows d(x)/d(xi_r). +LIBMESH_DEVICE_INLINE RealTensor +jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, topo, k, xi, eta, zeta), + nodes[k]); + return J; +} + +/// Compute physical point and Jacobian together (runtime topology). +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector grad = grad_map_shape(mapping_type, topo, k, xi, eta, zeta); + xyz += phi * nodes[k]; + J += libMesh::outer_product(grad, nodes[k]); + } +} + +/// Face Jacobian (runtime topology). +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, face_topo, k, xi, eta, zeta), + face_nodes[k]); + return J; +} + +// ========================================================================= +// Geometry helpers (topology-independent) +// ========================================================================= + +/// libMesh FEMap-compatible volume measure * quadrature_weight. +/// 3D: det(J) * weight +/// 2D: ||J_row0 x J_row1|| * weight +/// 1D: ||J_row0|| * weight +/// 0D: weight +LIBMESH_DEVICE_INLINE Real +volume_jxw(const RealTensor & J, unsigned int dim, Real quad_weight) +{ + if (dim == 3) + return leading_determinant(J, 3) * quad_weight; + else if (dim == 2) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (dim == 1) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Face JxW: surface measure * quadrature_weight +/// 3D: ||J_row0 x J_row1|| * weight +/// 2D: ||J_row0|| * weight +/// 1D: weight (face is a point) +LIBMESH_DEVICE_INLINE Real +face_jxw(const RealTensor & J, unsigned int parent_dim, Real quad_weight) +{ + if (parent_dim == 3) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (parent_dim == 2) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Outward unit normal for a 3D face from the face Jacobian. +LIBMESH_DEVICE_INLINE RealVector +face_normal(const RealTensor & J, unsigned int parent_dim) +{ + if (parent_dim != 3) + { + detail::abort_unsupported("face_normal(): only 3D face normals are defined from face Jacobians alone; use edge_normal_on_parent_surface() for 2D parent elements"); + return zero_vector(); + } + + RealVector n = J.row(0).cross(J.row(1)); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +/// Outward edge normal for a 2D parent element embedded in 3D. +/// Requires the edge Jacobian and the parent surface Jacobian at the mapped +/// parent-reference point. +LIBMESH_DEVICE_INLINE RealVector +edge_normal_on_parent_surface(const RealTensor & edge_J, + const RealTensor & parent_J) +{ + RealVector surface_normal = parent_J.row(0).cross(parent_J.row(1)); + const Real surface_len = surface_normal.norm(); + if (surface_len > 0.0) + surface_normal *= 1.0 / surface_len; + + RealVector n = edge_J.row(0).cross(surface_normal); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MAP_H diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h new file mode 100644 index 00000000000..b68289c72eb --- /dev/null +++ b/include/gpu/kokkos_fe_monomial.h @@ -0,0 +1,941 @@ +// Kokkos FEEvaluator specializations for MONOMIAL elements. +// +// MONOMIAL uses the complete total-degree polynomial space P_p. Following +// libMesh's FE, the basis is parameterised by spatial dimension, +// not element class — TRI and QUAD share MonomialImpl2D; TET/HEX/PRISM/PYRAMID +// share MonomialImpl3D. This gives 3 x 6 = 18 impl specializations (dims 1/2/3, +// orders 0-5), then per-topology FEEvaluator delegating specializations wire each +// libMesh::ElemType to the matching impl. +// +// Basis ordering: graded-lex (total degree first, then lexicographic by +// decreasing xi exponent). Matches libMesh::FE::shape ordering. + +#ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H +#define LIBMESH_KOKKOS_FE_MONOMIAL_H + +#include "gpu/kokkos_fe_base.h" +#include "libmesh/enum_elem_type.h" + +namespace libMesh::Kokkos +{ + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl1D — 1-D MONOMIAL basis, order N +// n_dofs = N + 1 +// Basis: {1, xi, xi², xi³, ...} +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl1D; + +template <> +struct MonomialImpl1D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl1D<1> +{ + static constexpr unsigned int n_dofs() { return 2; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<2> +{ + static constexpr unsigned int n_dofs() { return 3; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<3> +{ + static constexpr unsigned int n_dofs() { return 4; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<4> +{ + static constexpr unsigned int n_dofs() { return 5; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + case 4: return xi * xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<5> +{ + static constexpr unsigned int n_dofs() { return 6; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + case 4: return xi * xi * xi * xi; + case 5: return xi * xi * xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 5: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl2D — 2-D MONOMIAL basis, order N +// n_dofs = (N+1)(N+2)/2 +// Graded-lex basis: {1, xi, eta, xi², xi·eta, eta², ...} +// Shared by TRI and QUAD element classes. +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl2D; + +template <> +struct MonomialImpl2D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl2D<1> +{ + static constexpr unsigned int n_dofs() { return 3; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<2> +{ + static constexpr unsigned int n_dofs() { return 6; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<3> +{ + static constexpr unsigned int n_dofs() { return 10; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<4> +{ + static constexpr unsigned int n_dofs() { return 15; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + case 10: return xi * xi * xi * xi; + case 11: return xi * xi * xi * eta; + case 12: return xi * xi * eta * eta; + case 13: return xi * eta * eta * eta; + case 14: return eta * eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<5> +{ + static constexpr unsigned int n_dofs() { return 21; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + case 10: return xi * xi * xi * xi; + case 11: return xi * xi * xi * eta; + case 12: return xi * xi * eta * eta; + case 13: return xi * eta * eta * eta; + case 14: return eta * eta * eta * eta; + case 15: return xi * xi * xi * xi * xi; + case 16: return xi * xi * xi * xi * eta; + case 17: return xi * xi * xi * eta * eta; + case 18: return xi * xi * eta * eta * eta; + case 19: return xi * eta * eta * eta * eta; + case 20: return eta * eta * eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 15: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + case 16: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); + case 17: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); + case 18: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); + case 19: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); + case 20: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl3D — 3-D MONOMIAL basis, order N +// n_dofs = (N+1)(N+2)(N+3)/6 +// Basis ordering: graded-lex; for each total degree d, iterate c (zeta +// exponent) from 0 to d, then a (xi exponent) from d-c down to 0 (b=d-c-a). +// Shared by TET, HEX, PRISM, and PYRAMID element classes. +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl3D; + +template <> +struct MonomialImpl3D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl3D<1> +{ + static constexpr unsigned int n_dofs() { return 4; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<2> +{ + static constexpr unsigned int n_dofs() { return 10; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<3> +{ + static constexpr unsigned int n_dofs() { return 20; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + case 10: return xi * xi * xi; + case 11: return xi * xi * eta; + case 12: return xi * eta * eta; + case 13: return eta * eta * eta; + case 14: return xi * xi * zeta; + case 15: return xi * eta * zeta; + case 16: return eta * eta * zeta; + case 17: return xi * zeta * zeta; + case 18: return eta * zeta * zeta; + case 19: return zeta * zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<4> +{ + static constexpr unsigned int n_dofs() { return 35; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + case 10: return xi * xi * xi; + case 11: return xi * xi * eta; + case 12: return xi * eta * eta; + case 13: return eta * eta * eta; + case 14: return xi * xi * zeta; + case 15: return xi * eta * zeta; + case 16: return eta * eta * zeta; + case 17: return xi * zeta * zeta; + case 18: return eta * zeta * zeta; + case 19: return zeta * zeta * zeta; + case 20: return xi * xi * xi * xi; + case 21: return xi * xi * xi * eta; + case 22: return xi * xi * eta * eta; + case 23: return xi * eta * eta * eta; + case 24: return eta * eta * eta * eta; + case 25: return xi * xi * xi * zeta; + case 26: return xi * xi * eta * zeta; + case 27: return xi * eta * eta * zeta; + case 28: return eta * eta * eta * zeta; + case 29: return xi * xi * zeta * zeta; + case 30: return xi * eta * zeta * zeta; + case 31: return eta * eta * zeta * zeta; + case 32: return xi * zeta * zeta * zeta; + case 33: return eta * zeta * zeta * zeta; + case 34: return zeta * zeta * zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); + case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); + case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); + case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); + case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); + case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); + case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); + case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); + case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); + case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<5> +{ + static constexpr unsigned int n_dofs() { return 56; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi*xi; + case 5: return xi*eta; + case 6: return eta*eta; + case 7: return xi*zeta; + case 8: return eta*zeta; + case 9: return zeta*zeta; + case 10: return xi*xi*xi; + case 11: return xi*xi*eta; + case 12: return xi*eta*eta; + case 13: return eta*eta*eta; + case 14: return xi*xi*zeta; + case 15: return xi*eta*zeta; + case 16: return eta*eta*zeta; + case 17: return xi*zeta*zeta; + case 18: return eta*zeta*zeta; + case 19: return zeta*zeta*zeta; + case 20: return xi*xi*xi*xi; + case 21: return xi*xi*xi*eta; + case 22: return xi*xi*eta*eta; + case 23: return xi*eta*eta*eta; + case 24: return eta*eta*eta*eta; + case 25: return xi*xi*xi*zeta; + case 26: return xi*xi*eta*zeta; + case 27: return xi*eta*eta*zeta; + case 28: return eta*eta*eta*zeta; + case 29: return xi*xi*zeta*zeta; + case 30: return xi*eta*zeta*zeta; + case 31: return eta*eta*zeta*zeta; + case 32: return xi*zeta*zeta*zeta; + case 33: return eta*zeta*zeta*zeta; + case 34: return zeta*zeta*zeta*zeta; + case 35: return xi*xi*xi*xi*xi; + case 36: return xi*xi*xi*xi*eta; + case 37: return xi*xi*xi*eta*eta; + case 38: return xi*xi*eta*eta*eta; + case 39: return xi*eta*eta*eta*eta; + case 40: return eta*eta*eta*eta*eta; + case 41: return xi*xi*xi*xi*zeta; + case 42: return xi*xi*xi*eta*zeta; + case 43: return xi*xi*eta*eta*zeta; + case 44: return xi*eta*eta*eta*zeta; + case 45: return eta*eta*eta*eta*zeta; + case 46: return xi*xi*xi*zeta*zeta; + case 47: return xi*xi*eta*zeta*zeta; + case 48: return xi*eta*eta*zeta*zeta; + case 49: return eta*eta*eta*zeta*zeta; + case 50: return xi*xi*zeta*zeta*zeta; + case 51: return xi*eta*zeta*zeta*zeta; + case 52: return eta*eta*zeta*zeta*zeta; + case 53: return xi*zeta*zeta*zeta*zeta; + case 54: return eta*zeta*zeta*zeta*zeta; + case 55: return zeta*zeta*zeta*zeta*zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); + case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); + case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); + case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); + case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); + case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); + case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); + case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); + case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); + case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); + case 35: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + case 36: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); + case 37: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); + case 38: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); + case 39: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); + case 40: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); + case 41: return make_vector(4.0 * xi * xi * xi * zeta, 0.0, xi * xi * xi * xi); + case 42: return make_vector(3.0 * xi * xi * eta * zeta, xi * xi * xi * zeta, xi * xi * xi * eta); + case 43: return make_vector(2.0 * xi * eta * eta * zeta, 2.0 * xi * xi * eta * zeta, xi * xi * eta * eta); + case 44: return make_vector(eta * eta * eta * zeta, 3.0 * xi * eta * eta * zeta, xi * eta * eta * eta); + case 45: return make_vector(0.0, 4.0 * eta * eta * eta * zeta, eta * eta * eta * eta); + case 46: return make_vector(3.0 * xi * xi * zeta * zeta, 0.0, 2.0 * xi * xi * xi * zeta); + case 47: return make_vector(2.0 * xi * eta * zeta * zeta, xi * xi * zeta * zeta, 2.0 * xi * xi * eta * zeta); + case 48: return make_vector(eta * eta * zeta * zeta, 2.0 * xi * eta * zeta * zeta, 2.0 * xi * eta * eta * zeta); + case 49: return make_vector(0.0, 3.0 * eta * eta * zeta * zeta, 2.0 * eta * eta * eta * zeta); + case 50: return make_vector(2.0 * xi * zeta * zeta * zeta, 0.0, 3.0 * xi * xi * zeta * zeta); + case 51: return make_vector(eta * zeta * zeta * zeta, xi * zeta * zeta * zeta, 3.0 * xi * eta * zeta * zeta); + case 52: return make_vector(0.0, 2.0 * eta * zeta * zeta * zeta, 3.0 * eta * eta * zeta * zeta); + case 53: return make_vector(zeta * zeta * zeta * zeta, 0.0, 4.0 * xi * zeta * zeta * zeta); + case 54: return make_vector(0.0, zeta * zeta * zeta * zeta, 4.0 * eta * zeta * zeta * zeta); + case 55: return make_vector(0.0, 0.0, 5.0 * zeta * zeta * zeta * zeta); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// Per-topology FEEvaluator delegating specializations +// +// Each partial specialization fixes family=MONOMIAL and elem_type, leaving the +// polynomial Order as a template parameter, then inherits the matching impl. +// ═══════════════════════════════════════════════════════════════════════════ + +// ── 1-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl1D {}; + +template +struct FEEvaluator : MonomialImpl1D {}; + +// ── 2-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +// ── 3-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MONOMIAL_H diff --git a/include/gpu/kokkos_fe_types.h b/include/gpu/kokkos_fe_types.h new file mode 100644 index 00000000000..c266e6dc15a --- /dev/null +++ b/include/gpu/kokkos_fe_types.h @@ -0,0 +1,681 @@ +// Kokkos FE type helpers. +// +// Defines the FEShapeKey aggregate and device-callable dispatch functions used +// by both host-side assembly setup and device-side evaluation. +// +// Uses libMesh's own ElemType, FEFamily, and FEElemClass enums directly — +// no wrapper enums are needed. + +#ifndef LIBMESH_KOKKOS_FE_TYPES_H +#define LIBMESH_KOKKOS_FE_TYPES_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_fe_elem_class.h" +#include "libmesh/enum_order.h" +// ElemMappingType (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) is defined in enum_elem_type.h +#include "libmesh/libmesh_device.h" +#ifndef LIBMESH_KOKKOS_COMPILATION +# include "libmesh/libmesh_common.h" +#endif + +namespace libMesh::Kokkos +{ + +// Bring FEElemClass into this namespace so existing unqualified uses compile. +using libMesh::FEElemClass; + +namespace detail +{ + +LIBMESH_DEVICE_INLINE void +abort_unsupported(const char * msg) +{ +#ifdef LIBMESH_KOKKOS_COMPILATION + ::Kokkos::abort(msg); +#else + libmesh_error_msg(msg); +#endif +} + +} // namespace detail + +LIBMESH_DEVICE_INLINE bool +is_monomial_2d_elem_type(libMesh::ElemType elem_type) +{ + switch (elem_type) + { + case libMesh::C0POLYGON: + case libMesh::TRI3: + case libMesh::TRISHELL3: + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD4: + case libMesh::QUADSHELL4: + case libMesh::QUAD8: + case libMesh::QUADSHELL8: + case libMesh::QUAD9: + case libMesh::QUADSHELL9: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +is_monomial_3d_elem_type(libMesh::ElemType elem_type, + bool include_pyramid18 = true) +{ + switch (elem_type) + { + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::C0POLYHEDRON: + return true; + case libMesh::PYRAMID18: + return include_pyramid18; + default: + return false; + } +} + +// ── Shape function space key ────────────────────────────────────────────────── +// Uniquely identifies a libMesh FE space, including the exact element topology. +// This must be exact for LAGRANGE spaces, since libMesh distinguishes e.g. +// QUAD8 from QUAD9 and HEX20 from HEX27 at the same polynomial order. +// +// Trivially copyable; fits in a register (enum + enum + enum, no heap). + +struct FEShapeKey +{ + libMesh::FEFamily family; + libMesh::ElemType elem_type; + libMesh::Order order; +}; + +// ── Device-callable conversion helpers ─────────────────────────────────────── + +/// Return the Kokkos side topology used for dispatch for any side of parent +/// element type \p parent. +/// This helper is valid only for elements whose side topology is uniform. +/// Mixed-face elements such as prisms and pyramids require side-specific logic. +/// In 1D, libMesh sides are NODEELEM objects; this helper returns EDGE2 as the +/// internal surrogate topology used by the Kokkos map/shape path. +LIBMESH_DEVICE_INLINE libMesh::ElemType +get_side_topology(libMesh::ElemType parent) +{ + switch (parent) + { + // 1D: libMesh sides are NodeElem, but Kokkos dispatches them through + // a degenerate EDGE2 surrogate. + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::EDGE2; + + // 2D first-order: sides are linear edges + case libMesh::TRI3: + case libMesh::QUAD4: + return libMesh::EDGE2; + + // 2D second-order: sides are quadratic edges + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::EDGE3; + + // 3D first-order: uniform-side-topology elements only + case libMesh::TET4: + return libMesh::TRI3; + case libMesh::HEX8: + return libMesh::QUAD4; + + // 3D second-order: uniform-side-topology elements only + case libMesh::TET10: + return libMesh::TRI6; + case libMesh::TET14: + return libMesh::TRI7; + case libMesh::HEX20: + return libMesh::QUAD8; + case libMesh::HEX27: + return libMesh::QUAD9; + + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PRISM6: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID18: + detail::abort_unsupported("get_side_topology(): mixed-face elements require side-specific topology"); + return libMesh::INVALID_ELEM; + + default: + detail::abort_unsupported("get_side_topology(): unsupported element type"); + return libMesh::INVALID_ELEM; // unreachable after abort + } +} + +/// Map an ElemType to its base geometric class (order-independent). +/// e.g. QUAD4 / QUAD8 / QUAD9 all return FEElemClass::QUAD. +LIBMESH_DEVICE_INLINE libMesh::FEElemClass +class_from_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::FEElemClass::EDGE; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::FEElemClass::TRI; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::FEElemClass::QUAD; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return libMesh::FEElemClass::TET; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return libMesh::FEElemClass::HEX; + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return libMesh::FEElemClass::PRISM; + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return libMesh::FEElemClass::PYRAMID; + + default: + detail::abort_unsupported("class_from_topology(): unsupported element type"); + return libMesh::FEElemClass::N_CLASSES; // unreachable after abort + } +} + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_or_invalid(FEShapeKey key) +{ + switch (key.order) + { + case libMesh::FIRST: + switch (key.elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::EDGE2; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::TRI3; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::QUAD4; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return libMesh::TET4; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return libMesh::HEX8; + + default: + return libMesh::INVALID_ELEM; + } + + case libMesh::SECOND: + switch (key.elem_type) + { + case libMesh::EDGE3: + return libMesh::EDGE3; + + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::TRI6; + + case libMesh::QUAD8: + return libMesh::QUAD8; + + case libMesh::QUAD9: + return libMesh::QUAD9; + + case libMesh::TET10: + case libMesh::TET14: + return libMesh::TET10; + + case libMesh::HEX20: + return libMesh::HEX20; + + case libMesh::HEX27: + return libMesh::HEX27; + + default: + return libMesh::INVALID_ELEM; + } + + default: + return libMesh::INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +lagrange_exact_n_dofs_or_zero(libMesh::ElemType elem_type, + libMesh::Order order) +{ + switch (order) + { + case libMesh::CONSTANT: + return (elem_type == libMesh::NODEELEM) ? 1u : 0u; + + case libMesh::FIRST: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 2; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return 3; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return 4; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return 4; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return 8; + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return 6; + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 5; + + default: + return 0; + } + + case libMesh::SECOND: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE3: + return 3; + + case libMesh::TRI6: + case libMesh::TRI7: + return 6; + + case libMesh::QUAD8: + return 8; + + case libMesh::QUAD9: + return 9; + + case libMesh::TET10: + case libMesh::TET14: + return 10; + + case libMesh::HEX20: + return 20; + + case libMesh::HEX27: + return 27; + + case libMesh::PRISM15: + return 15; + + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return 18; + + case libMesh::PYRAMID13: + return 13; + + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 14; + + default: + return 0; + } + + case libMesh::THIRD: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE4: + return 4; + + case libMesh::TRI7: + return 7; + + case libMesh::TET14: + return 14; + + case libMesh::PRISM20: + return 20; + + case libMesh::PRISM21: + return 21; + + case libMesh::PYRAMID18: + return 18; + + default: + return 0; + } + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_exact_n_dofs_or_zero(libMesh::ElemType elem_type, + libMesh::Order order) +{ + if (elem_type == libMesh::INVALID_ELEM) + return 0; + if (order < libMesh::CONSTANT) + return 0; + + switch (order) + { + case libMesh::CONSTANT: + return 1; + + case libMesh::FIRST: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 2; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 3; + if (is_monomial_3d_elem_type(elem_type)) + return 4; + return 0; + + case libMesh::SECOND: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 3; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 6; + if (is_monomial_3d_elem_type(elem_type)) + return 10; + return 0; + + case libMesh::THIRD: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 4; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 10; + if (is_monomial_3d_elem_type(elem_type)) + return 20; + return 0; + + case libMesh::FOURTH: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return 5; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 15; + if (is_monomial_3d_elem_type(elem_type, false)) + return 35; + return 0; + + case libMesh::FIFTH: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return 6; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 21; + if (is_monomial_3d_elem_type(elem_type, false)) + return 56; + return 0; + + default: + { + const unsigned int p = static_cast(order); + + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return p + 1; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return (p + 1) * (p + 2) / 2; + if (is_monomial_3d_elem_type(elem_type, false)) + return (p + 1) * (p + 2) * (p + 3) / 6; + return 0; + } + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_evaluator_dim_or_zero(libMesh::ElemType elem_type) +{ + switch (elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 1; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return 2; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 3; + + default: + return 0; + } +} + +/// Return true iff the current Kokkos physics evaluators can evaluate \p key. +/// This boundary is the intersection of: +/// 1. exact libMesh-valid (family, elem_type, order) keys, and +/// 2. currently implemented Kokkos evaluator topologies/orders. +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key) +{ + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + lagrange_shape_topology_or_invalid(key) != libMesh::INVALID_ELEM; + + case libMesh::MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + monomial_evaluator_dim_or_zero(key.elem_type) != 0 && + key.order >= libMesh::CONSTANT && + key.order <= libMesh::FIFTH; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_grad_shape(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE bool +supports_n_dofs(FEShapeKey key) +{ + return supports_shape(key); +} + +/// Return the number of DOFs for a physics FE space described by \p key, +/// restricted to the current Kokkos evaluator support boundary. +LIBMESH_DEVICE_INLINE unsigned int +n_dofs(FEShapeKey key) +{ + if (!supports_n_dofs(key)) + { + detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE key for current Kokkos evaluator support boundary"); + return 0; + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order); + + case libMesh::MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order); + + default: + detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE family"); + return 0; + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_TYPES_H diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h new file mode 100644 index 00000000000..4c2e8750bab --- /dev/null +++ b/include/gpu/kokkos_quadrature.h @@ -0,0 +1,652 @@ +// Kokkos device-compatible Gauss quadrature rules. +// +// All evaluation functions are LIBMESH_DEVICE_INLINE — callable from both +// host and GPU device code. +// +// GaussLegendre1D: 1-D Gauss-Legendre on [-1,1], 1-7 point rules. +// GaussQuadrature: Full quadrature dispatcher for all supported topologies. +// - n_points(topo, order): number of quadrature points +// - point(topo, order, qp): reference coordinate of qp-th point +// - weight(topo, order, qp): weight of qp-th point +// +// Values match the libMesh QGauss implementation. + +#ifndef LIBMESH_KOKKOS_QUADRATURE_H +#define LIBMESH_KOKKOS_QUADRATURE_H + +#include "gpu/kokkos_scalar_types.h" +#include "libmesh/enum_elem_type.h" +#include +#include + +namespace libMesh::Kokkos +{ + +// --------------------------------------------------------------------------- +// 1-D Gauss-Legendre quadrature on [-1, 1] +// --------------------------------------------------------------------------- + +struct GaussLegendre1D +{ + LIBMESH_DEVICE_INLINE static unsigned int n_points(unsigned int alg_order) + { + const unsigned int n = (alg_order + 2u) / 2u; + return (n < 1u) ? 1u : (n > 7u ? 7u : n); + } + + LIBMESH_DEVICE_INLINE static Real point(unsigned int n, unsigned int i) + { + switch (n) + { + case 1: return 0.0; + case 2: + switch (i) + { + case 0: return -5.7735026918962576450914878050196e-01; + case 1: return 5.7735026918962576450914878050196e-01; + default: return 0.0; + } + case 3: + switch (i) + { + case 0: return -7.7459666924148337703585307995648e-01; + case 1: return 0.0; + case 2: return 7.7459666924148337703585307995648e-01; + default: return 0.0; + } + case 4: + switch (i) + { + case 0: return -8.6113631159405257522394648889281e-01; + case 1: return -3.3998104358485626480266575910324e-01; + case 2: return 3.3998104358485626480266575910324e-01; + case 3: return 8.6113631159405257522394648889281e-01; + default: return 0.0; + } + case 5: + switch (i) + { + case 0: return -9.0617984593866399279762687829939e-01; + case 1: return -5.3846931010568309103631442070021e-01; + case 2: return 0.0; + case 3: return 5.3846931010568309103631442070021e-01; + case 4: return 9.0617984593866399279762687829939e-01; + default: return 0.0; + } + case 6: + switch (i) + { + case 0: return -9.3246951420315202781230155449399e-01; + case 1: return -6.6120938646626451366139959501991e-01; + case 2: return -2.3861918608319690863050172168071e-01; + case 3: return 2.3861918608319690863050172168071e-01; + case 4: return 6.6120938646626451366139959501991e-01; + case 5: return 9.3246951420315202781230155449399e-01; + default: return 0.0; + } + case 7: + switch (i) + { + case 0: return -9.4910791234275852452618968404785e-01; + case 1: return -7.4153118559939443986386477328079e-01; + case 2: return -4.0584515137739716690660641207696e-01; + case 3: return 0.0; + case 4: return 4.0584515137739716690660641207696e-01; + case 5: return 7.4153118559939443986386477328079e-01; + case 6: return 9.4910791234275852452618968404785e-01; + default: return 0.0; + } + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real weight(unsigned int n, unsigned int i) + { + switch (n) + { + case 1: return 2.0; + case 2: return 1.0; + case 3: + switch (i) + { + case 0: case 2: return 5.5555555555555555555555555555556e-01; + case 1: return 8.8888888888888888888888888888889e-01; + default: return 0.0; + } + case 4: + switch (i) + { + case 0: case 3: return 3.4785484513745385737306394922200e-01; + case 1: case 2: return 6.5214515486254614262693605077800e-01; + default: return 0.0; + } + case 5: + switch (i) + { + case 0: case 4: return 2.3692688505618908751426404071992e-01; + case 1: case 3: return 4.7862867049936646804129151483564e-01; + case 2: return 5.6888888888888888888888888888889e-01; + default: return 0.0; + } + case 6: + switch (i) + { + case 0: case 5: return 1.7132449237917034504029614217273e-01; + case 1: case 4: return 3.6076157304813860756983351383772e-01; + case 2: case 3: return 4.6791393457269104738987034398955e-01; + default: return 0.0; + } + case 7: + switch (i) + { + case 0: case 6: return 1.2948496616886969327061143267908e-01; + case 1: case 5: return 2.7970539148927666790146777142378e-01; + case 2: case 4: return 3.8183005050511894495036977548898e-01; + case 3: return 4.1795918367346938775510204081633e-01; + default: return 0.0; + } + default: return 0.0; + } + } +}; + +// --------------------------------------------------------------------------- +// GaussQuadrature — device-callable quadrature for all supported topologies +// +// Coordinate conventions (same as libMesh): +// EDGE: xi in [-1,1] +// QUAD: (xi,eta) in [-1,1]^2, tensor product +// HEX: (xi,eta,zeta) in [-1,1]^3, tensor product +// TRI: (x,y) on unit triangle {(0,0),(1,0),(0,1)} +// TET: (x,y,z) on unit tet {(0,0,0),(1,0,0),(0,1,0),(0,0,1)} +// --------------------------------------------------------------------------- + +struct GaussQuadrature +{ + /// Number of quadrature points for a given topology and polynomial order. + LIBMESH_DEVICE_INLINE static unsigned int + n_points(libMesh::ElemType topo, unsigned int order) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + return GaussLegendre1D::n_points(order); + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n; + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n * n; + } + + case libMesh::TRI3: case libMesh::TRI6: + switch (order) + { + case 0: case 1: return 1; + case 2: return 3; + case 3: return 4; + case 4: return 6; + case 5: return 7; + default: return 12; + } + + case libMesh::TET4: case libMesh::TET10: + switch (order) + { + case 0: case 1: return 1; + case 2: return 4; + case 3: return 5; + case 4: return 11; + case 5: return 14; + default: return 24; + } + + default: return 0; + } + } + + /// Reference coordinate of the qp-th quadrature point. + LIBMESH_DEVICE_INLINE static RealVector + point(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + return make_vector(GaussLegendre1D::point(GaussLegendre1D::n_points(order), qp), 0.0, 0.0); + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + const unsigned int i = qp % n; + const unsigned int j = qp / n; + return make_vector(GaussLegendre1D::point(n, i), + GaussLegendre1D::point(n, j), 0.0); + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + const unsigned int i = qp % n; + const unsigned int j = (qp / n) % n; + const unsigned int k = qp / (n * n); + return make_vector(GaussLegendre1D::point(n, i), + GaussLegendre1D::point(n, j), + GaussLegendre1D::point(n, k)); + } + + case libMesh::TRI3: case libMesh::TRI6: + return tri_point(order, qp); + + case libMesh::TET4: case libMesh::TET10: + return tet_point(order, qp); + + default: return zero_vector(); + } + } + + /// Weight of the qp-th quadrature point. + LIBMESH_DEVICE_INLINE static Real + weight(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp); + } + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp % n) * + GaussLegendre1D::weight(n, qp / n); + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp % n) * + GaussLegendre1D::weight(n, (qp / n) % n) * + GaussLegendre1D::weight(n, qp / (n * n)); + } + + case libMesh::TRI3: case libMesh::TRI6: + return tri_weight(order, qp); + + case libMesh::TET4: case libMesh::TET10: + return tet_weight(order, qp); + + default: return 0.0; + } + } + +private: + // ── Triangle rules ──────────────────────────────────────────────────────── + + LIBMESH_DEVICE_INLINE static RealVector + tri_point(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: + return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); + + case 2: + switch (qp) + { + case 0: return make_vector(2.0 / 3.0, 1.0 / 6.0, 0.0); + case 1: return make_vector(1.0 / 6.0, 2.0 / 3.0, 0.0); + case 2: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.0); + default: return zero_vector(); + } + + case 3: + switch (qp) + { + case 0: return make_vector(1.5505102572168219018e-01, 1.7855872826361642312e-01, 0.0); + case 1: return make_vector(6.4494897427831780982e-01, 7.5031110222608118177e-02, 0.0); + case 2: return make_vector(1.5505102572168219018e-01, 6.6639024601470138670e-01, 0.0); + case 3: return make_vector(6.4494897427831780982e-01, 2.8001991549907407200e-01, 0.0); + default: return zero_vector(); + } + + case 4: + { + constexpr Real a1 = 4.4594849091596488632e-01, b1 = 1.0 - 2.0 * a1; + constexpr Real a2 = 9.1576213509770743460e-02, b2 = 1.0 - 2.0 * a2; + switch (qp) + { + case 0: return make_vector(a1, a1, 0.0); + case 1: return make_vector(a1, b1, 0.0); + case 2: return make_vector(b1, a1, 0.0); + case 3: return make_vector(a2, a2, 0.0); + case 4: return make_vector(a2, b2, 0.0); + case 5: return make_vector(b2, a2, 0.0); + default: return zero_vector(); + } + } + + case 5: + { + const Real sq15 = 3.872983346207417; // sqrt(15) + const Real a1 = 2.0 / 7.0 + sq15 / 21.0; + const Real a2 = 2.0 / 7.0 - sq15 / 21.0; + const Real b1 = 1.0 - 2.0 * a1, b2 = 1.0 - 2.0 * a2; + switch (qp) + { + case 0: return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); + case 1: return make_vector(a1, a1, 0.0); + case 2: return make_vector(a1, b1, 0.0); + case 3: return make_vector(b1, a1, 0.0); + case 4: return make_vector(a2, a2, 0.0); + case 5: return make_vector(a2, b2, 0.0); + case 6: return make_vector(b2, a2, 0.0); + default: return zero_vector(); + } + } + + case 6: + { + constexpr Real a1 = 2.4928674517091042129163855310701908e-01; + constexpr Real a2 = 6.3089014491502228340331602870819157e-02; + constexpr Real a3 = 3.1035245103378440541660773395655215e-01; + constexpr Real b1 = 1.0 - 2.0 * a1; + constexpr Real b2 = 1.0 - 2.0 * a2; + constexpr Real b3 = 6.3650249912139864723014259441204970e-01; + constexpr Real c3 = 1.0 - a3 - b3; + switch (qp) + { + case 0: return make_vector(a1, a1, 0.0); + case 1: return make_vector(a1, b1, 0.0); + case 2: return make_vector(b1, a1, 0.0); + case 3: return make_vector(a2, a2, 0.0); + case 4: return make_vector(a2, b2, 0.0); + case 5: return make_vector(b2, a2, 0.0); + case 6: return make_vector(a3, b3, 0.0); + case 7: return make_vector(b3, a3, 0.0); + case 8: return make_vector(a3, c3, 0.0); + case 9: return make_vector(c3, a3, 0.0); + case 10: return make_vector(b3, c3, 0.0); + case 11: return make_vector(c3, b3, 0.0); + default: return zero_vector(); + } + } + + default: // order >= 7: 12-point Ro3-invariant rule + { + constexpr Real rd[4][2] = { + {6.2382265094402118174e-02, 6.7517867073916085443e-02}, + {5.5225456656926611737e-02, 3.2150249385198182267e-01}, + {3.4324302945097146470e-02, 6.6094919618673565761e-01}, + {5.1584233435359177926e-01, 2.7771616697639178257e-01} + }; + const unsigned int row = qp / 3; + const unsigned int sub = qp % 3; + if (row >= 4) + return zero_vector(); + const Real z1 = rd[row][0], z2 = rd[row][1], z3 = 1.0 - z1 - z2; + switch (sub) + { + case 0: return make_vector(z1, z2, 0.0); + case 1: return make_vector(z3, z1, 0.0); + case 2: return make_vector(z2, z3, 0.0); + default: return zero_vector(); + } + } + } + } + + LIBMESH_DEVICE_INLINE static Real + tri_weight(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: return 0.5; + case 2: return 1.0 / 6.0; + case 3: return (qp % 2 == 0) ? 1.5902069087198858470e-01 : 9.0979309128011415303e-02; + case 4: return (qp < 3) ? 1.1169079483900573285e-01 : 5.4975871827660933819e-02; + case 5: + { + if (qp == 0) + return 9.0 / 80.0; + const Real sq15 = 3.872983346207417; + return (qp <= 3) ? (31.0 / 480.0 + sq15 / 2400.0) : (31.0 / 480.0 - sq15 / 2400.0); + } + case 6: + { + if (qp <= 2) + return 5.8393137863189683012644805692789721e-02; + if (qp <= 5) + return 2.5422453185103408460468404553434492e-02; + return 4.1425537809186787596776728210221227e-02; + } + default: + { + constexpr Real wts[4] = { + 2.6517028157436251429e-02, 4.3881408714446055037e-02, + 2.8775042784981585738e-02, 6.7493187009802774463e-02 + }; + return (qp / 3 < 4) ? wts[qp / 3] : 0.0; + } + } + } + + // ── Tetrahedral rules ───────────────────────────────────────────────────── + + LIBMESH_DEVICE_INLINE static RealVector + tet_point(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: + return make_vector(0.25, 0.25, 0.25); + + case 2: + { + const Real b = 0.25 * (1.0 - 1.0 / 2.2360679774997896964); // 1/sqrt(5) + const Real a = 1.0 - 3.0 * b; + switch (qp) + { + case 0: return make_vector(a, b, b); + case 1: return make_vector(b, a, b); + case 2: return make_vector(b, b, a); + case 3: return make_vector(b, b, b); + default: return zero_vector(); + } + } + + case 3: + switch (qp) + { + case 0: return make_vector(0.25, 0.25, 0.25); + case 1: return make_vector(0.5, 1.0 / 6.0, 1.0 / 6.0); + case 2: return make_vector(1.0 / 6.0, 0.5, 1.0 / 6.0); + case 3: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.5); + case 4: return make_vector(1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0); + default: return zero_vector(); + } + + case 4: + { + constexpr Real a1 = 2.5e-01; + constexpr Real a2 = 7.85714285714285714e-01, b2 = 7.14285714285714285e-02; + constexpr Real a3 = 3.99403576166799219e-01, b3 = 1.00596423833200785e-01; + switch (qp) + { + case 0: return make_vector(a1, a1, a1); + case 1: return make_vector(a2, b2, b2); + case 2: return make_vector(b2, a2, b2); + case 3: return make_vector(b2, b2, a2); + case 4: return make_vector(b2, b2, b2); + case 5: return make_vector(a3, a3, b3); + case 6: return make_vector(a3, b3, b3); + case 7: return make_vector(b3, b3, a3); + case 8: return make_vector(b3, a3, b3); + case 9: return make_vector(b3, a3, a3); + case 10: return make_vector(a3, b3, a3); + default: return zero_vector(); + } + } + + case 5: + { + constexpr Real af[3] = {3.1088591926330060980e-01, + 9.2735250310891226402e-02, + 4.5503704125649649492e-02}; + if (qp < 8) + { + const unsigned int g = qp / 4; + const unsigned int sub = qp % 4; + const Real ag = af[g], bg = 1.0 - 3.0 * ag; + switch (sub) + { + case 0: return make_vector(ag, ag, ag); + case 1: return make_vector(ag, bg, ag); + case 2: return make_vector(bg, ag, ag); + case 3: return make_vector(ag, ag, bg); + default: return zero_vector(); + } + } + else + { + const Real a2 = af[2], b2 = 0.5 * (1.0 - 2.0 * a2); + switch (qp - 8) + { + case 0: return make_vector(b2, b2, a2); + case 1: return make_vector(b2, a2, a2); + case 2: return make_vector(a2, a2, b2); + case 3: return make_vector(a2, b2, a2); + case 4: return make_vector(b2, a2, b2); + case 5: return make_vector(a2, b2, b2); + default: return zero_vector(); + } + } + } + + default: // order >= 6: 24-point Keast rule + { + constexpr Real data[4][3] = { + {3.56191386222544953e-01, 2.14602871259151684e-01, 0.0}, + {8.77978124396165982e-01, 4.06739585346113397e-02, 0.0}, + {3.29863295731730594e-02, 3.22337890142275646e-01, 0.0}, + {0.0, 0.0, 0.0} // 12-perm group handled separately + }; + + if (qp < 12) + { + // Three 4-permutation groups + const unsigned int grp = qp / 4; + const unsigned int sub = qp % 4; + const Real a = data[grp][0], b = data[grp][1]; + switch (sub) + { + case 0: return make_vector(a, b, b); + case 1: return make_vector(b, a, b); + case 2: return make_vector(b, b, a); + case 3: return make_vector(b, b, b); + default: return zero_vector(); + } + } + else + { + // 12-permutation group + constexpr Real a4 = 6.36610018750175299e-02; + constexpr Real b4 = 2.69672331458315867e-01; + constexpr Real c4 = 6.03005664791649076e-01; + switch (qp - 12) + { + case 0: return make_vector(a4, a4, b4); + case 1: return make_vector(a4, a4, c4); + case 2: return make_vector(b4, a4, a4); + case 3: return make_vector(c4, a4, a4); + case 4: return make_vector(a4, b4, a4); + case 5: return make_vector(a4, c4, a4); + case 6: return make_vector(a4, b4, c4); + case 7: return make_vector(a4, c4, b4); + case 8: return make_vector(b4, a4, c4); + case 9: return make_vector(b4, c4, a4); + case 10: return make_vector(c4, a4, b4); + case 11: return make_vector(c4, b4, a4); + default: return zero_vector(); + } + } + } + } + } + + LIBMESH_DEVICE_INLINE static Real + tet_weight(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: return 1.0 / 6.0; + case 2: return 1.0 / 24.0; + case 3: return (qp == 0) ? -2.0 / 15.0 : 0.075; + case 4: + { + if (qp == 0) + return -1.31555555555555556e-02; + if (qp <= 4) + return 7.62222222222222222e-03; + return 2.48888888888888889e-02; + } + case 5: + { + constexpr Real wf[3] = {1.8781320953002641800e-02, + 1.2248840519393658257e-02, + 7.0910034628469110730e-03}; + if (qp < 4) + return wf[0]; + if (qp < 8) + return wf[1]; + return wf[2]; + } + default: + { + constexpr Real wts[4] = {6.65379170969464506e-03, + 1.67953517588677620e-03, + 9.22619692394239843e-03, + 8.03571428571428248e-03}; + if (qp < 4) + return wts[0]; + if (qp < 8) + return wts[1]; + if (qp < 12) + return wts[2]; + return wts[3]; + } + } + } +}; + +// --------------------------------------------------------------------------- +// fill_quadrature — host-side convenience wrapper +// +// Fills std::vectors using the device-callable GaussQuadrature functions. +// --------------------------------------------------------------------------- + +inline void +fill_quadrature(libMesh::ElemType topo, + unsigned int order, + std::vector & qpts, + std::vector & weights) +{ + const unsigned int nqp = GaussQuadrature::n_points(topo, order); + qpts.resize(nqp); + weights.resize(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + qpts[q] = GaussQuadrature::point(topo, order, q); + weights[q] = GaussQuadrature::weight(topo, order, q); + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_QUADRATURE_H diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h new file mode 100644 index 00000000000..b7386cf900f --- /dev/null +++ b/include/gpu/kokkos_scalar_types.h @@ -0,0 +1,118 @@ +// libMesh Kokkos device-compatible scalar types. +// +// This header provides dimension-aware Kokkos aliases/helpers that mirror +// libMesh host numerics at LIBMESH_DIM=1/2/3. + +#ifndef LIBMESH_KOKKOS_SCALAR_TYPES_H +#define LIBMESH_KOKKOS_SCALAR_TYPES_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_vector.h" +#include "libmesh/type_tensor.h" +namespace libMesh::Kokkos +{ + +using Real = libMesh::Real; +using RealVector = libMesh::TypeVector; +using RealTensor = libMesh::TypeTensor; + +LIBMESH_DEVICE_INLINE +RealVector zero_vector() +{ + RealVector v; + v.zero(); + return v; +} + +LIBMESH_DEVICE_INLINE +RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + RealVector v = zero_vector(); + + v(0) = x; + +#if LIBMESH_DIM > 1 + v(1) = y; +#else + libmesh_assert_equal_to(y, Real(0)); +#endif + +#if LIBMESH_DIM > 2 + v(2) = z; +#else + libmesh_assert_equal_to(z, Real(0)); +#endif + + return v; +} + +LIBMESH_DEVICE_INLINE +RealTensor zero_tensor() +{ + RealTensor J; + J.zero(); + return J; +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_identity(const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + RealTensor I = zero_tensor(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + + return I; +} + +LIBMESH_DEVICE_INLINE +Real leading_determinant(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return Real(1); + + if (dim == 1) + return J(0, 0); + + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); + + return J.det(); +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_inverse(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return leading_identity(0); + + if (dim == 1) + { + RealTensor inv = zero_tensor(); + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real inv_det = Real(1) / leading_determinant(J, dim); + RealTensor inv = zero_tensor(); + inv(0, 0) = J(1, 1) * inv_det; + inv(0, 1) = -J(0, 1) * inv_det; + inv(1, 0) = -J(1, 0) * inv_det; + inv(1, 1) = J(0, 0) * inv_det; + return inv; + } + + return J.inverse(); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_SCALAR_TYPES_H diff --git a/src/quadrature/quadrature_gauss_3D.C b/src/quadrature/quadrature_gauss_3D.C index e9986c216cb..8e2f96ed5cc 100644 --- a/src/quadrature/quadrature_gauss_3D.C +++ b/src/quadrature/quadrature_gauss_3D.C @@ -181,8 +181,6 @@ void QGauss::init_3D() // Note: if !allow_rules_with_negative_weights, fall through to next case. } - - // Originally a Keast rule, // Patrick Keast, // Moderate Degree Tetrahedral Quadrature Formulas, From bebe8b7b6bc2827cc880edaa189873c707757e11 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 5 May 2026 10:28:33 -0600 Subject: [PATCH 06/48] Add Kokkos FE oracle test suite --- tests/Makefile.am | 110 +- tests/fe/kokkos_fe_contract_test.K | 333 ++++ tests/fe/kokkos_fe_invariant_test.K | 418 +++++ tests/fe/kokkos_fe_map_oracle_test.K | 529 +++++++ tests/fe/kokkos_fe_oracle_test_utils.h | 1386 +++++++++++++++++ tests/fe/kokkos_fe_permuted_map_oracle_test.K | 512 ++++++ .../fe/kokkos_fe_reconstruction_oracle_test.K | 320 ++++ tests/fe/kokkos_fe_shape_oracle_test.K | 629 ++++++++ tests/fe/kokkos_fe_side_trace_oracle_test.K | 342 ++++ tests/fe/kokkos_fe_types_oracle_test.K | 509 ++++++ tests/fe/kokkos_quadrature_oracle_test.K | 747 +++++++++ tests/quadrature/quadrature_exactness.h | 176 +++ tests/quadrature/quadrature_test.C | 108 +- 13 files changed, 6028 insertions(+), 91 deletions(-) create mode 100644 tests/fe/kokkos_fe_contract_test.K create mode 100644 tests/fe/kokkos_fe_invariant_test.K create mode 100644 tests/fe/kokkos_fe_map_oracle_test.K create mode 100644 tests/fe/kokkos_fe_oracle_test_utils.h create mode 100644 tests/fe/kokkos_fe_permuted_map_oracle_test.K create mode 100644 tests/fe/kokkos_fe_reconstruction_oracle_test.K create mode 100644 tests/fe/kokkos_fe_shape_oracle_test.K create mode 100644 tests/fe/kokkos_fe_side_trace_oracle_test.K create mode 100644 tests/fe/kokkos_fe_types_oracle_test.K create mode 100644 tests/fe/kokkos_quadrature_oracle_test.K create mode 100644 tests/quadrature/quadrature_exactness.h diff --git a/tests/Makefile.am b/tests/Makefile.am index 6efb6d23e88..a721cba160d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -135,6 +135,7 @@ unit_tests_sources = \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ + quadrature/quadrature_exactness.h \ quadrature/quadrature_test.C \ solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ @@ -241,7 +242,7 @@ data = matrices/geom_1_extraction_op.h5 \ unit_tests_data = $(data) # Why isn't this working automatically? -EXTRA_DIST = $(data) +EXTRA_DIST = $(data) fe/kokkos_fe_oracle_test_utils.h if LIBMESH_ENABLE_FPARSER unit_tests_sources += \ @@ -254,8 +255,75 @@ TESTS = if LIBMESH_ENABLE_KOKKOS KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) - check_PROGRAMS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit - TESTS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + check_PROGRAMS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + TESTS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + + kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K + kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K + kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K + kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K + kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K + kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K + kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K + kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K + kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + check_PROGRAMS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + TESTS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + + kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K + kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) @@ -392,6 +460,42 @@ endif -c $< -o $@ # Custom link rules so the Kokkos compiler drives the final link step. +kokkos_fe_types_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_types_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_shape_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_shape_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_invariant_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_invariant_unit_LDFLAGS) -o $@ + +kokkos_fe_contract_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_contract_unit_LDFLAGS) -o $@ + +kokkos_fe_permuted_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_permuted_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_reconstruction_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_reconstruction_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_side_trace_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_side_trace_oracle_unit_LDFLAGS) -o $@ + +kokkos_quadrature_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_quadrature_oracle_unit_LDFLAGS) -o $@ + kokkos_vector_ops_oracle_unit_LINK = \ $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ diff --git a/tests/fe/kokkos_fe_contract_test.K b/tests/fe/kokkos_fe_contract_test.K new file mode 100644 index 00000000000..826693a2e93 --- /dev/null +++ b/tests/fe/kokkos_fe_contract_test.K @@ -0,0 +1,333 @@ +// Host-side contract tests for libMesh::Kokkos hard-fail paths. +// +// This executable self-spawns child processes that intentionally invoke +// unsupported Kokkos FE entry points. A child succeeds only if the call +// returns normally; the parent test expects those calls to terminate with a +// non-zero exit status instead. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/node.h" + +#include +#include +#include +#include +#include +#include + +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; + +namespace +{ + +struct contract_case +{ + const char * name; +}; + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +} // anonymous namespace + +static element_fixture +build_master_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(elem_type); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(elem_type, refspace_nodes); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point refspace = refspace_nodes[i]; + fixture.nodes.push_back(libMesh::Node::build(refspace(0), refspace(1), refspace(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static std::unique_ptr +find_side_of_type(const libMesh::Elem & parent, + libMesh::ElemType desired_type, + unsigned int & side_id) +{ + for (unsigned int s = 0; s < parent.n_sides(); ++s) + { + auto side = parent.build_side_ptr(s); + if (side->type() == desired_type) + { + side_id = s; + return side; + } + } + + side_id = libMesh::invalid_uint; + return nullptr; +} + +static void +invoke_face_jacobian_on_side(const libMesh::Elem & side) +{ + constexpr unsigned int max_face_nodes = 9; + + RealVector face_nodes[max_face_nodes]; + for (unsigned int i = 0; i < side.n_nodes(); ++i) + face_nodes[i] = libMesh::Kokkos::point_to_real_vector(side.point(i)); + + (void)libMesh::Kokkos::face_jacobian( + libMesh::LAGRANGE_MAP, side.type(), face_nodes, side.n_nodes(), 0.0, 0.0, 0.0); +} + +static int +run_child_case(const std::string & case_name) +{ + using libMesh::Kokkos::FEShapeKey; + + if (case_name == "noop") + return 0; + + if (case_name == "get_side_topology_prism6") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PRISM6); + return 0; + } + + if (case_name == "get_side_topology_pyramid5") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PYRAMID5); + return 0; + } + + if (case_name == "shape_lagrange_edge4_third") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_lagrange_prism6_first") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "shape_monomial_hex27_sixth") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_monomial_tri7_sixth") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "ndofs_lagrange_prism6_first") + { + (void)libMesh::Kokkos::n_dofs( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}); + return 0; + } + + if (case_name == "map_shape_rational") + { + (void)libMesh::Kokkos::map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_map_shape_rational") + { + (void)libMesh::Kokkos::grad_map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "face_normal_parent_dim2") + { + RealTensor J = libMesh::Kokkos::zero_tensor(); + J(0, 0) = 1.0; + J(0, 1) = 2.0; + (void)libMesh::Kokkos::face_normal(J, 2); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "face_jacobian_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + std::fprintf(stderr, "Unknown child case: %s\n", case_name.c_str()); + return 3; +} + +static int +run_command(const std::string & command) +{ + std::fflush(nullptr); + return std::system(command.c_str()); +} + +static bool +expect_child_success(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + return status == 0; +} + +static bool +expect_child_abort(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + if (status == -1) + return false; + + if (WIFSIGNALED(status)) + return true; + + if (!WIFEXITED(status)) + return false; + + const int exit_code = WEXITSTATUS(status); + return exit_code != 0 && exit_code != 2 && exit_code != 3; +} + +int +main(int argc, char ** argv) +{ + if (argc == 3 && std::string(argv[1]) == "--child") + return run_child_case(argv[2]); + + if (!expect_child_success(argv[0], "noop")) + { + std::printf("[contract_spawn] FAIL (could not successfully respawn test executable)\n"); + return 1; + } + + const contract_case cases[] = { + { "get_side_topology_prism6" }, + { "get_side_topology_pyramid5" }, + { "shape_lagrange_edge4_third" }, + { "grad_shape_lagrange_prism6_first" }, + { "shape_monomial_hex27_sixth" }, + { "grad_shape_monomial_tri7_sixth" }, + { "ndofs_lagrange_prism6_first" }, + { "map_shape_rational" }, + { "grad_map_shape_rational" }, + { "face_normal_parent_dim2" }, + { "face_jacobian_prism20_tri7" }, + { "face_jacobian_prism21_tri7" }, + { "face_jacobian_pyramid18_tri7" }, + { "map_face_qp_to_parent_prism20_tri7" }, + { "map_face_qp_to_parent_prism21_tri7" }, + { "map_face_qp_to_parent_pyramid18_tri7" } + }; + + int total_fail = 0; + for (const auto & info : cases) + { + const bool passed = expect_child_abort(argv[0], info.name); + const int fail = passed ? 0 : 1; + std::printf("[contract_abort] [%s] %s (%d failures)\n", + info.name, + passed ? "PASS" : "FAIL", + fail); + total_fail += fail; + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_invariant_test.K b/tests/fe/kokkos_fe_invariant_test.K new file mode 100644 index 00000000000..da089f74737 --- /dev/null +++ b/tests/fe/kokkos_fe_invariant_test.K @@ -0,0 +1,418 @@ +// Kokkos kernel regression tests for libMesh::Kokkos FE invariants and quadrature exactness. +// The test suite covers: +// A. Partition of unity for all implemented LAGRANGE map topologies. +// B. Zero-sum gradients for the same map topologies. +// C. Nodal Kronecker-delta behavior at master nodes for the same map topologies. +// D. Quadrature exactness sweeps for the canonical tensor-product and simplex +// reference topologies using analytic monomial integrals on the reference +// element. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_quadrature.h" +#include "../quadrature/quadrature_exactness.h" + +#include "libmesh/elem.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::upload_real; + +static constexpr double invariant_tol = 1.0e-13; +static constexpr double exactness_tol = 2.0e-12; +static constexpr unsigned int quadrature_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct quadrature_exactness_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int max_order; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" } +}; + +// These cases sweep the full exactness range provided by the current Kokkos +// QGauss helper: +// - tensor-product rules: through order 13 (7-point 1D Gauss-Legendre) +// - simplex rules: through order 6 (highest explicit triangle/tet tables) +static const quadrature_exactness_case quadrature_cases[] = { + { libMesh::EDGE2, 1, 13, "EDGE2" }, + { libMesh::TRI3, 2, 6, "TRI3" }, + { libMesh::QUAD4, 2, 13, "QUAD4" }, + { libMesh::TET4, 3, 6, "TET4" }, + { libMesh::HEX8, 3, 13, "HEX8" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE Real +int_pow(Real x, unsigned int p) +{ + Real result = 1.0; + for (unsigned int i = 0; i < p; ++i) + result *= x; + return result; +} + +template +static int +test_partition_of_unity_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "unity_xi"); + auto d_eta = upload_real(eta_h, "unity_eta"); + auto d_zeta = upload_real(zeta_h, "unity_zeta"); + + Kokkos::View d_sum(std::string("unity_sum"), nqp); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + Real sum = 0.0; + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::map_shape(i, d_xi(q), d_eta(q), d_zeta(q)); + d_sum(q) = sum; + }); + Kokkos::fence(); + + std::vector ref_values(nqp, 1.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct partition_of_unity_dispatch +{ + explicit partition_of_unity_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_partition_of_unity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported partition topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_partition_of_unity(const map_elem_info & info) +{ + const partition_of_unity_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_zero_sum_gradients_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "gradsum_xi"); + auto d_eta = upload_real(eta_h, "gradsum_eta"); + auto d_zeta = upload_real(zeta_h, "gradsum_zeta"); + + Kokkos::View d_sum(std::string("gradsum"), info.dim * nqp); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int dim = info.dim; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + RealVector sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::grad_map_shape( + i, d_xi(q), d_eta(q), d_zeta(q)); + + for (unsigned int d = 0; d < dim; ++d) + d_sum(dim * q + d) = sum(d); + }); + Kokkos::fence(); + + std::vector ref_values(info.dim * nqp, 0.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct zero_sum_gradients_dispatch +{ + explicit zero_sum_gradients_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_zero_sum_gradients_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported zero-sum gradient topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_zero_sum_gradients(const map_elem_info & info) +{ + const zero_sum_gradients_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_kronecker_delta_impl(const map_elem_info & info) +{ + auto elem = build_reference_elem(Topo); + + std::vector xi_h(info.n_dofs), eta_h(info.n_dofs), zeta_h(info.n_dofs); + for (unsigned int j = 0; j < info.n_dofs; ++j) + { + const libMesh::Point p = elem->master_point(j); + xi_h[j] = p(0); + eta_h[j] = p(1); + zeta_h[j] = p(2); + } + + auto d_xi = upload_real(xi_h, "delta_xi"); + auto d_eta = upload_real(eta_h, "delta_eta"); + auto d_zeta = upload_real(zeta_h, "delta_zeta"); + + Kokkos::View d_values(std::string("delta_values"), info.n_dofs * info.n_dofs); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + n_dofs * n_dofs, + KOKKOS_LAMBDA(int idx) { + const unsigned int i = static_cast(idx) / n_dofs; + const unsigned int j = static_cast(idx) % n_dofs; + d_values(idx) = libMesh::Kokkos::map_shape( + i, d_xi(j), d_eta(j), d_zeta(j)); + }); + Kokkos::fence(); + + std::vector ref_values(info.n_dofs * info.n_dofs, 0.0); + for (unsigned int i = 0; i < info.n_dofs; ++i) + ref_values[i * info.n_dofs + i] = 1.0; + + return compare_device_values(d_values, ref_values, invariant_tol); +} + +struct kronecker_delta_dispatch +{ + explicit kronecker_delta_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_kronecker_delta_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported kronecker topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_kronecker_delta(const map_elem_info & info) +{ + const kronecker_delta_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +static double +integrate_monomial_on_device(const quadrature_exactness_case & info, + unsigned int order, + unsigned int a, + unsigned int b, + unsigned int c) +{ + const unsigned int nqp = GaussQuadrature::n_points(info.topo, order); + const libMesh::ElemType topo = info.topo; + + double integral = 0.0; + Kokkos::parallel_reduce( + nqp, + KOKKOS_LAMBDA(int qp, double & local_sum) { + const RealVector pt = GaussQuadrature::point(topo, order, qp); + const Real weight = GaussQuadrature::weight(topo, order, qp); + Real monomial = int_pow(pt(0), a); + +#if LIBMESH_DIM > 1 + monomial *= int_pow(pt(1), b); +#else + libmesh_assert_equal_to(b, 0); +#endif + +#if LIBMESH_DIM > 2 + monomial *= int_pow(pt(2), c); +#else + libmesh_assert_equal_to(c, 0); +#endif + + local_sum += static_cast(weight) * static_cast(monomial); + }, + integral); + + return integral; +} + +static int +test_quadrature_exactness(const quadrature_exactness_case & info) +{ + int fail = 0; + + for (unsigned int order = 0; order <= info.max_order; ++order) + switch (info.dim) + { + case 1: + for (unsigned int a = 0; a <= order; ++a) + { + const double actual = integrate_monomial_on_device(info, order, a, 0, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, 0, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 2: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + { + const double actual = integrate_monomial_on_device(info, order, a, b, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 3: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + for (unsigned int c = 0; a + b + c <= order; ++c) + { + const double actual = integrate_monomial_on_device(info, order, a, b, c); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, c); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + default: + ++fail; + break; + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : map_elems) + { + { + const int f = test_partition_of_unity(info); + std::printf("[partition_of_unity] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_zero_sum_gradients(info); + std::printf("[zero_sum_gradients] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_kronecker_delta(info); + std::printf("[kronecker_delta] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (const auto & info : quadrature_cases) + { + const int f = test_quadrature_exactness(info); + std::printf("[quadrature_exactness] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_map_oracle_test.K b/tests/fe/kokkos_fe_map_oracle_test.K new file mode 100644 index 00000000000..afde3b315ff --- /dev/null +++ b/tests/fe/kokkos_fe_map_oracle_test.K @@ -0,0 +1,529 @@ +// GPU kernel tests for libMesh::Kokkos map helpers across broad topology coverage. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for all implemented LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for all +// sides of the implemented 2D and 3D parent topologies. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for all +// sides and multiple side quadrature points, including supported +// mixed-face prism and pyramid element types. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_qp_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +} // anonymous namespace + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_oracle"); + const int fail = evaluate_map_helper_context(context, "map_oracle_results", tol); + if (fail) + std::printf(" map helper mismatch: topo=%s (%d failures)\n", info.name, fail); + + return fail; +} + +struct map_helper_dispatch +{ + explicit map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-helper topology: topo=%s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct face_helper_side_dispatch_3d +{ + face_helper_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_3d_impl(context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +template +struct face_helper_side_dispatch_2d +{ + face_helper_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_2d_impl( + context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +struct face_helper_parent_dispatch_2d +{ + face_helper_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const face_helper_side_dispatch_2d dispatch( + context, side_id, parent_name, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper parent: parent=%s parent_type=%d side_id=%u\n", + parent_name, + static_cast(topo), + side_id); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +static int +test_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "face_oracle"); + + if (context.parent_dim == 3) + { + const face_helper_side_dispatch_3d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const face_helper_parent_dispatch_2d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported face-helper parent dimension: parent=%s dim=%u side_id=%u\n", + parent_name, + context.parent_dim, + side_id); + return 1; +} + +static int +test_face_helpers_for_parent(const face_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + fail += test_face_helper_side_case(fixture, side_id, info.name); + + return fail; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); + + std::printf(" face_qp mismatch: parent=%s side_id=%u parent_type=%d side_type=%d\n", + parent_name, + side_id, + static_cast(parent.type()), + static_cast(side.type())); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + const unsigned int parent_node = parent.local_side_node(side_id, k); + const libMesh::Point parent_refspace = refspace_nodes[parent_node]; + std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_node, + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_for_parent(const face_qp_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + if (side->n_nodes() == 1) + { + fail += + check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported face_qp side: parent=%s side_id=%u side_type=%d\n", + info.name, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, face_qpt); + } + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const map_helper_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_parent_case cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_helpers_for_parent(info); + std::printf("[face_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_qp_parent_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::EDGE4, "EDGE4" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_qp_to_parent_for_parent(info); + std::printf("[face_qp_parent_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_oracle_test_utils.h b/tests/fe/kokkos_fe_oracle_test_utils.h new file mode 100644 index 00000000000..5d6f53e0920 --- /dev/null +++ b/tests/fe/kokkos_fe_oracle_test_utils.h @@ -0,0 +1,1386 @@ +#ifndef LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H +#define LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +#include +#include +#include +#include + +// This header is intended for the standalone Kokkos test executables, which +// include Kokkos before pulling in these helpers. + +namespace kokkos_test_utils +{ + +LIBMESH_DEVICE_INLINE libMesh::Real +vector_component(const libMesh::Kokkos::RealVector & v, unsigned int component) +{ + switch (component) + { + case 0: + return v(0); + case 1: +#if LIBMESH_DIM > 1 + return v(1); +#else + return 0.0; +#endif + case 2: +#if LIBMESH_DIM > 2 + return v(2); +#else + return 0.0; +#endif + default: + return 0.0; + } +} + +LIBMESH_DEVICE_INLINE libMesh::Real +tensor_component(const libMesh::Kokkos::RealTensor & T, unsigned int i, unsigned int j) +{ +#if LIBMESH_DIM > 2 + return T(i, j); +#elif LIBMESH_DIM > 1 + if (i < 2 && j < 2) + return T(i, j); + return 0.0; +#else + if (i == 0 && j == 0) + return T(0, 0); + return 0.0; +#endif +} + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +struct map_helper_context +{ + std::vector ref_values; + Kokkos::View d_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + unsigned int nqp; + unsigned int dim; + unsigned int n_nodes; +}; + +struct face_helper_context +{ + std::vector ref_values; + Kokkos::View d_face_coords; + Kokkos::View d_parent_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + Kokkos::View d_parent_xi; + Kokkos::View d_parent_eta; + Kokkos::View d_parent_zeta; + unsigned int nqp; + unsigned int parent_dim; + unsigned int n_parent_nodes; + unsigned int n_face_nodes; +}; + +template +struct lagrange_evaluator_topology +{ + static const libMesh::ElemType value = libMesh::INVALID_ELEM; +}; + +#define KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(exact_topo, exact_order, evaluator_topo) \ + template <> \ + struct lagrange_evaluator_topology \ + { \ + static const libMesh::ElemType value = libMesh::evaluator_topo; \ + } + +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE2, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, SECOND, EDGE3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE4, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI3, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, SECOND, TRI6); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, SECOND, TRI6); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD4, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, SECOND, QUAD8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, SECOND, QUAD9); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET4, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, SECOND, TET10); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, SECOND, TET10); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX8, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, SECOND, HEX20); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, SECOND, HEX27); + +#undef KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE + +template +struct monomial_evaluator_dim +{ + static const unsigned int value = 0; +}; + +#define KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(exact_topo, dim_value) \ + template <> \ + struct monomial_evaluator_dim \ + { \ + static const unsigned int value = dim_value; \ + } + +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE2, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE3, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE4, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI3, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI6, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI7, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD4, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD8, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD9, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET4, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET10, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET14, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX8, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX20, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX27, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM6, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM15, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM18, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM20, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM21, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID5, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID13, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID14, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID18, 3); + +#undef KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE + +template +struct monomial_order_evaluator; + +#define KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(dim_value, exact_order, impl_suffix, impl_order) \ + template <> \ + struct monomial_order_evaluator \ + { \ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::shape(i, xi, eta, zeta); \ + } \ + \ + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::grad_shape(i, xi, eta, zeta); \ + } \ + } + +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, CONSTANT, MonomialImpl1D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIRST, MonomialImpl1D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, SECOND, MonomialImpl1D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, THIRD, MonomialImpl1D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FOURTH, MonomialImpl1D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIFTH, MonomialImpl1D, 5); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, CONSTANT, MonomialImpl2D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIRST, MonomialImpl2D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, SECOND, MonomialImpl2D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, THIRD, MonomialImpl2D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FOURTH, MonomialImpl2D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIFTH, MonomialImpl2D, 5); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, CONSTANT, MonomialImpl3D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIRST, MonomialImpl3D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, SECOND, MonomialImpl3D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, THIRD, MonomialImpl3D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FOURTH, MonomialImpl3D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIFTH, MonomialImpl3D, 5); + +#undef KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE + +template +struct exact_shape_evaluator; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return libMesh::Kokkos::map_shape::value>( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return libMesh::Kokkos::grad_map_shape::value>( + i, xi, eta, zeta); + } +}; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::shape( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::grad_shape( + i, xi, eta, zeta); + } +}; + +template +LIBMESH_DEVICE_INLINE libMesh::Real +shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::shape(i, xi, eta, zeta); +} + +template +LIBMESH_DEVICE_INLINE libMesh::Kokkos::RealVector +grad_shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::grad_shape(i, xi, eta, zeta); +} + +template +inline int +dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispatcher) +{ + switch (order) + { + case libMesh::CONSTANT: + return dispatcher.template operator()(); + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + case libMesh::FOURTH: + return dispatcher.template operator()(); + case libMesh::FIFTH: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(libMesh::Kokkos::FEShapeKey{ libMesh::MONOMIAL, ExactTopo, order }); + } +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo); + +template +inline int +dispatch_supported_lagrange_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || !libMesh::Kokkos::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI7: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET14: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_supported_lagrange_shape_key_with_map(libMesh::Kokkos::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || + !libMesh::Kokkos::supports_shape(key) || + !is_supported_lagrange_map_topology(key.elem_type)) + return dispatcher.unsupported_key(key); + + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_supported_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (!libMesh::Kokkos::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI7: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM15: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM18: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM21: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID5: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID13: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID18: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + case libMesh::TET4: + case libMesh::TET10: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return true; + + default: + return false; + } +} + +inline bool +supports_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key) +{ + return libMesh::Kokkos::supports_shape(key) && + is_supported_lagrange_map_topology(key.elem_type); +} + +template +inline int +dispatch_supported_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (!supports_shape_key_with_lagrange_map(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key_with_map(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_face_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return true; + + default: + return false; + } +} + +template +inline int +dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + switch (topo) + { + case libMesh::EDGE2: + return dispatcher.template operator()(); + case libMesh::EDGE3: + return dispatcher.template operator()(); + case libMesh::TRI3: + return dispatcher.template operator()(); + case libMesh::TRI6: + return dispatcher.template operator()(); + case libMesh::QUAD4: + return dispatcher.template operator()(); + case libMesh::QUAD8: + return dispatcher.template operator()(); + case libMesh::QUAD9: + return dispatcher.template operator()(); + case libMesh::TET4: + return dispatcher.template operator()(); + case libMesh::TET10: + return dispatcher.template operator()(); + case libMesh::HEX8: + return dispatcher.template operator()(); + case libMesh::HEX20: + return dispatcher.template operator()(); + case libMesh::HEX27: + return dispatcher.template operator()(); + + default: + return dispatcher.unsupported_topology(topo); + } +} + +template +inline int +dispatch_supported_lagrange_face_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + if (!is_supported_lagrange_face_map_topology(topo)) + return dispatcher.unsupported_topology(topo); + + return dispatch_supported_lagrange_map_topology(topo, dispatcher); +} + +inline int +compare_device_values(const Kokkos::View & d_values, + const std::vector & ref_values, + double tol = 1.0e-13) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > tol) + ++fail; + + return fail; +} + +inline std::unique_ptr +build_reference_elem(libMesh::ElemType elem_type) +{ + auto elem = libMesh::Elem::build(elem_type); + elem->set_mapping_type(libMesh::LAGRANGE_MAP); + return elem; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + unsigned int quadrature_order, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + libMesh::QGauss qr(dim, static_cast(quadrature_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(elem_type); + + const unsigned int nqp = qr.n_points(); + xi_h.resize(nqp); + eta_h.resize(nqp); + zeta_h.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + } + + return nqp; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + return build_qps(elem_type, dim, /*quadrature_order=*/4, xi_h, eta_h, zeta_h); +} + +inline unsigned int +build_host_qgauss(libMesh::ElemType topo, + unsigned int dim, + unsigned int order, + std::vector & x_ref, + std::vector & y_ref, + std::vector & z_ref, + std::vector & w_ref) +{ + libMesh::QGauss qr(dim, static_cast(order)); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + const unsigned int nqp = qr.n_points(); + x_ref.resize(nqp); + y_ref.resize(nqp); + z_ref.resize(nqp); + w_ref.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + x_ref[q] = qr.qp(q)(0); + y_ref[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + z_ref[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_ref[q] = qr.w(q); + } + + return nqp; +} + +inline Kokkos::View +upload_real(const std::vector & values, const char * label) +{ + Kokkos::View d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + Kokkos::deep_copy(d, h); + return d; +} + +inline Kokkos::View +upload_point_coordinates(const libMesh::Elem & elem, const char * label) +{ + Kokkos::View d(std::string(label), 3 * elem.n_nodes()); + auto h = Kokkos::create_mirror_view(d); + for (unsigned int i = 0; i < elem.n_nodes(); ++i) + { + h(3 * i + 0) = elem.point(i)(0); + h(3 * i + 1) = elem.point(i)(1); + h(3 * i + 2) = elem.point(i)(2); + } + Kokkos::deep_copy(d, h); + return d; +} + +inline std::string +make_label(const char * prefix, const char * suffix) +{ + return std::string(prefix) + suffix; +} + +inline element_fixture +build_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point master = fixture.elem->master_point(i); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point( + 0.7 + 0.8 * xi + 0.06 * xi * xi, + -0.3 + 0.25 * xi + 0.04 * xi * xi, + 0.2 + 0.1 * xi - 0.03 * xi * xi); + break; + + case 2: + xyz = libMesh::Point( + 0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.1 + 0.12 * xi - 0.08 * eta + 0.02 * xi * eta); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_flat_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point master = fixture.elem->master_point(i); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point(0.7 + 0.8 * xi + 0.06 * xi * xi, + 0.0, + 0.0); + break; + + case 2: + xyz = libMesh::Point(0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.0); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_permuted_reference_fixture(libMesh::ElemType elem_type, + unsigned int perm_num) +{ + element_fixture fixture = build_reference_fixture(elem_type); + fixture.elem->permute(perm_num); + return fixture; +} + +inline map_helper_context +build_map_helper_context(const element_fixture & fixture, + libMesh::ElemType topo, + const char * label_prefix) +{ + map_helper_context context; + + const unsigned int dim = fixture.elem->dim(); + const unsigned int n_nodes = fixture.elem->n_nodes(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + auto fe = libMesh::FEBase::build(dim, fe_type); + + libMesh::QGauss qr(dim, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + fe->attach_quadrature_rule(&qr); + fe->get_xyz(); + fe->get_dxyzdxi(); + if (dim >= 2) + fe->get_dxyzdeta(); + if (dim >= 3) + fe->get_dxyzdzeta(); + fe->get_JxW(); + fe->reinit(fixture.elem.get()); + + const unsigned int nqp = qr.n_points(); + const auto & xyz = fe->get_xyz(); + const auto & dxyzdxi = fe->get_dxyzdxi(); + const auto & jxw = fe->get_JxW(); + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + libMesh::RealGradient dxyzdeta(0.0); + libMesh::RealGradient dxyzdzeta(0.0); + if (dim >= 2) + dxyzdeta = fe->get_dxyzdeta()[q]; + if (dim >= 3) + dxyzdzeta = fe->get_dxyzdzeta()[q]; + + const unsigned int base = 13 * q; + context.ref_values[base + 0] = xyz[q](0); + context.ref_values[base + 1] = xyz[q](1); + context.ref_values[base + 2] = xyz[q](2); + context.ref_values[base + 3] = dxyzdxi[q](0); + context.ref_values[base + 4] = dxyzdxi[q](1); + context.ref_values[base + 5] = dxyzdxi[q](2); + context.ref_values[base + 6] = dxyzdeta(0); + context.ref_values[base + 7] = dxyzdeta(1); + context.ref_values[base + 8] = dxyzdeta(2); + context.ref_values[base + 9] = dxyzdzeta(0); + context.ref_values[base + 10] = dxyzdzeta(1); + context.ref_values[base + 11] = dxyzdzeta(2); + context.ref_values[base + 12] = jxw[q]; + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_h[q] = qr.w(q); + } + + context.d_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.nqp = nqp; + context.dim = dim; + context.n_nodes = n_nodes; + + return context; +} + +template +inline int +evaluate_map_helper_context(const map_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_nodes = 27; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_coords = context.d_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int dim_ = context.dim; + const unsigned int n_nodes_ = context.n_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector nodes[max_nodes]; + for (unsigned int i = 0; i < n_nodes_; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + libMesh::Kokkos::RealVector xyz; + libMesh::Kokkos::RealTensor J; + libMesh::Kokkos::physical_point_and_jacobian( + nodes, n_nodes_, d_xi(q), d_eta(q), d_zeta(q), xyz, J); + + const libMesh::Real jxw_q = libMesh::Kokkos::volume_jxw(J, dim_, d_w(q)); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = vector_component(xyz, 0); + d_results(base + 1) = vector_component(xyz, 1); + d_results(base + 2) = vector_component(xyz, 2); + d_results(base + 3) = tensor_component(J, 0, 0); + d_results(base + 4) = tensor_component(J, 0, 1); + d_results(base + 5) = tensor_component(J, 0, 2); + d_results(base + 6) = tensor_component(J, 1, 0); + d_results(base + 7) = tensor_component(J, 1, 1); + d_results(base + 8) = tensor_component(J, 1, 2); + d_results(base + 9) = tensor_component(J, 2, 0); + d_results(base + 10) = tensor_component(J, 2, 1); + d_results(base + 11) = tensor_component(J, 2, 2); + d_results(base + 12) = jxw_q; + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +inline face_helper_context +build_face_helper_context(const element_fixture & fixture, + const libMesh::Elem & side, + unsigned int side_id, + const char * label_prefix) +{ + face_helper_context context; + const unsigned int parent_dim = fixture.elem->dim(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + const unsigned int side_dim = side.dim(); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side.type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_JxW(); + side_fe->get_normals(); + side_fe->get_dxyzdxi(); + if (parent_dim >= 3) + side_fe->get_dxyzdeta(); + side_fe->reinit(fixture.elem.get(), side_id); + + const unsigned int nqp = qr.n_points(); + const unsigned int n_parent_nodes = fixture.elem->n_nodes(); + const unsigned int n_face_nodes = side.n_nodes(); + + std::vector side_ref_points(nqp); + for (unsigned int q = 0; q < nqp; ++q) + side_ref_points[q] = qr.qp(q); + + std::vector parent_ref_points; + if (parent_dim == 2) + { + auto side_map_fe = libMesh::FEBase::build(parent_dim, fe_type); + side_map_fe->get_xyz(); + side_map_fe->side_map(fixture.elem.get(), &side, side_id, side_ref_points, parent_ref_points); + } + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + std::vector parent_xi_h(nqp, 0.0), parent_eta_h(nqp, 0.0), parent_zeta_h(nqp, 0.0); + for (unsigned int q = 0; q < nqp; ++q) + { + const libMesh::Point row0 = libMesh::FEMap::map_deriv(side_dim, &side, 0, side_ref_points[q]); + libMesh::Point row1(0.0); + if (side_dim >= 2) + row1 = libMesh::FEMap::map_deriv(side_dim, &side, 1, side_ref_points[q]); + const auto & normal = side_fe->get_normals()[q]; + const unsigned int base = 13 * q; + + context.ref_values[base + 0] = row0(0); + context.ref_values[base + 1] = row0(1); + context.ref_values[base + 2] = row0(2); + context.ref_values[base + 3] = row1(0); + context.ref_values[base + 4] = row1(1); + context.ref_values[base + 5] = row1(2); + context.ref_values[base + 6] = 0.0; + context.ref_values[base + 7] = 0.0; + context.ref_values[base + 8] = 0.0; + context.ref_values[base + 9] = side_fe->get_JxW()[q]; + context.ref_values[base + 10] = normal(0); + context.ref_values[base + 11] = normal(1); + context.ref_values[base + 12] = normal(2); + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (parent_dim >= 3) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = 0.0; + w_h[q] = qr.w(q); + + if (parent_dim == 2) + { + parent_xi_h[q] = parent_ref_points[q](0); + parent_eta_h[q] = parent_ref_points[q](1); + parent_zeta_h[q] = parent_ref_points[q](2); + } + } + + context.d_face_coords = upload_point_coordinates(side, make_label(label_prefix, "_coords").c_str()); + context.d_parent_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_parent_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.d_parent_xi = upload_real(parent_xi_h, make_label(label_prefix, "_parent_xi").c_str()); + context.d_parent_eta = upload_real(parent_eta_h, make_label(label_prefix, "_parent_eta").c_str()); + context.d_parent_zeta = upload_real(parent_zeta_h, make_label(label_prefix, "_parent_zeta").c_str()); + context.nqp = nqp; + context.parent_dim = parent_dim; + context.n_parent_nodes = n_parent_nodes; + context.n_face_nodes = n_face_nodes; + + return context; +} + +template +inline int +evaluate_face_helper_context_2d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_face_nodes = 9; + constexpr unsigned int max_parent_nodes = 27; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_parent_coords = context.d_parent_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const auto d_parent_xi = context.d_parent_xi; + const auto d_parent_eta = context.d_parent_eta; + const auto d_parent_zeta = context.d_parent_zeta; + const unsigned int n_parent_nodes_ = context.n_parent_nodes; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; + libMesh::Kokkos::RealVector parent_nodes[max_parent_nodes]; + for (unsigned int i = 0; i < n_face_nodes_; ++i) + face_nodes[i] = libMesh::Kokkos::make_vector( + d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); + for (unsigned int i = 0; i < n_parent_nodes_; ++i) + parent_nodes[i] = libMesh::Kokkos::make_vector( + d_parent_coords(3 * i + 0), d_parent_coords(3 * i + 1), d_parent_coords(3 * i + 2)); + + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Kokkos::RealTensor parent_J = libMesh::Kokkos::jacobian( + parent_nodes, n_parent_nodes_, d_parent_xi(q), d_parent_eta(q), d_parent_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/2u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::edge_normal_on_parent_surface(J, parent_J); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +template +inline int +evaluate_face_helper_context_3d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_face_nodes = 9; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; + for (unsigned int i = 0; i < n_face_nodes_; ++i) + face_nodes[i] = libMesh::Kokkos::make_vector( + d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); + + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/3u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::face_normal(J, /*parent_dim=*/3u); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +} // namespace kokkos_test_utils + +#endif diff --git a/tests/fe/kokkos_fe_permuted_map_oracle_test.K b/tests/fe/kokkos_fe_permuted_map_oracle_test.K new file mode 100644 index 00000000000..d988bc9543e --- /dev/null +++ b/tests/fe/kokkos_fe_permuted_map_oracle_test.K @@ -0,0 +1,512 @@ +// GPU kernel tests for permuted libMesh::Kokkos map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for every non-identity element permutation of the implemented +// LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for every +// non-identity parent permutation of the supported 2D and 3D parent +// topologies, including mixed-face prism and pyramid parents. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for every +// non-identity permutation of those same parent topologies. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_permuted_reference_fixture; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +static const map_helper_case map_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } +}; + +static const face_parent_case face_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } +}; + +} // anonymous namespace + +template +static int +test_permuted_map_case_impl(const map_helper_case & info, unsigned int perm_num) +{ + auto fixture = build_permuted_reference_fixture(Topo, perm_num); + const auto context = build_map_helper_context(fixture, info.topo, "perm_map"); + const int fail = evaluate_map_helper_context(context, "perm_map_results", tol); + if (fail) + std::printf(" permuted map mismatch: topo=%s perm=%u (%d failures)\n", + info.name, perm_num, fail); + + return fail; +} + +struct permuted_map_dispatch +{ + permuted_map_dispatch(const map_helper_case & in_info, unsigned int in_perm_num) + : info(in_info), perm_num(in_perm_num) + { + } + + template + int operator()() const + { + return test_permuted_map_case_impl(info, perm_num); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted map topology: topo=%s perm=%u type=%d\n", + info.name, + perm_num, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; + unsigned int perm_num; +}; + +static int +test_permuted_map_case(const map_helper_case & info, unsigned int perm_num) +{ + const permuted_map_dispatch dispatch(info, perm_num); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_permuted_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_permuted_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct permuted_face_side_dispatch_3d +{ + permuted_face_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_3d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +template +struct permuted_face_side_dispatch_2d +{ + permuted_face_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_2d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +struct permuted_face_parent_dispatch_2d +{ + permuted_face_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const permuted_face_side_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper parent: parent=%s perm=%u side_id=%u parent_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +static int +test_permuted_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "perm_face"); + + if (context.parent_dim == 3) + { + const permuted_face_side_dispatch_3d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const permuted_face_parent_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported permuted face-helper parent dimension: parent=%s perm=%u side_id=%u dim=%u\n", + parent_name, + perm_num, + side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_permuted_face_qp_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + unsigned int perm_num, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + std::printf(" permuted face_qp mismatch: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(side.type())); + + return fail; +} + +static int +test_map_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : map_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + fail += test_permuted_map_case(info, perm); + } + return fail; +} + +static int +test_face_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + fail += test_permuted_face_helper_side_case(fixture, side_id, info.name, perm); + } + } + } + return fail; +} + +static int +test_face_qp_to_parent_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (side->n_nodes() == 1) + { + fail += check_permuted_face_qp_case( + info.name, *fixture.elem, *side, side_id, perm, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face_qp side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_permuted_face_qp_case(info.name, *fixture.elem, *side, side_id, perm, face_qpt); + } + } + } + } + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int fail = 0; + + const int map_fail = test_map_helpers_for_all_permutations(); + fail += map_fail; + std::printf("[permuted_map_helpers] %s (%d failures)\n", map_fail ? "FAIL" : "OK", map_fail); + + const int face_fail = test_face_helpers_for_all_permutations(); + fail += face_fail; + std::printf("[permuted_face_helpers] %s (%d failures)\n", face_fail ? "FAIL" : "OK", face_fail); + + const int face_qp_fail = test_face_qp_to_parent_for_all_permutations(); + fail += face_qp_fail; + std::printf("[permuted_face_qp] %s (%d failures)\n", face_qp_fail ? "FAIL" : "OK", face_qp_fail); + + Kokkos::finalize(); + return fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_reconstruction_oracle_test.K b/tests/fe/kokkos_fe_reconstruction_oracle_test.K new file mode 100644 index 00000000000..a00947f0ebf --- /dev/null +++ b/tests/fe/kokkos_fe_reconstruction_oracle_test.K @@ -0,0 +1,320 @@ +// GPU kernel tests for libMesh::Kokkos FE reconstruction on physical elements. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase provides the host physical-space oracle values and gradients. +// +// The test suite covers: +// A. Reconstruction of solution values on distorted physical elements for +// every exact LAGRANGE key currently supported by the Kokkos evaluator. +// B. Reconstruction of physical-space gradients on the same elements by +// pulling reference-space gradients through the element Jacobian. +// C. The same value/gradient reconstruction parity for representative exact +// MONOMIAL keys across all supported dimensions and orders. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct reconstruction_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +static const reconstruction_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "LAGRANGE/EDGE3/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const reconstruction_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +static std::vector +build_coefficients(const reconstruction_case & info) +{ + std::vector coeffs(info.n_dofs); + const Real family_bias = (info.key.family == libMesh::MONOMIAL) ? Real(0.19) : Real(0.07); + const Real order_bias = Real(static_cast(info.key.order) + 1u) * Real(0.013); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const Real sign = (i % 2) ? Real(-1.0) : Real(1.0); + coeffs[i] = sign * (Real(0.17) + Real(0.041) * Real(i + 1u) + family_bias + order_bias); + } + + return coeffs; +} + +template +static int +test_reconstruction_case_impl(const reconstruction_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + element_fixture fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto fe = libMesh::FEBase::build(info.dim, fe_type); + + libMesh::QGauss qr(info.dim, static_cast(quad_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(info.key.elem_type); + + fe->attach_quadrature_rule(&qr); + fe->get_phi(); + fe->get_dphi(); + fe->reinit(fixture.elem.get()); + + const auto & phi = fe->get_phi(); + const auto & dphi = fe->get_dphi(); + const unsigned int nqp = qr.n_points(); + + const std::vector coeffs = build_coefficients(info); + std::vector ref_u(nqp, 0.0); + std::vector ref_gx(nqp, 0.0); + std::vector ref_gy(nqp, 0.0); + std::vector ref_gz(nqp, 0.0); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + ref_u[q] += phi[i][q] * coeffs[i]; + ref_gx[q] += dphi[i][q](0) * coeffs[i]; + ref_gy[q] += dphi[i][q](1) * coeffs[i]; + ref_gz[q] += dphi[i][q](2) * coeffs[i]; + } + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (info.dim >= 2) ? qr.qp(q)(1) : Real(0); + zeta_h[q] = (info.dim >= 3) ? qr.qp(q)(2) : Real(0); + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "recon_coords"); + auto d_coeffs = upload_real(coeffs, "recon_coeffs"); + auto d_xi = upload_real(xi_h, "recon_xi"); + auto d_eta = upload_real(eta_h, "recon_eta"); + auto d_zeta = upload_real(zeta_h, "recon_zeta"); + + Kokkos::View d_u(std::string("recon_u"), nqp); + Kokkos::View d_gx(std::string("recon_gx"), nqp); + Kokkos::View d_gy(std::string("recon_gy"), nqp); + Kokkos::View d_gz(std::string("recon_gz"), nqp); + + const unsigned int dim = info.dim; + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + RealVector geom_nodes[max_geom_nodes]; + for (unsigned int i = 0; i < n_geom_nodes_; ++i) + geom_nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealTensor J = libMesh::Kokkos::jacobian( + geom_nodes, n_geom_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + + Real u = 0.0; + RealVector grad_ref_sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + { + const Real coeff = d_coeffs(i); + u += coeff * shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + grad_ref_sum += + coeff * grad_shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + } + + const RealTensor invJ = libMesh::Kokkos::leading_inverse(J, dim); + const RealVector grad_phys = invJ * grad_ref_sum; + + d_u(q) = u; + d_gx(q) = vector_component(grad_phys, 0); + d_gy(q) = vector_component(grad_phys, 1); + d_gz(q) = vector_component(grad_phys, 2); + }); + Kokkos::fence(); + + int fail = 0; + fail += compare_device_values(d_u, ref_u, value_tol); + fail += compare_device_values(d_gx, ref_gx, grad_tol); + fail += compare_device_values(d_gy, ref_gy, grad_tol); + fail += compare_device_values(d_gz, ref_gz, grad_tol); + + if (fail) + std::printf(" reconstruction mismatch: %s (%d failures)\n", info.name, fail); + + return fail; +} + +struct reconstruction_dispatch +{ + explicit reconstruction_dispatch(const reconstruction_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_reconstruction_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported reconstruction key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const reconstruction_case & info; +}; + +static int +test_reconstruction_case(const reconstruction_case & info) +{ + const reconstruction_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_shape_oracle_test.K b/tests/fe/kokkos_fe_shape_oracle_test.K new file mode 100644 index 00000000000..d98de26dce8 --- /dev/null +++ b/tests/fe/kokkos_fe_shape_oracle_test.K @@ -0,0 +1,629 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE shape functions. +// The test suite covers: +// A. Geometry-map shape parity against the libMesh FE map oracle for the +// 12 implemented LAGRANGE map topologies. +// B. Geometry-map gradient parity against the libMesh FE map oracle for the +// same topologies. +// C. Physics FE parity for exact libMesh LAGRANGE keys that the Kokkos +// evaluator currently supports. +// D. Physics FE gradient parity for the same exact LAGRANGE keys. +// E. Physics FE parity for Kokkos-supported exact MONOMIAL keys. +// F. Physics FE gradient parity for the same exact MONOMIAL keys. +// +// Unsupported exact keys are expected to hard-abort in the Kokkos path and +// are therefore intentionally not invoked here. + +#include "libmesh/elem.h" +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/fe.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct physics_shape_info +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" }, +}; +static constexpr unsigned int n_map_elems = sizeof(map_elems) / sizeof(map_elems[0]); + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Only exact libMesh LAGRANGE keys whose evaluator topology is implemented in +// the current Kokkos path are included here. +static const physics_shape_info lagrange_physics_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "EDGE3/SECOND" }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, 1, 2, "EDGE4/FIRST" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, 2, 3, "TRI7/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, 2, 6, "TRI7/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, 3, 4, "TET14/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, 3, 10, "TET14/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "HEX27/SECOND" }, +}; +static constexpr unsigned int n_lagrange_physics_cases = + sizeof(lagrange_physics_cases) / sizeof(lagrange_physics_cases[0]); + +// These MONOMIAL cases cover all implemented MonomialImpl paths +// for orders 0..5, plus representative non-simplex exact keys. +static const physics_shape_info monomial_physics_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, 1, monomial_n_dofs_for_dim(1, libMesh::THIRD), "MONOMIAL/EDGE4/THIRD" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::THIRD }, 2, monomial_n_dofs_for_dim(2, libMesh::THIRD), "MONOMIAL/TRI7/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FOURTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FOURTH), "MONOMIAL/TRI7/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/TRI7/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::THIRD }, 3, monomial_n_dofs_for_dim(3, libMesh::THIRD), "MONOMIAL/TET14/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FOURTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FOURTH), "MONOMIAL/TET14/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/TET14/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PRISM6, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PRISM6/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PRISM21/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID5, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PYRAMID5/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PYRAMID14/FIFTH" }, +}; +static constexpr unsigned int n_monomial_physics_cases = + sizeof(monomial_physics_cases) / sizeof(monomial_physics_cases[0]); + +} // anonymous namespace + +static Real +host_map_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_map_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +static Real +host_physics_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_physics_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +// --------------------------------------------------------------------------- +// Test A: Geometry-map shape parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_shape_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_map_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_phi(std::string("dev_phi"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = + libMesh::Kokkos::map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +struct map_shape_dispatch +{ + explicit map_shape_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_shape_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-shape topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_shape_parity(const map_elem_info & e) +{ + const map_shape_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test B: Geometry-map gradient parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_grad_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + const unsigned int dim = e.dim; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = host_map_grad(*elem, fe_type, dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_gx(std::string("map_gx"), nd * nqp); + Kokkos::View d_gy(std::string("map_gy"), nd * nqp); + Kokkos::View d_gz(std::string("map_gz"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = + libMesh::Kokkos::grad_map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct map_grad_dispatch +{ + explicit map_grad_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_grad_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-grad topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_grad_parity(const map_elem_info & e) +{ + const map_grad_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test C: Exact-key physics shape parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_physics_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "phys_xi"); + auto d_eta = upload_real(eta_h, "phys_eta"); + auto d_zeta = upload_real(zeta_h, "phys_zeta"); + + Kokkos::View d_phi(std::string("phys_phi"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +// --------------------------------------------------------------------------- +// Test D: Exact-key physics gradient parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_grad_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = + host_physics_grad(*elem, fe_type, info.dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "grad_xi"); + auto d_eta = upload_real(eta_h, "grad_eta"); + auto d_zeta = upload_real(zeta_h, "grad_zeta"); + + Kokkos::View d_gx(std::string("phys_gx"), nd * nqp); + Kokkos::View d_gy(std::string("phys_gy"), nd * nqp); + Kokkos::View d_gz(std::string("phys_gz"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = grad_shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct shape_dispatch +{ + explicit shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +struct grad_shape_dispatch +{ + explicit grad_shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_grad_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported grad-shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +static int +test_shape_parity(const physics_shape_info & info) +{ + const shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +static int +test_grad_shape_parity(const physics_shape_info & info) +{ + const grad_shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (unsigned int e = 0; e < n_map_elems; ++e) + { + const map_elem_info & info = map_elems[e]; + + { + const int f = test_map_shape_parity(info); + std::printf("[map_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_map_grad_parity(info); + std::printf("[map_grad_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_lagrange_physics_cases; ++c) + { + const physics_shape_info & info = lagrange_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_monomial_physics_cases; ++c) + { + const physics_shape_info & info = monomial_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + Kokkos::finalize(); + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_side_trace_oracle_test.K b/tests/fe/kokkos_fe_side_trace_oracle_test.K new file mode 100644 index 00000000000..802545ccf31 --- /dev/null +++ b/tests/fe/kokkos_fe_side_trace_oracle_test.K @@ -0,0 +1,342 @@ +// GPU kernel tests for libMesh::Kokkos side-trace FE parity. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase side reinit provides host side-trace oracle values. +// +// The test suite covers: +// A. Side-restricted shape values for supported exact LAGRANGE keys on +// distorted physical elements. +// B. Tangential physical gradients on those same sides, using host FEBase +// side traces as the oracle. +// C. The same side value and tangential-gradient parity for representative +// exact MONOMIAL keys whose parent geometry topology is supported by the +// current Kokkos map implementation. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; + +namespace +{ + +struct side_trace_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Restrict to parent geometries currently handled by the Kokkos map path. +static const side_trace_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const side_trace_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE RealVector +tangential_component(const RealVector & v, const RealVector & normal) +{ + return v - ((v * normal) * normal); +} + +template +static int +test_side_trace_case_impl(const side_trace_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + auto fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + const unsigned int parent_dim = fixture.elem->dim(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + int fail = 0; + + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_phi(); + side_fe->get_dphi(); + side_fe->get_normals(); + side_fe->reinit(fixture.elem.get(), side_id); + + const auto & phi = side_fe->get_phi(); + const auto & dphi = side_fe->get_dphi(); + const auto & normals = side_fe->get_normals(); + + if (phi.size() != info.n_dofs || dphi.size() != info.n_dofs) + { + std::printf(" unexpected host side FE size: %s side_id=%u phi=%llu dphi=%llu expected=%u\n", + info.name, + side_id, + libMesh::cast_int(phi.size()), + libMesh::cast_int(dphi.size()), + info.n_dofs); + fail += 1; + continue; + } + + const unsigned int nqp = qr.n_points(); + std::vector ref_phi(info.n_dofs * nqp); + std::vector ref_tgx(info.n_dofs * nqp); + std::vector ref_tgy(info.n_dofs * nqp); + std::vector ref_tgz(info.n_dofs * nqp); + std::vector parent_xi_h(nqp), parent_eta_h(nqp), parent_zeta_h(nqp); + std::vector normal_x_h(nqp), normal_y_h(nqp), normal_z_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + const RealVector parent_qpt = + libMesh::Kokkos::map_face_qp_to_parent(*side, libMesh::LAGRANGE_MAP, side->type(), face_qpt); + + parent_xi_h[q] = vector_component(parent_qpt, 0); + parent_eta_h[q] = vector_component(parent_qpt, 1); + parent_zeta_h[q] = vector_component(parent_qpt, 2); + + const auto & n = normals[q]; + normal_x_h[q] = n(0); + normal_y_h[q] = n(1); + normal_z_h[q] = n(2); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const unsigned int idx = q * info.n_dofs + i; + const RealVector host_tg = tangential_component( + libMesh::Kokkos::make_vector(dphi[i][q](0), dphi[i][q](1), dphi[i][q](2)), + libMesh::Kokkos::make_vector(n(0), n(1), n(2))); + + ref_phi[idx] = phi[i][q]; + ref_tgx[idx] = vector_component(host_tg, 0); + ref_tgy[idx] = vector_component(host_tg, 1); + ref_tgz[idx] = vector_component(host_tg, 2); + } + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "side_trace_coords"); + auto d_parent_xi = upload_real(parent_xi_h, "side_trace_parent_xi"); + auto d_parent_eta = upload_real(parent_eta_h, "side_trace_parent_eta"); + auto d_parent_zeta = upload_real(parent_zeta_h, "side_trace_parent_zeta"); + auto d_normal_x = upload_real(normal_x_h, "side_trace_normal_x"); + auto d_normal_y = upload_real(normal_y_h, "side_trace_normal_y"); + auto d_normal_z = upload_real(normal_z_h, "side_trace_normal_z"); + + Kokkos::View d_phi(std::string("side_trace_phi"), ref_phi.size()); + Kokkos::View d_tgx(std::string("side_trace_tgx"), ref_tgx.size()); + Kokkos::View d_tgy(std::string("side_trace_tgy"), ref_tgy.size()); + Kokkos::View d_tgz(std::string("side_trace_tgz"), ref_tgz.size()); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + static_cast(ref_phi.size()), + KOKKOS_LAMBDA(int idx) { + const unsigned int q = static_cast(idx) / n_dofs; + const unsigned int i = static_cast(idx) % n_dofs; + + RealVector geom_nodes[max_geom_nodes]; + for (unsigned int k = 0; k < n_geom_nodes_; ++k) + geom_nodes[k] = libMesh::Kokkos::make_vector( + d_coords(3 * k + 0), d_coords(3 * k + 1), d_coords(3 * k + 2)); + + const Real xi = d_parent_xi(q); + const Real eta = d_parent_eta(q); + const Real zeta = d_parent_zeta(q); + const RealTensor J = + libMesh::Kokkos::jacobian(geom_nodes, n_geom_nodes_, xi, eta, zeta); + const RealVector grad_ref = grad_shape_for_key(i, xi, eta, zeta); + const RealVector grad_phys = libMesh::Kokkos::leading_inverse(J, parent_dim) * grad_ref; + const RealVector normal = libMesh::Kokkos::make_vector(d_normal_x(q), d_normal_y(q), d_normal_z(q)); + const RealVector tangential_grad = tangential_component(grad_phys, normal); + + d_phi(idx) = shape_for_key(i, xi, eta, zeta); + d_tgx(idx) = vector_component(tangential_grad, 0); + d_tgy(idx) = vector_component(tangential_grad, 1); + d_tgz(idx) = vector_component(tangential_grad, 2); + }); + Kokkos::fence(); + + const int side_fail = + compare_device_values(d_phi, ref_phi, value_tol) + + compare_device_values(d_tgx, ref_tgx, grad_tol) + + compare_device_values(d_tgy, ref_tgy, grad_tol) + + compare_device_values(d_tgz, ref_tgz, grad_tol); + + if (side_fail) + std::printf(" side-trace mismatch: %s side_id=%u side_type=%d (%d failures)\n", + info.name, + side_id, + static_cast(side->type()), + side_fail); + + fail += side_fail; + } + + return fail; +} + +struct side_trace_dispatch +{ + explicit side_trace_dispatch(const side_trace_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_side_trace_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported side-trace key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const side_trace_case & info; +}; + +static int +test_side_trace_case(const side_trace_case & info) +{ + const side_trace_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_types_oracle_test.K b/tests/fe/kokkos_fe_types_oracle_test.K new file mode 100644 index 00000000000..cc6b3cfd640 --- /dev/null +++ b/tests/fe/kokkos_fe_types_oracle_test.K @@ -0,0 +1,509 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE type helpers. +// The test suite covers: +// A. get_side_topology() against libMesh side topology, with the 1D +// NODEELEM -> EDGE2 surrogate used by the Kokkos path. +// B. class_from_topology() against a class inferred from libMesh +// first-order LAGRANGE FE spaces. +// C. n_dofs(FEShapeKey) for Kokkos-supported exact LAGRANGE keys against +// libMesh::FEInterface::n_dofs(). +// D. n_dofs(FEShapeKey) for Kokkos-supported MONOMIAL keys against +// libMesh::FEInterface::n_dofs(). +// E. supports_shape()/supports_grad_shape()/supports_n_dofs() agree on the +// current Kokkos evaluator support boundary. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_type.h" +#include "libmesh/libmesh.h" + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using kokkos_test_utils::build_reference_elem; + +namespace +{ + +struct side_topology_case +{ + libMesh::ElemType parent; + libMesh::ElemType expected; +}; + +struct class_from_topology_case +{ + libMesh::ElemType topo; + libMesh::FEElemClass expected; +}; + +struct n_dof_case +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int expected; +}; + +struct support_case +{ + libMesh::Kokkos::FEShapeKey key; + bool expected; +}; + +static libMesh::ElemType +host_side_topology_oracle(libMesh::ElemType parent_type) +{ + auto elem = build_reference_elem(parent_type); + + if (elem->dim() == 1) + return libMesh::EDGE2; + + auto first_side = elem->build_side_ptr(0); + const libMesh::ElemType side_topo = first_side->type(); + + for (unsigned int s = 1; s < elem->n_sides(); ++s) + { + auto side = elem->build_side_ptr(s); + if (side->type() != side_topo) + return libMesh::INVALID_ELEM; + } + + return side_topo; +} + +static libMesh::FEElemClass +host_class_from_topology_oracle(libMesh::ElemType topo) +{ + auto elem = build_reference_elem(topo); + + if (elem->dim() == 1) + return libMesh::FEElemClass::EDGE; + + const libMesh::FEType fe_type(libMesh::FIRST, libMesh::LAGRANGE); + const unsigned int ndofs = libMesh::FEInterface::n_dofs(fe_type, 0, elem.get()); + + switch (elem->dim()) + { + case 2: + switch (ndofs) + { + case 3: return libMesh::FEElemClass::TRI; + case 4: return libMesh::FEElemClass::QUAD; + default: return libMesh::FEElemClass::N_CLASSES; + } + + case 3: + switch (ndofs) + { + case 4: return libMesh::FEElemClass::TET; + case 8: return libMesh::FEElemClass::HEX; + case 6: return libMesh::FEElemClass::PRISM; + case 5: return libMesh::FEElemClass::PYRAMID; + default: return libMesh::FEElemClass::N_CLASSES; + } + + default: + return libMesh::FEElemClass::N_CLASSES; + } +} + +static unsigned int +host_n_dofs_oracle(libMesh::Kokkos::FEShapeKey key) +{ + auto elem = build_reference_elem(key.elem_type); + return libMesh::FEInterface::n_dofs(libMesh::FEType(key.order, key.family), 0, elem.get()); +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// Test 1: get_side_topology() against libMesh side topology. +// --------------------------------------------------------------------------- +static int +test_side_topology() +{ + static const libMesh::ElemType parents[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::TRI6, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::HEX8, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX20, + libMesh::HEX27 + }; + constexpr int n_cases = sizeof(parents) / sizeof(parents[0]); + + Kokkos::View d_cases(std::string("side_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).parent = parents[i]; + h(i).expected = host_side_topology_oracle(parents[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("side_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (get_side_topology(d_cases(i).parent) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 2: class_from_topology() against libMesh FE oracle. +// --------------------------------------------------------------------------- +static int +test_class_from_topology() +{ + static const libMesh::ElemType topos[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI6, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX8, + libMesh::HEX20, + libMesh::HEX27, + libMesh::PRISM6, + libMesh::PRISM15, + libMesh::PRISM18, + libMesh::PRISM20, + libMesh::PRISM21, + libMesh::PYRAMID5, + libMesh::PYRAMID13, + libMesh::PYRAMID14, + libMesh::PYRAMID18 + }; + constexpr int n_cases = sizeof(topos) / sizeof(topos[0]); + + Kokkos::View d_cases(std::string("class_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).topo = topos[i]; + h(i).expected = host_class_from_topology_oracle(topos[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("class_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (class_from_topology(d_cases(i).topo) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 3: n_dofs() for Kokkos-supported exact LAGRANGE keys against +// libMesh FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_lagrange() +{ + using libMesh::Kokkos::FEShapeKey; + + static const FEShapeKey keys[] = { + { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, + + { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, + + { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND } + }; + constexpr int n_cases = sizeof(keys) / sizeof(keys[0]); + + Kokkos::View d_cases(std::string("lagrange_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).key = keys[i]; + h(i).expected = host_n_dofs_oracle(keys[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("lagrange_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 4: n_dofs() for Kokkos-supported MONOMIAL keys against libMesh +// FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_monomial() +{ + using libMesh::Kokkos::FEShapeKey; + + static const libMesh::Order orders[] = { + libMesh::CONSTANT, + libMesh::FIRST, + libMesh::SECOND, + libMesh::THIRD, + libMesh::FOURTH, + libMesh::FIFTH + }; + static const libMesh::ElemType higher_dim_topos[] = { + libMesh::TRI7, + libMesh::QUAD9, + libMesh::TET14, + libMesh::HEX27, + libMesh::PRISM21 + }; + + constexpr int n_cases = sizeof(orders) / sizeof(orders[0]) * + (2 + sizeof(higher_dim_topos) / sizeof(higher_dim_topos[0])); + + Kokkos::View d_cases(std::string("monomial_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + int i = 0; + for (const auto order : orders) + { + const libMesh::ElemType edge_topo = + (order <= libMesh::THIRD) ? libMesh::EDGE4 : libMesh::EDGE3; + const libMesh::ElemType pyramid_topo = + (order <= libMesh::THIRD) ? libMesh::PYRAMID18 : libMesh::PYRAMID14; + + h(i).key = { libMesh::MONOMIAL, edge_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + h(i).key = { libMesh::MONOMIAL, pyramid_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + for (const auto topo : higher_dim_topos) + { + h(i).key = { libMesh::MONOMIAL, topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + } + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("monomial_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 5: support predicates agree on the Kokkos evaluator boundary. +// --------------------------------------------------------------------------- +static int +test_support_contract() +{ + using libMesh::Kokkos::FEShapeKey; + + static const support_case cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, true }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM15, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID5, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID14, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID18, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::INVALID_ORDER }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, true }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::PYRAMID18, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::INVALID_ORDER }, false } + }; + constexpr int n_cases = sizeof(cases) / sizeof(cases[0]); + + Kokkos::View d_cases(std::string("support_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + h(i) = cases[i]; + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("support_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + + const bool shape_supported = supports_shape(d_cases(i).key); + const bool grad_supported = supports_grad_shape(d_cases(i).key); + const bool ndofs_supported = supports_n_dofs(d_cases(i).key); + + if (shape_supported != d_cases(i).expected || + grad_supported != d_cases(i).expected || + ndofs_supported != d_cases(i).expected || + shape_supported != grad_supported || + shape_supported != ndofs_supported) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_side_topology(); + std::printf("[side_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_class_from_topology(); + std::printf("[class_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_lagrange(); + std::printf("[lagrange_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_monomial(); + std::printf("[monomial_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_support_contract(); + std::printf("[support_contract_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_quadrature_oracle_test.K b/tests/fe/kokkos_quadrature_oracle_test.K new file mode 100644 index 00000000000..96ebfe71640 --- /dev/null +++ b/tests/fe/kokkos_quadrature_oracle_test.K @@ -0,0 +1,747 @@ +// GPU kernel oracle tests for libMesh::Kokkos quadrature and map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// QGauss, FEMap, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. GaussQuadrature point and weight tables against libMesh QGauss. +// B. physical_point() and jacobian() against libMesh FEMap::map() and +// FEMap::map_deriv(). +// C. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase. +// D. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles. +// E. map_face_qp_to_parent() against libMesh FEBase::side_map(). +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_quadrature.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_host_qgauss; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::tensor_component; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct quadrature_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int order; +}; + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_helper_case +{ + libMesh::ElemType parent_topo; + unsigned int side_id; + const char * name; +}; + +} // anonymous namespace + +static int +test_quadrature_case(const quadrature_case & info) +{ + std::vector x_ref, y_ref, z_ref, w_ref; + const unsigned int host_nqp = + build_host_qgauss(info.topo, info.dim, info.order, x_ref, y_ref, z_ref, w_ref); + + Kokkos::View d_nqp(std::string("nqp")); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + d_nqp() = GaussQuadrature::n_points(info.topo, info.order); + }); + Kokkos::fence(); + + unsigned int device_nqp = 0; + Kokkos::deep_copy(device_nqp, d_nqp); + + int fail = 0; + if (device_nqp != host_nqp) + ++fail; + + Kokkos::View d_x(std::string("qx"), host_nqp); + Kokkos::View d_y(std::string("qy"), host_nqp); + Kokkos::View d_z(std::string("qz"), host_nqp); + Kokkos::View d_w(std::string("qw"), host_nqp); + + Kokkos::parallel_for( + host_nqp, + KOKKOS_LAMBDA(int qp) { + const RealVector pt = GaussQuadrature::point(info.topo, info.order, qp); + d_x(qp) = vector_component(pt, 0); + d_y(qp) = vector_component(pt, 1); + d_z(qp) = vector_component(pt, 2); + d_w(qp) = GaussQuadrature::weight(info.topo, info.order, qp); + }); + Kokkos::fence(); + + fail += compare_device_values(d_x, x_ref); + fail += compare_device_values(d_y, y_ref); + fail += compare_device_values(d_z, z_ref); + fail += compare_device_values(d_w, w_ref); + + if (fail) + std::printf(" quadrature mismatch: topo=%d dim=%u order=%u (%d failures)\n", + static_cast(info.topo), info.dim, info.order, fail); + + return fail; +} + +static int +test_quadrature_against_qgauss() +{ + int fail = 0; + + const libMesh::ElemType edge_topos[] = { libMesh::EDGE2, libMesh::EDGE3 }; + const libMesh::ElemType quad_topos[] = { libMesh::QUAD4, libMesh::QUAD8, libMesh::QUAD9 }; + const libMesh::ElemType hex_topos[] = { libMesh::HEX8, libMesh::HEX20, libMesh::HEX27 }; + const libMesh::ElemType tri_topos[] = { libMesh::TRI3, libMesh::TRI6 }; + const libMesh::ElemType tet_topos[] = { libMesh::TET4, libMesh::TET10 }; + + for (const auto topo : edge_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 1, order }); + + for (const auto topo : quad_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : hex_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + for (const auto topo : tri_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : tet_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + return fail; +} + + +static element_fixture +build_hex8_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::HEX8); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(8); + + static const double coords[8][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {1.0, 1.0, 0.0}, + {0.0, 1.0, 0.0}, + {0.0, 0.0, 1.0}, + {1.0, 0.0, 1.0}, + {1.0, 1.0, 1.0}, + {0.0, 1.0, 1.0} + }; + + for (unsigned int i = 0; i < 8; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static element_fixture +build_tri3_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::TRI3); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(3); + + static const double coords[3][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {0.0, 1.0, 0.0} + }; + + for (unsigned int i = 0; i < 3; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static int +test_physical_map_hex8() +{ + auto fixture = build_hex8_fixture(); + + const libMesh::Point ref_center(0.0, 0.0, 0.0); + const libMesh::Point ref_corner(-1.0, -1.0, -1.0); + + const libMesh::Point host_center = libMesh::FEMap::map(3, fixture.elem.get(), ref_center); + const libMesh::Point host_corner = libMesh::FEMap::map(3, fixture.elem.get(), ref_corner); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 0, ref_center); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 1, ref_center); + const libMesh::Point host_dzeta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 2, ref_center); + + std::vector ref_values = { + host_center(0), host_center(1), host_center(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2), + host_dzeta(0), host_dzeta(1), host_dzeta(2), + host_corner(0), host_corner(1), host_corner(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "hex_coords"); + + Kokkos::View d_results(std::string("hex_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + RealVector nodes[8]; + for (unsigned int i = 0; i < 8; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealVector xyz_center = + libMesh::Kokkos::physical_point(nodes, 8, 0.0, 0.0, 0.0); + const RealTensor J_center = + libMesh::Kokkos::jacobian(nodes, 8, 0.0, 0.0, 0.0); + const RealVector xyz_corner = libMesh::Kokkos::physical_point( + nodes, 8, -1.0, -1.0, -1.0); + + d_results(0) = vector_component(xyz_center, 0); + d_results(1) = vector_component(xyz_center, 1); + d_results(2) = vector_component(xyz_center, 2); + d_results(3) = tensor_component(J_center, 0, 0); + d_results(4) = tensor_component(J_center, 0, 1); + d_results(5) = tensor_component(J_center, 0, 2); + d_results(6) = tensor_component(J_center, 1, 0); + d_results(7) = tensor_component(J_center, 1, 1); + d_results(8) = tensor_component(J_center, 1, 2); + d_results(9) = tensor_component(J_center, 2, 0); + d_results(10) = tensor_component(J_center, 2, 1); + d_results(11) = tensor_component(J_center, 2, 2); + d_results(12) = vector_component(xyz_corner, 0); + d_results(13) = vector_component(xyz_corner, 1); + d_results(14) = vector_component(xyz_corner, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +static int +test_physical_map_tri3() +{ + auto fixture = build_tri3_fixture(); + + const libMesh::Point ref_pt(1.0 / 3.0, 1.0 / 3.0, 0.0); + + const libMesh::Point host_xyz = libMesh::FEMap::map(2, fixture.elem.get(), ref_pt); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 0, ref_pt); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 1, ref_pt); + + std::vector ref_values = { + host_xyz(0), host_xyz(1), host_xyz(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "tri_coords"); + + Kokkos::View d_results(std::string("tri_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + RealVector nodes[3]; + for (unsigned int i = 0; i < 3; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealVector xyz = libMesh::Kokkos::physical_point( + nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + const RealTensor J = libMesh::Kokkos::jacobian( + nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + + d_results(0) = vector_component(xyz, 0); + d_results(1) = vector_component(xyz, 1); + d_results(2) = vector_component(xyz, 2); + d_results(3) = tensor_component(J, 0, 0); + d_results(4) = tensor_component(J, 0, 1); + d_results(5) = tensor_component(J, 0, 2); + d_results(6) = tensor_component(J, 1, 0); + d_results(7) = tensor_component(J, 1, 1); + d_results(8) = tensor_component(J, 1, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_helper"); + const int fail = evaluate_map_helper_context(context, "map_helper_results", tol); + if (fail) + std::printf(" quadrature map-helper mismatch: %s (%d failures)\n", + info.name, + fail); + return fail; +} + +struct quadrature_map_helper_dispatch +{ + explicit quadrature_map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature map-helper topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const quadrature_map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_map_helpers_case_3d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_map_helpers_case_2d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct quadrature_face_side_dispatch_3d +{ + quadrature_face_side_dispatch_3d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_3d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +template +struct quadrature_face_side_dispatch_2d +{ + quadrature_face_side_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_2d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +struct quadrature_face_parent_dispatch_2d +{ + quadrature_face_parent_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const quadrature_face_side_dispatch_2d dispatch(context, info, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper parent: %s parent_type=%d side_id=%u\n", + info.name, + static_cast(topo), + info.side_id); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +static int +test_face_map_helpers_case(const face_helper_case & info) +{ + auto fixture = build_reference_fixture(info.parent_topo); + auto side = fixture.elem->build_side_ptr(info.side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, info.side_id, "face_helper"); + + if (context.parent_dim == 3) + { + const quadrature_face_side_dispatch_3d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const quadrature_face_parent_dispatch_2d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported quadrature face-helper parent dimension: %s parent_type=%d side_id=%u dim=%u\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * case_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); + + std::printf(" face_qp mismatch: case=%s parent_type=%d side_type=%d side_id=%u\n", + case_name, + static_cast(parent.type()), + static_cast(side.type()), + side_id); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" diff =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0) - vector_component(host, 0), + vector_component(kokkos, 1) - vector_component(host, 1), + vector_component(kokkos, 2) - vector_component(host, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + const unsigned int parent_node = parent.local_side_node(side_id, k); + const libMesh::Point parent_refspace = refspace_nodes[parent_node]; + std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_node, + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_ref_coords() +{ + using libMesh::Elem; + using libMesh::Node; + + int fail = 0; + + { + auto edge = Elem::build(libMesh::EDGE2); + edge->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(3.25, -2.0, 5.0, 0); + auto n1 = Node::build(9.50, 4.0, -1.0, 1); + edge->set_node(0, n0.get()); + edge->set_node(1, n1.get()); + + auto side0 = edge->build_side_ptr(0); + auto side1 = edge->build_side_ptr(1); + + fail += check_face_qp_to_parent_case("edge2_side0", *edge, *side0, 0, libMesh::Kokkos::zero_vector()); + fail += check_face_qp_to_parent_case("edge2_side1", *edge, *side1, 1, libMesh::Kokkos::zero_vector()); + } + + { + auto tri3 = Elem::build(libMesh::TRI3); + tri3->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(10.0, 20.0, 0.0, 0); + auto n1 = Node::build(14.0, 20.0, 0.0, 1); + auto n2 = Node::build(10.0, 23.0, 0.0, 2); + tri3->set_node(0, n0.get()); + tri3->set_node(1, n1.get()); + tri3->set_node(2, n2.get()); + + auto side0 = tri3->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri3_side0", *tri3, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + { + auto tri6 = Elem::build(libMesh::TRI6); + tri6->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(4.0, 1.0, 0.0, 0); + auto n1 = Node::build(9.0, 2.0, 0.0, 1); + auto n2 = Node::build(3.0, 8.0, 0.0, 2); + auto n3 = Node::build(42.0, -17.0, 5.0, 3); + auto n4 = Node::build(11.0, 11.0, 1.0, 4); + auto n5 = Node::build(-7.0, 4.0, 2.0, 5); + tri6->set_node(0, n0.get()); + tri6->set_node(1, n1.get()); + tri6->set_node(2, n2.get()); + tri6->set_node(3, n3.get()); + tri6->set_node(4, n4.get()); + tri6->set_node(5, n5.get()); + + auto side0 = tri6->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri6_side0", *tri6, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_quadrature_against_qgauss(); + std::printf("[quadrature_qgauss_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_hex8(); + std::printf("[physical_map_hex8] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_tri3(); + std::printf("[physical_map_tri3] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const map_helper_case cases[] = { + { libMesh::EDGE3, "EDGE3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX20, "HEX20" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const face_helper_case cases[] = { + { libMesh::TRI6, 0, "TRI6/side0" }, + { libMesh::TET10, 0, "TET10/side0" }, + { libMesh::HEX20, 0, "HEX20/side0" } + }; + + for (const auto & info : cases) + { + const int f = test_face_map_helpers_case(info); + std::printf("[face_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const int f = test_face_qp_to_parent_ref_coords(); + std::printf("[face_qp_parent_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/quadrature/quadrature_exactness.h b/tests/quadrature/quadrature_exactness.h new file mode 100644 index 00000000000..5db510f09ef --- /dev/null +++ b/tests/quadrature/quadrature_exactness.h @@ -0,0 +1,176 @@ +#ifndef LIBMESH_QUADRATURE_EXACTNESS_H +#define LIBMESH_QUADRATURE_EXACTNESS_H + +#include +#include +#include + +#include +#include +#include + +namespace quadrature_exactness +{ + +inline libMesh::Real +axis_integral(const unsigned int power) +{ + return (power % 2) ? libMesh::Real(0) : (libMesh::Real(2) / (power + 1)); +} + +inline libMesh::Real +edge_integral(const unsigned int x_power) +{ + return axis_integral(x_power); +} + +inline libMesh::Real +quad_integral(const unsigned int x_power, + const unsigned int y_power) +{ + return axis_integral(x_power) * axis_integral(y_power); +} + +inline libMesh::Real +tri_integral(const unsigned int x_power, + const unsigned int y_power) +{ + libMesh::Real analytical = 1.0; + + const unsigned int larger_power = std::max(x_power, y_power); + const unsigned int smaller_power = std::min(x_power, y_power); + + std::vector numerator(smaller_power > 1 ? smaller_power - 1 : 0); + std::vector denominator(2 + smaller_power); + + std::iota(numerator.begin(), numerator.end(), 2); + std::iota(denominator.begin(), denominator.end(), larger_power + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator.size()) + analytical *= numerator[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +hex_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return axis_integral(x_power) * axis_integral(y_power) * axis_integral(z_power); +} + +inline libMesh::Real +tet_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + libMesh::Real analytical = 1.0; + + unsigned int sorted_powers[3] = {x_power, y_power, z_power}; + std::sort(sorted_powers, sorted_powers + 3); + + std::vector numerator_1(sorted_powers[0] > 1 ? sorted_powers[0] - 1 : 0); + std::vector numerator_2(sorted_powers[1] > 1 ? sorted_powers[1] - 1 : 0); + std::vector denominator(3 + sorted_powers[0] + sorted_powers[1]); + + std::iota(numerator_1.begin(), numerator_1.end(), 2); + std::iota(numerator_2.begin(), numerator_2.end(), 2); + std::iota(denominator.begin(), denominator.end(), sorted_powers[2] + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator_1.size()) + analytical *= numerator_1[i]; + + if (i < numerator_2.size()) + analytical *= numerator_2[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +prism_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return tri_integral(x_power, y_power) * axis_integral(z_power); +} + +inline libMesh::Real +pyramid_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + if (x_power % 2 || y_power % 2) + return libMesh::Real(0); + + const unsigned int binom = + libMesh::Utility::binomial(x_power + y_power + z_power + 3, z_power); + + return libMesh::Real(4) / + ((x_power + 1) * (y_power + 1) * binom * (x_power + y_power + z_power + 3)); +} + +inline libMesh::Real +monomial_integral(const libMesh::ElemType elem_type, + const unsigned int x_power, + const unsigned int y_power = 0, + const unsigned int z_power = 0) +{ + switch (elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return edge_integral(x_power); + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return tri_integral(x_power, y_power); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return quad_integral(x_power, y_power); + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return tet_integral(x_power, y_power, z_power); + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return hex_integral(x_power, y_power, z_power); + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return prism_integral(x_power, y_power, z_power); + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return pyramid_integral(x_power, y_power, z_power); + + default: + return libMesh::Real(0); + } +} + +} // namespace quadrature_exactness + +#endif // LIBMESH_QUADRATURE_EXACTNESS_H diff --git a/tests/quadrature/quadrature_test.C b/tests/quadrature/quadrature_test.C index 1dd39a01832..f72440f417e 100644 --- a/tests/quadrature/quadrature_test.C +++ b/tests/quadrature/quadrature_test.C @@ -2,10 +2,10 @@ #include #include #include -#include #include -#include // std::iota + +#include "quadrature_exactness.h" #include "libmesh_cppunit.h" @@ -205,115 +205,47 @@ private: const std::function edge_integrals = [](int mode, int, int) { - return (mode % 2) ? 0 : (Real(2.0) / (mode+1)); + return quadrature_exactness::edge_integral(static_cast(mode)); }; const std::function quad_integrals = [](int modex, int modey, int) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - return exactx*exacty; + return quadrature_exactness::quad_integral(static_cast(modex), + static_cast(modey)); }; const std::function tri_integrals = [](int x_power, int y_power, int) { - // Compute the true integral, a! b! / (a + b + 2)! - Real analytical = 1.0; - - unsigned - larger_power = std::max(x_power, y_power), - smaller_power = std::min(x_power, y_power); - - // Cancel the larger of the two numerator terms with the - // denominator, and fill in the remaining entries. - std::vector - numerator(smaller_power > 1 ? smaller_power-1 : 0), - denominator(2+smaller_power); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator.begin(), numerator.end(), 2); - std::iota(denominator.begin(), denominator.end(), larger_power+1); - - // The denominator is guaranteed to have more terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power)); }; const std::function hex_integrals = [](int modex, int modey, int modez) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactx*exacty*exactz; + return quadrature_exactness::hex_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function tet_integrals = [](int x_power, int y_power, int z_power) { - // Compute the true integral, a! b! c! / (a + b + c + 3)! - Real analytical = 1.0; - - // Sort the a, b, c values - int sorted_powers[3] = {x_power, y_power, z_power}; - std::sort(sorted_powers, sorted_powers+3); - - // Cancel the largest power with the denominator, fill in the - // entries for the remaining numerator terms and the denominator. - std::vector - numerator_1(sorted_powers[0] > 1 ? sorted_powers[0]-1 : 0), - numerator_2(sorted_powers[1] > 1 ? sorted_powers[1]-1 : 0), - denominator(3 + sorted_powers[0] + sorted_powers[1]); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator_1.begin(), numerator_1.end(), 2); - std::iota(numerator_2.begin(), numerator_2.end(), 2); - std::iota(denominator.begin(), denominator.end(), sorted_powers[2]+1); - - // The denominator is guaranteed to have the most terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power), + static_cast(z_power)); }; const std::function prism_integrals = - [this](int modex, int modey, int modez) { - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactz * tri_integrals(modex, modey, 0); + [](int modex, int modey, int modez) { + return quadrature_exactness::prism_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function pyramid_integrals = [](int modex, int modey, int modez) { - - const int binom = Utility::binomial(modex+modey+modez+3, modez); - - if (modex%2 || modey%2) - return Real(0); - - return Real(4)/((modex+1)*(modey+1)*binom*(modex+modey+modez+3)); + return quadrature_exactness::pyramid_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; From 4cc3a11c8b0ce45d098b2d887204570cca9edaa2 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 13:20:30 -0600 Subject: [PATCH 07/48] Fix installed Kokkos FE header support --- contrib/bin/libmesh-config.in | 2 +- contrib/utils/libmesh-dbg.pc.in | 1 + contrib/utils/libmesh-devel.pc.in | 1 + contrib/utils/libmesh-oprof.pc.in | 1 + contrib/utils/libmesh-opt.pc.in | 1 + contrib/utils/libmesh-prof.pc.in | 1 + include/gpu/kokkos_fe_base.h | 2 +- include/gpu/kokkos_fe_evaluator.h | 12 ++--- include/gpu/kokkos_fe_face_map.h | 2 +- include/gpu/kokkos_fe_lagrange_1d.h | 2 +- include/gpu/kokkos_fe_lagrange_2d.h | 2 +- include/gpu/kokkos_fe_lagrange_3d.h | 2 +- include/gpu/kokkos_fe_map.h | 4 +- include/gpu/kokkos_fe_monomial.h | 2 +- include/gpu/kokkos_quadrature.h | 2 +- include/gpu/kokkos_scalar_types.h | 68 +++++++++++++++++++++++++++++ 16 files changed, 89 insertions(+), 16 deletions(-) diff --git a/contrib/bin/libmesh-config.in b/contrib/bin/libmesh-config.in index b935b5b334b..e75de90ed95 100644 --- a/contrib/bin/libmesh-config.in +++ b/contrib/bin/libmesh-config.in @@ -106,7 +106,7 @@ while [ "x$1" != "x" ]; do ;; "--cppflags") - return_val="${CPPFLAGS} $return_val" + return_val="${CPPFLAGS} @KOKKOS_CPPFLAGS@ $return_val" ;; "--cxxflags") diff --git a/contrib/utils/libmesh-dbg.pc.in b/contrib/utils/libmesh-dbg.pc.in index ab532d664c1..f6671513a62 100644 --- a/contrib/utils/libmesh-dbg.pc.in +++ b/contrib/utils/libmesh-dbg.pc.in @@ -13,4 +13,5 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_dbg -ltimpi_dbg \ Libs.private: Cflags: @CPPFLAGS_DBG@ \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-devel.pc.in b/contrib/utils/libmesh-devel.pc.in index 63b3d6c8097..75f4983cd62 100644 --- a/contrib/utils/libmesh-devel.pc.in +++ b/contrib/utils/libmesh-devel.pc.in @@ -13,4 +13,5 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_devel -ltimpi_devel \ Libs.private: Cflags: @CPPFLAGS_DEVEL@ \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-oprof.pc.in b/contrib/utils/libmesh-oprof.pc.in index 1184f433eb5..0e955436cdc 100644 --- a/contrib/utils/libmesh-oprof.pc.in +++ b/contrib/utils/libmesh-oprof.pc.in @@ -13,4 +13,5 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_oprof -ltimpi_oprof \ Libs.private: Cflags: @CPPFLAGS_OPROF@ \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-opt.pc.in b/contrib/utils/libmesh-opt.pc.in index 10deb13ec50..616875c1d0b 100644 --- a/contrib/utils/libmesh-opt.pc.in +++ b/contrib/utils/libmesh-opt.pc.in @@ -13,4 +13,5 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_opt -ltimpi_opt \ Libs.private: Cflags: @CPPFLAGS_OPT@ \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-prof.pc.in b/contrib/utils/libmesh-prof.pc.in index 213601d795a..0684e99bf40 100644 --- a/contrib/utils/libmesh-prof.pc.in +++ b/contrib/utils/libmesh-prof.pc.in @@ -13,4 +13,5 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_prof -ltimpi_prof \ Libs.private: Cflags: @CPPFLAGS_PROF@ \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h index 07664e627eb..4526ebdc67a 100644 --- a/include/gpu/kokkos_fe_base.h +++ b/include/gpu/kokkos_fe_base.h @@ -27,7 +27,7 @@ #ifndef LIBMESH_KOKKOS_FE_BASE_H #define LIBMESH_KOKKOS_FE_BASE_H -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_scalar_types.h" #include "libmesh/libmesh_device.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_fe_family.h" diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h index 118880c614d..5fb7c1d1dc4 100644 --- a/include/gpu/kokkos_fe_evaluator.h +++ b/include/gpu/kokkos_fe_evaluator.h @@ -15,12 +15,12 @@ #ifndef LIBMESH_KOKKOS_FE_EVALUATOR_H #define LIBMESH_KOKKOS_FE_EVALUATOR_H -#include "gpu/kokkos_fe_base.h" -#include "gpu/kokkos_fe_types.h" -#include "gpu/kokkos_fe_lagrange_1d.h" -#include "gpu/kokkos_fe_lagrange_2d.h" -#include "gpu/kokkos_fe_lagrange_3d.h" -#include "gpu/kokkos_fe_monomial.h" +#include "kokkos_fe_base.h" +#include "kokkos_fe_types.h" +#include "kokkos_fe_lagrange_1d.h" +#include "kokkos_fe_lagrange_2d.h" +#include "kokkos_fe_lagrange_3d.h" +#include "kokkos_fe_monomial.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_fe_family.h" diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h index 822ce4da406..5cefdd2402b 100644 --- a/include/gpu/kokkos_fe_face_map.h +++ b/include/gpu/kokkos_fe_face_map.h @@ -3,7 +3,7 @@ #ifdef LIBMESH_HAVE_KOKKOS -#include "gpu/kokkos_fe_evaluator.h" +#include "kokkos_fe_evaluator.h" #include "libmesh/elem.h" namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h index 72e78692882..3ade1ee709c 100644 --- a/include/gpu/kokkos_fe_lagrange_1d.h +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -12,7 +12,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_1D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h index 08d1e2f5ba6..f58097cbe21 100644 --- a/include/gpu/kokkos_fe_lagrange_2d.h +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -8,7 +8,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_2D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h index 5f2fbb203c3..48afbccaa28 100644 --- a/include/gpu/kokkos_fe_lagrange_3d.h +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -8,7 +8,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_3D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h index d71f81c931a..6e237997f81 100644 --- a/include/gpu/kokkos_fe_map.h +++ b/include/gpu/kokkos_fe_map.h @@ -17,8 +17,8 @@ #ifndef LIBMESH_KOKKOS_FE_MAP_H #define LIBMESH_KOKKOS_FE_MAP_H -#include "gpu/kokkos_fe_evaluator.h" -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_fe_evaluator.h" +#include "kokkos_scalar_types.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h index b68289c72eb..2dde44785f6 100644 --- a/include/gpu/kokkos_fe_monomial.h +++ b/include/gpu/kokkos_fe_monomial.h @@ -13,7 +13,7 @@ #ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H #define LIBMESH_KOKKOS_FE_MONOMIAL_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" #include "libmesh/enum_elem_type.h" namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h index 4c2e8750bab..d8b94c56a5c 100644 --- a/include/gpu/kokkos_quadrature.h +++ b/include/gpu/kokkos_quadrature.h @@ -14,7 +14,7 @@ #ifndef LIBMESH_KOKKOS_QUADRATURE_H #define LIBMESH_KOKKOS_QUADRATURE_H -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_scalar_types.h" #include "libmesh/enum_elem_type.h" #include #include diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h index b7386cf900f..7584819413b 100644 --- a/include/gpu/kokkos_scalar_types.h +++ b/include/gpu/kokkos_scalar_types.h @@ -10,6 +10,7 @@ #include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" #include "libmesh/type_tensor.h" + namespace libMesh::Kokkos { @@ -17,6 +18,73 @@ using Real = libMesh::Real; using RealVector = libMesh::TypeVector; using RealTensor = libMesh::TypeTensor; +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + VectorType v; + v.zero(); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + v(d) = view(i, d); + + return v; +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + view(i, d) = v(d); +} + +template +LIBMESH_DEVICE_INLINE +Real vector_component(const ViewType & view, const unsigned int i, const unsigned int component) +{ + if (component < LIBMESH_DIM) + return view(i, component); + + return Real(0); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + TensorType T; + T.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + T(row, col) = view(i, row, col); + + return T; +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + view(i, row, col) = T(row, col); +} + +template +LIBMESH_DEVICE_INLINE +Real tensor_component(const ViewType & view, + const unsigned int i, + const unsigned int row, + const unsigned int col) +{ + if (row < LIBMESH_DIM && col < LIBMESH_DIM) + return view(i, row, col); + + return Real(0); +} + LIBMESH_DEVICE_INLINE RealVector zero_vector() { From 9b1aa45b77d2bfe7c61fe3229542b0cf61ad74ff Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 15:03:32 -0600 Subject: [PATCH 08/48] Export enum_fe_elem_class header --- include/include_HEADERS | 1 + include/libmesh/Makefile.am | 4 ++++ include/libmesh/Makefile.in | 17 ++++++++++------- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/include_HEADERS b/include/include_HEADERS index 8d980280d31..61ce7391717 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -50,6 +50,7 @@ include_HEADERS = \ enums/enum_elem_quality.h \ enums/enum_elem_type.h \ enums/enum_error_estimator_type.h \ + enums/enum_fe_elem_class.h \ enums/enum_fe_family.h \ enums/enum_inf_map_type.h \ enums/enum_io_package.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index f6010488ff0..22549c7105d 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -41,6 +41,7 @@ BUILT_SOURCES = \ enum_elem_quality.h \ enum_elem_type.h \ enum_error_estimator_type.h \ + enum_fe_elem_class.h \ enum_fe_family.h \ enum_inf_map_type.h \ enum_io_package.h \ @@ -723,6 +724,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index 5c5b77c25db..f65449fe071 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -547,13 +547,13 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ single_predicates.h sparsity_pattern.h variable.h \ variant_filter_iterator.h enum_convergence_flags.h \ enum_eigen_solver_type.h enum_elem_quality.h enum_elem_type.h \ - enum_error_estimator_type.h enum_fe_family.h \ - enum_inf_map_type.h enum_io_package.h enum_matrix_build_type.h \ - enum_norm_type.h enum_order.h enum_parallel_type.h \ - enum_partitioner_type.h enum_point_locator_type.h \ - enum_preconditioner_type.h enum_quadrature_type.h \ - enum_solver_package.h enum_solver_type.h \ - enum_subset_solve_mode.h enum_xdr_mode.h \ + enum_error_estimator_type.h enum_fe_elem_class.h \ + enum_fe_family.h enum_inf_map_type.h enum_io_package.h \ + enum_matrix_build_type.h enum_norm_type.h enum_order.h \ + enum_parallel_type.h enum_partitioner_type.h \ + enum_point_locator_type.h enum_preconditioner_type.h \ + enum_quadrature_type.h enum_solver_package.h \ + enum_solver_type.h enum_subset_solve_mode.h enum_xdr_mode.h \ adjoint_refinement_estimator.h \ adjoint_residual_error_estimator.h discontinuity_measure.h \ error_estimator.h exact_error_estimator.h exact_solution.h \ @@ -1067,6 +1067,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ From c9dab843cfd923bf88a7fbcadb7c672fecb53966 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:04:37 -0600 Subject: [PATCH 09/48] Add Kokkos configuration and device plumbing --- include/base/libmesh_common.h | 45 ++++++++--- include/base/libmesh_device.h | 74 +++++++++++++++++ include/base/libmesh_exceptions.h | 7 ++ include/libmesh_config.h.in | 3 + m4/libmesh_optional_packages.m4 | 128 ++++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+), 12 deletions(-) create mode 100644 include/base/libmesh_device.h diff --git a/include/base/libmesh_common.h b/include/base/libmesh_common.h index d907f4a5fe3..32f8820b4d6 100644 --- a/include/base/libmesh_common.h +++ b/include/base/libmesh_common.h @@ -30,6 +30,10 @@ // The library configuration options #include "libmesh/libmesh_config.h" +// Device compilation support — must be included before assert macros +// so that LIBMESH_DEVICE_ASSERT is available for the Kokkos path. +#include "libmesh/libmesh_device.h" + // Use actual timestamps or constant dummies (to aid ccache) #ifdef LIBMESH_ENABLE_TIMESTAMPS # define LIBMESH_TIME __TIME__ @@ -183,33 +187,33 @@ typedef std::complex COMPLEX; // Helper functions for complex/real numbers // to clean up #ifdef LIBMESH_USE_COMPLEX_NUMBERS elsewhere -template inline T libmesh_real(T a) { return a; } -template inline T libmesh_imag(T /*a*/) { return 0; } -template inline T libmesh_conj(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_real(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_imag(T /*a*/) { return 0; } +template LIBMESH_DEVICE_INLINE T libmesh_conj(T a) { return a; } template -inline T libmesh_real(std::complex a) { return std::real(a); } +LIBMESH_DEVICE_INLINE T libmesh_real(std::complex a) { return std::real(a); } template -inline T libmesh_imag(std::complex a) { return std::imag(a); } +LIBMESH_DEVICE_INLINE T libmesh_imag(std::complex a) { return std::imag(a); } template -inline std::complex libmesh_conj(std::complex a) { return std::conj(a); } +LIBMESH_DEVICE_INLINE std::complex libmesh_conj(std::complex a) { return std::conj(a); } // std::isnan() is in as of C++11. template -inline bool libmesh_isnan(T x) { return std::isnan(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isnan(T x) { return std::isnan(x); } template -inline bool libmesh_isnan(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isnan(std::complex a) { return (std::isnan(std::real(a)) || std::isnan(std::imag(a))); } // std::isinf() is in as of C++11. template -inline bool libmesh_isinf(T x) { return std::isinf(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isinf(T x) { return std::isinf(x); } template -inline bool libmesh_isinf(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isinf(std::complex a) { return (std::isinf(std::real(a)) || std::isinf(std::imag(a))); } // Define the value type for unknowns in simulations. @@ -287,7 +291,13 @@ extern bool warned_about_auto_ptr; #endif // The libmesh_assert() macro acts like C's assert(), but throws a -// libmesh_error() (including stack trace, etc) instead of just exiting +// libmesh_error() (including stack trace, etc) instead of just exiting. +// +// In .K translation units (LIBMESH_KOKKOS_COMPILATION defined), +// LIBMESH_DEVICE_ASSERT is provided by libmesh_device.h using +// printf + Kokkos::abort() — device-safe across CUDA/HIP/SYCL. +// The assert macros delegate to it so that both host and device +// code in the same file get assertion checking. #ifdef NDEBUG #define libmesh_assert_msg(asserted, msg) ((void) 0) @@ -299,6 +309,18 @@ extern bool warned_about_auto_ptr; #define libmesh_assert_less_equal_msg(expr1,expr2, msg) ((void) 0) #define libmesh_assert_greater_equal_msg(expr1,expr2, msg) ((void) 0) +#elif defined(LIBMESH_DEVICE_ASSERT) + +// Kokkos compilation: use the device-safe assert from libmesh_device.h. +#define libmesh_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_exceptionless_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_assert_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) == (expr2)) +#define libmesh_assert_not_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) != (expr2)) +#define libmesh_assert_less_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) < (expr2)) +#define libmesh_assert_greater_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) > (expr2)) +#define libmesh_assert_less_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) <= (expr2)) +#define libmesh_assert_greater_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) >= (expr2)) + #else #define libmesh_assertion_types(expr1,expr2) \ @@ -674,7 +696,6 @@ inline Tnew restrict_int (Told oldvar) return oldvar; } - /** * This is a helper variable template for cases when we want to use a default compile-time * error with constexpr-based if conditions. The templating delays the triggering diff --git a/include/base/libmesh_device.h b/include/base/libmesh_device.h new file mode 100644 index 00000000000..f41d4c70b01 --- /dev/null +++ b/include/base/libmesh_device.h @@ -0,0 +1,74 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_LIBMESH_DEVICE_H +#define LIBMESH_LIBMESH_DEVICE_H + +// Defines LIBMESH_DEVICE_INLINE, mirroring MetaPhysicL's METAPHYSICL_INLINE +// pattern (metaphysicl_device.h / METAPHYSICL_KOKKOS_COMPILATION). +// +// When compiling a .K translation unit (LIBMESH_KOKKOS_COMPILATION is defined +// by kokkos.mk), this expands to KOKKOS_INLINE_FUNCTION so that annotated +// methods are callable from both host and device code. In all other +// translation units it expands to plain `inline`. +#ifdef LIBMESH_KOKKOS_COMPILATION +# include +# include +# define LIBMESH_DEVICE_INLINE KOKKOS_INLINE_FUNCTION + +// Backend-neutral device-code detection for Kokkos .K translation units. +// This lets error/exception plumbing share a single predicate instead of +// hardcoding per-backend checks in multiple headers. +# if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || defined(__SYCL_DEVICE_ONLY__) +# define LIBMESH_IN_DEVICE_CODE 1 +# else +# define LIBMESH_IN_DEVICE_CODE 0 +# endif + +// Device-safe assert: uses printf (supported on CUDA/HIP) and +// Kokkos::abort() for backend-portable device termination. +// Defined here (not in libmesh_common.h) because Kokkos headers +// are only available in .K translation units. +# ifndef NDEBUG +# define LIBMESH_DEVICE_ASSERT(asserted) \ + do { if (!(asserted)) { \ + printf("libMesh assert failed: %s, file %s, line %d\n", \ + #asserted, __FILE__, __LINE__); \ + ::Kokkos::abort("libmesh_assert failed"); \ + } } while (0) +# else +# define LIBMESH_DEVICE_ASSERT(asserted) ((void) 0) +# endif + +# define LIBMESH_DEVICE_ERROR_MSG(msg) \ + do { \ + printf("libMesh error: %s, file %s, line %d\n", \ + msg, __FILE__, __LINE__); \ + ::Kokkos::abort(msg); \ + } while (0) + +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) \ + do { if (cond) { LIBMESH_DEVICE_ERROR_MSG(msg); } } while (0) + +#else +# define LIBMESH_DEVICE_INLINE inline +# define LIBMESH_IN_DEVICE_CODE 0 +# define LIBMESH_DEVICE_ERROR_MSG(msg) libmesh_error_msg(msg) +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) libmesh_error_msg_if(cond, msg) +#endif + +#endif // LIBMESH_LIBMESH_DEVICE_H diff --git a/include/base/libmesh_exceptions.h b/include/base/libmesh_exceptions.h index 6ca79b7b269..65237e1478f 100644 --- a/include/base/libmesh_exceptions.h +++ b/include/base/libmesh_exceptions.h @@ -23,6 +23,7 @@ #include "libmesh/libmesh_config.h" #include "libmesh/libmesh_abort.h" +#include "libmesh/libmesh_device.h" #include #include @@ -212,7 +213,13 @@ class TerminationException #ifdef LIBMESH_ENABLE_EXCEPTIONS #define libmesh_noexcept noexcept +#if LIBMESH_IN_DEVICE_CODE +// Kokkos device code does not support C++ exceptions. +#define LIBMESH_THROW(e) do { LIBMESH_DEVICE_ERROR_MSG((e).what()); } while (0) +#else #define LIBMESH_THROW(e) do { throw e; } while (0) +#endif + #define libmesh_rethrow throw #define libmesh_try try #define libmesh_catch(e) catch(e) diff --git a/include/libmesh_config.h.in b/include/libmesh_config.h.in index 9adaa9efe05..a8e72c8859c 100644 --- a/include/libmesh_config.h.in +++ b/include/libmesh_config.h.in @@ -434,6 +434,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H +/* Define if Kokkos support is enabled in libMesh */ +#undef HAVE_KOKKOS + /* Flag indicating whether the library will be compiled with LASPACK support */ #undef HAVE_LASPACK diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 2c569d088c0..9fb641d1945 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -861,6 +861,134 @@ AM_CONDITIONAL(LIBMESH_ENABLE_METAPHYSICL, test x$enablemetaphysicl = xyes) +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- +AC_ARG_WITH([kokkos], + AS_HELP_STRING([--with-kokkos=DIR], + [Enable Kokkos support using the installation at DIR]), + [KOKKOS_DIR="$withval"], + [KOKKOS_DIR="no"]) + +AC_ARG_WITH([kokkos-backend], + AS_HELP_STRING([--with-kokkos-backend=BACKEND], + [cuda|hip|sycl|openmp|serial (default: auto-detect from KokkosCore_config.h)]), + [KOKKOS_BACKEND="$withval"], [KOKKOS_BACKEND="auto"]) + +dnl Allow the caller (e.g. MOOSE's configure_libmesh.sh) to pre-set the +dnl Kokkos compiler and flags via environment variables. If KOKKOS_CXX is +dnl already set, we skip auto-detection entirely — the caller knows best. +dnl We use AC_SUBST (not AC_ARG_VAR) so these flags stay scoped to .K +dnl compilation rules and don't leak into the main CPPFLAGS/CXXFLAGS. + +AS_IF([test "x$KOKKOS_DIR" != "xno"], + [ + AC_CHECK_FILE([$KOKKOS_DIR/include/Kokkos_Core.hpp], + [ + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" + libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + + dnl Only auto-detect if KOKKOS_CXX was not pre-set by the caller + AS_IF([test "x$KOKKOS_CXX" = "x"], + [ + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + + dnl Auto-detect backend + AS_IF([test "x$KOKKOS_BACKEND" = "xauto"], + [ + AS_IF([test -r "$KOKKOS_CFG"], + [ + AS_IF([grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=cuda], + [AS_IF([grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=hip], + [AS_IF([grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=sycl], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=openmp], + [KOKKOS_BACKEND=serial])])])]) + ], + [KOKKOS_BACKEND=serial]) + ]) + + AC_MSG_RESULT([Kokkos backend: $KOKKOS_BACKEND]) + + dnl Check if Kokkos was built with OpenMP + have_kokkos_openmp=no + AS_IF([test -r "$KOKKOS_CFG"], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [have_kokkos_openmp=yes])]) + + case "$KOKKOS_BACKEND" in + cuda) + AC_PATH_PROG([NVCC],[nvcc],[no],[$PATH]) + AS_IF([test "x$NVCC" = "xno"], + [AC_MSG_ERROR([nvcc not found but Kokkos CUDA backend requested])]) + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + AS_IF([test "x$have_kokkos_openmp" = "xyes"], + [ + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + ]) + ;; + hip) + AC_PATH_PROG([HIPCC],[hipcc],[no],[$PATH]) + AS_IF([test "x$HIPCC" = "xno"], + [AC_MSG_ERROR([hipcc not found but Kokkos HIP backend requested])]) + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + sycl) + AC_PATH_PROG([ICPX],[icpx],[no],[$PATH]) + AS_IF([test "x$ICPX" = "xno"], + [AC_MSG_ERROR([icpx not found but Kokkos SYCL backend requested])]) + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + esac + ], + [AC_MSG_RESULT([Using caller-provided KOKKOS_CXX=$KOKKOS_CXX])]) + + dnl Set defaults for any variables not provided by caller or auto-detect + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + + AC_DEFINE([HAVE_KOKKOS], [1], + [Define if Kokkos support is enabled in libMesh]) + AC_MSG_RESULT(<<< Configuring library with Kokkos support >>>) + ], + [ + AC_MSG_WARN([Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support]) + enablekokkos=no + ]) + ], + [enablekokkos=no]) + +AC_SUBST([KOKKOS_CXX]) +AC_SUBST([KOKKOS_CPPFLAGS]) +AC_SUBST([KOKKOS_CXXFLAGS]) +AC_SUBST([KOKKOS_LDFLAGS]) +AC_SUBST([KOKKOS_LIBS]) +AM_CONDITIONAL(LIBMESH_ENABLE_KOKKOS, test x$enablekokkos = xyes) +# ------------------------------------------------------------- + + + AS_IF([test "$enableoptional" != no], [ AC_MSG_RESULT(----------------------------------------------) From 3d8327ba85aac1a6a2bdf0e2ad2e527d93f1e780 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:04:41 -0600 Subject: [PATCH 10/48] Make vector and tensor value types device-usable --- include/numerics/tensor_tools.h | 36 +++++------ include/numerics/tensor_value.h | 24 ++++--- include/numerics/type_tensor.h | 110 +++++++++++++++++++------------- include/numerics/type_vector.h | 89 ++++++++++++++++---------- include/numerics/vector_value.h | 16 +++-- 5 files changed, 161 insertions(+), 114 deletions(-) diff --git a/include/numerics/tensor_tools.h b/include/numerics/tensor_tools.h index 7617116f10d..f183380a84d 100644 --- a/include/numerics/tensor_tools.h +++ b/include/numerics/tensor_tools.h @@ -45,92 +45,92 @@ namespace TensorTools // Vector specializations will follow. template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if::value && ScalarTraits::value, typename CompareTypes::supertype>::type inner_product(const T & a, const T2& b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeVector & a, const TypeVector & b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeTensor & a, const TypeTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeNTensor & a, const TypeNTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const T & a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE T norm(std::complex a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeVector & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const VectorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeTensor & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TensorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const T & a) -{ using std::norm; return norm(a); } +{ return a * libmesh_conj(a); } template -inline +LIBMESH_DEVICE_INLINE T norm_sq(std::complex a) { using std::norm; return norm(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeVector & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const VectorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeTensor & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TensorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE bool is_zero(const T & a){ return a.is_zero();} // Any tensor-rank-independent code will need to include diff --git a/include/numerics/tensor_value.h b/include/numerics/tensor_value.h index 3a0d680476d..c99e0cac003 100644 --- a/include/numerics/tensor_value.h +++ b/include/numerics/tensor_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_tensor.h" +#include "libmesh/libmesh_device.h" #include "libmesh/libmesh.h" // for pi #ifdef LIBMESH_HAVE_METAPHYSICL @@ -93,12 +94,14 @@ class TensorValue : public TypeTensor * Constructor. Takes 1 row vector for LIBMESH_DIM=1 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx); /** * Constructor. Takes 2 row vectors for LIBMESH_DIM=2 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy); @@ -106,6 +109,7 @@ class TensorValue : public TypeTensor * Constructor. Takes 3 row vectors for LIBMESH_DIM=3 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -134,11 +138,11 @@ class TensorValue : public TypeTensor const TypeTensor & p_im); #endif - /** * Assignment-from-scalar operator. Used only to zero out tensors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TensorValue &>::type @@ -211,7 +215,7 @@ typedef NumberTensorValue Tensor; //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue () : TypeTensor () { @@ -220,7 +224,7 @@ TensorValue::TensorValue () : template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const T & xx, const T & xy, const T & xz, @@ -237,7 +241,7 @@ TensorValue::TensorValue (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -257,7 +261,7 @@ TensorValue::TensorValue (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TensorValue & p) : TypeTensor (p) { @@ -267,7 +271,7 @@ TensorValue::TensorValue (const TensorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx) : TypeTensor (vx) { @@ -277,7 +281,7 @@ TensorValue::TensorValue (const TypeVector & vx) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy) : TypeTensor (vx, vy) @@ -288,7 +292,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) : @@ -300,7 +304,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p) : TypeTensor (p) { @@ -309,7 +313,7 @@ TensorValue::TensorValue (const TypeTensor & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p_re, const TypeTensor & p_im) : TypeTensor (Complex (p_re(0,0), p_im(0,0)), diff --git a/include/numerics/type_tensor.h b/include/numerics/type_tensor.h index 470b745f120..ac6dc145428 100644 --- a/include/numerics/type_tensor.h +++ b/include/numerics/type_tensor.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" // C++ includes @@ -101,13 +102,16 @@ class TypeTensor * many vectors are needed. */ template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -133,12 +137,14 @@ class TypeTensor /** * Destructor. */ + LIBMESH_DEVICE_INLINE ~TypeTensor(); /** * Assign to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeTensor &); /** @@ -147,6 +153,7 @@ class TypeTensor * \returns A reference to *this. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor &>::type @@ -166,11 +173,13 @@ class TypeTensor /** * \returns A proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE ConstTypeTensorColumn slice (const unsigned int i) const; /** * \returns A writable proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE TypeTensorColumn slice (const unsigned int i); /** @@ -181,6 +190,7 @@ class TypeTensor /** * \returns A copy of one column of the tensor as a TypeVector. */ + LIBMESH_DEVICE_INLINE TypeVector column(const unsigned int r) const; /** @@ -210,6 +220,7 @@ class TypeTensor * Add a scaled tensor to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeTensor &, const T &); /** @@ -240,6 +251,7 @@ class TypeTensor * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeTensor &, const T &); /** @@ -265,6 +277,7 @@ class TypeTensor */ template ::value, int>::type = 0> + LIBMESH_DEVICE_INLINE const TypeTensor & operator *= (const Scalar & factor) { for (unsigned int i=0; i + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeTensor &) const; @@ -339,6 +353,7 @@ class TypeTensor * \returns A copy of the result vector, this tensor is unchanged. */ template + LIBMESH_DEVICE_INLINE TypeVector::supertype> left_multiply (const TypeVector & p) const; @@ -358,6 +373,7 @@ class TypeTensor * * \returns The solution in the \p x vector. */ + LIBMESH_DEVICE_INLINE void solve(const TypeVector & b, TypeVector & x) const; /** @@ -375,6 +391,7 @@ class TypeTensor /** * \returns True if all values in the tensor are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** @@ -393,11 +410,13 @@ class TypeTensor /** * Set all entries of the tensor to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** * \returns \p true if two tensors are equal, \p false otherwise. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeTensor & rhs) const; /** @@ -513,7 +532,7 @@ class ConstTypeTensorColumn //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor () { _coords[0] = {}; @@ -536,7 +555,7 @@ TypeTensor::TypeTensor () template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const T & xx, const T & xy, const T & xz, @@ -582,7 +601,7 @@ TypeTensor::TypeTensor (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -631,7 +650,7 @@ TypeTensor::TypeTensor (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const TypeTensor & p) { // copy the nodes from vector p to me @@ -642,6 +661,7 @@ TypeTensor::TypeTensor (const TypeTensor & p) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx) { libmesh_assert_equal_to (LIBMESH_DIM, 1); @@ -650,6 +670,7 @@ TypeTensor::TypeTensor(const TypeVector & vx) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy) { @@ -666,6 +687,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) @@ -690,7 +712,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template -inline +LIBMESH_DEVICE_INLINE TypeTensor::~TypeTensor () { } @@ -699,7 +721,7 @@ TypeTensor::~TypeTensor () template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::assign (const TypeTensor & p) { for (unsigned int i=0; i::assign (const TypeTensor & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeTensor::operator () (const unsigned int i, const unsigned int j) const { @@ -728,14 +750,14 @@ const T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE T & TypeTensor::operator () (const unsigned int i, const unsigned int j) { #if LIBMESH_DIM < 3 - libmesh_error_msg_if(i >= LIBMESH_DIM || j >= LIBMESH_DIM, - "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); + LIBMESH_DEVICE_ERROR_MSG_IF(i >= LIBMESH_DIM || j >= LIBMESH_DIM, + "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); #endif @@ -747,7 +769,7 @@ T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE ConstTypeTensorColumn TypeTensor::slice (const unsigned int i) const { @@ -757,7 +779,7 @@ TypeTensor::slice (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE TypeTensorColumn TypeTensor::slice (const unsigned int i) { @@ -767,7 +789,7 @@ TypeTensor::slice (const unsigned int i) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::row(const unsigned int r) const { @@ -781,7 +803,7 @@ TypeTensor::row(const unsigned int r) const template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::column(const unsigned int r) const { @@ -796,7 +818,7 @@ TypeTensor::column(const unsigned int r) const template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator + (const TypeTensor & p) const { @@ -831,7 +853,7 @@ TypeTensor::operator + (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator += (const TypeTensor & p) { this->add (p); @@ -843,7 +865,7 @@ const TypeTensor & TypeTensor::operator += (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add (const TypeTensor & p) { for (unsigned int i=0; i::add (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::add_scaled (const TypeTensor & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator - (const TypeTensor & p) const { @@ -901,7 +923,7 @@ TypeTensor::operator - (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) { this->subtract (p); @@ -913,7 +935,7 @@ const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract (const TypeTensor & p) { for (unsigned int i=0; i::subtract (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeTensor & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::operator - () const { @@ -967,7 +989,7 @@ TypeTensor TypeTensor::operator - () const template template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_if< ScalarTraits::value, @@ -1003,7 +1025,7 @@ TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_ template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1015,7 +1037,7 @@ operator * (const Scalar & factor, template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1053,7 +1075,7 @@ TypeTensor::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::transpose() const { #if LIBMESH_DIM == 1 @@ -1083,7 +1105,7 @@ TypeTensor TypeTensor::transpose() const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::inverse() const { #if LIBMESH_DIM == 1 @@ -1132,7 +1154,7 @@ TypeTensor TypeTensor::inverse() const template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::solve(const TypeVector & b, TypeVector & x) const { #if LIBMESH_DIM == 1 @@ -1183,7 +1205,7 @@ void TypeTensor::solve(const TypeVector & b, TypeVector & x) const template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator /= (const T & factor) { libmesh_assert_not_equal_to (factor, static_cast(0.)); @@ -1199,7 +1221,7 @@ const TypeTensor & TypeTensor::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::operator * (const TypeVector & p) const { @@ -1213,7 +1235,7 @@ TypeTensor::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::left_multiply (const TypeVector & p) const { @@ -1226,7 +1248,7 @@ TypeTensor::left_multiply (const TypeVector & p) const } template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> operator * (const TypeVector & a, const TypeTensor & b) { @@ -1235,7 +1257,7 @@ operator * (const TypeVector & a, const TypeTensor & b) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator * (const TypeTensor & p) const { @@ -1250,7 +1272,7 @@ TypeTensor::operator * (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) { TypeTensor temp; @@ -1270,7 +1292,7 @@ const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) */ template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeTensor::contract (const TypeTensor & t) const { @@ -1283,7 +1305,7 @@ TypeTensor::contract (const TypeTensor & t) const template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm() const { using std::sqrt; @@ -1292,7 +1314,7 @@ auto TypeTensor::norm() const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::is_zero() const { for (const auto & val : _coords) @@ -1302,7 +1324,7 @@ bool TypeTensor::is_zero() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::det() const { #if LIBMESH_DIM == 1 @@ -1325,7 +1347,7 @@ T TypeTensor::det() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::tr() const { #if LIBMESH_DIM == 1 @@ -1342,7 +1364,7 @@ T TypeTensor::tr() const } template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm_sq () const { Real sum = 0.; @@ -1364,7 +1386,7 @@ auto TypeTensor::norm_sq () const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::operator == (const TypeTensor & rhs) const { #if LIBMESH_DIM == 1 @@ -1436,7 +1458,7 @@ void TypeTensor::print(std::ostream & os) const } template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> outer_product(const TypeVector & a, const TypeVector & b) { diff --git a/include/numerics/type_vector.h b/include/numerics/type_vector.h index aaf79a9fd22..a9ae1bb2518 100644 --- a/include/numerics/type_vector.h +++ b/include/numerics/type_vector.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #include "libmesh/tensor_tools.h" #include "libmesh/int_range.h" @@ -141,12 +142,14 @@ class TypeVector * Assign to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeVector &); /** * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector &>::type @@ -157,12 +160,14 @@ class TypeVector * \returns A const reference to the \f$ i^{th} \f$ entry of the vector. */ const T & operator () (const unsigned int i) const; + LIBMESH_DEVICE_INLINE const T & slice (const unsigned int i) const { return (*this)(i); } /** * \returns A writable reference to the \f$ i^{th} \f$ entry of the vector. */ T & operator () (const unsigned int i); + LIBMESH_DEVICE_INLINE T & slice (const unsigned int i) { return (*this)(i); } /** @@ -192,6 +197,7 @@ class TypeVector * Add a scaled value to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeVector &, const T &); /** @@ -222,6 +228,7 @@ class TypeVector * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeVector &, const T &); /** @@ -279,6 +286,7 @@ class TypeVector * \returns The result of TypeVector::operator*(). */ template + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeVector &) const; @@ -292,6 +300,7 @@ class TypeVector /** * \returns A unit vector in the direction of *this. */ + LIBMESH_DEVICE_INLINE TypeVector unit() const; /** @@ -309,16 +318,19 @@ class TypeVector /** * \returns The L1 norm of the vector */ + LIBMESH_DEVICE_INLINE auto l1_norm() const; /** * \returns True if all values in the vector are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** * Set all entries of the vector to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** @@ -342,11 +354,13 @@ class TypeVector * \note For floating point types T, the function \p absolute_fuzzy_equals() * may be a more appropriate choice. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeVector & rhs) const; /** * \returns !(*this == rhs) */ + LIBMESH_DEVICE_INLINE bool operator != (const TypeVector & rhs) const; /** @@ -425,7 +439,7 @@ class TypeVector // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector () { _coords[0] = {}; @@ -442,7 +456,7 @@ TypeVector::TypeVector () template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const T & x, const T & y, const T & z) @@ -467,7 +481,7 @@ TypeVector::TypeVector (const T & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (typename std::enable_if::value, const Scalar1>::type & x, @@ -497,7 +511,7 @@ TypeVector::TypeVector (typename template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const Scalar & x, typename std::enable_if::value, @@ -518,7 +532,7 @@ TypeVector::TypeVector (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const TypeVector & p) { // copy the nodes from vector p to me @@ -530,7 +544,7 @@ TypeVector::TypeVector (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::assign (const TypeVector & p) { for (unsigned int i=0; i::assign (const TypeVector & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeVector::operator () (const unsigned int i) const { libmesh_assert_less (i, LIBMESH_DIM); @@ -551,7 +565,7 @@ const T & TypeVector::operator () (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE T & TypeVector::operator () (const unsigned int i) { libmesh_assert_less (i, LIBMESH_DIM); @@ -563,7 +577,7 @@ T & TypeVector::operator () (const unsigned int i) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator + (const TypeVector & p) const { @@ -589,7 +603,7 @@ TypeVector::operator + (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator += (const TypeVector & p) { this->add (p); @@ -601,7 +615,7 @@ const TypeVector & TypeVector::operator += (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add (const TypeVector & p) { #if LIBMESH_DIM == 1 @@ -625,7 +639,7 @@ void TypeVector::add (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add_scaled (const TypeVector & p, const T & factor) { #if LIBMESH_DIM == 1 @@ -649,7 +663,7 @@ void TypeVector::add_scaled (const TypeVector & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator - (const TypeVector & p) const { @@ -676,7 +690,7 @@ TypeVector::operator - (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator -= (const TypeVector & p) { this->subtract (p); @@ -688,7 +702,7 @@ const TypeVector & TypeVector::operator -= (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract (const TypeVector & p) { for (unsigned int i=0; i::subtract (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract_scaled (const TypeVector & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeVector & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::operator - () const { @@ -734,7 +748,7 @@ TypeVector TypeVector::operator - () const template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -761,7 +775,7 @@ TypeVector::operator * (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -774,7 +788,7 @@ operator * (const Scalar & factor, template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator *= (const T & factor) { #if LIBMESH_DIM == 1 @@ -799,7 +813,7 @@ const TypeVector & TypeVector::operator *= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -830,7 +844,7 @@ TypeVector::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator /= (const T & factor) { @@ -847,7 +861,7 @@ TypeVector::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::operator * (const TypeVector & p) const { @@ -869,7 +883,7 @@ TypeVector::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::contract(const TypeVector & p) const { @@ -880,6 +894,7 @@ TypeVector::contract(const TypeVector & p) const template template +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::cross(const TypeVector & p) const { @@ -903,7 +918,7 @@ TypeVector::cross(const TypeVector & p) const template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm() const { using std::sqrt; @@ -913,7 +928,7 @@ auto TypeVector::norm() const template -inline +LIBMESH_DEVICE_INLINE void TypeVector::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm_sq() const { #if LIBMESH_DIM == 1 @@ -944,7 +959,7 @@ auto TypeVector::norm_sq() const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::is_zero() const { for (const auto & val : _coords) @@ -958,6 +973,7 @@ auto TypeVector::l1_norm() const; template +LIBMESH_DEVICE_INLINE auto TypeVector::l1_norm() const { @@ -988,7 +1004,7 @@ bool TypeVector::relative_fuzzy_equals(const TypeVector & rhs, Real tol) c template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator == (const TypeVector & rhs) const { #if LIBMESH_DIM == 1 @@ -1010,7 +1026,7 @@ bool TypeVector::operator == (const TypeVector & rhs) const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator != (const TypeVector & rhs) const { return (!(*this == rhs)); @@ -1027,7 +1043,7 @@ bool TypeVector::operator != (const TypeVector & rhs) const // [b0, b1, b2] // [c0, c1, c2] template -inline +LIBMESH_DEVICE_INLINE T triple_product(const TypeVector & a, const TypeVector & b, const TypeVector & c) @@ -1049,7 +1065,7 @@ T triple_product(const TypeVector & a, // to be positive if the vectors are obey the right-hand rule, or // negative for a left-hand orientation. template -inline +LIBMESH_DEVICE_INLINE T solid_angle(const TypeVector & v01, const TypeVector & v02, const TypeVector & v03) @@ -1075,7 +1091,7 @@ T solid_angle(const TypeVector & v01, * calling b.cross(c).norm_sq(). */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm_sq(const TypeVector & b, const TypeVector & c) { @@ -1096,7 +1112,7 @@ T cross_norm_sq(const TypeVector & b, * Calls cross_norm_sq() and takes the square root of the result. */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm(const TypeVector & b, const TypeVector & c) { @@ -1105,7 +1121,7 @@ T cross_norm(const TypeVector & b, } template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::unit() const { @@ -1167,6 +1183,7 @@ struct CompareTypes, TypeVector> }; template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const T & a, const TypeVector & b) { @@ -1178,6 +1195,7 @@ outer_product(const T & a, const TypeVector & b) } template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const TypeVector & a, const T2 & b) { @@ -1208,6 +1226,7 @@ l1_norm_diff(const TypeVector & vec1, const TypeVector & vec2) namespace std { template +LIBMESH_DEVICE_INLINE auto norm(const libMesh::TypeVector & vector) -> decltype(std::norm(T())) { // Yea I agree it's dumb that the standard returns the square of the Euclidean norm diff --git a/include/numerics/vector_value.h b/include/numerics/vector_value.h index c93f17313dc..45116e1a737 100644 --- a/include/numerics/vector_value.h +++ b/include/numerics/vector_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_vector.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #ifdef LIBMESH_HAVE_METAPHYSICL @@ -124,6 +125,7 @@ class VectorValue : public TypeVector * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, VectorValue &>::type @@ -146,7 +148,7 @@ typedef NumberVectorValue Gradient; // Inline functions template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue () : TypeVector () { @@ -154,7 +156,7 @@ VectorValue::VectorValue () : template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const T & x, const T & y, const T & z) : @@ -166,7 +168,7 @@ VectorValue::VectorValue (const T & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (typename std::enable_if::value, const Scalar1>::type & x, @@ -183,7 +185,7 @@ VectorValue::VectorValue (typename template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const Scalar & x, typename std::enable_if::value, @@ -194,7 +196,7 @@ VectorValue::VectorValue (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const VectorValue & p) : TypeVector (p) { @@ -204,7 +206,7 @@ VectorValue::VectorValue (const VectorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p) : TypeVector (p) { @@ -212,7 +214,7 @@ VectorValue::VectorValue (const TypeVector & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p_re, const TypeVector & p_im) : TypeVector (Complex (p_re(0), p_im(0)), From ad6101f04bbd94e827794cf2bc292401160a8418 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:04:46 -0600 Subject: [PATCH 11/48] Add Kokkos numerics storage and operator headers --- include/gpu/kokkos_linalg_base.h | 472 ++++++++++++ include/gpu/kokkos_storage.h | 53 ++ include/gpu/kokkos_storage_policy.h | 124 ++++ include/gpu/kokkos_tensor_ops.h | 1033 +++++++++++++++++++++++++++ include/gpu/kokkos_vector_ops.h | 746 +++++++++++++++++++ include/include_HEADERS | 6 + include/libmesh/Makefile.am | 25 +- 7 files changed, 2458 insertions(+), 1 deletion(-) create mode 100644 include/gpu/kokkos_linalg_base.h create mode 100644 include/gpu/kokkos_storage.h create mode 100644 include/gpu/kokkos_storage_policy.h create mode 100644 include/gpu/kokkos_tensor_ops.h create mode 100644 include/gpu/kokkos_vector_ops.h diff --git a/include/gpu/kokkos_linalg_base.h b/include/gpu/kokkos_linalg_base.h new file mode 100644 index 00000000000..70a634f1f19 --- /dev/null +++ b/include/gpu/kokkos_linalg_base.h @@ -0,0 +1,472 @@ +// libMesh Kokkos compile-time linalg foundation. +// +// This header defines the small access/materialization layer that sits +// underneath richer vector/tensor algebra. It is intentionally limited to +// component access, storage-backed references, and conversion between +// vector-like/tensor-like objects and libMesh semantic types. + +#ifndef LIBMESH_KOKKOS_LINALG_BASE_H +#define LIBMESH_KOKKOS_LINALG_BASE_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" +#include "libmesh/vector_value.h" + +#include +#include + +namespace libMesh::Kokkos +{ + +namespace detail +{ + +template +using remove_cvref_t = + typename std::remove_cv::type>::type; + +template +using remove_ref_t = typename std::remove_reference::type; + +template +using vector_view_value_t = + remove_cvref_t()(0, 0))>; + +template +using tensor_view_value_t = + remove_cvref_t()(0, 0, 0))>; + +} // namespace detail + +template +struct vector_traits; + +template +struct tensor_traits; + +template +struct is_vector_like : std::false_type +{ +}; + +template +struct is_tensor_like : std::false_type +{ +}; + +template +struct is_vector_ref : std::false_type +{ +}; + +template +struct is_tensor_ref : std::false_type +{ +}; + +template +inline constexpr bool is_vector_like_v = is_vector_like>::value; + +template +inline constexpr bool is_tensor_like_v = is_tensor_like>::value; + +template +inline constexpr bool is_vector_ref_v = is_vector_ref>::value; + +template +inline constexpr bool is_tensor_ref_v = is_tensor_ref>::value; + +template +class vector_ref +{ +public: + using view_type = ViewType; + using value_type = detail::vector_view_value_t; + + LIBMESH_DEVICE_INLINE + vector_ref(ViewType view, const unsigned int index) : _view(view), _index(index) {} + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int component) const + { + return _view(_index, component); + } + + template + LIBMESH_DEVICE_INLINE + void set(const unsigned int component, const Scalar & value) + { + static_assert(std::is_assignable::value, + "Cannot write through a vector_ref built from a read-only view"); + _view(_index, component) = value; + } + + template + LIBMESH_DEVICE_INLINE + void assign(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void add(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void add_scaled(const RightVector & right, const value_type & factor); + + template + LIBMESH_DEVICE_INLINE + void subtract(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void subtract_scaled(const RightVector & right, const value_type & factor); + + LIBMESH_DEVICE_INLINE + void zero(); + + template + LIBMESH_DEVICE_INLINE + auto contract(const RightVector & right) const; + + LIBMESH_DEVICE_INLINE + auto norm() const; + + LIBMESH_DEVICE_INLINE + auto norm_sq() const; + + LIBMESH_DEVICE_INLINE + auto l1_norm() const; + + LIBMESH_DEVICE_INLINE + bool is_zero() const; + + LIBMESH_DEVICE_INLINE + auto unit() const; + + template + LIBMESH_DEVICE_INLINE + auto cross(const RightVector & right) const; + + LIBMESH_DEVICE_INLINE + unsigned int index() const + { + return _index; + } + +private: + ViewType _view; + unsigned int _index; +}; + +template +class tensor_ref +{ +public: + using view_type = ViewType; + using value_type = detail::tensor_view_value_t; + + LIBMESH_DEVICE_INLINE + tensor_ref(ViewType view, const unsigned int index) : _view(view), _index(index) {} + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int row, const unsigned int col) const + { + return _view(_index, row, col); + } + + template + LIBMESH_DEVICE_INLINE + void set(const unsigned int row, const unsigned int col, const Scalar & value) + { + static_assert(std::is_assignable::value, + "Cannot write through a tensor_ref built from a read-only view"); + _view(_index, row, col) = value; + } + + template + LIBMESH_DEVICE_INLINE + void assign(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void add(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void add_scaled(const RightTensor & right, const value_type & factor); + + template + LIBMESH_DEVICE_INLINE + void subtract(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void subtract_scaled(const RightTensor & right, const value_type & factor); + + LIBMESH_DEVICE_INLINE + void zero(); + + template + LIBMESH_DEVICE_INLINE + auto contract(const RightTensor & right) const; + + LIBMESH_DEVICE_INLINE + auto norm() const; + + LIBMESH_DEVICE_INLINE + auto norm_sq() const; + + LIBMESH_DEVICE_INLINE + bool is_zero() const; + + LIBMESH_DEVICE_INLINE + auto transpose() const; + + LIBMESH_DEVICE_INLINE + auto det(const unsigned int dim = LIBMESH_DIM) const; + + LIBMESH_DEVICE_INLINE + auto tr() const; + + LIBMESH_DEVICE_INLINE + auto inverse(const unsigned int dim = LIBMESH_DIM) const; + + template + LIBMESH_DEVICE_INLINE + void solve(const VectorLike & b, ResultVector & x) const; + + LIBMESH_DEVICE_INLINE + auto row(const unsigned int i) const; + + LIBMESH_DEVICE_INLINE + auto column(const unsigned int i) const; + + template + LIBMESH_DEVICE_INLINE + auto left_multiply(const VectorLike & v) const; + + LIBMESH_DEVICE_INLINE + unsigned int index() const + { + return _index; + } + +private: + ViewType _view; + unsigned int _index; +}; + +template +struct vector_traits> +{ + using value_type = T; + using semantic_type = libMesh::TypeVector; +}; + +template +struct vector_traits> +{ + using value_type = T; + using semantic_type = libMesh::VectorValue; +}; + +template <> +struct vector_traits +{ + using value_type = libMesh::Real; + using semantic_type = libMesh::Point; +}; + +template +struct vector_traits> +{ + using value_type = typename vector_ref::value_type; + using semantic_type = libMesh::TypeVector; +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template <> +struct is_vector_like : std::true_type +{ +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template +struct is_vector_ref> : std::true_type +{ +}; + +template +struct tensor_traits> +{ + using value_type = T; + using semantic_type = libMesh::TypeTensor; +}; + +template +struct tensor_traits> +{ + using value_type = T; + using semantic_type = libMesh::TensorValue; +}; + +template +struct tensor_traits> +{ + using value_type = typename tensor_ref::value_type; + using semantic_type = libMesh::TypeTensor; +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_ref> : std::true_type +{ +}; + +template +using vector_value_type_t = typename vector_traits>::value_type; + +template +using tensor_value_type_t = typename tensor_traits>::value_type; + +template +using vector_semantic_type_t = typename vector_traits>::semantic_type; + +template +using tensor_semantic_type_t = typename tensor_traits>::semantic_type; + +template +LIBMESH_DEVICE_INLINE +decltype(auto) +vector_get_component(const T & v, const unsigned int component) +{ + return v(component); +} + +template +LIBMESH_DEVICE_INLINE +void vector_set_component(T & v, const unsigned int component, const Scalar & value) +{ + v(component) = value; +} + +template +LIBMESH_DEVICE_INLINE +void vector_set_component(vector_ref v, + const unsigned int component, + const Scalar & value) +{ + v.set(component, value); +} + +template +LIBMESH_DEVICE_INLINE +decltype(auto) +tensor_get_component(const T & T_in, const unsigned int row, const unsigned int col) +{ + return T_in(row, col); +} + +template +LIBMESH_DEVICE_INLINE +void tensor_set_component(T & T_out, + const unsigned int row, + const unsigned int col, + const Scalar & value) +{ + T_out(row, col) = value; +} + +template +LIBMESH_DEVICE_INLINE +void tensor_set_component(tensor_ref T_out, + const unsigned int row, + const unsigned int col, + const Scalar & value) +{ + T_out.set(row, col, value); +} + +template +LIBMESH_DEVICE_INLINE +vector_ref> +make_vector_ref(ViewType && view, const unsigned int index) +{ + return vector_ref>(std::forward(view), index); +} + +template +LIBMESH_DEVICE_INLINE +tensor_ref> +make_tensor_ref(ViewType && view, const unsigned int index) +{ + return tensor_ref>(std::forward(view), index); +} + +template +LIBMESH_DEVICE_INLINE +OutputVector materialize_vector(const VectorLike & v) +{ + static_assert(is_vector_like>::value, + "materialize_vector() requires a vector-like input type"); + + OutputVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, vector_get_component(v, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +OutputTensor materialize_tensor(const TensorLike & T_in) +{ + static_assert(is_tensor_like>::value, + "materialize_tensor() requires a tensor-like input type"); + + OutputTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col)); + + return out; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_LINALG_BASE_H diff --git a/include/gpu/kokkos_storage.h b/include/gpu/kokkos_storage.h new file mode 100644 index 00000000000..23e59aabf8c --- /dev/null +++ b/include/gpu/kokkos_storage.h @@ -0,0 +1,53 @@ +// libMesh Kokkos storage helpers for dimension-aware vector/tensor views. + +#ifndef LIBMESH_KOKKOS_STORAGE_H +#define LIBMESH_KOKKOS_STORAGE_H + +#include "libmesh/kokkos_linalg_base.h" + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" + +namespace libMesh::Kokkos +{ + +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + return materialize_vector(make_vector_ref(view, i)); +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + auto out = make_vector_ref(view, i); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + vector_set_component(out, d, vector_get_component(v, d)); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + return materialize_tensor(make_tensor_ref(view, i)); +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + auto out = make_tensor_ref(view, i); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T, row, col)); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_STORAGE_H diff --git a/include/gpu/kokkos_storage_policy.h b/include/gpu/kokkos_storage_policy.h new file mode 100644 index 00000000000..6bfec5a6df0 --- /dev/null +++ b/include/gpu/kokkos_storage_policy.h @@ -0,0 +1,124 @@ +// libMesh Kokkos compile-time storage policies for fixed-dimension linalg data. +// +// These policies keep storage selection separate from the linalg algorithms: +// kernels operate on refs/materialized values, while the backend policy chooses +// the underlying Kokkos view layout. + +#ifndef LIBMESH_KOKKOS_STORAGE_POLICY_H +#define LIBMESH_KOKKOS_STORAGE_POLICY_H + +#include "libmesh/libmesh_common.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMesh::Kokkos +{ + +template +struct static_dim_storage_policy +{ + using scalar_type = Scalar; + using layout_type = Layout; + using vector_view = ::Kokkos::View; + using tensor_view = ::Kokkos::View; + + static constexpr const char * + name() + { + return std::is_same::value ? "layoutleft" : + std::is_same::value ? "layoutright" : + "layoutcustom"; + } +}; + +using layout_left_storage_policy = static_dim_storage_policy; +using layout_right_storage_policy = static_dim_storage_policy; +using default_storage_policy = layout_right_storage_policy; + +template +constexpr const char * +storage_policy_name() +{ + return StoragePolicy::name(); +} + +template +inline typename StoragePolicy::vector_view +make_vector_storage(const char * label, const std::size_t n) +{ + return typename StoragePolicy::vector_view(std::string(label), n); +} + +inline default_storage_policy::vector_view +make_vector_storage(const char * label, const std::size_t n) +{ + return make_vector_storage(label, n); +} + +template +inline typename StoragePolicy::tensor_view +make_tensor_storage(const char * label, const std::size_t n) +{ + return typename StoragePolicy::tensor_view(std::string(label), n); +} + +inline default_storage_policy::tensor_view +make_tensor_storage(const char * label, const std::size_t n) +{ + return make_tensor_storage(label, n); +} + +template +inline typename StoragePolicy::vector_view +upload_vector_storage(const std::vector & values, const char * label) +{ + auto d = make_vector_storage(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + h(i, component) = values[i](component); + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline default_storage_policy::vector_view +upload_vector_storage(const std::vector & values, const char * label) +{ + return upload_vector_storage(values, label); +} + +template +inline typename StoragePolicy::tensor_view +upload_tensor_storage(const std::vector & values, const char * label) +{ + auto d = make_tensor_storage(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + h(i, row, col) = values[i](row, col); + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline default_storage_policy::tensor_view +upload_tensor_storage(const std::vector & values, const char * label) +{ + return upload_tensor_storage(values, label); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_STORAGE_POLICY_H diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h new file mode 100644 index 00000000000..a08080405d6 --- /dev/null +++ b/include/gpu/kokkos_tensor_ops.h @@ -0,0 +1,1033 @@ +// libMesh Kokkos generic tensor operations. +// +// These free functions build tensor algebra on top of the primitive +// access/materialization layer in kokkos_linalg_base.h. They are written +// against tensor-like and vector-like inputs so both libMesh owning types and +// storage-backed refs can participate in the same math. + +#ifndef LIBMESH_KOKKOS_TENSOR_OPS_H +#define LIBMESH_KOKKOS_TENSOR_OPS_H + +#include "libmesh/kokkos_linalg_base.h" +#include "libmesh/kokkos_vector_ops.h" + +#include "libmesh/tensor_tools.h" + +#include + +namespace libMesh::Kokkos +{ + +// Construction and materialization + +template +LIBMESH_DEVICE_INLINE +ResultTensor zero_tensor_value() +{ + ResultTensor out; + out.zero(); + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_identity(const unsigned int dim = LIBMESH_DIM) +{ + ResultTensor out; + out.zero(); + + for (unsigned int i = 0; i < dim; ++i) + tensor_set_component(out, i, i, tensor_value_type_t(1)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor copy_tensor(const TensorLike & T_in) +{ + return materialize_tensor(T_in); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t copy_tensor(const TensorLike & T_in) +{ + return copy_tensor>(T_in); +} + +// Tensor reductions and predicates + +template +LIBMESH_DEVICE_INLINE +auto tensor_contract(const LeftTensor & left, const RightTensor & right) +{ + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like left input"); + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like right input"); + + using sum_type = + detail::remove_cvref_t; + + sum_type sum = sum_type(0); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + sum += tensor_get_component(left, row, col) * tensor_get_component(right, row, col); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_norm_sq(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_norm_sq() requires a tensor-like input"); + + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + sum += libMesh::TensorTools::norm_sq(tensor_get_component(T_in, row, col)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_norm(const TensorLike & T_in) +{ + using std::sqrt; + return sqrt(tensor_norm_sq(T_in)); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_trace(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_trace() requires a tensor-like input"); + + using trace_type = detail::remove_cvref_t; + trace_type sum = trace_type(0); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + sum += tensor_get_component(T_in, i, i); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_is_zero(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_is_zero() requires a tensor-like input"); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(T_in, row, col) != tensor_value_type_t(0)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_equal(const LeftTensor & left, const RightTensor & right) +{ + static_assert(is_tensor_like_v, "tensor_equal() requires a tensor-like left input"); + static_assert(is_tensor_like_v, "tensor_equal() requires a tensor-like right input"); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(left, row, col) != tensor_get_component(right, row, col)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_not_equal(const LeftTensor & left, const RightTensor & right) +{ + return !tensor_equal(left, right); +} + +// Tensor arithmetic + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_outer_product(const LeftVector & left, const RightVector & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + vector_get_component(left, row) * libmesh_conj(vector_get_component(right, col))); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +libMesh::TypeTensor> +tensor_outer_product(const LeftVector & left, const RightVector & right) +{ + return tensor_outer_product>>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_add(const LeftTensor & left, const RightTensor & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_add(const LeftTensor & left, const RightTensor & right) +{ + return tensor_add>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_subtract(const LeftTensor & left, const RightTensor & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_subtract(const LeftTensor & left, const RightTensor & right) +{ + return tensor_subtract>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_scale(const Scalar & alpha, const TensorLike & T_in) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_scale(const Scalar & alpha, const TensorLike & T_in) +{ + return tensor_scale>(alpha, T_in); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_divide(const TensorLike & T_in, const Scalar & alpha) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_divide(const TensorLike & T_in, const Scalar & alpha) +{ + return tensor_divide>(T_in, alpha); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +{ + static_assert(is_tensor_like_v, "tensor_determinant() requires a tensor-like input"); + + if (dim == 0) + return tensor_value_type_t(1); + + if (dim == 1) + return tensor_get_component(T_in, 0, 0); + + if (dim == 2) + return tensor_get_component(T_in, 0, 0) * tensor_get_component(T_in, 1, 1) - + tensor_get_component(T_in, 0, 1) * tensor_get_component(T_in, 1, 0); + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + return a00 * (a11 * a22 - a12 * a21) - + a01 * (a10 * a22 - a12 * a20) + + a02 * (a10 * a21 - a11 * a20); +#else + libmesh_ignore(T_in); + return tensor_value_type_t(0); +#endif +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +{ + static_assert(is_tensor_like_v, "tensor_inverse() requires a tensor-like input"); + + ResultTensor out; + out.zero(); + + if (dim == 0) + return out; + + if (dim == 1) + { + tensor_set_component(out, 0, 0, tensor_value_type_t(1) / tensor_get_component(T_in, 0, 0)); + return out; + } + + const auto det = tensor_determinant(T_in, dim); + + if (dim == 2) + { + tensor_set_component(out, 0, 0, tensor_get_component(T_in, 1, 1) / det); + tensor_set_component(out, 0, 1, -tensor_get_component(T_in, 0, 1) / det); + tensor_set_component(out, 1, 0, -tensor_get_component(T_in, 1, 0) / det); + tensor_set_component(out, 1, 1, tensor_get_component(T_in, 0, 0) / det); + return out; + } + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + tensor_set_component(out, 0, 0, (a11 * a22 - a12 * a21) / det); + tensor_set_component(out, 0, 1, (a02 * a21 - a01 * a22) / det); + tensor_set_component(out, 0, 2, (a01 * a12 - a02 * a11) / det); + tensor_set_component(out, 1, 0, (a12 * a20 - a10 * a22) / det); + tensor_set_component(out, 1, 1, (a00 * a22 - a02 * a20) / det); + tensor_set_component(out, 1, 2, (a02 * a10 - a00 * a12) / det); + tensor_set_component(out, 2, 0, (a10 * a21 - a11 * a20) / det); + tensor_set_component(out, 2, 1, (a01 * a20 - a00 * a21) / det); + tensor_set_component(out, 2, 2, (a00 * a11 - a01 * a10) / det); +#else + libmesh_ignore(T_in); +#endif + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +{ + return tensor_inverse>(T_in, dim); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_transpose(const TensorLike & T_in) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, col, row)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_transpose(const TensorLike & T_in) +{ + return tensor_transpose>(T_in); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + alpha * tensor_get_component(A, row, col) + + beta * tensor_get_component(B, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) +{ + return tensor_linear_combination>(alpha, A, beta, B); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_multiply(const LeftTensor & left, const RightTensor & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + auto value = tensor_get_component(left, row, 0) * tensor_get_component(right, 0, col); + for (unsigned int k = 1; k < LIBMESH_DIM; ++k) + value += tensor_get_component(left, row, k) * tensor_get_component(right, k, col); + tensor_set_component(out, row, col, value); + } + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t tensor_multiply(const LeftTensor & left, const RightTensor & right) +{ + return tensor_multiply>(left, right); +} + +// Tensor/vector conversions + +template +LIBMESH_DEVICE_INLINE +ResultVector tensor_row(const TensorLike & T_in, const unsigned int row) +{ + ResultVector out; + out.zero(); + + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + vector_set_component(out, col, tensor_get_component(T_in, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +libMesh::TypeVector> +tensor_row(const TensorLike & T_in, const unsigned int row) +{ + return tensor_row>>(T_in, row); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector tensor_column(const TensorLike & T_in, const unsigned int col) +{ + ResultVector out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + vector_set_component(out, row, tensor_get_component(T_in, row, col)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +libMesh::TypeVector> +tensor_column(const TensorLike & T_in, const unsigned int col) +{ + return tensor_column>>(T_in, col); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector tensor_vector_multiply(const TensorLike & T_in, const VectorLike & v) +{ + ResultVector out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + { + auto value = tensor_get_component(T_in, row, 0) * vector_get_component(v, 0); + for (unsigned int col = 1; col < LIBMESH_DIM; ++col) + value += tensor_get_component(T_in, row, col) * vector_get_component(v, col); + vector_set_component(out, row, value); + } + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t tensor_vector_multiply(const TensorLike & T_in, const VectorLike & v) +{ + return tensor_vector_multiply>(T_in, v); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_tensor_multiply(const VectorLike & v, const TensorLike & T_in) +{ + ResultVector out; + out.zero(); + + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + auto value = vector_get_component(v, 0) * tensor_get_component(T_in, 0, col); + for (unsigned int row = 1; row < LIBMESH_DIM; ++row) + value += vector_get_component(v, row) * tensor_get_component(T_in, row, col); + vector_set_component(out, col, value); + } + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_tensor_multiply(const VectorLike & v, const TensorLike & T_in) +{ + return vector_tensor_multiply>(v, T_in); +} + +// libMesh-like convenience wrappers + +template +LIBMESH_DEVICE_INLINE +auto contract(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v, + decltype(tensor_contract(left, right))> +{ + return tensor_contract(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto norm_sq(const TensorLike & T_in) + -> std::enable_if_t, decltype(tensor_norm_sq(T_in))> +{ + return tensor_norm_sq(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto norm(const TensorLike & T_in) + -> std::enable_if_t, decltype(tensor_norm(T_in))> +{ + return tensor_norm(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto is_zero(const TensorLike & T_in) + -> std::enable_if_t, bool> +{ + return tensor_is_zero(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto outer_product(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, + libMesh::TypeTensor>> +{ + return tensor_outer_product(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto transpose(const TensorLike & T_in) + -> std::enable_if_t, tensor_semantic_type_t> +{ + return tensor_transpose(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto det(const TensorLike & T_in) + -> std::enable_if_t, decltype(tensor_determinant(T_in))> +{ + return tensor_determinant(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) + -> std::enable_if_t, tensor_semantic_type_t> +{ + return tensor_inverse(T_in, dim); +} + +template +LIBMESH_DEVICE_INLINE +auto row(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, libMesh::TypeVector>> +{ + return tensor_row(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto column(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, libMesh::TypeVector>> +{ + return tensor_column(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v, + tensor_semantic_type_t> +{ + return tensor_multiply(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const TensorLike & T_in, const VectorLike & v) + -> std::enable_if_t && is_vector_like_v, + vector_semantic_type_t> +{ + return tensor_vector_multiply(T_in, v); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const VectorLike & v, const TensorLike & T_in) + -> std::enable_if_t && is_tensor_like_v, + vector_semantic_type_t> +{ + return vector_tensor_multiply(v, T_in); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::assign(const RightTensor & right) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, row, col, tensor_get_component(right, row, col)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::add(const RightTensor & right) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, + row, + col, + tensor_get_component(*this, row, col) + tensor_get_component(right, row, col)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::add_scaled(const RightTensor & right, const value_type & factor) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, + row, + col, + tensor_get_component(*this, row, col) + + factor * tensor_get_component(right, row, col)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::subtract(const RightTensor & right) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, + row, + col, + tensor_get_component(*this, row, col) - tensor_get_component(right, row, col)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::subtract_scaled(const RightTensor & right, const value_type & factor) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, + row, + col, + tensor_get_component(*this, row, col) - + factor * tensor_get_component(right, row, col)); +} + +template +LIBMESH_DEVICE_INLINE +void tensor_ref::zero() +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(*this, row, col, value_type(0)); +} + +template +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::contract(const RightTensor & right) const +{ + return tensor_contract(*this, right); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::norm() const +{ + return tensor_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::norm_sq() const +{ + return tensor_norm_sq(*this); +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_ref::is_zero() const +{ + return tensor_is_zero(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::transpose() const +{ + return tensor_transpose(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::det(const unsigned int dim) const +{ + return tensor_determinant(*this, dim); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::tr() const +{ + return tensor_trace(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::inverse(const unsigned int dim) const +{ + return tensor_inverse(*this, dim); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::solve(const VectorLike & b, ResultVector & x) const +{ + const auto solution = tensor_vector_multiply>(this->inverse(), b); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(x, component, vector_get_component(solution, component)); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::row(const unsigned int i) const +{ + return tensor_row(*this, i); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::column(const unsigned int i) const +{ + return tensor_column(*this, i); +} + +template +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::left_multiply(const VectorLike & v) const +{ + return vector_tensor_multiply(v, *this); +} + +// Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. + +template +LIBMESH_DEVICE_INLINE +auto operator-(const TensorLike & T_in) + -> std::enable_if_t && is_tensor_ref_v, + tensor_semantic_type_t> +{ + return tensor_scale(tensor_value_type_t(-1), T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + tensor_semantic_type_t> +{ + return tensor_add(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator-(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + tensor_semantic_type_t> +{ + return tensor_subtract(left, right); +} + +template && !is_tensor_like_v && + is_tensor_like_v && is_tensor_ref_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const Scalar & alpha, const TensorLike & T_in) +{ + return tensor_scale(alpha, T_in); +} + +template && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const TensorLike & T_in, const Scalar & alpha) +{ + return tensor_scale(alpha, T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator/(const TensorLike & T_in, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + tensor_semantic_type_t> +{ + return tensor_divide(T_in, alpha); +} + +template && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const LeftTensor & left, const RightTensor & right) +{ + return tensor_multiply(left, right); +} + +template && is_vector_like_v && + (is_tensor_ref_v || is_vector_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const TensorLike & T_in, const VectorLike & v) +{ + return tensor_vector_multiply(T_in, v); +} + +template && is_tensor_like_v && + (is_vector_ref_v || is_tensor_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const VectorLike & v, const TensorLike & T_in) +{ + return vector_tensor_multiply(v, T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator==(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + bool> +{ + return tensor_equal(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator!=(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + bool> +{ + return tensor_not_equal(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+=(LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + LeftTensor &> +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator-=(LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + LeftTensor &> +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator*=(LeftTensor & left, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftTensor &> +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, row, col, tensor_get_component(left, row, col) * alpha); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator/=(LeftTensor & left, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftTensor &> +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, row, col, tensor_get_component(left, row, col) / alpha); + + return left; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_TENSOR_OPS_H diff --git a/include/gpu/kokkos_vector_ops.h b/include/gpu/kokkos_vector_ops.h new file mode 100644 index 00000000000..3fb14068661 --- /dev/null +++ b/include/gpu/kokkos_vector_ops.h @@ -0,0 +1,746 @@ +// libMesh Kokkos generic vector operations. +// +// These free functions build vector algebra on top of the primitive +// access/materialization layer in kokkos_linalg_base.h. They are written +// against vector-like inputs so both libMesh owning types and storage-backed +// refs can participate in the same math. + +#ifndef LIBMESH_KOKKOS_VECTOR_OPS_H +#define LIBMESH_KOKKOS_VECTOR_OPS_H + +#include "libmesh/kokkos_linalg_base.h" + +#include "libmesh/tensor_tools.h" + +#include + +namespace libMesh::Kokkos +{ + +// Construction and materialization + +template +LIBMESH_DEVICE_INLINE +ResultVector zero_vector_value() +{ + ResultVector out; + out.zero(); + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector copy_vector(const VectorLike & v) +{ + return materialize_vector(v); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t copy_vector(const VectorLike & v) +{ + return copy_vector>(v); +} + +// Reductions and predicates + +template +LIBMESH_DEVICE_INLINE +auto vector_dot(const LeftVector & left, const RightVector & right) +{ + static_assert(is_vector_like_v, "vector_dot() requires a vector-like left input"); + static_assert(is_vector_like_v, "vector_dot() requires a vector-like right input"); + + using sum_type = + detail::remove_cvref_t; + + sum_type sum = sum_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += vector_get_component(left, component) * vector_get_component(right, component); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto vector_contract(const LeftVector & left, const RightVector & right) +{ + return vector_dot(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_norm_sq(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_norm_sq() requires a vector-like input"); + + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += libMesh::TensorTools::norm_sq(vector_get_component(v, component)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto vector_norm(const VectorLike & v) +{ + using std::sqrt; + return sqrt(vector_norm_sq(v)); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_l1_norm(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_l1_norm() requires a vector-like input"); + + using std::abs; + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += abs(vector_get_component(v, component)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +bool vector_is_zero(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_is_zero() requires a vector-like input"); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + if (vector_get_component(v, component) != vector_value_type_t(0)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +bool vector_equal(const LeftVector & left, const RightVector & right) +{ + static_assert(is_vector_like_v, "vector_equal() requires a vector-like left input"); + static_assert(is_vector_like_v, "vector_equal() requires a vector-like right input"); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + if (vector_get_component(left, component) != vector_get_component(right, component)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +bool vector_not_equal(const LeftVector & left, const RightVector & right) +{ + return !vector_equal(left, right); +} + +// Arithmetic + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_add(const LeftVector & left, const RightVector & right) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + vector_get_component(left, component) + vector_get_component(right, component)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_add(const LeftVector & left, const RightVector & right) +{ + return vector_add>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_subtract(const LeftVector & left, const RightVector & right) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + vector_get_component(left, component) - vector_get_component(right, component)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_subtract(const LeftVector & left, const RightVector & right) +{ + return vector_subtract>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_scale(const Scalar & alpha, const VectorLike & v) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, alpha * vector_get_component(v, component)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_scale(const Scalar & alpha, const VectorLike & v) +{ + return vector_scale>(alpha, v); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_divide(const VectorLike & v, const Scalar & alpha) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, vector_get_component(v, component) / alpha); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_divide(const VectorLike & v, const Scalar & alpha) +{ + return vector_divide>(v, alpha); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b) +{ + return vector_linear_combination>(alpha, a, beta, b); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b, + const ScalarC & gamma, + const VectorC & c) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component) + + gamma * vector_get_component(c, component)); + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b, + const ScalarC & gamma, + const VectorC & c) +{ + return vector_linear_combination>(alpha, a, beta, b, gamma, c); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_unit(const VectorLike & v) +{ + const auto length = vector_norm(v); + libmesh_assert_not_equal_to(length, static_cast(0.)); + return vector_divide(v, length); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_unit(const VectorLike & v) +{ + return vector_unit>(v); +} + +// Geometry + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_cross(const LeftVector & left, const RightVector & right) +{ + ResultVector out; + out.zero(); + +#if LIBMESH_DIM == 3 + vector_set_component(out, + 0, + vector_get_component(left, 1) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 1)); + vector_set_component(out, + 1, + -vector_get_component(left, 0) * vector_get_component(right, 2) + + vector_get_component(left, 2) * vector_get_component(right, 0)); + vector_set_component(out, + 2, + vector_get_component(left, 0) * vector_get_component(right, 1) - + vector_get_component(left, 1) * vector_get_component(right, 0)); +#else + libmesh_ignore(left); + libmesh_ignore(right); +#endif + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_cross(const LeftVector & left, const RightVector & right) +{ + return vector_cross>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_triple_product(const LeftVector & left, + const MiddleVector & middle, + const RightVector & right) +{ +#if LIBMESH_DIM == 3 + return vector_get_component(left, 0) * + (vector_get_component(middle, 1) * vector_get_component(right, 2) - + vector_get_component(middle, 2) * vector_get_component(right, 1)) - + vector_get_component(left, 1) * + (vector_get_component(middle, 0) * vector_get_component(right, 2) - + vector_get_component(middle, 2) * vector_get_component(right, 0)) + + vector_get_component(left, 2) * + (vector_get_component(middle, 0) * vector_get_component(right, 1) - + vector_get_component(middle, 1) * vector_get_component(right, 0)); +#else + libmesh_ignore(left, middle, right); + using value_type = + detail::remove_cvref_t; + return value_type(0); +#endif +} + +template +LIBMESH_DEVICE_INLINE +auto vector_cross_norm_sq(const LeftVector & left, const RightVector & right) +{ + const auto z = vector_get_component(left, 0) * vector_get_component(right, 1) - + vector_get_component(left, 1) * vector_get_component(right, 0); + +#if LIBMESH_DIM == 3 + const auto x = vector_get_component(left, 1) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 1); + const auto y = vector_get_component(left, 0) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 0); + return x * x + y * y + z * z; +#else + return z * z; +#endif +} + +template +LIBMESH_DEVICE_INLINE +auto vector_solid_angle(const VectorA & v01, const VectorB & v02, const VectorC & v03) +{ + using std::atan; + + const auto norm01 = vector_norm(v01); + const auto norm02 = vector_norm(v02); + const auto norm03 = vector_norm(v03); + const auto tan_half_angle = + vector_triple_product(v01, v02, v03) / + (vector_dot(v01, v02) * norm03 + + vector_dot(v01, v03) * norm02 + + vector_dot(v02, v03) * norm01 + + norm01 * norm02 * norm03); + + return Real(2) * atan(tan_half_angle); +} + +// libMesh-like convenience wrappers + +template +LIBMESH_DEVICE_INLINE +auto contract(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, + decltype(vector_contract(left, right))> +{ + return vector_contract(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto norm_sq(const VectorLike & v) + -> std::enable_if_t, decltype(vector_norm_sq(v))> +{ + return vector_norm_sq(v); +} + +template +LIBMESH_DEVICE_INLINE +auto norm(const VectorLike & v) + -> std::enable_if_t, decltype(vector_norm(v))> +{ + return vector_norm(v); +} + +template +LIBMESH_DEVICE_INLINE +auto is_zero(const VectorLike & v) + -> std::enable_if_t, bool> +{ + return vector_is_zero(v); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::assign(const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, component, vector_get_component(right, component)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::add(const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, + component, + vector_get_component(*this, component) + vector_get_component(right, component)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::add_scaled(const RightVector & right, const value_type & factor) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, + component, + vector_get_component(*this, component) + + factor * vector_get_component(right, component)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::subtract(const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, + component, + vector_get_component(*this, component) - vector_get_component(right, component)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::subtract_scaled(const RightVector & right, const value_type & factor) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, + component, + vector_get_component(*this, component) - + factor * vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void vector_ref::zero() +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(*this, component, value_type(0)); +} + +template +template +LIBMESH_DEVICE_INLINE +auto vector_ref::contract(const RightVector & right) const +{ + return vector_contract(*this, right); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::norm() const +{ + return vector_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::norm_sq() const +{ + return vector_norm_sq(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::l1_norm() const +{ + return vector_l1_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +bool vector_ref::is_zero() const +{ + return vector_is_zero(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::unit() const +{ + return vector_unit(*this); +} + +template +template +LIBMESH_DEVICE_INLINE +auto vector_ref::cross(const RightVector & right) const +{ + return vector_cross(*this, right); +} + +// Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. + +template +LIBMESH_DEVICE_INLINE +auto operator-(const VectorLike & v) + -> std::enable_if_t && is_vector_ref_v, + vector_semantic_type_t> +{ + return vector_scale(vector_value_type_t(-1), v); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + vector_semantic_type_t> +{ + return vector_add(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator-(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + vector_semantic_type_t> +{ + return vector_subtract(left, right); +} + +template && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const LeftVector & left, const RightVector & right) +{ + return vector_dot(left, right); +} + +template && !is_tensor_like_v && + is_vector_like_v && is_vector_ref_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const Scalar & alpha, const VectorLike & v) +{ + return vector_scale(alpha, v); +} + +template && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const VectorLike & v, const Scalar & alpha) +{ + return vector_scale(alpha, v); +} + +template && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator/(const VectorLike & v, const Scalar & alpha) +{ + return vector_divide(v, alpha); +} + +template +LIBMESH_DEVICE_INLINE +auto operator==(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + bool> +{ + return vector_equal(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator!=(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + bool> +{ + return vector_not_equal(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+=(LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + LeftVector &> +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) + vector_get_component(right, component)); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator-=(LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + LeftVector &> +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) - vector_get_component(right, component)); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator*=(LeftVector & left, const Scalar & alpha) + -> std::enable_if_t && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftVector &> +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, component, vector_get_component(left, component) * alpha); + + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator/=(LeftVector & left, const Scalar & alpha) + -> std::enable_if_t && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftVector &> +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, component, vector_get_component(left, component) / alpha); + + return left; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_VECTOR_OPS_H diff --git a/include/include_HEADERS b/include/include_HEADERS index 115b473ba2e..a08484a3083 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -28,6 +28,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -174,6 +175,11 @@ include_HEADERS = \ geom/sphere.h \ geom/stored_range.h \ geom/surface.h \ + gpu/kokkos_linalg_base.h \ + gpu/kokkos_storage.h \ + gpu/kokkos_storage_policy.h \ + gpu/kokkos_tensor_ops.h \ + gpu/kokkos_vector_ops.h \ ghosting/default_coupling.h \ ghosting/ghost_point_neighbors.h \ ghosting/ghosting_functor.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index 7b8880c3a42..25470b1ea0e 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -19,6 +19,7 @@ BUILT_SOURCES = \ libmesh_augment_std_namespace.h \ libmesh_base.h \ libmesh_common.h \ + libmesh_device.h \ libmesh_documentation.h \ libmesh_exceptions.h \ libmesh_logging.h \ @@ -172,6 +173,11 @@ BUILT_SOURCES = \ overlap_coupling.h \ point_neighbor_coupling.h \ sibling_coupling.h \ + kokkos_linalg_base.h \ + kokkos_storage.h \ + kokkos_storage_policy.h \ + kokkos_tensor_ops.h \ + kokkos_vector_ops.h \ abaqus_io.h \ boundary_info.h \ boundary_mesh.h \ @@ -657,6 +663,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1116,6 +1125,21 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage_policy.h: $(top_srcdir)/include/gpu/kokkos_storage_policy.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_tensor_ops.h: $(top_srcdir)/include/gpu/kokkos_tensor_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_vector_ops.h: $(top_srcdir)/include/gpu/kokkos_vector_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + abaqus_io.h: $(top_srcdir)/include/mesh/abaqus_io.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -2138,4 +2162,3 @@ xdr_cxx.h: $(top_srcdir)/include/utils/xdr_cxx.h parallel_communicator_specializations: $(top_srcdir)/include/timpi_shims/parallel_communicator_specializations $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - From de66cbdb1e51d4cb1000c855c252a297074a7e66 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:04:52 -0600 Subject: [PATCH 12/48] Add Kokkos numerics oracle test infrastructure --- tests/Makefile.am | 41 +- .../kokkos_numerics_oracle_test_utils.h | 90 ++++ .../kokkos_tensor_ops_oracle_fixtures.h | 161 ++++++ .../kokkos_tensor_ops_oracle_runners.h | 482 ++++++++++++++++++ .../kokkos_vector_ops_oracle_fixtures.h | 223 ++++++++ .../kokkos_vector_ops_oracle_runners.h | 327 ++++++++++++ 6 files changed, 1323 insertions(+), 1 deletion(-) create mode 100644 tests/numerics/kokkos_numerics_oracle_test_utils.h create mode 100644 tests/numerics/kokkos_tensor_ops_oracle_fixtures.h create mode 100644 tests/numerics/kokkos_tensor_ops_oracle_runners.h create mode 100644 tests/numerics/kokkos_vector_ops_oracle_fixtures.h create mode 100644 tests/numerics/kokkos_vector_ops_oracle_runners.h diff --git a/tests/Makefile.am b/tests/Makefile.am index bb12f424833..6efb6d23e88 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -7,6 +7,7 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) LIBS = $(libmesh_optional_LIBS) $(CPPUNIT_LIBS) +KOKKOS_TEST_CPPFLAGS = # We might have turned on -Werror and/or paranoid warnings CXXFLAGS_DBG += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) @@ -248,6 +249,26 @@ if LIBMESH_ENABLE_FPARSER endif check_PROGRAMS = # empty, append below +TESTS = + +if LIBMESH_ENABLE_KOKKOS + KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) + + check_PROGRAMS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + TESTS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + + kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K + kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K + kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +endif # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, @@ -358,9 +379,27 @@ $(top_builddir)/libmesh_oprof.la: FORCE if LIBMESH_ENABLE_CPPUNIT -TESTS = run_unit_tests.sh +TESTS += run_unit_tests.sh endif +# Compile .K translation units with the Kokkos device compiler. +# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc +# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + +# Custom link rules so the Kokkos compiler drives the final link step. +kokkos_vector_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ + +kokkos_tensor_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ + CLEANFILES = cube_mesh.xda \ slit_mesh.xda \ slit_solution.xda \ diff --git a/tests/numerics/kokkos_numerics_oracle_test_utils.h b/tests/numerics/kokkos_numerics_oracle_test_utils.h new file mode 100644 index 00000000000..c25ce2a056e --- /dev/null +++ b/tests/numerics/kokkos_numerics_oracle_test_utils.h @@ -0,0 +1,90 @@ +#ifndef KOKKOS_NUMERICS_ORACLE_TEST_UTILS_H +#define KOKKOS_NUMERICS_ORACLE_TEST_UTILS_H + +#include "libmesh/libmesh.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMeshTest +{ +namespace KokkosOracle +{ + +using libMesh::Real; + +template +inline ::Kokkos::View +upload_objects(const std::vector & values, const char * label) +{ + ::Kokkos::View d(std::string(label), values.size()); + auto h = ::Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + ::Kokkos::deep_copy(d, h); + return d; +} + +inline int +compare_device_scalars(const ::Kokkos::View & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > tol) + ++fail; + + return fail; +} + +template +inline int +compare_device_vectors(const ViewType & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + if (std::fabs(h_values(i, d) - ref_values[i](d)) > tol) + ++fail; + + return fail; +} + +template +inline int +compare_device_tensors(const ViewType & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (std::fabs(h_values(i, row, col) - ref_values[i](row, col)) > tol) + ++fail; + + return fail; +} + +} // namespace KokkosOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h b/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h new file mode 100644 index 00000000000..8a53f37bd83 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h @@ -0,0 +1,161 @@ +#ifndef KOKKOS_TENSOR_OPS_ORACLE_FIXTURES_H +#define KOKKOS_TENSOR_OPS_ORACLE_FIXTURES_H + +#include "libmesh/libmesh.h" +#include "libmesh/point.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_n_tensor.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_tensor_ops.h" +#include "gpu/kokkos_storage.h" +#include "gpu/kokkos_storage_policy.h" + +#include "kokkos_numerics_oracle_test_utils.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosTensorOracle +{ + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; + +using oracle_vector = libMesh::TypeVector; +using oracle_tensor = libMesh::TypeTensor; + +inline oracle_vector +make_host_vector(const Real x, const Real y = 0, const Real z = 0) +{ + oracle_vector v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +inline oracle_tensor +make_host_tensor(const Real xx, + const Real xy = 0, + const Real xz = 0, + const Real yx = 0, + const Real yy = 0, + const Real yz = 0, + const Real zx = 0, + const Real zy = 0, + const Real zz = 0) +{ + oracle_tensor T; + T.zero(); + T(0, 0) = xx; +#if LIBMESH_DIM > 1 + T(0, 1) = xy; + T(1, 0) = yx; + T(1, 1) = yy; +#endif +#if LIBMESH_DIM > 2 + T(0, 2) = xz; + T(1, 2) = yz; + T(2, 0) = zx; + T(2, 1) = zy; + T(2, 2) = zz; +#endif + return T; +} + +struct tensor_dim_case +{ + oracle_tensor J; + unsigned int dim; + const char * name; +}; + +static const tensor_dim_case dim_cases[] = { + { make_host_tensor(1.7, -0.2, 0.5, + 0.3, 1.1, -0.4, + -0.6, 0.8, 0.9), + 1, + "leading_1d" }, +#if LIBMESH_DIM > 1 + { make_host_tensor(2.5, -0.75, 0.4, + 1.2, 1.8, -0.6, + -0.3, 0.9, 1.4), + 2, + "leading_2d" }, +#endif +#if LIBMESH_DIM > 2 + { make_host_tensor(9.08973348886179e-01, 3.36455579239923e-01, 5.16389236893863e-01, + 9.44156071777472e-01, 1.35610910092516e-01, 1.49881119060538e-02, + 1.15988384086146e-01, 6.79845197685518e-03, 3.77028969454745e-01), + 3, + "leading_3d" } +#endif +}; + +inline oracle_tensor +build_identity_tensor(const unsigned int dim) +{ + oracle_tensor I; + I.zero(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + return I; +} + +inline Real +host_leading_determinant(const oracle_tensor & J, const unsigned int dim) +{ + if (dim == 0) + return Real(1); + if (dim == 1) + return J(0, 0); + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); +#if LIBMESH_DIM > 2 + return J.det(); +#else + return Real(0); +#endif +} + +inline oracle_tensor +host_leading_inverse(const oracle_tensor & J, const unsigned int dim) +{ + oracle_tensor inv; + inv.zero(); + + if (dim == 1) + { + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real det = host_leading_determinant(J, dim); + inv(0, 0) = J(1, 1) / det; + inv(0, 1) = -J(0, 1) / det; + inv(1, 0) = -J(1, 0) / det; + inv(1, 1) = J(0, 0) / det; + return inv; + } + +#if LIBMESH_DIM > 2 + return oracle_tensor(J.inverse()); +#else + return inv; +#endif +} + +} // namespace KokkosTensorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h new file mode 100644 index 00000000000..cad772919a0 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -0,0 +1,482 @@ +#ifndef KOKKOS_TENSOR_OPS_ORACLE_RUNNERS_H +#define KOKKOS_TENSOR_OPS_ORACLE_RUNNERS_H + +#include "kokkos_tensor_ops_oracle_fixtures.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosTensorOracle +{ + +template +static int +test_dim_ops() +{ + const unsigned int ncases = sizeof(dim_cases) / sizeof(dim_cases[0]); + + std::vector J_values(ncases); + std::vector dims(ncases); + std::vector ref_det(ncases); + std::vector ref_inv(ncases); + std::vector ref_I(ncases); + std::vector ref_prod_left(ncases); + std::vector ref_prod_right(ncases); + + for (unsigned int c = 0; c < ncases; ++c) + { + const auto & info = dim_cases[c]; + J_values[c] = info.J; + dims[c] = info.dim; + + ref_det[c] = host_leading_determinant(info.J, info.dim); + ref_inv[c] = host_leading_inverse(info.J, info.dim); + ref_I[c] = build_identity_tensor(info.dim); + ref_prod_left[c] = info.J * ref_inv[c]; + ref_prod_right[c] = ref_inv[c] * info.J; + } + + auto d_J = libMesh::Kokkos::upload_tensor_storage(J_values, "tensor_dim_ops_J"); + auto d_dims = libMeshTest::KokkosOracle::upload_objects(dims, "tensor_dim_ops_dim"); + ::Kokkos::View d_det("tensor_dim_ops_det", ncases); + auto d_inv = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_inv", ncases); + auto d_I = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_I", ncases); + auto d_prod_left = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_prod_left", ncases); + auto d_prod_right = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_prod_right", ncases); + + ::Kokkos::parallel_for( + static_cast(ncases), + KOKKOS_LAMBDA(int c) { + const auto J_ref = libMesh::Kokkos::make_tensor_ref(d_J, c); + const unsigned int dim = d_dims(c); + const Real det = libMesh::Kokkos::tensor_determinant(J_ref, dim); + const auto inv = J_ref.inverse(dim); + const auto I = libMesh::Kokkos::tensor_identity(dim); + const auto prod_left = J_ref * inv; + const auto prod_right = inv * J_ref; + + d_det(c) = det; + libMesh::Kokkos::store_tensor(d_inv, c, inv); + libMesh::Kokkos::store_tensor(d_I, c, I); + libMesh::Kokkos::store_tensor(d_prod_left, c, prod_left); + libMesh::Kokkos::store_tensor(d_prod_right, c, prod_right); + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_scalars(d_det, ref_det, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_inv, ref_inv, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_I, ref_I, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_prod_left, ref_prod_left, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_prod_right, ref_prod_right, tol); +} + +template +static int +test_tensor_ops() +{ + const auto A = make_host_tensor(1.1, -0.4, 0.7, + 0.3, 1.9, -1.2, + -0.8, 0.5, 2.2); + const auto a = make_host_vector(2.0, 3.0, 4.0); + const auto b = make_host_vector(5.0, -6.0, 7.0); + const auto c = make_host_vector(1.25, -0.5, 2.0); + + const auto outer = libMesh::outer_product(a, b); + const auto transpose = A.transpose(); + const auto mix = 1.5 * A - 0.25 * outer; + const auto right = A * c; + const auto left = c * A; + const Real contract = A.contract(outer); + const Real norm = A.norm(); + const auto zero = libMesh::Kokkos::zero_tensor_value(); + + std::vector ref_outer(1, outer); + std::vector ref_transpose(1, transpose); + std::vector ref_mix(1, mix); + std::vector ref_rows(LIBMESH_DIM); + std::vector ref_columns(LIBMESH_DIM); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + ref_rows[i] = A.row(i); + ref_columns[i] = A.column(i); + } + std::vector ref_right(1, right); + std::vector ref_left(1, left); + std::vector ref_scalars = {contract, norm, zero.is_zero() ? 1.0 : 0.0, A.is_zero() ? 1.0 : 0.0}; + + auto d_A = libMesh::Kokkos::upload_tensor_storage(std::vector{A}, "tensor_ops_A"); + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "tensor_ops_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "tensor_ops_b"); + auto d_c = libMesh::Kokkos::upload_vector_storage(std::vector{c}, "tensor_ops_c"); + auto d_outer = libMesh::Kokkos::make_tensor_storage("tensor_ops_outer", 1); + auto d_transpose = libMesh::Kokkos::make_tensor_storage("tensor_ops_transpose", 1); + auto d_mix = libMesh::Kokkos::make_tensor_storage("tensor_ops_mix", 1); + auto d_rows = libMesh::Kokkos::make_vector_storage("tensor_ops_rows", LIBMESH_DIM); + auto d_columns = libMesh::Kokkos::make_vector_storage("tensor_ops_columns", LIBMESH_DIM); + auto d_right = libMesh::Kokkos::make_vector_storage("tensor_ops_right", 1); + auto d_left = libMesh::Kokkos::make_vector_storage("tensor_ops_left", 1); + ::Kokkos::View d_scalars("tensor_ops_scalars", 4); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto A_ref = libMesh::Kokkos::make_tensor_ref(d_A, 0); + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); + const auto outer_d = libMesh::Kokkos::tensor_outer_product(a_ref, b_ref); + const auto transpose_d = A_ref.transpose(); + const auto mix_d = Real(1.5) * A_ref - Real(0.25) * outer_d; + const auto right_d = A_ref * c_ref; + const auto left_d = c_ref * A_ref; + const Real contract_d = A_ref.contract(outer_d); + const Real norm_d = A_ref.norm(); + const bool zero_is_zero_d = libMesh::Kokkos::zero_tensor_value().is_zero(); + const bool A_is_zero_d = A_ref.is_zero(); + + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + libMesh::Kokkos::store_vector(d_rows, i, A_ref.row(i)); + libMesh::Kokkos::store_vector(d_columns, i, A_ref.column(i)); + } + + libMesh::Kokkos::store_tensor(d_outer, 0, outer_d); + libMesh::Kokkos::store_tensor(d_transpose, 0, transpose_d); + libMesh::Kokkos::store_tensor(d_mix, 0, mix_d); + libMesh::Kokkos::store_vector(d_right, 0, right_d); + libMesh::Kokkos::store_vector(d_left, 0, left_d); + d_scalars(0) = contract_d; + d_scalars(1) = norm_d; + d_scalars(2) = zero_is_zero_d ? 1.0 : 0.0; + d_scalars(3) = A_is_zero_d ? 1.0 : 0.0; + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_tensors(d_outer, ref_outer, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_transpose, ref_transpose, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_mix, ref_mix, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_rows, ref_rows, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_columns, ref_columns, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_right, ref_right, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_left, ref_left, tol) + + libMeshTest::KokkosOracle::compare_device_scalars(d_scalars, ref_scalars, tol); +} + +inline int +test_tensor_host_only_ops() +{ + int fail = 0; + +#if LIBMESH_DIM > 2 + { + libMesh::TensorValue tensor(2., 1., 0., + 1., 2., 1., + 0., 1., 2.); + fail += tensor.is_hpd(/*rel_tol=*/0.) ? 0 : 1; + } + + { + libMesh::TensorValue tensor(1., 0., 0., + 0., 0., 1., + 0., 1., 0.); + fail += tensor.is_hpd() ? 1 : 0; + } + + { + const libMesh::Point x(1., 0., 0.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 0., 0.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0)) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2)) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 0., 0.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1)) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2)) <= tol) ? 0 : 1; + } + + { + const libMesh::Point x(1., 1., 1.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 90., 90.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) + 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2) - 1.) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 90., 90.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2) - 1.) <= tol) ? 0 : 1; + } +#endif + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool assertion = + std::is_same>>::value; + fail += assertion ? 0 : 1; +#endif + + return fail; +} + +template +static int +test_linalg_foundation_storage_roundtrip() +{ + int fail = 0; + + auto d_vector = libMesh::Kokkos::make_vector_storage("foundation_vector", 1); + auto d_tensor = libMesh::Kokkos::make_tensor_storage("foundation_tensor", 1); + + { + auto h_vector = ::Kokkos::create_mirror_view(d_vector); + auto h_tensor = ::Kokkos::create_mirror_view(d_tensor); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + h_vector(0, d) = Real(d + 1) * Real(0.5); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + h_tensor(0, row, col) = Real(10 * row + col + 1) * Real(0.25); + + ::Kokkos::deep_copy(d_vector, h_vector); + ::Kokkos::deep_copy(d_tensor, h_tensor); + } + + const auto vector_in = libMesh::Kokkos::make_vector_ref(d_vector, 0); + const auto tensor_in = libMesh::Kokkos::make_tensor_ref(d_tensor, 0); + + const auto as_point = libMesh::Kokkos::materialize_vector(vector_in); + const auto as_vector_value = + libMesh::Kokkos::materialize_vector>(vector_in); + const auto as_type_vector = + libMesh::Kokkos::materialize_vector>(vector_in); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + { + const Real expected = Real(d + 1) * Real(0.5); + fail += (std::fabs(as_point(d) - expected) <= tol) ? 0 : 1; + fail += (std::fabs(as_vector_value(d) - expected) <= tol) ? 0 : 1; + fail += (std::fabs(as_type_vector(d) - expected) <= tol) ? 0 : 1; + } + + const auto as_tensor_value = + libMesh::Kokkos::materialize_tensor>(tensor_in); + const auto as_type_tensor = + libMesh::Kokkos::materialize_tensor>(tensor_in); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + const Real expected = Real(10 * row + col + 1) * Real(0.25); + fail += (std::fabs(as_tensor_value(row, col) - expected) <= tol) ? 0 : 1; + fail += (std::fabs(as_type_tensor(row, col) - expected) <= tol) ? 0 : 1; + } + + auto d_vector_out = libMesh::Kokkos::make_vector_storage("foundation_vector_out", 1); + auto d_tensor_out = libMesh::Kokkos::make_tensor_storage("foundation_tensor_out", 1); + + auto vector_out = libMesh::Kokkos::make_vector_ref(d_vector_out, 0); + auto tensor_out = libMesh::Kokkos::make_tensor_ref(d_tensor_out, 0); + + vector_out.zero(); + vector_out.assign(as_vector_value); + vector_out.add_scaled(as_type_vector, Real(0)); + vector_out.subtract_scaled(as_type_vector, Real(0)); + + tensor_out.zero(); + tensor_out.assign(as_tensor_value); + tensor_out.add_scaled(as_type_tensor, Real(0)); + tensor_out.subtract_scaled(as_type_tensor, Real(0)); + + { + auto h_vector_out = ::Kokkos::create_mirror_view(d_vector_out); + auto h_tensor_out = ::Kokkos::create_mirror_view(d_tensor_out); + ::Kokkos::deep_copy(h_vector_out, d_vector_out); + ::Kokkos::deep_copy(h_tensor_out, d_tensor_out); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + fail += (std::fabs(h_vector_out(0, d) - as_vector_value(d)) <= tol) ? 0 : 1; + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + fail += (std::fabs(h_tensor_out(0, row, col) - as_tensor_value(row, col)) <= tol) ? 0 : 1; + } + + return fail; +} + +template +static int +test_mixed_representation_ops() +{ + int fail = 0; + + const auto a = make_host_vector(2.0, 3.0, 4.0); + const auto b = make_host_vector(5.0, -6.0, 7.0); + const auto c = make_host_vector(1.25, -0.5, 2.0); + const auto A = make_host_tensor(1.1, -0.4, 0.7, + 0.3, 1.9, -1.2, + -0.8, 0.5, 2.2); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "mixed_ops_a"); + auto d_A = libMesh::Kokkos::upload_tensor_storage(std::vector{A}, "mixed_ops_A"); + + ::Kokkos::View d_scalars("mixed_ops_scalars", 8); + auto d_vectors = libMesh::Kokkos::make_vector_storage("mixed_ops_vectors", 5); + auto d_tensors = libMesh::Kokkos::make_tensor_storage("mixed_ops_tensors", 4); + + const auto ref_dot = a * b; + const auto ref_contract = A.contract(libMesh::outer_product(a, b)); + const auto ref_det = host_leading_determinant(A, LIBMESH_DIM); + const auto ref_right = A * c; + const auto ref_left = A.left_multiply(c); + const auto ref_mix = a + b; + const auto ref_row0 = A.row(0); + const auto ref_col0 = A.column(0); + const auto ref_transpose = A.transpose(); + const auto ref_inverse = host_leading_inverse(A, LIBMESH_DIM); + const auto ref_add = A + ref_transpose; + const auto ref_scaled = 0.5 * A; + const auto ref_trace = A.tr(); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto A_ref = libMesh::Kokkos::make_tensor_ref(d_A, 0); + + const auto mix = a_ref + b; + const auto right = A_ref * c; + const auto left = A_ref.left_multiply(c); + const auto row0 = A_ref.row(0); + const auto col0 = A_ref.column(0); + const auto transpose = A_ref.transpose(); + const auto inverse = A_ref.inverse(); + const auto add = A_ref + ref_transpose; + const auto scaled = Real(0.5) * A_ref; + const auto outer = libMesh::Kokkos::tensor_outer_product(a_ref, b); + + d_scalars(0) = a_ref * b; + d_scalars(1) = A_ref.contract(outer); + d_scalars(2) = A_ref.det(); + d_scalars(3) = (A_ref == A) ? 1.0 : 0.0; + d_scalars(4) = (A_ref != inverse) ? 1.0 : 0.0; + d_scalars(5) = libMesh::Kokkos::vector_equal(row0, ref_row0) ? 1.0 : 0.0; + d_scalars(6) = libMesh::Kokkos::vector_equal(col0, ref_col0) ? 1.0 : 0.0; + d_scalars(7) = A_ref.tr(); + + libMesh::Kokkos::store_vector(d_vectors, 0, right); + libMesh::Kokkos::store_vector(d_vectors, 1, left); + libMesh::Kokkos::store_vector(d_vectors, 2, mix); + libMesh::Kokkos::store_vector(d_vectors, 3, row0); + libMesh::Kokkos::store_vector(d_vectors, 4, col0); + libMesh::Kokkos::store_tensor(d_tensors, 0, transpose); + libMesh::Kokkos::store_tensor(d_tensors, 1, inverse); + libMesh::Kokkos::store_tensor(d_tensors, 2, add); + libMesh::Kokkos::store_tensor(d_tensors, 3, scaled); + }); + ::Kokkos::fence(); + + fail += libMeshTest::KokkosOracle::compare_device_scalars( + d_scalars, + std::vector{ref_dot, ref_contract, ref_det, 1.0, 1.0, 1.0, 1.0, ref_trace}, + tol); + fail += libMeshTest::KokkosOracle::compare_device_vectors( + d_vectors, + std::vector{ref_right, ref_left, ref_mix, ref_row0, ref_col0}, + tol); + fail += libMeshTest::KokkosOracle::compare_device_tensors( + d_tensors, + std::vector{ref_transpose, ref_inverse, ref_add, ref_scaled}, + tol); + + return fail; +} + +inline int +run_all_oracles() +{ + int total_fail = 0; + + const int dim_fail_left = test_dim_ops(); + std::printf("[tensor_dim_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + dim_fail_left ? "FAIL" : "PASS", + dim_fail_left); + total_fail += dim_fail_left; + + const int dim_fail_right = test_dim_ops(); + std::printf("[tensor_dim_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + dim_fail_right ? "FAIL" : "PASS", + dim_fail_right); + total_fail += dim_fail_right; + + const int tensor_fail_left = test_tensor_ops(); + std::printf("[tensor_ops_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + tensor_fail_left ? "FAIL" : "PASS", + tensor_fail_left); + total_fail += tensor_fail_left; + + const int tensor_fail_right = test_tensor_ops(); + std::printf("[tensor_ops_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + tensor_fail_right ? "FAIL" : "PASS", + tensor_fail_right); + total_fail += tensor_fail_right; + + const int host_fail = test_tensor_host_only_ops(); + std::printf("[tensor_host_ops_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + const int foundation_fail_left = + test_linalg_foundation_storage_roundtrip(); + std::printf("[kokkos_linalg_foundation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + foundation_fail_left ? "FAIL" : "PASS", + foundation_fail_left); + total_fail += foundation_fail_left; + + const int foundation_fail_right = + test_linalg_foundation_storage_roundtrip(); + std::printf("[kokkos_linalg_foundation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + foundation_fail_right ? "FAIL" : "PASS", + foundation_fail_right); + total_fail += foundation_fail_right; + + const int mixed_fail_left = test_mixed_representation_ops(); + std::printf("[kokkos_linalg_mixed_representation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_fail_left ? "FAIL" : "PASS", + mixed_fail_left); + total_fail += mixed_fail_left; + + const int mixed_fail_right = test_mixed_representation_ops(); + std::printf("[kokkos_linalg_mixed_representation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_fail_right ? "FAIL" : "PASS", + mixed_fail_right); + total_fail += mixed_fail_right; + + return total_fail; +} + +} // namespace KokkosTensorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_vector_ops_oracle_fixtures.h b/tests/numerics/kokkos_vector_ops_oracle_fixtures.h new file mode 100644 index 00000000000..5bce52de341 --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_fixtures.h @@ -0,0 +1,223 @@ +#ifndef KOKKOS_VECTOR_OPS_ORACLE_FIXTURES_H +#define KOKKOS_VECTOR_OPS_ORACLE_FIXTURES_H + +#include "libmesh/libmesh.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_vector.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_vector_ops.h" +#include "gpu/kokkos_storage.h" +#include "gpu/kokkos_storage_policy.h" + +#include "kokkos_numerics_oracle_test_utils.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosVectorOracle +{ + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; +static constexpr double unit_tol = 1.0e-14; +static constexpr Real golden_ratio = 1.6180339887498948482; +static constexpr unsigned int solid_angle_results = + 1 + ((LIBMESH_DIM > 1) ? 2u : 0u) + ((LIBMESH_DIM > 2) ? 4u : 0u); +static constexpr unsigned int vector_results = + 11 + ((LIBMESH_DIM > 2) ? 2u : 0u); +static constexpr unsigned int scalar_results = 11 + solid_angle_results; + +template +LIBMESH_DEVICE_INLINE +Vec +make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + Vec v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +inline libMesh::TypeVector +as_type_vector(const libMesh::TypeVector & v) +{ + return v; +} + +inline libMesh::TypeVector +as_type_vector(const libMesh::VectorValue & v) +{ + return make_vector>(v(0) +#if LIBMESH_DIM > 1 + , + v(1) +#endif +#if LIBMESH_DIM > 2 + , + v(2) +#endif + ); +} + +template +struct host_oracle +{ + std::vector vectors; + std::vector scalars; +}; + +struct vector_case +{ + const char * name; + Real ax, ay, az; + Real bx, by, bz; + Real cx, cy, cz; +}; + +static const vector_case cases[] = { +#if LIBMESH_DIM >= 1 + { "line_case_a", 2.0, 0.0, 0.0, -3.0, 0.0, 0.0, 0.5, 0.0, 0.0 }, + { "line_case_b", -1.25, 0.0, 0.0, 4.5, 0.0, 0.0, -2.0, 0.0, 0.0 }, +#endif +#if LIBMESH_DIM >= 2 + { "plane_case_a", 2.0, 3.0, 0.0, 5.0, -6.0, 0.0, 1.25, -0.5, 0.0 }, + { "plane_case_b", -1.0, 4.0, 0.0, 0.5, 2.5, 0.0, -3.0, 1.5, 0.0 }, +#endif +#if LIBMESH_DIM >= 3 + { "space_case_a", 2.0, 3.0, 4.0, 5.0, -6.0, 7.0, 1.25, -0.5, 2.0 }, + { "space_case_b", -1.0, 4.0, 0.75, 0.5, 2.5, -3.5, -3.0, 1.5, 2.25 }, +#endif +}; + +template +inline host_oracle +build_host_oracle(const Vec & a, const Vec & b, const Vec & c) +{ + host_oracle result; + result.vectors.reserve(vector_results); + result.scalars.reserve(scalar_results); + + const auto copied = a; + + Vec mix = a + b; + mix -= c; + + Vec scaled = 1.25 * a; + scaled += (-0.5) * b; + scaled += (0.25) * c; + + Vec plus_assign = a; + plus_assign += b; + + Vec minus_assign = a; + minus_assign -= b; + + Vec accum; + accum.zero(); + accum.add_scaled(a, 1.25); + accum.add_scaled(b, -0.5); + accum.subtract_scaled(c, -0.25); + + const auto divided = a / 5.0; + const auto outer_right = libMesh::outer_product(a, 5.0); + const auto outer_left = libMesh::outer_product(5.0, a); + + Vec mult_assign = a; + mult_assign *= 5.0; + + Vec div_assign = a; + div_assign /= 5.0; + + Vec assign_zero = a; + assign_zero = 0.0; + + result.vectors.push_back(copied); + result.vectors.push_back(mix); + result.vectors.push_back(scaled); + result.vectors.push_back(accum); + result.vectors.push_back(plus_assign); + result.vectors.push_back(minus_assign); + result.vectors.push_back(divided); + result.vectors.push_back(outer_right); + result.vectors.push_back(outer_left); + result.vectors.push_back(mult_assign); + result.vectors.push_back(div_assign); + + result.scalars.push_back(a * b); + result.scalars.push_back(a.contract(b)); + result.scalars.push_back(mix.norm()); + result.scalars.push_back(mix.norm_sq()); + result.scalars.push_back(make_vector(0.0, 0.0, 0.0).is_zero() ? 1.0 : 0.0); + result.scalars.push_back(mix.is_zero() ? 1.0 : 0.0); + result.scalars.push_back((a == a) ? 1.0 : 0.0); + result.scalars.push_back((a == b) ? 1.0 : 0.0); + result.scalars.push_back((a != a) ? 1.0 : 0.0); + result.scalars.push_back((a != b) ? 1.0 : 0.0); + result.scalars.push_back(assign_zero.is_zero() ? 1.0 : 0.0); + + const auto xvec = make_vector(1.3); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(xvec), + as_type_vector(xvec))); + +#if LIBMESH_DIM > 1 + const auto yvec = make_vector(0.0, 2.7); + const auto xydiag = make_vector(3.1, 3.1); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(xvec), + as_type_vector(yvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(yvec), + as_type_vector(xydiag))); +#endif + +#if LIBMESH_DIM > 2 + const auto xypdiag = make_vector(0.8, -0.8); + const auto zvec = make_vector(0.0, 0.0, 1.1); + const auto xzdiag = make_vector(0.0, 0.7, 0.7); + const auto icosa1 = make_vector(1.0, golden_ratio, 0.0); + const auto icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const auto icosa3 = make_vector(0.0, 1.0, golden_ratio); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xydiag), + as_type_vector(yvec), + as_type_vector(zvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(yvec), + as_type_vector(xzdiag))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xypdiag), + as_type_vector(xydiag), + as_type_vector(zvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(icosa1), + as_type_vector(icosa2), + as_type_vector(icosa3))); +#endif + +#if LIBMESH_DIM > 2 + const auto cross = a.cross(b); + auto unit_cross = cross; + if (cross.norm() > unit_tol) + unit_cross = cross.unit(); + + result.vectors.push_back(cross); + result.vectors.push_back(unit_cross); +#endif + + libmesh_assert_equal_to(result.vectors.size(), vector_results); + libmesh_assert_equal_to(result.scalars.size(), scalar_results); + + return result; +} + +} // namespace KokkosVectorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_vector_ops_oracle_runners.h b/tests/numerics/kokkos_vector_ops_oracle_runners.h new file mode 100644 index 00000000000..73fbbe7834a --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_runners.h @@ -0,0 +1,327 @@ +#ifndef KOKKOS_VECTOR_OPS_ORACLE_RUNNERS_H +#define KOKKOS_VECTOR_OPS_ORACLE_RUNNERS_H + +#include "kokkos_vector_ops_oracle_fixtures.h" + +#include + +namespace libMeshTest +{ +namespace KokkosVectorOracle +{ + +template +static int +test_vector_ops_case(const vector_case & info) +{ + const auto a = make_vector(info.ax, info.ay, info.az); + const auto b = make_vector(info.bx, info.by, info.bz); + const auto c = make_vector(info.cx, info.cy, info.cz); + + const auto expected = build_host_oracle(a, b, c); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "vector_ops_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "vector_ops_b"); + auto d_c = libMesh::Kokkos::upload_vector_storage(std::vector{c}, "vector_ops_c"); + auto d_vectors = libMesh::Kokkos::make_vector_storage("vector_ops_vectors", vector_results); + ::Kokkos::View d_scalars("vector_ops_scalars", scalar_results); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); + + const Vec copied = libMesh::Kokkos::copy_vector(a_ref); + const Vec mix = libMesh::Kokkos::vector_linear_combination( + Real(1), a_ref, Real(1), b_ref, Real(-1), c_ref); + const Vec scaled = libMesh::Kokkos::vector_linear_combination( + Real(1.25), a_ref, Real(-0.5), b_ref, Real(0.25), c_ref); + const Vec plus_assign = a_ref + b_ref; + const Vec minus_assign = a_ref - b_ref; + const Vec accum = libMesh::Kokkos::vector_linear_combination( + Real(1.25), a_ref, Real(-0.5), b_ref, Real(0.25), c_ref); + const Vec divided = a_ref / Real(5.0); + const Vec outer_right = Real(5.0) * a_ref; + const Vec outer_left = a_ref * Real(5.0); + const Vec mult_assign = a_ref * Real(5.0); + const Vec div_assign = a_ref / Real(5.0); + const Vec assign_zero = libMesh::Kokkos::zero_vector_value(); + + const Real dot = libMesh::Kokkos::vector_dot(a_ref, b_ref); + const Real contract = a_ref.contract(b_ref); + const Real norm = mix.norm(); + const Real norm_sq = mix.norm_sq(); + const Vec zero = libMesh::Kokkos::zero_vector_value(); + + unsigned int vector_offset = 0; + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, copied); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mix); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, scaled); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, accum); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, plus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, minus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, divided); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_right); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_left); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mult_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, div_assign); + + unsigned int scalar_offset = 0; + d_scalars(scalar_offset++) = a_ref * b_ref; + d_scalars(scalar_offset++) = contract; + d_scalars(scalar_offset++) = norm; + d_scalars(scalar_offset++) = norm_sq; + d_scalars(scalar_offset++) = zero.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = mix.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref == a_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref == b_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref != a_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref != b_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = assign_zero.is_zero() ? 1.0 : 0.0; + + const Vec xvec = make_vector(1.3); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, xvec, xvec); + +#if LIBMESH_DIM > 1 + const Vec yvec = make_vector(0.0, 2.7); + const Vec xydiag = make_vector(3.1, 3.1); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, xvec, yvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, yvec, xydiag); +#endif + +#if LIBMESH_DIM > 2 + const Vec xypdiag = make_vector(0.8, -0.8); + const Vec zvec = make_vector(0.0, 0.0, 1.1); + const Vec xzdiag = make_vector(0.0, 0.7, 0.7); + const Vec icosa1 = make_vector(1.0, golden_ratio, 0.0); + const Vec icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const Vec icosa3 = make_vector(0.0, 1.0, golden_ratio); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xydiag, yvec, zvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, yvec, xzdiag); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xypdiag, xydiag, zvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(icosa1, icosa2, icosa3); +#endif + +#if LIBMESH_DIM > 2 + const Vec cross = a_ref.cross(b_ref); + Vec unit_cross = cross; + if (libMesh::Kokkos::vector_norm(cross) > unit_tol) + unit_cross = libMesh::Kokkos::vector_unit(cross); + + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, cross); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, unit_cross); +#endif + + libmesh_assert_equal_to(vector_offset, vector_results); + libmesh_assert_equal_to(scalar_offset, scalar_results); + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_vectors(d_vectors, expected.vectors, tol) + + libMeshTest::KokkosOracle::compare_device_scalars(d_scalars, expected.scalars, tol); +} + +template +int +run_vector_cases(const char * suite_name) +{ + int fail = 0; + + for (const auto & info : cases) + { + const int f = test_vector_ops_case(info); + std::printf("[%s] [%s] [%s] %s (%d failures)\n", + suite_name, + libMesh::Kokkos::storage_policy_name(), + info.name, + f ? "FAIL" : "PASS", + f); + fail += f; + } + + return fail; +} + +inline int +test_vector_host_only_traits() +{ + int fail = 0; + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool typevector_assertion = + std::is_same>>::value; + fail += typevector_assertion ? 0 : 1; + + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedValueType; + constexpr bool vectorvalue_assertion = + std::is_same>>::value; + fail += vectorvalue_assertion ? 0 : 1; +#endif + + return fail; +} + +template +static int +test_mixed_representation_ops() +{ + int fail = 0; + + const auto a = make_vector(2.0, 3.0, 4.0); + const auto b = make_vector(5.0, -6.0, 7.0); + const auto c = make_vector(1.25, -0.5, 2.0); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "mixed_vector_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "mixed_vector_b"); + + auto d_vectors = + libMesh::Kokkos::make_vector_storage("mixed_vector_vectors", (LIBMESH_DIM > 2) ? 5 : 3); + ::Kokkos::View d_scalars("mixed_vector_scalars", (LIBMESH_DIM > 2) ? 7 : 5); + + const auto ref_sum = a + b; + const auto ref_diff = a - b; + const auto ref_scaled = 1.5 * a; + const auto ref_dot = a * b; + const auto ref_contract = a.contract(b); + const auto ref_solid_angle = + libMesh::solid_angle(as_type_vector(a), as_type_vector(b), as_type_vector(c)); + const auto ref_cross_norm_sq = libMesh::cross_norm_sq(as_type_vector(a), as_type_vector(b)); + +#if LIBMESH_DIM > 2 + const auto ref_cross = a.cross(b); + auto ref_unit_cross = ref_cross; + if (ref_cross.norm() > unit_tol) + ref_unit_cross = ref_cross.unit(); +#endif + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + + const auto sum = a_ref + b; + const auto diff = a - b_ref; + const auto scaled = Real(1.5) * a_ref; + + libMesh::Kokkos::store_vector(d_vectors, 0, sum); + libMesh::Kokkos::store_vector(d_vectors, 1, diff); + libMesh::Kokkos::store_vector(d_vectors, 2, scaled); + + d_scalars(0) = a_ref * b; + d_scalars(1) = b_ref.contract(a); + d_scalars(2) = (a_ref == a) ? 1.0 : 0.0; + d_scalars(3) = (a_ref != b) ? 1.0 : 0.0; + d_scalars(4) = libMesh::Kokkos::vector_solid_angle(a_ref, b, c); + +#if LIBMESH_DIM > 2 + const auto cross = a_ref.cross(b); + Vec unit_cross = cross; + if (libMesh::Kokkos::vector_norm(cross) > unit_tol) + unit_cross = libMesh::Kokkos::vector_unit(cross); + + libMesh::Kokkos::store_vector(d_vectors, 3, cross); + libMesh::Kokkos::store_vector(d_vectors, 4, unit_cross); + d_scalars(5) = libMesh::Kokkos::vector_cross_norm_sq(a_ref, b); + d_scalars(6) = (cross == libMesh::Kokkos::vector_cross(a, b_ref)) ? 1.0 : 0.0; +#endif + }); + ::Kokkos::fence(); + + fail += libMeshTest::KokkosOracle::compare_device_vectors( + d_vectors, + [&]() { + std::vector ref = {ref_sum, ref_diff, ref_scaled}; +#if LIBMESH_DIM > 2 + ref.push_back(ref_cross); + ref.push_back(ref_unit_cross); +#endif + return ref; + }(), + tol); + + fail += libMeshTest::KokkosOracle::compare_device_scalars( + d_scalars, + [&]() { + std::vector ref = {ref_dot, ref_contract, 1.0, 1.0, ref_solid_angle}; +#if LIBMESH_DIM > 2 + ref.push_back(ref_cross_norm_sq); + ref.push_back(1.0); +#endif + return ref; + }(), + tol); + + return fail; +} + +inline int +run_all_oracles() +{ + int total_fail = 0; + + total_fail += run_vector_cases>( + "typevector_kernel_oracle"); + total_fail += run_vector_cases>( + "typevector_kernel_oracle"); + total_fail += run_vector_cases>( + "vectorvalue_kernel_oracle"); + total_fail += run_vector_cases>( + "vectorvalue_kernel_oracle"); + + const int mixed_typevector_left = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [typevector] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_typevector_left ? "FAIL" : "PASS", + mixed_typevector_left); + total_fail += mixed_typevector_left; + + const int mixed_typevector_right = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [typevector] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_typevector_right ? "FAIL" : "PASS", + mixed_typevector_right); + total_fail += mixed_typevector_right; + + const int mixed_vectorvalue_left = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [vectorvalue] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_vectorvalue_left ? "FAIL" : "PASS", + mixed_vectorvalue_left); + total_fail += mixed_vectorvalue_left; + + const int mixed_vectorvalue_right = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [vectorvalue] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_vectorvalue_right ? "FAIL" : "PASS", + mixed_vectorvalue_right); + total_fail += mixed_vectorvalue_right; + + const int host_fail = test_vector_host_only_traits(); + std::printf("[vector_host_traits_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + return total_fail; +} + +} // namespace KokkosVectorOracle +} // namespace libMeshTest + +#endif From acb22ee6f68ffc504e707c0ce0dca19668141141 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:04:55 -0600 Subject: [PATCH 13/48] Add Kokkos vector and tensor oracle tests --- .../numerics/kokkos_tensor_ops_oracle_test.K | 20 +++++++++++++++++++ .../numerics/kokkos_vector_ops_oracle_test.K | 20 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/numerics/kokkos_tensor_ops_oracle_test.K create mode 100644 tests/numerics/kokkos_vector_ops_oracle_test.K diff --git a/tests/numerics/kokkos_tensor_ops_oracle_test.K b/tests/numerics/kokkos_tensor_ops_oracle_test.K new file mode 100644 index 00000000000..858d4773690 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_test.K @@ -0,0 +1,20 @@ +#include "libmesh/libmesh_config.h" +#include "kokkos_tensor_ops_oracle_runners.h" + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + const int total_fail = libMeshTest::KokkosTensorOracle::run_all_oracles(); + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/numerics/kokkos_vector_ops_oracle_test.K b/tests/numerics/kokkos_vector_ops_oracle_test.K new file mode 100644 index 00000000000..fedc7651ff5 --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_test.K @@ -0,0 +1,20 @@ +#include "libmesh/libmesh_config.h" +#include "kokkos_vector_ops_oracle_runners.h" + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + const int total_fail = libMeshTest::KokkosVectorOracle::run_all_oracles(); + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} From 63d0098bcd104cae7d68988c26ab06558d18c454 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Fri, 8 May 2026 15:05:03 -0600 Subject: [PATCH 14/48] Regenerate configure and Makefile.in files --- Makefile.in | 8 + configure | 366 ++++++++++++++++++ contrib/Makefile.in | 8 + contrib/capnproto/Makefile.in | 8 + contrib/eigen/gitshim/Makefile.in | 8 + contrib/exodusii/5.22b/exodus/Makefile.in | 8 + contrib/exodusii/5.22b/nemesis/Makefile.in | 8 + contrib/exodusii/Lib/Makefile.in | 8 + contrib/exodusii/v8.11/exodus/Makefile.in | 8 + contrib/exodusii/v8.11/nemesis/Makefile.in | 8 + contrib/fparser/Makefile.in | 8 + contrib/fparser/extrasrc/Makefile.in | 8 + contrib/gmv/Makefile.in | 8 + contrib/gzstream/Makefile.in | 8 + contrib/laspack/Makefile.in | 8 + contrib/libHilbert/Makefile.in | 8 + contrib/metis/Makefile.in | 8 + contrib/nanoflann/Makefile.in | 8 + contrib/nemesis/Lib/Makefile.in | 8 + contrib/netgen/Makefile.in | 10 +- contrib/parmetis/Makefile.in | 8 + contrib/poly2tri/modified/Makefile.in | 8 + contrib/qhull/2012.1/Makefile.in | 8 + contrib/sfcurves/Makefile.in | 8 + contrib/tecplot/binary/Makefile.in | 8 + contrib/tecplot/tecio/Makefile.in | 8 + contrib/tetgen/Makefile.in | 8 + contrib/triangle/Makefile.in | 8 + doc/Makefile.in | 8 + doc/html/Makefile.in | 8 + examples/Makefile.in | 8 + .../adaptivity/adaptivity_ex1/Makefile.in | 8 + .../adaptivity/adaptivity_ex2/Makefile.in | 8 + .../adaptivity/adaptivity_ex3/Makefile.in | 8 + .../adaptivity/adaptivity_ex4/Makefile.in | 8 + .../adaptivity/adaptivity_ex5/Makefile.in | 8 + examples/adjoints/adjoints_ex1/Makefile.in | 8 + examples/adjoints/adjoints_ex2/Makefile.in | 8 + examples/adjoints/adjoints_ex3/Makefile.in | 8 + examples/adjoints/adjoints_ex4/Makefile.in | 8 + examples/adjoints/adjoints_ex5/Makefile.in | 8 + examples/adjoints/adjoints_ex6/Makefile.in | 8 + examples/adjoints/adjoints_ex7/Makefile.in | 8 + .../eigenproblems_ex1/Makefile.in | 8 + .../eigenproblems_ex2/Makefile.in | 8 + .../eigenproblems_ex3/Makefile.in | 8 + .../eigenproblems_ex4/Makefile.in | 8 + .../fem_system/fem_system_ex1/Makefile.in | 8 + .../fem_system/fem_system_ex2/Makefile.in | 8 + .../fem_system/fem_system_ex3/Makefile.in | 8 + .../fem_system/fem_system_ex4/Makefile.in | 8 + .../fem_system/fem_system_ex5/Makefile.in | 8 + .../introduction/introduction_ex1/Makefile.in | 8 + .../introduction/introduction_ex2/Makefile.in | 8 + .../introduction/introduction_ex3/Makefile.in | 8 + .../introduction/introduction_ex4/Makefile.in | 8 + .../introduction/introduction_ex5/Makefile.in | 8 + .../miscellaneous_ex1/Makefile.in | 8 + .../miscellaneous_ex10/Makefile.in | 8 + .../miscellaneous_ex11/Makefile.in | 8 + .../miscellaneous_ex12/Makefile.in | 8 + .../miscellaneous_ex13/Makefile.in | 8 + .../miscellaneous_ex14/Makefile.in | 8 + .../miscellaneous_ex15/Makefile.in | 8 + .../miscellaneous_ex16/Makefile.in | 8 + .../miscellaneous_ex17/Makefile.in | 8 + .../miscellaneous_ex2/Makefile.in | 8 + .../miscellaneous_ex3/Makefile.in | 8 + .../miscellaneous_ex4/Makefile.in | 8 + .../miscellaneous_ex5/Makefile.in | 8 + .../miscellaneous_ex6/Makefile.in | 8 + .../miscellaneous_ex7/Makefile.in | 8 + .../miscellaneous_ex8/Makefile.in | 8 + .../miscellaneous_ex9/Makefile.in | 8 + .../optimization/optimization_ex1/Makefile.in | 8 + .../optimization/optimization_ex2/Makefile.in | 8 + .../reduced_basis_ex1/Makefile.in | 8 + .../reduced_basis_ex2/Makefile.in | 8 + .../reduced_basis_ex3/Makefile.in | 8 + .../reduced_basis_ex4/Makefile.in | 8 + .../reduced_basis_ex5/Makefile.in | 8 + .../reduced_basis_ex6/Makefile.in | 8 + .../reduced_basis_ex7/Makefile.in | 8 + .../solution_transfer_ex1/Makefile.in | 8 + .../subdomains/subdomains_ex1/Makefile.in | 8 + .../subdomains/subdomains_ex2/Makefile.in | 8 + .../subdomains/subdomains_ex3/Makefile.in | 8 + .../systems_of_equations_ex1/Makefile.in | 8 + .../systems_of_equations_ex2/Makefile.in | 8 + .../systems_of_equations_ex3/Makefile.in | 8 + .../systems_of_equations_ex4/Makefile.in | 8 + .../systems_of_equations_ex5/Makefile.in | 8 + .../systems_of_equations_ex6/Makefile.in | 8 + .../systems_of_equations_ex7/Makefile.in | 8 + .../systems_of_equations_ex8/Makefile.in | 8 + .../systems_of_equations_ex9/Makefile.in | 8 + examples/transient/transient_ex1/Makefile.in | 8 + examples/transient/transient_ex2/Makefile.in | 8 + examples/transient/transient_ex3/Makefile.in | 8 + examples/vector_fe/vector_fe_ex1/Makefile.in | 8 + examples/vector_fe/vector_fe_ex10/Makefile.in | 8 + examples/vector_fe/vector_fe_ex2/Makefile.in | 8 + examples/vector_fe/vector_fe_ex3/Makefile.in | 8 + examples/vector_fe/vector_fe_ex4/Makefile.in | 8 + examples/vector_fe/vector_fe_ex5/Makefile.in | 8 + examples/vector_fe/vector_fe_ex6/Makefile.in | 8 + examples/vector_fe/vector_fe_ex7/Makefile.in | 8 + examples/vector_fe/vector_fe_ex8/Makefile.in | 8 + examples/vector_fe/vector_fe_ex9/Makefile.in | 8 + include/Makefile.in | 14 + include/libmesh/Makefile.in | 39 +- tests/Makefile.in | 136 +++++-- 112 files changed, 1387 insertions(+), 34 deletions(-) diff --git a/Makefile.in b/Makefile.in index cec62a434fa..186be47b152 100644 --- a/Makefile.in +++ b/Makefile.in @@ -7517,11 +7517,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -7569,6 +7576,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/configure b/configure index 33ff470d60a..91b72819aca 100755 --- a/configure +++ b/configure @@ -672,6 +672,16 @@ libmesh_contrib_LDFLAGS libmesh_contrib_INCLUDES libmesh_optional_LIBS libmesh_optional_INCLUDES +LIBMESH_ENABLE_KOKKOS_FALSE +LIBMESH_ENABLE_KOKKOS_TRUE +KOKKOS_LIBS +KOKKOS_LDFLAGS +KOKKOS_CXXFLAGS +KOKKOS_CPPFLAGS +KOKKOS_CXX +ICPX +HIPCC +NVCC LIBMESH_ENABLE_METAPHYSICL_FALSE LIBMESH_ENABLE_METAPHYSICL_TRUE METAPHYSICL_INCLUDE @@ -1347,6 +1357,8 @@ enable_metaphysicl with_metaphysicl with_metaphysicl_include enable_metaphysicl_required +with_kokkos +with_kokkos_backend ' ac_precious_vars='build_alias host_alias @@ -2273,6 +2285,10 @@ Optional Packages: internal: build from contrib --with-metaphysicl-include= + --with-kokkos=DIR Enable Kokkos support using the installation at DIR + --with-kokkos-backend=BACKEND + cuda|hip|sycl|openmp|serial (default: auto-detect + from KokkosCore_config.h) Some influential environment variables: PETSC_DIR path to PETSc installation @@ -63913,6 +63929,352 @@ fi +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- + +# Check whether --with-kokkos was given. +if test ${with_kokkos+y} +then : + withval=$with_kokkos; KOKKOS_DIR="$withval" +else case e in #( + e) KOKKOS_DIR="no" ;; +esac +fi + + + +# Check whether --with-kokkos-backend was given. +if test ${with_kokkos_backend+y} +then : + withval=$with_kokkos_backend; KOKKOS_BACKEND="$withval" +else case e in #( + e) KOKKOS_BACKEND="auto" ;; +esac +fi + + + +if test "x$KOKKOS_DIR" != "xno" +then : + + as_ac_File=`printf "%s\n" "ac_cv_file_$KOKKOS_DIR/include/Kokkos_Core.hpp" | sed "$as_sed_sh"` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $KOKKOS_DIR/include/Kokkos_Core.hpp" >&5 +printf %s "checking for $KOKKOS_DIR/include/Kokkos_Core.hpp... " >&6; } +if eval test \${$as_ac_File+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "$KOKKOS_DIR/include/Kokkos_Core.hpp"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi ;; +esac +fi +eval ac_res=\$$as_ac_File + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes" +then : + + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" + libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + + if test "x$KOKKOS_CXX" = "x" +then : + + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + + if test "x$KOKKOS_BACKEND" = "xauto" +then : + + if test -r "$KOKKOS_CFG" +then : + + if grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=cuda +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=hip +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=sycl +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=openmp +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi ;; +esac +fi ;; +esac +fi ;; +esac +fi + +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Kokkos backend: $KOKKOS_BACKEND" >&5 +printf "%s\n" "Kokkos backend: $KOKKOS_BACKEND" >&6; } + + have_kokkos_openmp=no + if test -r "$KOKKOS_CFG" +then : + if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + have_kokkos_openmp=yes +fi +fi + + case "$KOKKOS_BACKEND" in + cuda) + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="no" + ;; +esac ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$NVCC" = "xno" +then : + as_fn_error $? "nvcc not found but Kokkos CUDA backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + if test "x$have_kokkos_openmp" = "xyes" +then : + + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + +fi + ;; + hip) + # Extract the first word of "hipcc", so it can be a program name with args. +set dummy hipcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_HIPCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $HIPCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_HIPCC="$HIPCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_HIPCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_HIPCC" && ac_cv_path_HIPCC="no" + ;; +esac ;; +esac +fi +HIPCC=$ac_cv_path_HIPCC +if test -n "$HIPCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCC" >&5 +printf "%s\n" "$HIPCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$HIPCC" = "xno" +then : + as_fn_error $? "hipcc not found but Kokkos HIP backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + sycl) + # Extract the first word of "icpx", so it can be a program name with args. +set dummy icpx; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ICPX+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $ICPX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ICPX="$ICPX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ICPX="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_ICPX" && ac_cv_path_ICPX="no" + ;; +esac ;; +esac +fi +ICPX=$ac_cv_path_ICPX +if test -n "$ICPX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ICPX" >&5 +printf "%s\n" "$ICPX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$ICPX" = "xno" +then : + as_fn_error $? "icpx not found but Kokkos SYCL backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + esac + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&5 +printf "%s\n" "Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&6; } ;; +esac +fi + + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + + +printf "%s\n" "#define HAVE_KOKKOS 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: <<< Configuring library with Kokkos support >>>" >&5 +printf "%s\n" "<<< Configuring library with Kokkos support >>>" >&6; } + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&5 +printf "%s\n" "$as_me: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&2;} + enablekokkos=no + ;; +esac +fi + + +else case e in #( + e) enablekokkos=no ;; +esac +fi + + + + + + + if test x$enablekokkos = xyes; then + LIBMESH_ENABLE_KOKKOS_TRUE= + LIBMESH_ENABLE_KOKKOS_FALSE='#' +else + LIBMESH_ENABLE_KOKKOS_TRUE='#' + LIBMESH_ENABLE_KOKKOS_FALSE= +fi + +# ------------------------------------------------------------- + + + if test "$enableoptional" != no then : @@ -65182,6 +65544,10 @@ if test -z "${LIBMESH_ENABLE_METAPHYSICL_TRUE}" && test -z "${LIBMESH_ENABLE_MET as_fn_error $? "conditional \"LIBMESH_ENABLE_METAPHYSICL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${LIBMESH_ENABLE_KOKKOS_TRUE}" && test -z "${LIBMESH_ENABLE_KOKKOS_FALSE}"; then + as_fn_error $? "conditional \"LIBMESH_ENABLE_KOKKOS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${GIT_CHECKOUT_TRUE}" && test -z "${GIT_CHECKOUT_FALSE}"; then as_fn_error $? "conditional \"GIT_CHECKOUT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/contrib/Makefile.in b/contrib/Makefile.in index 6c8d8649cdc..668594d8c04 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in @@ -604,11 +604,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -656,6 +663,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/capnproto/Makefile.in b/contrib/capnproto/Makefile.in index 43bfdcb44d5..56c7844f40d 100644 --- a/contrib/capnproto/Makefile.in +++ b/contrib/capnproto/Makefile.in @@ -453,11 +453,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -505,6 +512,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/eigen/gitshim/Makefile.in b/contrib/eigen/gitshim/Makefile.in index cd83617aabb..bab4d953ac5 100644 --- a/contrib/eigen/gitshim/Makefile.in +++ b/contrib/eigen/gitshim/Makefile.in @@ -337,11 +337,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -389,6 +396,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/exodus/Makefile.in b/contrib/exodusii/5.22b/exodus/Makefile.in index d6f576d1a90..66a5e6a0357 100644 --- a/contrib/exodusii/5.22b/exodus/Makefile.in +++ b/contrib/exodusii/5.22b/exodus/Makefile.in @@ -3320,11 +3320,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -3372,6 +3379,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/nemesis/Makefile.in b/contrib/exodusii/5.22b/nemesis/Makefile.in index 8538cdc184f..17636fa0144 100644 --- a/contrib/exodusii/5.22b/nemesis/Makefile.in +++ b/contrib/exodusii/5.22b/nemesis/Makefile.in @@ -399,11 +399,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -451,6 +458,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/Lib/Makefile.in b/contrib/exodusii/Lib/Makefile.in index 4c6ef829ed5..59c360d8928 100644 --- a/contrib/exodusii/Lib/Makefile.in +++ b/contrib/exodusii/Lib/Makefile.in @@ -1955,11 +1955,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2007,6 +2014,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/exodus/Makefile.in b/contrib/exodusii/v8.11/exodus/Makefile.in index 826c62c23e0..6c21419de79 100644 --- a/contrib/exodusii/v8.11/exodus/Makefile.in +++ b/contrib/exodusii/v8.11/exodus/Makefile.in @@ -4248,11 +4248,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -4300,6 +4307,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/nemesis/Makefile.in b/contrib/exodusii/v8.11/nemesis/Makefile.in index d1909c9f1d5..715ed90b4fe 100644 --- a/contrib/exodusii/v8.11/nemesis/Makefile.in +++ b/contrib/exodusii/v8.11/nemesis/Makefile.in @@ -409,11 +409,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -461,6 +468,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/Makefile.in b/contrib/fparser/Makefile.in index a9a20542d31..dd6e31b0e76 100644 --- a/contrib/fparser/Makefile.in +++ b/contrib/fparser/Makefile.in @@ -867,11 +867,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -919,6 +926,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/extrasrc/Makefile.in b/contrib/fparser/extrasrc/Makefile.in index f257d59051c..3eafacaf7a0 100644 --- a/contrib/fparser/extrasrc/Makefile.in +++ b/contrib/fparser/extrasrc/Makefile.in @@ -339,11 +339,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -391,6 +398,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/gmv/Makefile.in b/contrib/gmv/Makefile.in index 1043f694cf0..249f658088a 100644 --- a/contrib/gmv/Makefile.in +++ b/contrib/gmv/Makefile.in @@ -394,11 +394,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -446,6 +453,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/gzstream/Makefile.in b/contrib/gzstream/Makefile.in index 26d9c6a99cd..d7694ab76c8 100644 --- a/contrib/gzstream/Makefile.in +++ b/contrib/gzstream/Makefile.in @@ -446,11 +446,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -498,6 +505,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/laspack/Makefile.in b/contrib/laspack/Makefile.in index a921519ee01..125376d4347 100644 --- a/contrib/laspack/Makefile.in +++ b/contrib/laspack/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/libHilbert/Makefile.in b/contrib/libHilbert/Makefile.in index 92b7a8a5c6e..35d049071ec 100644 --- a/contrib/libHilbert/Makefile.in +++ b/contrib/libHilbert/Makefile.in @@ -477,11 +477,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -529,6 +536,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/metis/Makefile.in b/contrib/metis/Makefile.in index 2167e22fd72..5e8047805b9 100644 --- a/contrib/metis/Makefile.in +++ b/contrib/metis/Makefile.in @@ -1021,11 +1021,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1073,6 +1080,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nanoflann/Makefile.in b/contrib/nanoflann/Makefile.in index cb6fb5b1e25..94694c9b03c 100644 --- a/contrib/nanoflann/Makefile.in +++ b/contrib/nanoflann/Makefile.in @@ -443,11 +443,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -495,6 +502,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nemesis/Lib/Makefile.in b/contrib/nemesis/Lib/Makefile.in index 30e196af739..722bf4b86f0 100644 --- a/contrib/nemesis/Lib/Makefile.in +++ b/contrib/nemesis/Lib/Makefile.in @@ -789,11 +789,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -841,6 +848,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/netgen/Makefile.in b/contrib/netgen/Makefile.in index f7db0d91967..931be784598 100644 --- a/contrib/netgen/Makefile.in +++ b/contrib/netgen/Makefile.in @@ -341,11 +341,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -393,6 +400,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -934,7 +942,7 @@ vtkversion = @vtkversion@ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib.h \ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.cpp \ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/parallelfunc.cpp +@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/parallelfunc.cpp @LIBMESH_ENABLE_NETGEN_TRUE@netgenincludedir = $(includedir)/netgen @LIBMESH_ENABLE_NETGEN_TRUE@nglibincludedir = $(includedir)/netgen/nglib diff --git a/contrib/parmetis/Makefile.in b/contrib/parmetis/Makefile.in index 70875e6ff39..cde4b12aab9 100644 --- a/contrib/parmetis/Makefile.in +++ b/contrib/parmetis/Makefile.in @@ -855,11 +855,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -907,6 +914,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/poly2tri/modified/Makefile.in b/contrib/poly2tri/modified/Makefile.in index abb0c8cffa6..514fd6a38a6 100644 --- a/contrib/poly2tri/modified/Makefile.in +++ b/contrib/poly2tri/modified/Makefile.in @@ -541,11 +541,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -593,6 +600,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/qhull/2012.1/Makefile.in b/contrib/qhull/2012.1/Makefile.in index d9910cea212..03f2cbcb819 100644 --- a/contrib/qhull/2012.1/Makefile.in +++ b/contrib/qhull/2012.1/Makefile.in @@ -1164,11 +1164,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1216,6 +1223,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/sfcurves/Makefile.in b/contrib/sfcurves/Makefile.in index 453e333f449..18be38e4660 100644 --- a/contrib/sfcurves/Makefile.in +++ b/contrib/sfcurves/Makefile.in @@ -414,11 +414,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -466,6 +473,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/binary/Makefile.in b/contrib/tecplot/binary/Makefile.in index 9c918125c59..9a13af38818 100644 --- a/contrib/tecplot/binary/Makefile.in +++ b/contrib/tecplot/binary/Makefile.in @@ -384,11 +384,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -436,6 +443,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/tecio/Makefile.in b/contrib/tecplot/tecio/Makefile.in index 0ca8d9faaf2..f937b27b6db 100644 --- a/contrib/tecplot/tecio/Makefile.in +++ b/contrib/tecplot/tecio/Makefile.in @@ -623,11 +623,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -675,6 +682,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tetgen/Makefile.in b/contrib/tetgen/Makefile.in index b3035e72b2b..725d7a30d36 100644 --- a/contrib/tetgen/Makefile.in +++ b/contrib/tetgen/Makefile.in @@ -429,11 +429,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -481,6 +488,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/triangle/Makefile.in b/contrib/triangle/Makefile.in index a63cb31454c..f87cb42296f 100644 --- a/contrib/triangle/Makefile.in +++ b/contrib/triangle/Makefile.in @@ -424,11 +424,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -476,6 +483,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/doc/Makefile.in b/doc/Makefile.in index 42ba76ef15b..812994568fe 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -347,11 +347,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -399,6 +406,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index ea0e18aacb4..d3e9103668c 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -307,11 +307,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -359,6 +366,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/Makefile.in b/examples/Makefile.in index d16d580d3a2..27a8db4d760 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -375,11 +375,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -427,6 +434,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex1/Makefile.in b/examples/adaptivity/adaptivity_ex1/Makefile.in index 156523e9faa..3dc4c77a9dd 100644 --- a/examples/adaptivity/adaptivity_ex1/Makefile.in +++ b/examples/adaptivity/adaptivity_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex2/Makefile.in b/examples/adaptivity/adaptivity_ex2/Makefile.in index be5308ddd74..04c82fb9137 100644 --- a/examples/adaptivity/adaptivity_ex2/Makefile.in +++ b/examples/adaptivity/adaptivity_ex2/Makefile.in @@ -484,11 +484,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -536,6 +543,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex3/Makefile.in b/examples/adaptivity/adaptivity_ex3/Makefile.in index 0a64440aa41..7e31802fa14 100644 --- a/examples/adaptivity/adaptivity_ex3/Makefile.in +++ b/examples/adaptivity/adaptivity_ex3/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex4/Makefile.in b/examples/adaptivity/adaptivity_ex4/Makefile.in index b25aca31264..e155bc31268 100644 --- a/examples/adaptivity/adaptivity_ex4/Makefile.in +++ b/examples/adaptivity/adaptivity_ex4/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex5/Makefile.in b/examples/adaptivity/adaptivity_ex5/Makefile.in index b419dc98312..50cfa24eefd 100644 --- a/examples/adaptivity/adaptivity_ex5/Makefile.in +++ b/examples/adaptivity/adaptivity_ex5/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex1/Makefile.in b/examples/adjoints/adjoints_ex1/Makefile.in index a174eb3b2fe..f658f98c231 100644 --- a/examples/adjoints/adjoints_ex1/Makefile.in +++ b/examples/adjoints/adjoints_ex1/Makefile.in @@ -559,11 +559,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -611,6 +618,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex2/Makefile.in b/examples/adjoints/adjoints_ex2/Makefile.in index d0ac7f06aeb..1583e06ba69 100644 --- a/examples/adjoints/adjoints_ex2/Makefile.in +++ b/examples/adjoints/adjoints_ex2/Makefile.in @@ -527,11 +527,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +586,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex3/Makefile.in b/examples/adjoints/adjoints_ex3/Makefile.in index af52a508f53..8e744ca96ee 100644 --- a/examples/adjoints/adjoints_ex3/Makefile.in +++ b/examples/adjoints/adjoints_ex3/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex4/Makefile.in b/examples/adjoints/adjoints_ex4/Makefile.in index 6721b984eb7..8ff3dceb0f3 100644 --- a/examples/adjoints/adjoints_ex4/Makefile.in +++ b/examples/adjoints/adjoints_ex4/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex5/Makefile.in b/examples/adjoints/adjoints_ex5/Makefile.in index 5dcc545ab49..f4bdc7d11ae 100644 --- a/examples/adjoints/adjoints_ex5/Makefile.in +++ b/examples/adjoints/adjoints_ex5/Makefile.in @@ -562,11 +562,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +621,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex6/Makefile.in b/examples/adjoints/adjoints_ex6/Makefile.in index f8568140952..43ada1d6034 100644 --- a/examples/adjoints/adjoints_ex6/Makefile.in +++ b/examples/adjoints/adjoints_ex6/Makefile.in @@ -527,11 +527,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +586,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex7/Makefile.in b/examples/adjoints/adjoints_ex7/Makefile.in index 793d69f1149..c807bd13682 100644 --- a/examples/adjoints/adjoints_ex7/Makefile.in +++ b/examples/adjoints/adjoints_ex7/Makefile.in @@ -577,11 +577,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -629,6 +636,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex1/Makefile.in b/examples/eigenproblems/eigenproblems_ex1/Makefile.in index a23b7ec4684..b2aeeba0c70 100644 --- a/examples/eigenproblems/eigenproblems_ex1/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex2/Makefile.in b/examples/eigenproblems/eigenproblems_ex2/Makefile.in index 5ef2c3bc3ec..e4584dfbd8f 100644 --- a/examples/eigenproblems/eigenproblems_ex2/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex2/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex3/Makefile.in b/examples/eigenproblems/eigenproblems_ex3/Makefile.in index 4a060dc7358..1bb7ca59f97 100644 --- a/examples/eigenproblems/eigenproblems_ex3/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex3/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex4/Makefile.in b/examples/eigenproblems/eigenproblems_ex4/Makefile.in index eb92f3d91f8..becef7ee553 100644 --- a/examples/eigenproblems/eigenproblems_ex4/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex4/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex1/Makefile.in b/examples/fem_system/fem_system_ex1/Makefile.in index 47f9d7e206f..ab49a448096 100644 --- a/examples/fem_system/fem_system_ex1/Makefile.in +++ b/examples/fem_system/fem_system_ex1/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex2/Makefile.in b/examples/fem_system/fem_system_ex2/Makefile.in index d1357da5ca8..5990a9ddcc7 100644 --- a/examples/fem_system/fem_system_ex2/Makefile.in +++ b/examples/fem_system/fem_system_ex2/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex3/Makefile.in b/examples/fem_system/fem_system_ex3/Makefile.in index 6783edf17bd..614048a0e48 100644 --- a/examples/fem_system/fem_system_ex3/Makefile.in +++ b/examples/fem_system/fem_system_ex3/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex4/Makefile.in b/examples/fem_system/fem_system_ex4/Makefile.in index 8e6296abb4e..1a31f0ddab4 100644 --- a/examples/fem_system/fem_system_ex4/Makefile.in +++ b/examples/fem_system/fem_system_ex4/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex5/Makefile.in b/examples/fem_system/fem_system_ex5/Makefile.in index 00224c8d1c2..2526a04fe16 100644 --- a/examples/fem_system/fem_system_ex5/Makefile.in +++ b/examples/fem_system/fem_system_ex5/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex1/Makefile.in b/examples/introduction/introduction_ex1/Makefile.in index 59920135dfb..13087163eff 100644 --- a/examples/introduction/introduction_ex1/Makefile.in +++ b/examples/introduction/introduction_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex2/Makefile.in b/examples/introduction/introduction_ex2/Makefile.in index 4168b619aa4..5e11b336ef9 100644 --- a/examples/introduction/introduction_ex2/Makefile.in +++ b/examples/introduction/introduction_ex2/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex3/Makefile.in b/examples/introduction/introduction_ex3/Makefile.in index 859921ed0fa..bb24117c806 100644 --- a/examples/introduction/introduction_ex3/Makefile.in +++ b/examples/introduction/introduction_ex3/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex4/Makefile.in b/examples/introduction/introduction_ex4/Makefile.in index 3572fea8ef5..efdd5b9b127 100644 --- a/examples/introduction/introduction_ex4/Makefile.in +++ b/examples/introduction/introduction_ex4/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex5/Makefile.in b/examples/introduction/introduction_ex5/Makefile.in index 3ae3f4020fe..b08a51822e9 100644 --- a/examples/introduction/introduction_ex5/Makefile.in +++ b/examples/introduction/introduction_ex5/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex1/Makefile.in b/examples/miscellaneous/miscellaneous_ex1/Makefile.in index d0de4bad619..fa1d6604c10 100644 --- a/examples/miscellaneous/miscellaneous_ex1/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex10/Makefile.in b/examples/miscellaneous/miscellaneous_ex10/Makefile.in index 1afb2f88961..4ceaf8ccf94 100644 --- a/examples/miscellaneous/miscellaneous_ex10/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex10/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex11/Makefile.in b/examples/miscellaneous/miscellaneous_ex11/Makefile.in index de80da2aab8..e36b21636b2 100644 --- a/examples/miscellaneous/miscellaneous_ex11/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex11/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex12/Makefile.in b/examples/miscellaneous/miscellaneous_ex12/Makefile.in index 99775b9b0b9..2c4c4fc193d 100644 --- a/examples/miscellaneous/miscellaneous_ex12/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex12/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex13/Makefile.in b/examples/miscellaneous/miscellaneous_ex13/Makefile.in index d6f6a103170..4f0080367ed 100644 --- a/examples/miscellaneous/miscellaneous_ex13/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex13/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex14/Makefile.in b/examples/miscellaneous/miscellaneous_ex14/Makefile.in index feb36eea1f3..768948775af 100644 --- a/examples/miscellaneous/miscellaneous_ex14/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex14/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex15/Makefile.in b/examples/miscellaneous/miscellaneous_ex15/Makefile.in index 504236ef024..438762128b7 100644 --- a/examples/miscellaneous/miscellaneous_ex15/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex15/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex16/Makefile.in b/examples/miscellaneous/miscellaneous_ex16/Makefile.in index 816dc5777b4..acaecda9d82 100644 --- a/examples/miscellaneous/miscellaneous_ex16/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex16/Makefile.in @@ -481,11 +481,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -533,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex17/Makefile.in b/examples/miscellaneous/miscellaneous_ex17/Makefile.in index d7264bce057..8ab2dc797f8 100644 --- a/examples/miscellaneous/miscellaneous_ex17/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex17/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex2/Makefile.in b/examples/miscellaneous/miscellaneous_ex2/Makefile.in index bc5b0d64089..5e2d6279049 100644 --- a/examples/miscellaneous/miscellaneous_ex2/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex3/Makefile.in b/examples/miscellaneous/miscellaneous_ex3/Makefile.in index 2f5e956a6ae..d58bfb75a85 100644 --- a/examples/miscellaneous/miscellaneous_ex3/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex3/Makefile.in @@ -470,11 +470,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -522,6 +529,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex4/Makefile.in b/examples/miscellaneous/miscellaneous_ex4/Makefile.in index d9d6f954cbc..f35a439ba5f 100644 --- a/examples/miscellaneous/miscellaneous_ex4/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex4/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex5/Makefile.in b/examples/miscellaneous/miscellaneous_ex5/Makefile.in index 719d522a31a..dc7dbeeef7b 100644 --- a/examples/miscellaneous/miscellaneous_ex5/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex5/Makefile.in @@ -479,11 +479,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -531,6 +538,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex6/Makefile.in b/examples/miscellaneous/miscellaneous_ex6/Makefile.in index a5a756000c3..a985950b56d 100644 --- a/examples/miscellaneous/miscellaneous_ex6/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex6/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex7/Makefile.in b/examples/miscellaneous/miscellaneous_ex7/Makefile.in index d0115c3cac9..e3515b9f785 100644 --- a/examples/miscellaneous/miscellaneous_ex7/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex7/Makefile.in @@ -508,11 +508,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -560,6 +567,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex8/Makefile.in b/examples/miscellaneous/miscellaneous_ex8/Makefile.in index 270aa93b899..d990641f148 100644 --- a/examples/miscellaneous/miscellaneous_ex8/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex8/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex9/Makefile.in b/examples/miscellaneous/miscellaneous_ex9/Makefile.in index 46f5b5339d5..3860b7d8388 100644 --- a/examples/miscellaneous/miscellaneous_ex9/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex9/Makefile.in @@ -505,11 +505,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -557,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex1/Makefile.in b/examples/optimization/optimization_ex1/Makefile.in index cf5012ab431..fb3baf00e2d 100644 --- a/examples/optimization/optimization_ex1/Makefile.in +++ b/examples/optimization/optimization_ex1/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex2/Makefile.in b/examples/optimization/optimization_ex2/Makefile.in index f3065b739c7..c7816e72654 100644 --- a/examples/optimization/optimization_ex2/Makefile.in +++ b/examples/optimization/optimization_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex1/Makefile.in b/examples/reduced_basis/reduced_basis_ex1/Makefile.in index 0879171980e..fa1d195a31a 100644 --- a/examples/reduced_basis/reduced_basis_ex1/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex1/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex2/Makefile.in b/examples/reduced_basis/reduced_basis_ex2/Makefile.in index 986069c3c25..2c34d349aeb 100644 --- a/examples/reduced_basis/reduced_basis_ex2/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex2/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex3/Makefile.in b/examples/reduced_basis/reduced_basis_ex3/Makefile.in index f68264db04e..41d6871eef3 100644 --- a/examples/reduced_basis/reduced_basis_ex3/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex3/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex4/Makefile.in b/examples/reduced_basis/reduced_basis_ex4/Makefile.in index 2c3343c83e8..a7bd31cb843 100644 --- a/examples/reduced_basis/reduced_basis_ex4/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex4/Makefile.in @@ -494,11 +494,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +553,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex5/Makefile.in b/examples/reduced_basis/reduced_basis_ex5/Makefile.in index 238b469fe39..82b5b2fbc05 100644 --- a/examples/reduced_basis/reduced_basis_ex5/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex5/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex6/Makefile.in b/examples/reduced_basis/reduced_basis_ex6/Makefile.in index 50076f35515..dbc24072e30 100644 --- a/examples/reduced_basis/reduced_basis_ex6/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex6/Makefile.in @@ -494,11 +494,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +553,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex7/Makefile.in b/examples/reduced_basis/reduced_basis_ex7/Makefile.in index 3817bee46df..a8446610838 100644 --- a/examples/reduced_basis/reduced_basis_ex7/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex7/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/solution_transfer/solution_transfer_ex1/Makefile.in b/examples/solution_transfer/solution_transfer_ex1/Makefile.in index cd7613f44f7..5a0d14c6136 100644 --- a/examples/solution_transfer/solution_transfer_ex1/Makefile.in +++ b/examples/solution_transfer/solution_transfer_ex1/Makefile.in @@ -465,11 +465,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +524,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex1/Makefile.in b/examples/subdomains/subdomains_ex1/Makefile.in index f6238ba3205..52deb3d0ade 100644 --- a/examples/subdomains/subdomains_ex1/Makefile.in +++ b/examples/subdomains/subdomains_ex1/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex2/Makefile.in b/examples/subdomains/subdomains_ex2/Makefile.in index 5c20167218c..b32dcd0e3c9 100644 --- a/examples/subdomains/subdomains_ex2/Makefile.in +++ b/examples/subdomains/subdomains_ex2/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex3/Makefile.in b/examples/subdomains/subdomains_ex3/Makefile.in index 3a83f502b61..a3d5c4a101d 100644 --- a/examples/subdomains/subdomains_ex3/Makefile.in +++ b/examples/subdomains/subdomains_ex3/Makefile.in @@ -474,11 +474,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -526,6 +533,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in index 3add79bf28a..c0fc74e97bf 100644 --- a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in index cd44f4a5603..a89c3ec3026 100644 --- a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in @@ -471,11 +471,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in index f5c4ff69de1..f0afcad3be3 100644 --- a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in index 0fbdb133bf1..dcdbbe495d2 100644 --- a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in index d1f25e7dc8b..ff6e7676c11 100644 --- a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in index 5712834b887..da837f22e98 100644 --- a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in index 15445020704..4ef11376ea4 100644 --- a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in @@ -472,11 +472,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in index abcd909b40d..e5c7279beb3 100644 --- a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in @@ -510,11 +510,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -562,6 +569,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in index a497c06f581..495f1b401f7 100644 --- a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in @@ -472,11 +472,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex1/Makefile.in b/examples/transient/transient_ex1/Makefile.in index 288208b4c6c..cb1f7123cd7 100644 --- a/examples/transient/transient_ex1/Makefile.in +++ b/examples/transient/transient_ex1/Makefile.in @@ -480,11 +480,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex2/Makefile.in b/examples/transient/transient_ex2/Makefile.in index bae2b2fcc21..3a5648a9715 100644 --- a/examples/transient/transient_ex2/Makefile.in +++ b/examples/transient/transient_ex2/Makefile.in @@ -466,11 +466,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex3/Makefile.in b/examples/transient/transient_ex3/Makefile.in index 031ab6ae7a6..418ce7dfed8 100644 --- a/examples/transient/transient_ex3/Makefile.in +++ b/examples/transient/transient_ex3/Makefile.in @@ -514,11 +514,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +573,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex1/Makefile.in b/examples/vector_fe/vector_fe_ex1/Makefile.in index c474401c7ad..66462dd6f04 100644 --- a/examples/vector_fe/vector_fe_ex1/Makefile.in +++ b/examples/vector_fe/vector_fe_ex1/Makefile.in @@ -478,11 +478,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -530,6 +537,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex10/Makefile.in b/examples/vector_fe/vector_fe_ex10/Makefile.in index c2a3aa0dd15..97e297f7568 100644 --- a/examples/vector_fe/vector_fe_ex10/Makefile.in +++ b/examples/vector_fe/vector_fe_ex10/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex2/Makefile.in b/examples/vector_fe/vector_fe_ex2/Makefile.in index 1aca800f63c..ad09c3743fa 100644 --- a/examples/vector_fe/vector_fe_ex2/Makefile.in +++ b/examples/vector_fe/vector_fe_ex2/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex3/Makefile.in b/examples/vector_fe/vector_fe_ex3/Makefile.in index b033d436f7d..5994d90a2bf 100644 --- a/examples/vector_fe/vector_fe_ex3/Makefile.in +++ b/examples/vector_fe/vector_fe_ex3/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex4/Makefile.in b/examples/vector_fe/vector_fe_ex4/Makefile.in index b2320fc3c58..91be49794d4 100644 --- a/examples/vector_fe/vector_fe_ex4/Makefile.in +++ b/examples/vector_fe/vector_fe_ex4/Makefile.in @@ -504,11 +504,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +563,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex5/Makefile.in b/examples/vector_fe/vector_fe_ex5/Makefile.in index 21a71638e6e..62b45033645 100644 --- a/examples/vector_fe/vector_fe_ex5/Makefile.in +++ b/examples/vector_fe/vector_fe_ex5/Makefile.in @@ -491,11 +491,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -543,6 +550,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex6/Makefile.in b/examples/vector_fe/vector_fe_ex6/Makefile.in index cb550f280e4..d092712abe2 100644 --- a/examples/vector_fe/vector_fe_ex6/Makefile.in +++ b/examples/vector_fe/vector_fe_ex6/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex7/Makefile.in b/examples/vector_fe/vector_fe_ex7/Makefile.in index 6ae32888b36..65dce847a17 100644 --- a/examples/vector_fe/vector_fe_ex7/Makefile.in +++ b/examples/vector_fe/vector_fe_ex7/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex8/Makefile.in b/examples/vector_fe/vector_fe_ex8/Makefile.in index fe881610825..d5c542a648e 100644 --- a/examples/vector_fe/vector_fe_ex8/Makefile.in +++ b/examples/vector_fe/vector_fe_ex8/Makefile.in @@ -489,11 +489,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +548,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex9/Makefile.in b/examples/vector_fe/vector_fe_ex9/Makefile.in index 34e5092c976..0e362c7a3f8 100644 --- a/examples/vector_fe/vector_fe_ex9/Makefile.in +++ b/examples/vector_fe/vector_fe_ex9/Makefile.in @@ -499,11 +499,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +558,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/include/Makefile.in b/include/Makefile.in index 15e2ded0d9e..a68272a0d25 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -380,11 +380,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -432,6 +439,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -635,6 +643,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -781,6 +790,11 @@ include_HEADERS = \ geom/sphere.h \ geom/stored_range.h \ geom/surface.h \ + gpu/kokkos_linalg_base.h \ + gpu/kokkos_storage.h \ + gpu/kokkos_storage_policy.h \ + gpu/kokkos_tensor_ops.h \ + gpu/kokkos_vector_ops.h \ ghosting/default_coupling.h \ ghosting/ghost_point_neighbors.h \ ghosting/ghosting_functor.h \ diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index 0e95a2a8ef6..c2ff4cf1b7c 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -309,11 +309,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -361,6 +368,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -530,10 +538,11 @@ EXTRA_DIST = rebuild_makefile.sh BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ dof_object.h factory.h float128_shims.h getpot.h id_types.h \ libmesh.h libmesh_abort.h libmesh_augment_std_namespace.h \ - libmesh_base.h libmesh_common.h libmesh_documentation.h \ - libmesh_exceptions.h libmesh_logging.h libmesh_singleton.h \ - libmesh_version.h multi_predicates.h periodic_boundaries.h \ - periodic_boundary.h periodic_boundary_base.h print_trace.h \ + libmesh_base.h libmesh_common.h libmesh_device.h \ + libmesh_documentation.h libmesh_exceptions.h libmesh_logging.h \ + libmesh_singleton.h libmesh_version.h multi_predicates.h \ + periodic_boundaries.h periodic_boundary.h \ + periodic_boundary_base.h print_trace.h \ reference_counted_object.h reference_counter.h \ single_predicates.h sparsity_pattern.h variable.h \ variant_filter_iterator.h enum_convergence_flags.h \ @@ -580,7 +589,9 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ remote_elem.h sphere.h stored_range.h surface.h \ default_coupling.h ghost_point_neighbors.h ghosting_functor.h \ non_manifold_coupling.h overlap_coupling.h \ - point_neighbor_coupling.h sibling_coupling.h abaqus_io.h \ + point_neighbor_coupling.h sibling_coupling.h \ + kokkos_linalg_base.h kokkos_storage.h kokkos_storage_policy.h \ + kokkos_tensor_ops.h kokkos_vector_ops.h abaqus_io.h \ boundary_info.h boundary_mesh.h checkpoint_io.h \ distributed_mesh.h dyna_io.h ensight_io.h exodusII_io.h \ exodusII_io_helper.h exodus_header_info.h fro_io.h gmsh_io.h \ @@ -992,6 +1003,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1451,6 +1465,21 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage_policy.h: $(top_srcdir)/include/gpu/kokkos_storage_policy.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_tensor_ops.h: $(top_srcdir)/include/gpu/kokkos_tensor_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_vector_ops.h: $(top_srcdir)/include/gpu/kokkos_vector_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + abaqus_io.h: $(top_srcdir)/include/mesh/abaqus_io.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/tests/Makefile.in b/tests/Makefile.in index 59ed2e7641e..d94f4b9384d 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -95,25 +95,31 @@ target_triplet = @target@ @LIBMESH_ENABLE_FPARSER_TRUE@ fparser/autodiff.C check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ - $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) + $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) \ + $(am__EXEEXT_7) +TESTS = $(am__EXEEXT_1) $(am__append_11) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_2 = -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_3 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_4 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, # others segfault and/or hang. By default we will not run # GLIBCXX-debugging builds with cppunit unless specifically # configured to. -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_2 = unit_tests-dbg -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_3 = unit_tests-dbg -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_4 = unit_tests-devel -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_5 = unit_tests-prof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_6 = unit_tests-oprof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_7 = unit_tests-opt -@LIBMESH_VPATH_BUILD_TRUE@am__append_8 = .linkstamp +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_5 = unit_tests-dbg +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_6 = unit_tests-dbg +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_7 = unit_tests-devel +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_8 = unit_tests-prof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_9 = unit_tests-oprof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_10 = unit_tests-opt +@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_11 = run_unit_tests.sh +@LIBMESH_VPATH_BUILD_TRUE@am__append_12 = .linkstamp ###################################################################### # # Don't leave code coverage outputs lying around -@CODE_COVERAGE_ENABLED_TRUE@am__append_9 = */*.gcda */*.gcno +@CODE_COVERAGE_ENABLED_TRUE@am__append_13 = */*.gcda */*.gcno subdir = tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = \ @@ -182,12 +188,34 @@ mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/include/libmesh_config.h.tmp CONFIG_CLEAN_FILES = run_unit_tests.sh CONFIG_CLEAN_VPATH_FILES = -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_1 = unit_tests-dbg$(EXEEXT) -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-devel$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_4 = unit_tests-prof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-oprof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_6 = unit_tests-opt$(EXEEXT) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__EXEEXT_1 = kokkos_vector_ops_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-dbg$(EXEEXT) +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_4 = unit_tests-devel$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-prof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_6 = unit_tests-oprof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_7 = unit_tests-opt$(EXEEXT) +am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_tensor_ops_oracle_test.K +am__dirstamp = $(am__leading_dot)dirstamp +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_tensor_ops_oracle_unit_OBJECTS = numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT) +kokkos_tensor_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_tensor_ops_oracle_unit_OBJECTS) +am__DEPENDENCIES_1 = +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_vector_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_vector_ops_oracle_unit_OBJECTS = numerics/kokkos_vector_ops_oracle_test.$(OBJEXT) +kokkos_vector_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_vector_ops_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ stream_redirector.h test_comm.h base/dof_object_test.h \ base/dof_map_test.C base/default_coupling_test.C \ @@ -262,7 +290,6 @@ am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ utils/parameters_test.C utils/point_locator_test.C \ utils/rb_parameters_test.C utils/transparent_comparator.C \ utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C -am__dirstamp = $(am__leading_dot)dirstamp @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_1 = fparser/unit_tests_dbg-autodiff.$(OBJEXT) am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ base/unit_tests_dbg-dof_map_test.$(OBJEXT) \ @@ -1860,10 +1887,14 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ +SOURCES = $(kokkos_tensor_ops_oracle_unit_SOURCES) \ + $(kokkos_vector_ops_oracle_unit_SOURCES) \ + $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ $(unit_tests_oprof_SOURCES) $(unit_tests_opt_SOURCES) \ $(unit_tests_prof_SOURCES) -DIST_SOURCES = $(am__unit_tests_dbg_SOURCES_DIST) \ +DIST_SOURCES = $(am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_vector_ops_oracle_unit_SOURCES_DIST) \ + $(am__unit_tests_dbg_SOURCES_DIST) \ $(am__unit_tests_devel_SOURCES_DIST) \ $(am__unit_tests_oprof_SOURCES_DIST) \ $(am__unit_tests_opt_SOURCES_DIST) \ @@ -2064,11 +2095,18 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2116,6 +2154,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -2288,6 +2327,7 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) +KOKKOS_TEST_CPPFLAGS = $(am__append_2) unit_tests_sources = driver.C libmesh_cppunit.h stream_redirector.h \ test_comm.h base/dof_object_test.h base/dof_map_test.C \ base/default_coupling_test.C base/getpot_test.C \ @@ -2450,6 +2490,16 @@ unit_tests_data = $(data) # Why isn't this working automatically? EXTRA_DIST = $(data) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_SOURCES = $(unit_tests_sources) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CXXFLAGS = $(CXXFLAGS_DBG) @@ -2480,7 +2530,16 @@ EXTRA_DIST = $(data) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_LDADD = $(top_builddir)/libmesh_opt.la @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_optdir = $(datadir) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_DATA = $(data) -@LIBMESH_ENABLE_CPPUNIT_TRUE@TESTS = run_unit_tests.sh + +# Custom link rules so the Kokkos compiler drives the final link step. +kokkos_vector_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ + +kokkos_tensor_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ + CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ mesh_with_soln.e elemental_from_nodal.e write_elemset_data.e \ write_sideset_data.e write_nodeset_data.e write_edgeset_data.e \ @@ -2514,8 +2573,8 @@ CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ write_exodus_QUADSHELL9.e write_exodus_TET10.e \ write_exodus_TET14.e write_exodus_TET4.e write_exodus_TRI3.e \ write_exodus_TRI6.e write_exodus_TRI7.e \ - write_exodus_TRISHELL3.e smoother.out $(am__append_8) \ - $(am__append_9) + write_exodus_TRISHELL3.e smoother.out $(am__append_12) \ + $(am__append_13) # need to link any data files for VPATH builds @LIBMESH_VPATH_BUILD_TRUE@BUILT_SOURCES = .linkstamp @@ -2523,7 +2582,7 @@ all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: -.SUFFIXES: .C .lo .o .obj +.SUFFIXES: .C .K .lo .o .obj $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -2559,6 +2618,24 @@ run_unit_tests.sh: $(top_builddir)/config.status $(srcdir)/run_unit_tests.sh.in clean-checkPROGRAMS: $(am__rm_f) $(check_PROGRAMS) test -z "$(EXEEXT)" || $(am__rm_f) $(check_PROGRAMS:$(EXEEXT)=) +numerics/$(am__dirstamp): + @$(MKDIR_P) numerics + @: >>numerics/$(am__dirstamp) +numerics/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) numerics/$(DEPDIR) + @: >>numerics/$(DEPDIR)/$(am__dirstamp) +numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_tensor_ops_oracle_unit$(EXEEXT): $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_tensor_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_tensor_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_tensor_ops_oracle_unit_LINK) $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_LDADD) $(LIBS) +numerics/kokkos_vector_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_vector_ops_oracle_unit$(EXEEXT): $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_vector_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_vector_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_vector_ops_oracle_unit_LINK) $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_LDADD) $(LIBS) base/$(am__dirstamp): @$(MKDIR_P) base @: >>base/$(am__dirstamp) @@ -2727,12 +2804,6 @@ mesh/unit_tests_dbg-project_solution_test.$(OBJEXT): \ mesh/$(am__dirstamp) mesh/$(DEPDIR)/$(am__dirstamp) mesh/unit_tests_dbg-xdrio_test.$(OBJEXT): mesh/$(am__dirstamp) \ mesh/$(DEPDIR)/$(am__dirstamp) -numerics/$(am__dirstamp): - @$(MKDIR_P) numerics - @: >>numerics/$(am__dirstamp) -numerics/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) numerics/$(DEPDIR) - @: >>numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-composite_function_test.$(OBJEXT): \ numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-coupling_matrix_test.$(OBJEXT): \ @@ -14717,6 +14788,15 @@ $(top_builddir)/libmesh_prof.la: FORCE $(top_builddir)/libmesh_oprof.la: FORCE (cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) libmesh_oprof.la) +# Compile .K translation units with the Kokkos device compiler. +# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc +# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + @LIBMESH_VPATH_BUILD_TRUE@.linkstamp: @LIBMESH_VPATH_BUILD_TRUE@ -rm -f solutions && $(LN_S) -f $(srcdir)/solutions . @LIBMESH_VPATH_BUILD_TRUE@ -rm -f meshes && $(LN_S) -f $(srcdir)/meshes . From 7daa2e616a5bc42abe10d0fd2db6e24b5f834939 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Mon, 11 May 2026 12:45:53 -0600 Subject: [PATCH 15/48] Include method CXX flags in pkg-config cflags --- contrib/bin/test_installed_examples.sh | 8 ++++++-- contrib/bin/test_installed_headers.sh | 6 +++++- contrib/utils/libmesh-dbg.pc.in | 1 + contrib/utils/libmesh-devel.pc.in | 1 + contrib/utils/libmesh-oprof.pc.in | 1 + contrib/utils/libmesh-opt.pc.in | 1 + contrib/utils/libmesh-prof.pc.in | 1 + 7 files changed, 16 insertions(+), 3 deletions(-) diff --git a/contrib/bin/test_installed_examples.sh b/contrib/bin/test_installed_examples.sh index 8be94eccde2..c255be44304 100755 --- a/contrib/bin/test_installed_examples.sh +++ b/contrib/bin/test_installed_examples.sh @@ -29,9 +29,13 @@ fi echo "Testing examples in $examples_install_path" +if test "$PKG_CONFIG" = ""; then + PKG_CONFIG=pkg-config +fi + if test "$PKG_CONFIG" != "no"; then - installed_CXXFLAGS=$(pkg-config libmesh --cflags) - installed_LIBS=$(pkg-config libmesh --libs) + installed_CXXFLAGS=$($PKG_CONFIG libmesh --cflags) + installed_LIBS=$($PKG_CONFIG libmesh --libs) elif test -x $LIBMESH_CONFIG_PATH/libmesh-config; then installed_CXXFLAGS=$($LIBMESH_CONFIG_PATH/libmesh-config --cppflags --cxxflags --include) diff --git a/contrib/bin/test_installed_headers.sh b/contrib/bin/test_installed_headers.sh index 2154bc19c20..645ba266a49 100755 --- a/contrib/bin/test_installed_headers.sh +++ b/contrib/bin/test_installed_headers.sh @@ -49,12 +49,16 @@ fi # Variable is set but not used # testing_installed_tree="no" +if test "$PKG_CONFIG" = ""; then + PKG_CONFIG=pkg-config +fi + if test "$test_CXXFLAGS" = ""; then # testing_installed_tree="yes" if test "$PKG_CONFIG" != "no"; then - test_CXXFLAGS=$(pkg-config libmesh --cflags) + test_CXXFLAGS=$($PKG_CONFIG libmesh --cflags) elif test -x $LIBMESH_CONFIG_PATH/libmesh-config; then test_CXXFLAGS=$($LIBMESH_CONFIG_PATH/libmesh-config --cppflags --cxxflags --include) diff --git a/contrib/utils/libmesh-dbg.pc.in b/contrib/utils/libmesh-dbg.pc.in index ab532d664c1..b4a29f1156f 100644 --- a/contrib/utils/libmesh-dbg.pc.in +++ b/contrib/utils/libmesh-dbg.pc.in @@ -12,5 +12,6 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_dbg -ltimpi_dbg \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_DBG@ \ + ${cxxflags_extra} \ -I${includedir} \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-devel.pc.in b/contrib/utils/libmesh-devel.pc.in index 63b3d6c8097..4614467f5d9 100644 --- a/contrib/utils/libmesh-devel.pc.in +++ b/contrib/utils/libmesh-devel.pc.in @@ -12,5 +12,6 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_devel -ltimpi_devel \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_DEVEL@ \ + ${cxxflags_extra} \ -I${includedir} \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-oprof.pc.in b/contrib/utils/libmesh-oprof.pc.in index 1184f433eb5..0a1fbd47e25 100644 --- a/contrib/utils/libmesh-oprof.pc.in +++ b/contrib/utils/libmesh-oprof.pc.in @@ -12,5 +12,6 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_oprof -ltimpi_oprof \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_OPROF@ \ + ${cxxflags_extra} \ -I${includedir} \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-opt.pc.in b/contrib/utils/libmesh-opt.pc.in index 10deb13ec50..cbb9529da5b 100644 --- a/contrib/utils/libmesh-opt.pc.in +++ b/contrib/utils/libmesh-opt.pc.in @@ -12,5 +12,6 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_opt -ltimpi_opt \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_OPT@ \ + ${cxxflags_extra} \ -I${includedir} \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-prof.pc.in b/contrib/utils/libmesh-prof.pc.in index 213601d795a..333dcaa92b6 100644 --- a/contrib/utils/libmesh-prof.pc.in +++ b/contrib/utils/libmesh-prof.pc.in @@ -12,5 +12,6 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_prof -ltimpi_prof \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_PROF@ \ + ${cxxflags_extra} \ -I${includedir} \ @libmesh_optional_INCLUDES@ From 57b4840cd01c185c6ce56ca4a187a2eda44a61dd Mon Sep 17 00:00:00 2001 From: rochi00 Date: Mon, 11 May 2026 15:44:23 -0600 Subject: [PATCH 16/48] Run tensor foundation oracle on device --- .../kokkos_tensor_ops_oracle_runners.h | 103 ++++++++++-------- 1 file changed, 60 insertions(+), 43 deletions(-) diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h index cad772919a0..07670821baa 100644 --- a/tests/numerics/kokkos_tensor_ops_oracle_runners.h +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -233,8 +233,6 @@ template static int test_linalg_foundation_storage_roundtrip() { - int fail = 0; - auto d_vector = libMesh::Kokkos::make_vector_storage("foundation_vector", 1); auto d_tensor = libMesh::Kokkos::make_tensor_storage("foundation_tensor", 1); @@ -253,51 +251,64 @@ test_linalg_foundation_storage_roundtrip() ::Kokkos::deep_copy(d_tensor, h_tensor); } - const auto vector_in = libMesh::Kokkos::make_vector_ref(d_vector, 0); - const auto tensor_in = libMesh::Kokkos::make_tensor_ref(d_tensor, 0); - - const auto as_point = libMesh::Kokkos::materialize_vector(vector_in); - const auto as_vector_value = - libMesh::Kokkos::materialize_vector>(vector_in); - const auto as_type_vector = - libMesh::Kokkos::materialize_vector>(vector_in); - - for (unsigned int d = 0; d < LIBMESH_DIM; ++d) - { - const Real expected = Real(d + 1) * Real(0.5); - fail += (std::fabs(as_point(d) - expected) <= tol) ? 0 : 1; - fail += (std::fabs(as_vector_value(d) - expected) <= tol) ? 0 : 1; - fail += (std::fabs(as_type_vector(d) - expected) <= tol) ? 0 : 1; - } + auto d_vector_out = libMesh::Kokkos::make_vector_storage("foundation_vector_out", 1); + auto d_tensor_out = libMesh::Kokkos::make_tensor_storage("foundation_tensor_out", 1); + ::Kokkos::View d_fail("foundation_fail"); - const auto as_tensor_value = - libMesh::Kokkos::materialize_tensor>(tensor_in); - const auto as_type_tensor = - libMesh::Kokkos::materialize_tensor>(tensor_in); + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + int local_fail = 0; - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - { - const Real expected = Real(10 * row + col + 1) * Real(0.25); - fail += (std::fabs(as_tensor_value(row, col) - expected) <= tol) ? 0 : 1; - fail += (std::fabs(as_type_tensor(row, col) - expected) <= tol) ? 0 : 1; - } + const auto vector_in = libMesh::Kokkos::make_vector_ref(d_vector, 0); + const auto tensor_in = libMesh::Kokkos::make_tensor_ref(d_tensor, 0); - auto d_vector_out = libMesh::Kokkos::make_vector_storage("foundation_vector_out", 1); - auto d_tensor_out = libMesh::Kokkos::make_tensor_storage("foundation_tensor_out", 1); + const auto as_point = libMesh::Kokkos::materialize_vector(vector_in); + const auto as_vector_value = + libMesh::Kokkos::materialize_vector>(vector_in); + const auto as_type_vector = + libMesh::Kokkos::materialize_vector>(vector_in); - auto vector_out = libMesh::Kokkos::make_vector_ref(d_vector_out, 0); - auto tensor_out = libMesh::Kokkos::make_tensor_ref(d_tensor_out, 0); + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + { + const Real expected = Real(d + 1) * Real(0.5); + local_fail += (std::fabs(as_point(d) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_vector_value(d) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_type_vector(d) - expected) <= tol) ? 0 : 1; + } - vector_out.zero(); - vector_out.assign(as_vector_value); - vector_out.add_scaled(as_type_vector, Real(0)); - vector_out.subtract_scaled(as_type_vector, Real(0)); + const auto as_tensor_value = + libMesh::Kokkos::materialize_tensor>(tensor_in); + const auto as_type_tensor = + libMesh::Kokkos::materialize_tensor>(tensor_in); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + const Real expected = Real(10 * row + col + 1) * Real(0.25); + local_fail += (std::fabs(as_tensor_value(row, col) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_type_tensor(row, col) - expected) <= tol) ? 0 : 1; + } + + auto vector_out = libMesh::Kokkos::make_vector_ref(d_vector_out, 0); + auto tensor_out = libMesh::Kokkos::make_tensor_ref(d_tensor_out, 0); + + vector_out.zero(); + vector_out.assign(as_vector_value); + vector_out.add_scaled(as_type_vector, Real(0)); + vector_out.subtract_scaled(as_type_vector, Real(0)); + + tensor_out.zero(); + tensor_out.assign(as_tensor_value); + tensor_out.add_scaled(as_type_tensor, Real(0)); + tensor_out.subtract_scaled(as_type_tensor, Real(0)); + + d_fail() = local_fail; + }); + ::Kokkos::fence(); - tensor_out.zero(); - tensor_out.assign(as_tensor_value); - tensor_out.add_scaled(as_type_tensor, Real(0)); - tensor_out.subtract_scaled(as_type_tensor, Real(0)); + int fail = 0; + ::Kokkos::deep_copy(fail, d_fail); { auto h_vector_out = ::Kokkos::create_mirror_view(d_vector_out); @@ -306,11 +317,17 @@ test_linalg_foundation_storage_roundtrip() ::Kokkos::deep_copy(h_tensor_out, d_tensor_out); for (unsigned int d = 0; d < LIBMESH_DIM; ++d) - fail += (std::fabs(h_vector_out(0, d) - as_vector_value(d)) <= tol) ? 0 : 1; + { + const Real expected = Real(d + 1) * Real(0.5); + fail += (std::fabs(h_vector_out(0, d) - expected) <= tol) ? 0 : 1; + } for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - fail += (std::fabs(h_tensor_out(0, row, col) - as_tensor_value(row, col)) <= tol) ? 0 : 1; + { + const Real expected = Real(10 * row + col + 1) * Real(0.25); + fail += (std::fabs(h_tensor_out(0, row, col) - expected) <= tol) ? 0 : 1; + } } return fail; From 93fee21645efffa368e5bc27e94ee1875b014564 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Mon, 11 May 2026 16:01:50 -0600 Subject: [PATCH 17/48] Make Point constructors device-callable --- include/geom/point.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/geom/point.h b/include/geom/point.h index a305deea3a7..57796757869 100644 --- a/include/geom/point.h +++ b/include/geom/point.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/hashing.h" +#include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" namespace libMesh @@ -44,6 +45,7 @@ class Point : public TypeVector * Constructor. By default sets all entries to 0. Gives the point * 0 in \p LIBMESH_DIM dimensions. */ + LIBMESH_DEVICE_INLINE Point (const Real x=0., const Real y=0., const Real z=0.) : @@ -53,11 +55,13 @@ class Point : public TypeVector /** * Trivial copy-constructor. */ + LIBMESH_DEVICE_INLINE Point (const Point & p) = default; /** * Copy-constructor from non-point Typevector. */ + LIBMESH_DEVICE_INLINE Point (const TypeVector & p) : TypeVector (p) {} @@ -65,6 +69,7 @@ class Point : public TypeVector /** * Copy-assignment operator. */ + LIBMESH_DEVICE_INLINE Point& operator=(const Point & p) = default; /** @@ -73,6 +78,7 @@ class Point : public TypeVector template ::value,void>::type> + LIBMESH_DEVICE_INLINE Point (const T x) : TypeVector (x,0,0) {} From 77d03330221e65bc364bb8ebac50563a2391617b Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 07:50:39 -0600 Subject: [PATCH 18/48] Inline tensor equality into constrained operators --- include/gpu/kokkos_tensor_ops.h | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index a08080405d6..f251b6d9e4e 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -130,28 +130,6 @@ bool tensor_is_zero(const TensorLike & T_in) return true; } -template -LIBMESH_DEVICE_INLINE -bool tensor_equal(const LeftTensor & left, const RightTensor & right) -{ - static_assert(is_tensor_like_v, "tensor_equal() requires a tensor-like left input"); - static_assert(is_tensor_like_v, "tensor_equal() requires a tensor-like right input"); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - if (tensor_get_component(left, row, col) != tensor_get_component(right, row, col)) - return false; - - return true; -} - -template -LIBMESH_DEVICE_INLINE -bool tensor_not_equal(const LeftTensor & left, const RightTensor & right) -{ - return !tensor_equal(left, right); -} - // Tensor arithmetic template @@ -953,7 +931,12 @@ auto operator==(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), bool> { - return tensor_equal(left, right); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(left, row, col) != tensor_get_component(right, row, col)) + return false; + + return true; } template @@ -963,7 +946,7 @@ auto operator!=(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), bool> { - return tensor_not_equal(left, right); + return !(left == right); } template From d26c43ac01b5a532160a1679f5af6854ee6544ea Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 07:52:10 -0600 Subject: [PATCH 19/48] Differentiate leading tensor determinant helper --- include/gpu/kokkos_tensor_ops.h | 13 +++++++------ tests/numerics/kokkos_tensor_ops_oracle_runners.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index f251b6d9e4e..b124fd212d6 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -264,9 +264,10 @@ tensor_semantic_type_t tensor_divide(const TensorLike & T_in, const template LIBMESH_DEVICE_INLINE -auto tensor_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +auto tensor_leading_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) { - static_assert(is_tensor_like_v, "tensor_determinant() requires a tensor-like input"); + static_assert(is_tensor_like_v, + "tensor_leading_determinant() requires a tensor-like input"); if (dim == 0) return tensor_value_type_t(1); @@ -316,7 +317,7 @@ ResultTensor tensor_inverse(const TensorLike & T_in, const unsigned int dim = LI return out; } - const auto det = tensor_determinant(T_in, dim); + const auto det = tensor_leading_determinant(T_in, dim); if (dim == 2) { @@ -610,9 +611,9 @@ auto transpose(const TensorLike & T_in) template LIBMESH_DEVICE_INLINE auto det(const TensorLike & T_in) - -> std::enable_if_t, decltype(tensor_determinant(T_in))> + -> std::enable_if_t, decltype(T_in.det())> { - return tensor_determinant(T_in); + return T_in.det(); } template @@ -779,7 +780,7 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::det(const unsigned int dim) const { - return tensor_determinant(*this, dim); + return tensor_leading_determinant(*this, dim); } template diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h index 07670821baa..a12cec13ab9 100644 --- a/tests/numerics/kokkos_tensor_ops_oracle_runners.h +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -51,7 +51,7 @@ test_dim_ops() KOKKOS_LAMBDA(int c) { const auto J_ref = libMesh::Kokkos::make_tensor_ref(d_J, c); const unsigned int dim = d_dims(c); - const Real det = libMesh::Kokkos::tensor_determinant(J_ref, dim); + const Real det = J_ref.det(dim); const auto inv = J_ref.inverse(dim); const auto I = libMesh::Kokkos::tensor_identity(dim); const auto prod_left = J_ref * inv; From ac897f283ad46d0ff5d54bf14afc2285bfdfaf3b Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 08:08:04 -0600 Subject: [PATCH 20/48] Inline vector equality and remove contract shim --- include/gpu/kokkos_vector_ops.h | 42 ++++--------------- .../kokkos_tensor_ops_oracle_runners.h | 4 +- 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/include/gpu/kokkos_vector_ops.h b/include/gpu/kokkos_vector_ops.h index 3fb14068661..fffbf789da7 100644 --- a/include/gpu/kokkos_vector_ops.h +++ b/include/gpu/kokkos_vector_ops.h @@ -63,13 +63,6 @@ auto vector_dot(const LeftVector & left, const RightVector & right) return sum; } -template -LIBMESH_DEVICE_INLINE -auto vector_contract(const LeftVector & left, const RightVector & right) -{ - return vector_dot(left, right); -} - template LIBMESH_DEVICE_INLINE auto vector_norm_sq(const VectorLike & v) @@ -122,27 +115,6 @@ bool vector_is_zero(const VectorLike & v) return true; } -template -LIBMESH_DEVICE_INLINE -bool vector_equal(const LeftVector & left, const RightVector & right) -{ - static_assert(is_vector_like_v, "vector_equal() requires a vector-like left input"); - static_assert(is_vector_like_v, "vector_equal() requires a vector-like right input"); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - if (vector_get_component(left, component) != vector_get_component(right, component)) - return false; - - return true; -} - -template -LIBMESH_DEVICE_INLINE -bool vector_not_equal(const LeftVector & left, const RightVector & right) -{ - return !vector_equal(left, right); -} - // Arithmetic template @@ -447,9 +419,9 @@ template LIBMESH_DEVICE_INLINE auto contract(const LeftVector & left, const RightVector & right) -> std::enable_if_t && is_vector_like_v, - decltype(vector_contract(left, right))> + decltype(vector_dot(left, right))> { - return vector_contract(left, right); + return vector_dot(left, right); } template @@ -544,7 +516,7 @@ template LIBMESH_DEVICE_INLINE auto vector_ref::contract(const RightVector & right) const { - return vector_contract(*this, right); + return vector_dot(*this, right); } template @@ -672,7 +644,11 @@ auto operator==(const LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), bool> { - return vector_equal(left, right); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + if (vector_get_component(left, component) != vector_get_component(right, component)) + return false; + + return true; } template @@ -682,7 +658,7 @@ auto operator!=(const LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), bool> { - return vector_not_equal(left, right); + return !(left == right); } template diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h index a12cec13ab9..59aadda6a4f 100644 --- a/tests/numerics/kokkos_tensor_ops_oracle_runners.h +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -389,8 +389,8 @@ test_mixed_representation_ops() d_scalars(2) = A_ref.det(); d_scalars(3) = (A_ref == A) ? 1.0 : 0.0; d_scalars(4) = (A_ref != inverse) ? 1.0 : 0.0; - d_scalars(5) = libMesh::Kokkos::vector_equal(row0, ref_row0) ? 1.0 : 0.0; - d_scalars(6) = libMesh::Kokkos::vector_equal(col0, ref_col0) ? 1.0 : 0.0; + d_scalars(5) = (row0 == ref_row0) ? 1.0 : 0.0; + d_scalars(6) = (col0 == ref_col0) ? 1.0 : 0.0; d_scalars(7) = A_ref.tr(); libMesh::Kokkos::store_vector(d_vectors, 0, right); From e83887cd9deaf1a88c407920f57e2c45cb3159cd Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 08:19:45 -0600 Subject: [PATCH 21/48] Hide tensor helper API and share in-place Kokkos ops --- include/gpu/kokkos_tensor_ops.h | 756 +++++++++--------- include/gpu/kokkos_vector_ops.h | 127 ++- .../kokkos_tensor_ops_oracle_runners.h | 4 +- 3 files changed, 485 insertions(+), 402 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index b124fd212d6..c20ade2661e 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -58,386 +58,389 @@ tensor_semantic_type_t copy_tensor(const TensorLike & T_in) return copy_tensor>(T_in); } -// Tensor reductions and predicates +namespace detail +{ -template +template LIBMESH_DEVICE_INLINE -auto tensor_contract(const LeftTensor & left, const RightTensor & right) +auto leading_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) { - static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like left input"); - static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like right input"); + static_assert(is_tensor_like_v, + "detail::leading_determinant() requires a tensor-like input"); - using sum_type = - detail::remove_cvref_t; + if (dim == 0) + return tensor_value_type_t(1); - sum_type sum = sum_type(0); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - sum += tensor_get_component(left, row, col) * tensor_get_component(right, row, col); + if (dim == 1) + return tensor_get_component(T_in, 0, 0); - return sum; + if (dim == 2) + return tensor_get_component(T_in, 0, 0) * tensor_get_component(T_in, 1, 1) - + tensor_get_component(T_in, 0, 1) * tensor_get_component(T_in, 1, 0); + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + return a00 * (a11 * a22 - a12 * a21) - + a01 * (a10 * a22 - a12 * a20) + + a02 * (a10 * a21 - a11 * a20); +#else + libmesh_ignore(T_in); + return tensor_value_type_t(0); +#endif } -template +template LIBMESH_DEVICE_INLINE -auto tensor_norm_sq(const TensorLike & T_in) +ResultTensor outer_product(const LeftVector & left, const RightVector & right) { - static_assert(is_tensor_like_v, "tensor_norm_sq() requires a tensor-like input"); - - using norm_type = detail::remove_cvref_t; + ResultTensor out; + out.zero(); - norm_type sum = norm_type(0); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - sum += libMesh::TensorTools::norm_sq(tensor_get_component(T_in, row, col)); + tensor_set_component(out, + row, + col, + vector_get_component(left, row) * libmesh_conj(vector_get_component(right, col))); - return sum; + return out; } -template +template LIBMESH_DEVICE_INLINE -auto tensor_norm(const TensorLike & T_in) +ResultTensor inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) { - using std::sqrt; - return sqrt(tensor_norm_sq(T_in)); -} + static_assert(is_tensor_like_v, "detail::inverse() requires a tensor-like input"); -template -LIBMESH_DEVICE_INLINE -auto tensor_trace(const TensorLike & T_in) -{ - static_assert(is_tensor_like_v, "tensor_trace() requires a tensor-like input"); + ResultTensor out; + out.zero(); - using trace_type = detail::remove_cvref_t; - trace_type sum = trace_type(0); - for (unsigned int i = 0; i < LIBMESH_DIM; ++i) - sum += tensor_get_component(T_in, i, i); + if (dim == 0) + return out; - return sum; + if (dim == 1) + { + tensor_set_component(out, 0, 0, tensor_value_type_t(1) / tensor_get_component(T_in, 0, 0)); + return out; + } + + const auto det = leading_determinant(T_in, dim); + + if (dim == 2) + { + tensor_set_component(out, 0, 0, tensor_get_component(T_in, 1, 1) / det); + tensor_set_component(out, 0, 1, -tensor_get_component(T_in, 0, 1) / det); + tensor_set_component(out, 1, 0, -tensor_get_component(T_in, 1, 0) / det); + tensor_set_component(out, 1, 1, tensor_get_component(T_in, 0, 0) / det); + return out; + } + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + tensor_set_component(out, 0, 0, (a11 * a22 - a12 * a21) / det); + tensor_set_component(out, 0, 1, (a02 * a21 - a01 * a22) / det); + tensor_set_component(out, 0, 2, (a01 * a12 - a02 * a11) / det); + tensor_set_component(out, 1, 0, (a12 * a20 - a10 * a22) / det); + tensor_set_component(out, 1, 1, (a00 * a22 - a02 * a20) / det); + tensor_set_component(out, 1, 2, (a02 * a10 - a00 * a12) / det); + tensor_set_component(out, 2, 0, (a10 * a21 - a11 * a20) / det); + tensor_set_component(out, 2, 1, (a01 * a20 - a00 * a21) / det); + tensor_set_component(out, 2, 2, (a00 * a11 - a01 * a10) / det); +#else + libmesh_ignore(T_in); +#endif + + return out; } -template +template LIBMESH_DEVICE_INLINE -bool tensor_is_zero(const TensorLike & T_in) +ResultTensor transpose(const TensorLike & T_in) { - static_assert(is_tensor_like_v, "tensor_is_zero() requires a tensor-like input"); + ResultTensor out; + out.zero(); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - if (tensor_get_component(T_in, row, col) != tensor_value_type_t(0)) - return false; + tensor_set_component(out, row, col, tensor_get_component(T_in, col, row)); - return true; + return out; } -// Tensor arithmetic - -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_outer_product(const LeftVector & left, const RightVector & right) +ResultTensor multiply(const LeftTensor & left, const RightTensor & right) { ResultTensor out; out.zero(); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - vector_get_component(left, row) * libmesh_conj(vector_get_component(right, col))); + { + auto value = tensor_get_component(left, row, 0) * tensor_get_component(right, 0, col); + for (unsigned int k = 1; k < LIBMESH_DIM; ++k) + value += tensor_get_component(left, row, k) * tensor_get_component(right, k, col); + tensor_set_component(out, row, col, value); + } return out; } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -libMesh::TypeTensor> -tensor_outer_product(const LeftVector & left, const RightVector & right) +ResultVector row(const TensorLike & T_in, const unsigned int row_index) { - return tensor_outer_product>>(left, right); + ResultVector out; + out.zero(); + + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + vector_set_component(out, col, tensor_get_component(T_in, row_index, col)); + + return out; } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_add(const LeftTensor & left, const RightTensor & right) +ResultVector column(const TensorLike & T_in, const unsigned int col_index) { - ResultTensor out; + ResultVector out; out.zero(); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); + for (unsigned int row_index = 0; row_index < LIBMESH_DIM; ++row_index) + vector_set_component(out, row_index, tensor_get_component(T_in, row_index, col_index)); return out; } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_add(const LeftTensor & left, const RightTensor & right) +ResultVector multiply(const TensorLike & T_in, const VectorLike & v) { - return tensor_add>(left, right); + ResultVector out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + { + auto value = tensor_get_component(T_in, row, 0) * vector_get_component(v, 0); + for (unsigned int col = 1; col < LIBMESH_DIM; ++col) + value += tensor_get_component(T_in, row, col) * vector_get_component(v, col); + vector_set_component(out, row, value); + } + + return out; } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_subtract(const LeftTensor & left, const RightTensor & right) +ResultVector multiply(const VectorLike & v, const TensorLike & T_in) { - ResultTensor out; + ResultVector out; out.zero(); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + auto value = vector_get_component(v, 0) * tensor_get_component(T_in, 0, col); + for (unsigned int row = 1; row < LIBMESH_DIM; ++row) + value += vector_get_component(v, row) * tensor_get_component(T_in, row, col); + vector_set_component(out, col, value); + } return out; } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_subtract(const LeftTensor & left, const RightTensor & right) +void assign_tensor_components(LeftTensor & left, const RightTensor & right) { - return tensor_subtract>(left, right); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, row, col, tensor_get_component(right, row, col)); } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_scale(const Scalar & alpha, const TensorLike & T_in) +void add_tensor_components(LeftTensor & left, const RightTensor & right) { - ResultTensor out; - out.zero(); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); - - return out; + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_scale(const Scalar & alpha, const TensorLike & T_in) +void add_scaled_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) { - return tensor_scale>(alpha, T_in); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) + + factor * tensor_get_component(right, row, col)); } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_divide(const TensorLike & T_in, const Scalar & alpha) +void subtract_tensor_components(LeftTensor & left, const RightTensor & right) { - ResultTensor out; - out.zero(); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); - - return out; + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_divide(const TensorLike & T_in, const Scalar & alpha) +void subtract_scaled_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) { - return tensor_divide>(T_in, alpha); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) - + factor * tensor_get_component(right, row, col)); } template LIBMESH_DEVICE_INLINE -auto tensor_leading_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +void zero_tensor_components(TensorLike & T_in) { - static_assert(is_tensor_like_v, - "tensor_leading_determinant() requires a tensor-like input"); - - if (dim == 0) - return tensor_value_type_t(1); - - if (dim == 1) - return tensor_get_component(T_in, 0, 0); - - if (dim == 2) - return tensor_get_component(T_in, 0, 0) * tensor_get_component(T_in, 1, 1) - - tensor_get_component(T_in, 0, 1) * tensor_get_component(T_in, 1, 0); - -#if LIBMESH_DIM > 2 - const auto a00 = tensor_get_component(T_in, 0, 0); - const auto a01 = tensor_get_component(T_in, 0, 1); - const auto a02 = tensor_get_component(T_in, 0, 2); - const auto a10 = tensor_get_component(T_in, 1, 0); - const auto a11 = tensor_get_component(T_in, 1, 1); - const auto a12 = tensor_get_component(T_in, 1, 2); - const auto a20 = tensor_get_component(T_in, 2, 0); - const auto a21 = tensor_get_component(T_in, 2, 1); - const auto a22 = tensor_get_component(T_in, 2, 2); - - return a00 * (a11 * a22 - a12 * a21) - - a01 * (a10 * a22 - a12 * a20) + - a02 * (a10 * a21 - a11 * a20); -#else - libmesh_ignore(T_in); - return tensor_value_type_t(0); -#endif + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, tensor_value_type_t(0)); } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +void scale_tensor_components(TensorLike & T_in, const Scalar & alpha) { - static_assert(is_tensor_like_v, "tensor_inverse() requires a tensor-like input"); - - ResultTensor out; - out.zero(); - - if (dim == 0) - return out; + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, tensor_get_component(T_in, row, col) * alpha); +} - if (dim == 1) - { - tensor_set_component(out, 0, 0, tensor_value_type_t(1) / tensor_get_component(T_in, 0, 0)); - return out; - } +template +LIBMESH_DEVICE_INLINE +void divide_tensor_components(TensorLike & T_in, const Scalar & alpha) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, tensor_get_component(T_in, row, col) / alpha); +} - const auto det = tensor_leading_determinant(T_in, dim); +} // namespace detail - if (dim == 2) - { - tensor_set_component(out, 0, 0, tensor_get_component(T_in, 1, 1) / det); - tensor_set_component(out, 0, 1, -tensor_get_component(T_in, 0, 1) / det); - tensor_set_component(out, 1, 0, -tensor_get_component(T_in, 1, 0) / det); - tensor_set_component(out, 1, 1, tensor_get_component(T_in, 0, 0) / det); - return out; - } +// Tensor reductions and predicates -#if LIBMESH_DIM > 2 - const auto a00 = tensor_get_component(T_in, 0, 0); - const auto a01 = tensor_get_component(T_in, 0, 1); - const auto a02 = tensor_get_component(T_in, 0, 2); - const auto a10 = tensor_get_component(T_in, 1, 0); - const auto a11 = tensor_get_component(T_in, 1, 1); - const auto a12 = tensor_get_component(T_in, 1, 2); - const auto a20 = tensor_get_component(T_in, 2, 0); - const auto a21 = tensor_get_component(T_in, 2, 1); - const auto a22 = tensor_get_component(T_in, 2, 2); +template +LIBMESH_DEVICE_INLINE +auto tensor_contract(const LeftTensor & left, const RightTensor & right) +{ + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like left input"); + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like right input"); - tensor_set_component(out, 0, 0, (a11 * a22 - a12 * a21) / det); - tensor_set_component(out, 0, 1, (a02 * a21 - a01 * a22) / det); - tensor_set_component(out, 0, 2, (a01 * a12 - a02 * a11) / det); - tensor_set_component(out, 1, 0, (a12 * a20 - a10 * a22) / det); - tensor_set_component(out, 1, 1, (a00 * a22 - a02 * a20) / det); - tensor_set_component(out, 1, 2, (a02 * a10 - a00 * a12) / det); - tensor_set_component(out, 2, 0, (a10 * a21 - a11 * a20) / det); - tensor_set_component(out, 2, 1, (a01 * a20 - a00 * a21) / det); - tensor_set_component(out, 2, 2, (a00 * a11 - a01 * a10) / det); -#else - libmesh_ignore(T_in); -#endif + using sum_type = + detail::remove_cvref_t; - return out; -} + sum_type sum = sum_type(0); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + sum += tensor_get_component(left, row, col) * tensor_get_component(right, row, col); -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) -{ - return tensor_inverse>(T_in, dim); + return sum; } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_transpose(const TensorLike & T_in) +auto tensor_norm_sq(const TensorLike & T_in) { - ResultTensor out; - out.zero(); + static_assert(is_tensor_like_v, "tensor_norm_sq() requires a tensor-like input"); + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, row, col, tensor_get_component(T_in, col, row)); + sum += libMesh::TensorTools::norm_sq(tensor_get_component(T_in, row, col)); - return out; + return sum; } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_transpose(const TensorLike & T_in) +auto tensor_norm(const TensorLike & T_in) { - return tensor_transpose>(T_in); + using std::sqrt; + return sqrt(tensor_norm_sq(T_in)); } -template +template LIBMESH_DEVICE_INLINE -ResultTensor tensor_linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) +auto tensor_trace(const TensorLike & T_in) { - ResultTensor out; - out.zero(); + static_assert(is_tensor_like_v, "tensor_trace() requires a tensor-like input"); - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - alpha * tensor_get_component(A, row, col) + - beta * tensor_get_component(B, row, col)); + using trace_type = detail::remove_cvref_t; + trace_type sum = trace_type(0); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + sum += tensor_get_component(T_in, i, i); - return out; + return sum; } -template ::value, int>::type = 0> +template LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) +bool tensor_is_zero(const TensorLike & T_in) { - return tensor_linear_combination>(alpha, A, beta, B); + static_assert(is_tensor_like_v, "tensor_is_zero() requires a tensor-like input"); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(T_in, row, col) != tensor_value_type_t(0)) + return false; + + return true; } +// Tensor arithmetic + template LIBMESH_DEVICE_INLINE -ResultTensor tensor_multiply(const LeftTensor & left, const RightTensor & right) +ResultTensor tensor_add(const LeftTensor & left, const RightTensor & right) { ResultTensor out; out.zero(); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - { - auto value = tensor_get_component(left, row, 0) * tensor_get_component(right, 0, col); - for (unsigned int k = 1; k < LIBMESH_DIM; ++k) - value += tensor_get_component(left, row, k) * tensor_get_component(right, k, col); - tensor_set_component(out, row, col, value); - } + tensor_set_component(out, + row, + col, + tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); return out; } @@ -447,115 +450,124 @@ template ::value, int>::type = 0> LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_multiply(const LeftTensor & left, const RightTensor & right) +tensor_semantic_type_t tensor_add(const LeftTensor & left, const RightTensor & right) { - return tensor_multiply>(left, right); + return tensor_add>(left, right); } -// Tensor/vector conversions - -template +template LIBMESH_DEVICE_INLINE -ResultVector tensor_row(const TensorLike & T_in, const unsigned int row) +ResultTensor tensor_subtract(const LeftTensor & left, const RightTensor & right) { - ResultVector out; + ResultTensor out; out.zero(); - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - vector_set_component(out, col, tensor_get_component(T_in, row, col)); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); return out; } template ::value, int>::type = 0> LIBMESH_DEVICE_INLINE -libMesh::TypeVector> -tensor_row(const TensorLike & T_in, const unsigned int row) +tensor_semantic_type_t tensor_subtract(const LeftTensor & left, const RightTensor & right) { - return tensor_row>>(T_in, row); + return tensor_subtract>(left, right); } -template +template LIBMESH_DEVICE_INLINE -ResultVector tensor_column(const TensorLike & T_in, const unsigned int col) +ResultTensor tensor_scale(const Scalar & alpha, const TensorLike & T_in) { - ResultVector out; + ResultTensor out; out.zero(); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - vector_set_component(out, row, tensor_get_component(T_in, row, col)); + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); return out; } template ::value, int>::type = 0> LIBMESH_DEVICE_INLINE -libMesh::TypeVector> -tensor_column(const TensorLike & T_in, const unsigned int col) +tensor_semantic_type_t tensor_scale(const Scalar & alpha, const TensorLike & T_in) { - return tensor_column>>(T_in, col); + return tensor_scale>(alpha, T_in); } -template +template LIBMESH_DEVICE_INLINE -ResultVector tensor_vector_multiply(const TensorLike & T_in, const VectorLike & v) +ResultTensor tensor_divide(const TensorLike & T_in, const Scalar & alpha) { - ResultVector out; + ResultTensor out; out.zero(); for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - { - auto value = tensor_get_component(T_in, row, 0) * vector_get_component(v, 0); - for (unsigned int col = 1; col < LIBMESH_DIM; ++col) - value += tensor_get_component(T_in, row, col) * vector_get_component(v, col); - vector_set_component(out, row, value); - } + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); return out; } template ::value, int>::type = 0> LIBMESH_DEVICE_INLINE -vector_semantic_type_t tensor_vector_multiply(const TensorLike & T_in, const VectorLike & v) +tensor_semantic_type_t tensor_divide(const TensorLike & T_in, const Scalar & alpha) { - return tensor_vector_multiply>(T_in, v); + return tensor_divide>(T_in, alpha); } -template +template LIBMESH_DEVICE_INLINE -ResultVector vector_tensor_multiply(const VectorLike & v, const TensorLike & T_in) +ResultTensor tensor_linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) { - ResultVector out; + ResultTensor out; out.zero(); - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - { - auto value = vector_get_component(v, 0) * tensor_get_component(T_in, 0, col); - for (unsigned int row = 1; row < LIBMESH_DIM; ++row) - value += vector_get_component(v, row) * tensor_get_component(T_in, row, col); - vector_set_component(out, col, value); - } + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + alpha * tensor_get_component(A, row, col) + + beta * tensor_get_component(B, row, col)); return out; } template ::value, int>::type = 0> LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_tensor_multiply(const VectorLike & v, const TensorLike & T_in) +tensor_semantic_type_t tensor_linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) { - return vector_tensor_multiply>(v, T_in); + return tensor_linear_combination>(alpha, A, beta, B); } +// Tensor/vector conversions + // libMesh-like convenience wrappers template @@ -591,13 +603,29 @@ auto is_zero(const TensorLike & T_in) return tensor_is_zero(T_in); } +template +LIBMESH_DEVICE_INLINE +auto outer_product(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, ResultTensor> +{ + return detail::outer_product(left, right); +} + template LIBMESH_DEVICE_INLINE auto outer_product(const LeftVector & left, const RightVector & right) -> std::enable_if_t && is_vector_like_v, libMesh::TypeTensor>> { - return tensor_outer_product(left, right); + return outer_product>>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto transpose(const TensorLike & T_in) + -> std::enable_if_t, ResultTensor> +{ + return detail::transpose(T_in); } template @@ -605,7 +633,7 @@ LIBMESH_DEVICE_INLINE auto transpose(const TensorLike & T_in) -> std::enable_if_t, tensor_semantic_type_t> { - return tensor_transpose(T_in); + return transpose>(T_in); } template @@ -616,12 +644,28 @@ auto det(const TensorLike & T_in) return T_in.det(); } +template +LIBMESH_DEVICE_INLINE +auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) + -> std::enable_if_t, ResultTensor> +{ + return detail::inverse(T_in, dim); +} + template LIBMESH_DEVICE_INLINE auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) -> std::enable_if_t, tensor_semantic_type_t> { - return tensor_inverse(T_in, dim); + return inverse>(T_in, dim); +} + +template +LIBMESH_DEVICE_INLINE +auto row(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, ResultVector> +{ + return detail::row(T_in, i); } template @@ -629,7 +673,15 @@ LIBMESH_DEVICE_INLINE auto row(const TensorLike & T_in, const unsigned int i) -> std::enable_if_t, libMesh::TypeVector>> { - return tensor_row(T_in, i); + return row>>(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto column(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, ResultVector> +{ + return detail::column(T_in, i); } template @@ -637,7 +689,15 @@ LIBMESH_DEVICE_INLINE auto column(const TensorLike & T_in, const unsigned int i) -> std::enable_if_t, libMesh::TypeVector>> { - return tensor_column(T_in, i); + return column>>(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v, ResultTensor> +{ + return detail::multiply(left, right); } template @@ -646,7 +706,15 @@ auto multiply(const LeftTensor & left, const RightTensor & right) -> std::enable_if_t && is_tensor_like_v, tensor_semantic_type_t> { - return tensor_multiply(left, right); + return multiply>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const TensorLike & T_in, const VectorLike & v) + -> std::enable_if_t && is_vector_like_v, ResultVector> +{ + return detail::multiply(T_in, v); } template @@ -655,7 +723,15 @@ auto multiply(const TensorLike & T_in, const VectorLike & v) -> std::enable_if_t && is_vector_like_v, vector_semantic_type_t> { - return tensor_vector_multiply(T_in, v); + return multiply>(T_in, v); +} + +template +LIBMESH_DEVICE_INLINE +auto multiply(const VectorLike & v, const TensorLike & T_in) + -> std::enable_if_t && is_tensor_like_v, ResultVector> +{ + return detail::multiply(v, T_in); } template @@ -664,7 +740,7 @@ auto multiply(const VectorLike & v, const TensorLike & T_in) -> std::enable_if_t && is_tensor_like_v, vector_semantic_type_t> { - return vector_tensor_multiply(v, T_in); + return multiply>(v, T_in); } template @@ -672,9 +748,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::assign(const RightTensor & right) { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, row, col, tensor_get_component(right, row, col)); + detail::assign_tensor_components(*this, right); } template @@ -682,12 +756,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::add(const RightTensor & right) { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, - row, - col, - tensor_get_component(*this, row, col) + tensor_get_component(right, row, col)); + detail::add_tensor_components(*this, right); } template @@ -695,13 +764,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::add_scaled(const RightTensor & right, const value_type & factor) { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, - row, - col, - tensor_get_component(*this, row, col) + - factor * tensor_get_component(right, row, col)); + detail::add_scaled_tensor_components(*this, right, factor); } template @@ -709,12 +772,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::subtract(const RightTensor & right) { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, - row, - col, - tensor_get_component(*this, row, col) - tensor_get_component(right, row, col)); + detail::subtract_tensor_components(*this, right); } template @@ -722,22 +780,14 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::subtract_scaled(const RightTensor & right, const value_type & factor) { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, - row, - col, - tensor_get_component(*this, row, col) - - factor * tensor_get_component(right, row, col)); + detail::subtract_scaled_tensor_components(*this, right, factor); } template LIBMESH_DEVICE_INLINE void tensor_ref::zero() { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(*this, row, col, value_type(0)); + detail::zero_tensor_components(*this); } template @@ -773,14 +823,14 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::transpose() const { - return tensor_transpose(*this); + return libMesh::Kokkos::transpose(*this); } template LIBMESH_DEVICE_INLINE auto tensor_ref::det(const unsigned int dim) const { - return tensor_leading_determinant(*this, dim); + return detail::leading_determinant(*this, dim); } template @@ -794,7 +844,7 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::inverse(const unsigned int dim) const { - return tensor_inverse(*this, dim); + return libMesh::Kokkos::inverse(*this, dim); } template @@ -802,7 +852,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::solve(const VectorLike & b, ResultVector & x) const { - const auto solution = tensor_vector_multiply>(this->inverse(), b); + const auto solution = libMesh::Kokkos::multiply>(this->inverse(), b); for (unsigned int component = 0; component < LIBMESH_DIM; ++component) vector_set_component(x, component, vector_get_component(solution, component)); } @@ -811,14 +861,14 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::row(const unsigned int i) const { - return tensor_row(*this, i); + return libMesh::Kokkos::row(*this, i); } template LIBMESH_DEVICE_INLINE auto tensor_ref::column(const unsigned int i) const { - return tensor_column(*this, i); + return libMesh::Kokkos::column(*this, i); } template @@ -826,7 +876,7 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::left_multiply(const VectorLike & v) const { - return vector_tensor_multiply(v, *this); + return libMesh::Kokkos::multiply(v, *this); } // Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. @@ -900,7 +950,7 @@ template @@ -957,13 +1007,7 @@ auto operator+=(LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), LeftTensor &> { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, - row, - col, - tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); - + detail::add_tensor_components(left, right); return left; } @@ -974,13 +1018,7 @@ auto operator-=(LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), LeftTensor &> { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, - row, - col, - tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); - + detail::subtract_tensor_components(left, right); return left; } @@ -991,10 +1029,7 @@ auto operator*=(LeftTensor & left, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, LeftTensor &> { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, row, col, tensor_get_component(left, row, col) * alpha); - + detail::scale_tensor_components(left, alpha); return left; } @@ -1005,10 +1040,7 @@ auto operator/=(LeftTensor & left, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, LeftTensor &> { - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, row, col, tensor_get_component(left, row, col) / alpha); - + detail::divide_tensor_components(left, alpha); return left; } diff --git a/include/gpu/kokkos_vector_ops.h b/include/gpu/kokkos_vector_ops.h index fffbf789da7..591b4d051b1 100644 --- a/include/gpu/kokkos_vector_ops.h +++ b/include/gpu/kokkos_vector_ops.h @@ -44,6 +44,85 @@ vector_semantic_type_t copy_vector(const VectorLike & v) return copy_vector>(v); } +namespace detail +{ + +template +LIBMESH_DEVICE_INLINE +void assign_vector_components(LeftVector & left, const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, component, vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void add_vector_components(LeftVector & left, const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) + vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void add_scaled_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) + + factor * vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void subtract_vector_components(LeftVector & left, const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) - vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void subtract_scaled_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) - + factor * vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void zero_vector_components(VectorLike & v) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, vector_value_type_t(0)); +} + +template +LIBMESH_DEVICE_INLINE +void scale_vector_components(VectorLike & v, const Scalar & alpha) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, vector_get_component(v, component) * alpha); +} + +template +LIBMESH_DEVICE_INLINE +void divide_vector_components(VectorLike & v, const Scalar & alpha) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, vector_get_component(v, component) / alpha); +} + +} // namespace detail + // Reductions and predicates template @@ -453,8 +532,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::assign(const RightVector & right) { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, component, vector_get_component(right, component)); + detail::assign_vector_components(*this, right); } template @@ -462,10 +540,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::add(const RightVector & right) { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, - component, - vector_get_component(*this, component) + vector_get_component(right, component)); + detail::add_vector_components(*this, right); } template @@ -473,11 +548,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::add_scaled(const RightVector & right, const value_type & factor) { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, - component, - vector_get_component(*this, component) + - factor * vector_get_component(right, component)); + detail::add_scaled_vector_components(*this, right, factor); } template @@ -485,10 +556,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::subtract(const RightVector & right) { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, - component, - vector_get_component(*this, component) - vector_get_component(right, component)); + detail::subtract_vector_components(*this, right); } template @@ -496,19 +564,14 @@ template LIBMESH_DEVICE_INLINE void vector_ref::subtract_scaled(const RightVector & right, const value_type & factor) { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, - component, - vector_get_component(*this, component) - - factor * vector_get_component(right, component)); + detail::subtract_scaled_vector_components(*this, right, factor); } template LIBMESH_DEVICE_INLINE void vector_ref::zero() { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(*this, component, value_type(0)); + detail::zero_vector_components(*this); } template @@ -668,11 +731,7 @@ auto operator+=(LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), LeftVector &> { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, - component, - vector_get_component(left, component) + vector_get_component(right, component)); - + detail::add_vector_components(left, right); return left; } @@ -683,11 +742,7 @@ auto operator-=(LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), LeftVector &> { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, - component, - vector_get_component(left, component) - vector_get_component(right, component)); - + detail::subtract_vector_components(left, right); return left; } @@ -698,9 +753,7 @@ auto operator*=(LeftVector & left, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, LeftVector &> { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, component, vector_get_component(left, component) * alpha); - + detail::scale_vector_components(left, alpha); return left; } @@ -711,9 +764,7 @@ auto operator/=(LeftVector & left, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, LeftVector &> { - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, component, vector_get_component(left, component) / alpha); - + detail::divide_vector_components(left, alpha); return left; } diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h index 59aadda6a4f..de867d59f6d 100644 --- a/tests/numerics/kokkos_tensor_ops_oracle_runners.h +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -126,7 +126,7 @@ test_tensor_ops() const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); - const auto outer_d = libMesh::Kokkos::tensor_outer_product(a_ref, b_ref); + const auto outer_d = libMesh::Kokkos::outer_product(a_ref, b_ref); const auto transpose_d = A_ref.transpose(); const auto mix_d = Real(1.5) * A_ref - Real(0.25) * outer_d; const auto right_d = A_ref * c_ref; @@ -382,7 +382,7 @@ test_mixed_representation_ops() const auto inverse = A_ref.inverse(); const auto add = A_ref + ref_transpose; const auto scaled = Real(0.5) * A_ref; - const auto outer = libMesh::Kokkos::tensor_outer_product(a_ref, b); + const auto outer = libMesh::Kokkos::outer_product(a_ref, b); d_scalars(0) = a_ref * b; d_scalars(1) = A_ref.contract(outer); From b4d13365dc0b77ab9b1320df16d7f7c1f3768478 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 08:32:34 -0600 Subject: [PATCH 22/48] Make remaining tensor helpers internal-only --- include/gpu/kokkos_tensor_ops.h | 64 +++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index c20ade2661e..c820eacd616 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -354,6 +354,9 @@ void divide_tensor_components(TensorLike & T_in, const Scalar & alpha) } // namespace detail +namespace detail +{ + // Tensor reductions and predicates template @@ -568,31 +571,33 @@ tensor_semantic_type_t tensor_linear_combination(const ScalarA & alpha, // Tensor/vector conversions +} // namespace detail + // libMesh-like convenience wrappers template LIBMESH_DEVICE_INLINE auto contract(const LeftTensor & left, const RightTensor & right) -> std::enable_if_t && is_tensor_like_v, - decltype(tensor_contract(left, right))> + decltype(detail::tensor_contract(left, right))> { - return tensor_contract(left, right); + return detail::tensor_contract(left, right); } template LIBMESH_DEVICE_INLINE auto norm_sq(const TensorLike & T_in) - -> std::enable_if_t, decltype(tensor_norm_sq(T_in))> + -> std::enable_if_t, decltype(detail::tensor_norm_sq(T_in))> { - return tensor_norm_sq(T_in); + return detail::tensor_norm_sq(T_in); } template LIBMESH_DEVICE_INLINE auto norm(const TensorLike & T_in) - -> std::enable_if_t, decltype(tensor_norm(T_in))> + -> std::enable_if_t, decltype(detail::tensor_norm(T_in))> { - return tensor_norm(T_in); + return detail::tensor_norm(T_in); } template @@ -600,7 +605,7 @@ LIBMESH_DEVICE_INLINE auto is_zero(const TensorLike & T_in) -> std::enable_if_t, bool> { - return tensor_is_zero(T_in); + return detail::tensor_is_zero(T_in); } template @@ -743,6 +748,29 @@ auto multiply(const VectorLike & v, const TensorLike & T_in) return multiply>(v, T_in); } +template +LIBMESH_DEVICE_INLINE +auto linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) + -> std::enable_if_t && is_tensor_like_v, ResultTensor> +{ + return detail::tensor_linear_combination(alpha, A, beta, B); +} + +template +LIBMESH_DEVICE_INLINE +auto linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) + -> std::enable_if_t && is_tensor_like_v, + tensor_semantic_type_t> +{ + return linear_combination>(alpha, A, beta, B); +} + template template LIBMESH_DEVICE_INLINE @@ -795,28 +823,28 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::contract(const RightTensor & right) const { - return tensor_contract(*this, right); + return detail::tensor_contract(*this, right); } template LIBMESH_DEVICE_INLINE auto tensor_ref::norm() const { - return tensor_norm(*this); + return detail::tensor_norm(*this); } template LIBMESH_DEVICE_INLINE auto tensor_ref::norm_sq() const { - return tensor_norm_sq(*this); + return detail::tensor_norm_sq(*this); } template LIBMESH_DEVICE_INLINE bool tensor_ref::is_zero() const { - return tensor_is_zero(*this); + return detail::tensor_is_zero(*this); } template @@ -837,7 +865,7 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::tr() const { - return tensor_trace(*this); + return detail::tensor_trace(*this); } template @@ -887,7 +915,7 @@ auto operator-(const TensorLike & T_in) -> std::enable_if_t && is_tensor_ref_v, tensor_semantic_type_t> { - return tensor_scale(tensor_value_type_t(-1), T_in); + return detail::tensor_scale(tensor_value_type_t(-1), T_in); } template @@ -897,7 +925,7 @@ auto operator+(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return tensor_add(left, right); + return detail::tensor_add(left, right); } template @@ -907,7 +935,7 @@ auto operator-(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return tensor_subtract(left, right); + return detail::tensor_subtract(left, right); } template @@ -939,7 +967,7 @@ auto operator/(const TensorLike & T_in, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, tensor_semantic_type_t> { - return tensor_divide(T_in, alpha); + return detail::tensor_divide(T_in, alpha); } template Date: Tue, 12 May 2026 09:19:25 -0600 Subject: [PATCH 23/48] Reduce Kokkos algebra wrapper layers --- include/gpu/kokkos_tensor_ops.h | 320 ++++++-------------------------- include/gpu/kokkos_vector_ops.h | 282 +++++++--------------------- 2 files changed, 128 insertions(+), 474 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index c820eacd616..2d94cb53443 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -187,7 +187,7 @@ ResultTensor transpose(const TensorLike & T_in) template LIBMESH_DEVICE_INLINE -ResultTensor multiply(const LeftTensor & left, const RightTensor & right) +ResultTensor multiply_tensors(const LeftTensor & left, const RightTensor & right) { ResultTensor out; out.zero(); @@ -232,7 +232,7 @@ ResultVector column(const TensorLike & T_in, const unsigned int col_index) template LIBMESH_DEVICE_INLINE -ResultVector multiply(const TensorLike & T_in, const VectorLike & v) +ResultVector multiply_tensor_vector(const TensorLike & T_in, const VectorLike & v) { ResultVector out; out.zero(); @@ -250,7 +250,7 @@ ResultVector multiply(const TensorLike & T_in, const VectorLike & v) template LIBMESH_DEVICE_INLINE -ResultVector multiply(const VectorLike & v, const TensorLike & T_in) +ResultVector multiply_vector_tensor(const VectorLike & v, const TensorLike & T_in) { ResultVector out; out.zero(); @@ -275,21 +275,18 @@ void assign_tensor_components(LeftTensor & left, const RightTensor & right) tensor_set_component(left, row, col, tensor_get_component(right, row, col)); } -template +template LIBMESH_DEVICE_INLINE -void add_tensor_components(LeftTensor & left, const RightTensor & right) +void fill_tensor_components(TensorLike & T_in, const Scalar & value) { for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, - row, - col, - tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); + tensor_set_component(T_in, row, col, value); } template LIBMESH_DEVICE_INLINE -void add_scaled_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) +void update_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) { for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) @@ -300,38 +297,53 @@ void add_scaled_tensor_components(LeftTensor & left, const RightTensor & right, factor * tensor_get_component(right, row, col)); } -template +template LIBMESH_DEVICE_INLINE -void subtract_tensor_components(LeftTensor & left, const RightTensor & right) +ResultTensor linear_combination(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) { + ResultTensor out; + out.zero(); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, + tensor_set_component(out, row, col, - tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); + alpha * tensor_get_component(A, row, col) + + beta * tensor_get_component(B, row, col)); + + return out; } -template +template LIBMESH_DEVICE_INLINE -void subtract_scaled_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) +ResultTensor scale_tensor(const Scalar & alpha, const TensorLike & T_in) { + ResultTensor out; + out.zero(); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(left, - row, - col, - tensor_get_component(left, row, col) - - factor * tensor_get_component(right, row, col)); + tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); + + return out; } -template +template LIBMESH_DEVICE_INLINE -void zero_tensor_components(TensorLike & T_in) +ResultTensor divide_tensor(const TensorLike & T_in, const Scalar & alpha) { + ResultTensor out; + out.zero(); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(T_in, row, col, tensor_value_type_t(0)); + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); + + return out; } template @@ -352,11 +364,6 @@ void divide_tensor_components(TensorLike & T_in, const Scalar & alpha) tensor_set_component(T_in, row, col, tensor_get_component(T_in, row, col) / alpha); } -} // namespace detail - -namespace detail -{ - // Tensor reductions and predicates template @@ -429,148 +436,6 @@ bool tensor_is_zero(const TensorLike & T_in) return true; } -// Tensor arithmetic - -template -LIBMESH_DEVICE_INLINE -ResultTensor tensor_add(const LeftTensor & left, const RightTensor & right) -{ - ResultTensor out; - out.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - tensor_get_component(left, row, col) + tensor_get_component(right, row, col)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_add(const LeftTensor & left, const RightTensor & right) -{ - return tensor_add>(left, right); -} - -template -LIBMESH_DEVICE_INLINE -ResultTensor tensor_subtract(const LeftTensor & left, const RightTensor & right) -{ - ResultTensor out; - out.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - tensor_get_component(left, row, col) - tensor_get_component(right, row, col)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_subtract(const LeftTensor & left, const RightTensor & right) -{ - return tensor_subtract>(left, right); -} - -template -LIBMESH_DEVICE_INLINE -ResultTensor tensor_scale(const Scalar & alpha, const TensorLike & T_in) -{ - ResultTensor out; - out.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_scale(const Scalar & alpha, const TensorLike & T_in) -{ - return tensor_scale>(alpha, T_in); -} - -template -LIBMESH_DEVICE_INLINE -ResultTensor tensor_divide(const TensorLike & T_in, const Scalar & alpha) -{ - ResultTensor out; - out.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_divide(const TensorLike & T_in, const Scalar & alpha) -{ - return tensor_divide>(T_in, alpha); -} - -template -LIBMESH_DEVICE_INLINE -ResultTensor tensor_linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) -{ - ResultTensor out; - out.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - tensor_set_component(out, - row, - col, - alpha * tensor_get_component(A, row, col) + - beta * tensor_get_component(B, row, col)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -tensor_semantic_type_t tensor_linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) -{ - return tensor_linear_combination>(alpha, A, beta, B); -} - -// Tensor/vector conversions - } // namespace detail // libMesh-like convenience wrappers @@ -697,80 +562,6 @@ auto column(const TensorLike & T_in, const unsigned int i) return column>>(T_in, i); } -template -LIBMESH_DEVICE_INLINE -auto multiply(const LeftTensor & left, const RightTensor & right) - -> std::enable_if_t && is_tensor_like_v, ResultTensor> -{ - return detail::multiply(left, right); -} - -template -LIBMESH_DEVICE_INLINE -auto multiply(const LeftTensor & left, const RightTensor & right) - -> std::enable_if_t && is_tensor_like_v, - tensor_semantic_type_t> -{ - return multiply>(left, right); -} - -template -LIBMESH_DEVICE_INLINE -auto multiply(const TensorLike & T_in, const VectorLike & v) - -> std::enable_if_t && is_vector_like_v, ResultVector> -{ - return detail::multiply(T_in, v); -} - -template -LIBMESH_DEVICE_INLINE -auto multiply(const TensorLike & T_in, const VectorLike & v) - -> std::enable_if_t && is_vector_like_v, - vector_semantic_type_t> -{ - return multiply>(T_in, v); -} - -template -LIBMESH_DEVICE_INLINE -auto multiply(const VectorLike & v, const TensorLike & T_in) - -> std::enable_if_t && is_tensor_like_v, ResultVector> -{ - return detail::multiply(v, T_in); -} - -template -LIBMESH_DEVICE_INLINE -auto multiply(const VectorLike & v, const TensorLike & T_in) - -> std::enable_if_t && is_tensor_like_v, - vector_semantic_type_t> -{ - return multiply>(v, T_in); -} - -template -LIBMESH_DEVICE_INLINE -auto linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) - -> std::enable_if_t && is_tensor_like_v, ResultTensor> -{ - return detail::tensor_linear_combination(alpha, A, beta, B); -} - -template -LIBMESH_DEVICE_INLINE -auto linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) - -> std::enable_if_t && is_tensor_like_v, - tensor_semantic_type_t> -{ - return linear_combination>(alpha, A, beta, B); -} - template template LIBMESH_DEVICE_INLINE @@ -784,7 +575,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::add(const RightTensor & right) { - detail::add_tensor_components(*this, right); + detail::update_tensor_components(*this, right, value_type(1)); } template @@ -792,7 +583,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::add_scaled(const RightTensor & right, const value_type & factor) { - detail::add_scaled_tensor_components(*this, right, factor); + detail::update_tensor_components(*this, right, factor); } template @@ -800,7 +591,7 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::subtract(const RightTensor & right) { - detail::subtract_tensor_components(*this, right); + detail::update_tensor_components(*this, right, value_type(-1)); } template @@ -808,14 +599,14 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::subtract_scaled(const RightTensor & right, const value_type & factor) { - detail::subtract_scaled_tensor_components(*this, right, factor); + detail::update_tensor_components(*this, right, -factor); } template LIBMESH_DEVICE_INLINE void tensor_ref::zero() { - detail::zero_tensor_components(*this); + detail::fill_tensor_components(*this, value_type(0)); } template @@ -880,7 +671,8 @@ template LIBMESH_DEVICE_INLINE void tensor_ref::solve(const VectorLike & b, ResultVector & x) const { - const auto solution = libMesh::Kokkos::multiply>(this->inverse(), b); + const auto solution = + detail::multiply_tensor_vector>(this->inverse(), b); for (unsigned int component = 0; component < LIBMESH_DIM; ++component) vector_set_component(x, component, vector_get_component(solution, component)); } @@ -904,7 +696,7 @@ template LIBMESH_DEVICE_INLINE auto tensor_ref::left_multiply(const VectorLike & v) const { - return libMesh::Kokkos::multiply(v, *this); + return v * *this; } // Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. @@ -915,7 +707,7 @@ auto operator-(const TensorLike & T_in) -> std::enable_if_t && is_tensor_ref_v, tensor_semantic_type_t> { - return detail::tensor_scale(tensor_value_type_t(-1), T_in); + return detail::scale_tensor>(tensor_value_type_t(-1), T_in); } template @@ -925,7 +717,8 @@ auto operator+(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return detail::tensor_add(left, right); + return detail::linear_combination>( + tensor_value_type_t(1), left, tensor_value_type_t(1), right); } template @@ -935,7 +728,8 @@ auto operator-(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return detail::tensor_subtract(left, right); + return detail::linear_combination>( + tensor_value_type_t(1), left, tensor_value_type_t(-1), right); } template >(alpha, T_in); } template >(alpha, T_in); } template @@ -967,7 +761,7 @@ auto operator/(const TensorLike & T_in, const Scalar & alpha) !is_vector_like_v && !is_tensor_like_v, tensor_semantic_type_t> { - return detail::tensor_divide(T_in, alpha); + return detail::divide_tensor>(T_in, alpha); } template >(left, right); } template >(T_in, v); } template >(v, T_in); } template @@ -1035,7 +829,7 @@ auto operator+=(LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), LeftTensor &> { - detail::add_tensor_components(left, right); + detail::update_tensor_components(left, right, tensor_value_type_t(1)); return left; } @@ -1046,7 +840,7 @@ auto operator-=(LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), LeftTensor &> { - detail::subtract_tensor_components(left, right); + detail::update_tensor_components(left, right, tensor_value_type_t(-1)); return left; } diff --git a/include/gpu/kokkos_vector_ops.h b/include/gpu/kokkos_vector_ops.h index 591b4d051b1..2c68a0341f2 100644 --- a/include/gpu/kokkos_vector_ops.h +++ b/include/gpu/kokkos_vector_ops.h @@ -55,19 +55,17 @@ void assign_vector_components(LeftVector & left, const RightVector & right) vector_set_component(left, component, vector_get_component(right, component)); } -template +template LIBMESH_DEVICE_INLINE -void add_vector_components(LeftVector & left, const RightVector & right) +void fill_vector_components(VectorLike & v, const Scalar & value) { for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, - component, - vector_get_component(left, component) + vector_get_component(right, component)); + vector_set_component(v, component, value); } template LIBMESH_DEVICE_INLINE -void add_scaled_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) +void update_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) { for (unsigned int component = 0; component < LIBMESH_DIM; ++component) vector_set_component(left, @@ -76,33 +74,72 @@ void add_scaled_vector_components(LeftVector & left, const RightVector & right, factor * vector_get_component(right, component)); } -template +template LIBMESH_DEVICE_INLINE -void subtract_vector_components(LeftVector & left, const RightVector & right) +ResultVector linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b) { + ResultVector out; + out.zero(); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, + vector_set_component(out, component, - vector_get_component(left, component) - vector_get_component(right, component)); + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component)); + + return out; } -template +template LIBMESH_DEVICE_INLINE -void subtract_scaled_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) +ResultVector linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b, + const ScalarC & gamma, + const VectorC & c) { + ResultVector out; + out.zero(); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(left, + vector_set_component(out, component, - vector_get_component(left, component) - - factor * vector_get_component(right, component)); + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component) + + gamma * vector_get_component(c, component)); + + return out; } -template +template LIBMESH_DEVICE_INLINE -void zero_vector_components(VectorLike & v) +ResultVector scale_vector(const Scalar & alpha, const VectorLike & v) { + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, alpha * vector_get_component(v, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector divide_vector(const VectorLike & v, const Scalar & alpha) +{ + ResultVector out; + out.zero(); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(v, component, vector_value_type_t(0)); + vector_set_component(out, component, vector_get_component(v, component) / alpha); + + return out; } template @@ -194,192 +231,13 @@ bool vector_is_zero(const VectorLike & v) return true; } -// Arithmetic - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_add(const LeftVector & left, const RightVector & right) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, - component, - vector_get_component(left, component) + vector_get_component(right, component)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_add(const LeftVector & left, const RightVector & right) -{ - return vector_add>(left, right); -} - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_subtract(const LeftVector & left, const RightVector & right) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, - component, - vector_get_component(left, component) - vector_get_component(right, component)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_subtract(const LeftVector & left, const RightVector & right) -{ - return vector_subtract>(left, right); -} - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_scale(const Scalar & alpha, const VectorLike & v) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, component, alpha * vector_get_component(v, component)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_scale(const Scalar & alpha, const VectorLike & v) -{ - return vector_scale>(alpha, v); -} - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_divide(const VectorLike & v, const Scalar & alpha) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, component, vector_get_component(v, component) / alpha); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_divide(const VectorLike & v, const Scalar & alpha) -{ - return vector_divide>(v, alpha); -} - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_linear_combination(const ScalarA & alpha, - const VectorA & a, - const ScalarB & beta, - const VectorB & b) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, - component, - alpha * vector_get_component(a, component) + - beta * vector_get_component(b, component)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_linear_combination(const ScalarA & alpha, - const VectorA & a, - const ScalarB & beta, - const VectorB & b) -{ - return vector_linear_combination>(alpha, a, beta, b); -} - -template -LIBMESH_DEVICE_INLINE -ResultVector vector_linear_combination(const ScalarA & alpha, - const VectorA & a, - const ScalarB & beta, - const VectorB & b, - const ScalarC & gamma, - const VectorC & c) -{ - ResultVector out; - out.zero(); - - for (unsigned int component = 0; component < LIBMESH_DIM; ++component) - vector_set_component(out, - component, - alpha * vector_get_component(a, component) + - beta * vector_get_component(b, component) + - gamma * vector_get_component(c, component)); - - return out; -} - -template ::value, int>::type = 0> -LIBMESH_DEVICE_INLINE -vector_semantic_type_t vector_linear_combination(const ScalarA & alpha, - const VectorA & a, - const ScalarB & beta, - const VectorB & b, - const ScalarC & gamma, - const VectorC & c) -{ - return vector_linear_combination>(alpha, a, beta, b, gamma, c); -} - template LIBMESH_DEVICE_INLINE ResultVector vector_unit(const VectorLike & v) { const auto length = vector_norm(v); libmesh_assert_not_equal_to(length, static_cast(0.)); - return vector_divide(v, length); + return detail::divide_vector(v, length); } template LIBMESH_DEVICE_INLINE void vector_ref::add(const RightVector & right) { - detail::add_vector_components(*this, right); + detail::update_vector_components(*this, right, value_type(1)); } template @@ -548,7 +406,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::add_scaled(const RightVector & right, const value_type & factor) { - detail::add_scaled_vector_components(*this, right, factor); + detail::update_vector_components(*this, right, factor); } template @@ -556,7 +414,7 @@ template LIBMESH_DEVICE_INLINE void vector_ref::subtract(const RightVector & right) { - detail::subtract_vector_components(*this, right); + detail::update_vector_components(*this, right, value_type(-1)); } template @@ -564,14 +422,14 @@ template LIBMESH_DEVICE_INLINE void vector_ref::subtract_scaled(const RightVector & right, const value_type & factor) { - detail::subtract_scaled_vector_components(*this, right, factor); + detail::update_vector_components(*this, right, -factor); } template LIBMESH_DEVICE_INLINE void vector_ref::zero() { - detail::zero_vector_components(*this); + detail::fill_vector_components(*this, value_type(0)); } template @@ -633,7 +491,7 @@ auto operator-(const VectorLike & v) -> std::enable_if_t && is_vector_ref_v, vector_semantic_type_t> { - return vector_scale(vector_value_type_t(-1), v); + return detail::scale_vector>(vector_value_type_t(-1), v); } template @@ -643,7 +501,8 @@ auto operator+(const LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), vector_semantic_type_t> { - return vector_add(left, right); + return detail::linear_combination>( + vector_value_type_t(1), left, vector_value_type_t(1), right); } template @@ -653,7 +512,8 @@ auto operator-(const LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), vector_semantic_type_t> { - return vector_subtract(left, right); + return detail::linear_combination>( + vector_value_type_t(1), left, vector_value_type_t(-1), right); } template >(alpha, v); } template >(alpha, v); } template >(v, alpha); } template @@ -731,7 +591,7 @@ auto operator+=(LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), LeftVector &> { - detail::add_vector_components(left, right); + detail::update_vector_components(left, right, vector_value_type_t(1)); return left; } @@ -742,7 +602,7 @@ auto operator-=(LeftVector & left, const RightVector & right) (is_vector_ref_v || is_vector_ref_v), LeftVector &> { - detail::subtract_vector_components(left, right); + detail::update_vector_components(left, right, vector_value_type_t(-1)); return left; } From b93e873c351f78d92fa51195b21d26554d86c15c Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 09:42:50 -0600 Subject: [PATCH 24/48] Update vector oracle to use Kokkos ref operators --- tests/numerics/kokkos_vector_ops_oracle_runners.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/numerics/kokkos_vector_ops_oracle_runners.h b/tests/numerics/kokkos_vector_ops_oracle_runners.h index 73fbbe7834a..5fcd7e45a5e 100644 --- a/tests/numerics/kokkos_vector_ops_oracle_runners.h +++ b/tests/numerics/kokkos_vector_ops_oracle_runners.h @@ -34,14 +34,11 @@ test_vector_ops_case(const vector_case & info) const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); const Vec copied = libMesh::Kokkos::copy_vector(a_ref); - const Vec mix = libMesh::Kokkos::vector_linear_combination( - Real(1), a_ref, Real(1), b_ref, Real(-1), c_ref); - const Vec scaled = libMesh::Kokkos::vector_linear_combination( - Real(1.25), a_ref, Real(-0.5), b_ref, Real(0.25), c_ref); + const Vec mix = a_ref + b_ref - c_ref; + const Vec scaled = Real(1.25) * a_ref + Real(-0.5) * b_ref + Real(0.25) * c_ref; const Vec plus_assign = a_ref + b_ref; const Vec minus_assign = a_ref - b_ref; - const Vec accum = libMesh::Kokkos::vector_linear_combination( - Real(1.25), a_ref, Real(-0.5), b_ref, Real(0.25), c_ref); + const Vec accum = Real(1.25) * a_ref + Real(-0.5) * b_ref + Real(0.25) * c_ref; const Vec divided = a_ref / Real(5.0); const Vec outer_right = Real(5.0) * a_ref; const Vec outer_left = a_ref * Real(5.0); From c8427b115dac751acb76f5151c7614f5292f18f1 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 09:48:58 -0600 Subject: [PATCH 25/48] Rename tensor combination kernel --- include/gpu/kokkos_tensor_ops.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index 2d94cb53443..71926b92b1f 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -299,10 +299,10 @@ void update_tensor_components(LeftTensor & left, const RightTensor & right, cons template LIBMESH_DEVICE_INLINE -ResultTensor linear_combination(const ScalarA & alpha, - const TensorA & A, - const ScalarB & beta, - const TensorB & B) +ResultTensor combine_tensors(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) { ResultTensor out; out.zero(); @@ -717,7 +717,7 @@ auto operator+(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return detail::linear_combination>( + return detail::combine_tensors>( tensor_value_type_t(1), left, tensor_value_type_t(1), right); } @@ -728,7 +728,7 @@ auto operator-(const LeftTensor & left, const RightTensor & right) (is_tensor_ref_v || is_tensor_ref_v), tensor_semantic_type_t> { - return detail::linear_combination>( + return detail::combine_tensors>( tensor_value_type_t(1), left, tensor_value_type_t(-1), right); } From 37230df2d73ab13488614ef83cc840b7e62bb18f Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 10:27:16 -0600 Subject: [PATCH 26/48] Forward libMesh error macros into device code --- include/base/libmesh_common.h | 9 ++++++++- include/base/libmesh_device.h | 2 -- include/base/libmesh_exceptions.h | 4 ++++ include/numerics/type_tensor.h | 4 ++-- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/base/libmesh_common.h b/include/base/libmesh_common.h index 32f8820b4d6..7e663098df1 100644 --- a/include/base/libmesh_common.h +++ b/include/base/libmesh_common.h @@ -309,7 +309,7 @@ extern bool warned_about_auto_ptr; #define libmesh_assert_less_equal_msg(expr1,expr2, msg) ((void) 0) #define libmesh_assert_greater_equal_msg(expr1,expr2, msg) ((void) 0) -#elif defined(LIBMESH_DEVICE_ASSERT) +#elif defined(LIBMESH_KOKKOS_COMPILATION) // Kokkos compilation: use the device-safe assert from libmesh_device.h. #define libmesh_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) @@ -426,6 +426,12 @@ struct casting_compare { // // The libmesh_terminate() macro prints a message and throws a // TerminationException exception +#if LIBMESH_IN_DEVICE_CODE +#define libmesh_error_msg(msg) \ + do { \ + LIBMESH_DEVICE_ERROR_MSG(msg); \ + } while (0) +#else #define libmesh_error_msg(msg) \ do { \ std::stringstream message_stream; \ @@ -433,6 +439,7 @@ struct casting_compare { libMesh::MacroFunctions::report_error(__FILE__, __LINE__, LIBMESH_DATE, LIBMESH_TIME, message_stream); \ LIBMESH_THROW(libMesh::LogicError(message_stream.str())); \ } while (0) +#endif #define libmesh_error() libmesh_error_msg("") diff --git a/include/base/libmesh_device.h b/include/base/libmesh_device.h index f41d4c70b01..87388289b80 100644 --- a/include/base/libmesh_device.h +++ b/include/base/libmesh_device.h @@ -67,8 +67,6 @@ #else # define LIBMESH_DEVICE_INLINE inline # define LIBMESH_IN_DEVICE_CODE 0 -# define LIBMESH_DEVICE_ERROR_MSG(msg) libmesh_error_msg(msg) -# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) libmesh_error_msg_if(cond, msg) #endif #endif // LIBMESH_LIBMESH_DEVICE_H diff --git a/include/base/libmesh_exceptions.h b/include/base/libmesh_exceptions.h index 65237e1478f..8db27cd83d2 100644 --- a/include/base/libmesh_exceptions.h +++ b/include/base/libmesh_exceptions.h @@ -226,7 +226,11 @@ class TerminationException #else +#if LIBMESH_IN_DEVICE_CODE +#define LIBMESH_THROW(e) do { LIBMESH_DEVICE_ERROR_MSG((e).what()); } while (0) +#else #define LIBMESH_THROW(e) do { libMesh::err << e.what(); libMesh::libmesh_abort(); } while (0) +#endif #define libmesh_rethrow #define libmesh_try #define libmesh_catch(e) if (0) diff --git a/include/numerics/type_tensor.h b/include/numerics/type_tensor.h index ac6dc145428..04f4b5bad8f 100644 --- a/include/numerics/type_tensor.h +++ b/include/numerics/type_tensor.h @@ -756,8 +756,8 @@ T & TypeTensor::operator () (const unsigned int i, { #if LIBMESH_DIM < 3 - LIBMESH_DEVICE_ERROR_MSG_IF(i >= LIBMESH_DIM || j >= LIBMESH_DIM, - "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); + libmesh_error_msg_if(i >= LIBMESH_DIM || j >= LIBMESH_DIM, + "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); #endif From 1b773eaa80604e4a1a5da4a3db594043ff7ab118 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 10:31:09 -0600 Subject: [PATCH 27/48] Probe Kokkos toolchain configuration --- m4/libmesh_optional_packages.m4 | 81 +++++++++++++++++++++++++++++++++ tests/Makefile.am | 7 +-- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 9fb641d1945..64e0863398c 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -968,6 +968,86 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + dnl If KOKKOS_CXX differs from the main compiler, it may not be the MPI + dnl wrapper and thus may need the wrapper's compile flags explicitly in + dnl order to find mpi.h. Query the primary CXX wrapper for compile-time + dnl flags and fall back to MPI_INCLUDES when probing is unavailable. + KOKKOS_MPI_CPPFLAGS="" + AS_IF([test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX"], + [ + AC_MSG_CHECKING([for MPI compile flags usable with KOKKOS_CXX]) + KOKKOS_MPI_CPPFLAGS=`$CXX -showme:compile 2>/dev/null` + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS=`$CXX -compile_info 2>/dev/null`]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'`]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS="$MPI_INCLUDES"]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [AC_MSG_RESULT([not found])], + [AC_MSG_RESULT([$KOKKOS_MPI_CPPFLAGS])]) + ]) + + dnl Fail configure early if the chosen Kokkos compiler/flags/libs cannot + dnl actually compile and link a minimal Kokkos program. + AC_MSG_CHECKING([whether the Kokkos compiler configuration works]) + libmesh_save_CXX="$CXX" + libmesh_save_CPPFLAGS="$CPPFLAGS" + libmesh_save_CXXFLAGS="$CXXFLAGS" + libmesh_save_LDFLAGS="$LDFLAGS" + libmesh_save_LIBS="$LIBS" + + CXX="$KOKKOS_CXX" + CPPFLAGS="$CPPFLAGS $KOKKOS_CPPFLAGS $KOKKOS_MPI_CPPFLAGS" + CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" + LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" + LIBS="$LIBS $KOKKOS_LIBS" + + AS_IF([test "x$enablempi" = "xyes"], + [ + LDFLAGS="$LDFLAGS $MPI_LDFLAGS" + LIBS="$LIBS $MPI_LIBS" + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +int main(int argc, char ** argv) +{ + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + MPI_Finalize(); + return 0; +} +]])], + [kokkos_config_works=yes], + [kokkos_config_works=no]) + ], + [ + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ +#include +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + return 0; +} +]])], + [kokkos_config_works=yes], + [kokkos_config_works=no]) + ]) + + CXX="$libmesh_save_CXX" + CPPFLAGS="$libmesh_save_CPPFLAGS" + CXXFLAGS="$libmesh_save_CXXFLAGS" + LDFLAGS="$libmesh_save_LDFLAGS" + LIBS="$libmesh_save_LIBS" + + AS_IF([test "x$kokkos_config_works" = "xyes"], + [AC_MSG_RESULT([yes])], + [AC_MSG_ERROR([configured Kokkos compiler/flags failed to compile and link a minimal test program])]) + AC_DEFINE([HAVE_KOKKOS], [1], [Define if Kokkos support is enabled in libMesh]) AC_MSG_RESULT(<<< Configuring library with Kokkos support >>>) @@ -984,6 +1064,7 @@ AC_SUBST([KOKKOS_CPPFLAGS]) AC_SUBST([KOKKOS_CXXFLAGS]) AC_SUBST([KOKKOS_LDFLAGS]) AC_SUBST([KOKKOS_LIBS]) +AC_SUBST([KOKKOS_MPI_CPPFLAGS]) AM_CONDITIONAL(LIBMESH_ENABLE_KOKKOS, test x$enablekokkos = xyes) # ------------------------------------------------------------- diff --git a/tests/Makefile.am b/tests/Makefile.am index 6efb6d23e88..85d686408c7 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -383,11 +383,12 @@ TESTS += run_unit_tests.sh endif # Compile .K translation units with the Kokkos device compiler. -# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc -# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +# If KOKKOS_CXX is not the MPI wrapper, configure populates +# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and +# any wrapper-provided defines remain visible. .K.o: $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ -c $< -o $@ From 2ca983df55fcc5f20ef9922ea130cb117b5c229a Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 10:31:27 -0600 Subject: [PATCH 28/48] Regenerate configure and tests Makefile.in --- configure | 125 +++++++++++++++++++++++++++++++++++++++++++++- tests/Makefile.in | 8 +-- 2 files changed, 129 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 91b72819aca..35af44a56c6 100755 --- a/configure +++ b/configure @@ -678,6 +678,7 @@ KOKKOS_LIBS KOKKOS_LDFLAGS KOKKOS_CXXFLAGS KOKKOS_CPPFLAGS +KOKKOS_MPI_CPPFLAGS KOKKOS_CXX ICPX HIPCC @@ -64237,6 +64238,129 @@ fi KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + KOKKOS_MPI_CPPFLAGS="" + if test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MPI compile flags usable with KOKKOS_CXX" >&5 +printf %s "checking for MPI compile flags usable with KOKKOS_CXX... " >&6; } + KOKKOS_MPI_CPPFLAGS=`$CXX -showme:compile 2>/dev/null` + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS=`$CXX -compile_info 2>/dev/null` +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'` +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS="$MPI_INCLUDES" +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +printf "%s\n" "not found" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $KOKKOS_MPI_CPPFLAGS" >&5 +printf "%s\n" "$KOKKOS_MPI_CPPFLAGS" >&6; } ;; +esac +fi + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the Kokkos compiler configuration works" >&5 +printf %s "checking whether the Kokkos compiler configuration works... " >&6; } + libmesh_save_CXX="$CXX" + libmesh_save_CPPFLAGS="$CPPFLAGS" + libmesh_save_CXXFLAGS="$CXXFLAGS" + libmesh_save_LDFLAGS="$LDFLAGS" + libmesh_save_LIBS="$LIBS" + + CXX="$KOKKOS_CXX" + CPPFLAGS="$CPPFLAGS $KOKKOS_CPPFLAGS $KOKKOS_MPI_CPPFLAGS" + CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" + LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" + LIBS="$LIBS $KOKKOS_LIBS" + + if test "x$enablempi" = "xyes" +then : + + LDFLAGS="$LDFLAGS $MPI_LDFLAGS" + LIBS="$LIBS $MPI_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int +main (int argc, char ** argv) +{ + + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + MPI_Finalize(); + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + kokkos_config_works=yes +else case e in #( + e) kokkos_config_works=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int +main (int argc, char ** argv) +{ + + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + kokkos_config_works=yes +else case e in #( + e) kokkos_config_works=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ;; +esac +fi + + CXX="$libmesh_save_CXX" + CPPFLAGS="$libmesh_save_CPPFLAGS" + CXXFLAGS="$libmesh_save_CXXFLAGS" + LDFLAGS="$libmesh_save_LDFLAGS" + LIBS="$libmesh_save_LIBS" + + if test "x$kokkos_config_works" = "xyes" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else case e in #( + e) as_fn_error $? "configured Kokkos compiler/flags failed to compile and link a minimal test program" "$LINENO" 5 ;; +esac +fi + printf "%s\n" "#define HAVE_KOKKOS 1" >>confdefs.h @@ -68934,4 +69058,3 @@ printf "%s\n" "--------- Done Configuring libMesh ----------" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ---------------------------------------------" >&5 printf "%s\n" "---------------------------------------------" >&6; } # rm -f -r conftest* config.cache include/libmesh/libmesh_config.h.tmp - diff --git a/tests/Makefile.in b/tests/Makefile.in index d94f4b9384d..96c87de5252 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -2103,6 +2103,7 @@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ @@ -14789,11 +14790,12 @@ $(top_builddir)/libmesh_oprof.la: FORCE (cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) libmesh_oprof.la) # Compile .K translation units with the Kokkos device compiler. -# $(MPI_INCLUDES) is needed because KOKKOS_CXX may be nvcc/hipcc -# instead of the MPI compiler wrapper, so mpi.h won't be found implicitly. +# If KOKKOS_CXX is not the MPI wrapper, configure populates +# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and +# any wrapper-provided defines remain visible. .K.o: $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ -c $< -o $@ From 3a89c4c9955a94f8b9b348a0b97b5952fb0f664a Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 11:00:02 -0600 Subject: [PATCH 29/48] Force C++ mode for Kokkos configure probe --- configure | 39 +++++++++++++++++++-------------- m4/libmesh_optional_packages.m4 | 2 ++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/configure b/configure index 35af44a56c6..481367f38f0 100755 --- a/configure +++ b/configure @@ -674,11 +674,11 @@ libmesh_optional_LIBS libmesh_optional_INCLUDES LIBMESH_ENABLE_KOKKOS_FALSE LIBMESH_ENABLE_KOKKOS_TRUE +KOKKOS_MPI_CPPFLAGS KOKKOS_LIBS KOKKOS_LDFLAGS KOKKOS_CXXFLAGS KOKKOS_CPPFLAGS -KOKKOS_MPI_CPPFLAGS KOKKOS_CXX ICPX HIPCC @@ -64238,7 +64238,7 @@ fi KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" - KOKKOS_MPI_CPPFLAGS="" + KOKKOS_MPI_CPPFLAGS="" if test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX" then : @@ -64251,7 +64251,7 @@ then : fi if test "x$KOKKOS_MPI_CPPFLAGS" = "x" then : - KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'` + KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^^ * //'` fi if test "x$KOKKOS_MPI_CPPFLAGS" = "x" then : @@ -64269,7 +64269,7 @@ fi fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the Kokkos compiler configuration works" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the Kokkos compiler configuration works" >&5 printf %s "checking whether the Kokkos compiler configuration works... " >&6; } libmesh_save_CXX="$CXX" libmesh_save_CPPFLAGS="$CPPFLAGS" @@ -64282,6 +64282,11 @@ printf %s "checking whether the Kokkos compiler configuration works... " >&6; } CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" LIBS="$LIBS $KOKKOS_LIBS" + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test "x$enablempi" = "xyes" then : @@ -64293,20 +64298,17 @@ then : #include #include -int -main (int argc, char ** argv) +int main(int argc, char ** argv) { - MPI_Init(&argc, &argv); Kokkos::initialize(argc, argv); Kokkos::finalize(); MPI_Finalize(); - - ; return 0; } + _ACEOF -if ac_fn_cxx_try_link "$LINENO" +if ac_fn_c_try_link "$LINENO" then : kokkos_config_works=yes else case e in #( @@ -64322,18 +64324,15 @@ else case e in #( /* end confdefs.h. */ #include -int -main (int argc, char ** argv) +int main(int argc, char ** argv) { - Kokkos::initialize(argc, argv); Kokkos::finalize(); - - ; return 0; } + _ACEOF -if ac_fn_cxx_try_link "$LINENO" +if ac_fn_c_try_link "$LINENO" then : kokkos_config_works=yes else case e in #( @@ -64342,7 +64341,7 @@ esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext - ;; + ;; esac fi @@ -64351,6 +64350,11 @@ fi CXXFLAGS="$libmesh_save_CXXFLAGS" LDFLAGS="$libmesh_save_LDFLAGS" LIBS="$libmesh_save_LIBS" + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "x$kokkos_config_works" = "xyes" then : @@ -64387,6 +64391,7 @@ fi + if test x$enablekokkos = xyes; then LIBMESH_ENABLE_KOKKOS_TRUE= LIBMESH_ENABLE_KOKKOS_FALSE='#' diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 64e0863398c..190a90c824e 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -1002,6 +1002,7 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" LIBS="$LIBS $KOKKOS_LIBS" + AC_LANG_PUSH([C++]) AS_IF([test "x$enablempi" = "xyes"], [ @@ -1037,6 +1038,7 @@ int main(int argc, char ** argv) [kokkos_config_works=yes], [kokkos_config_works=no]) ]) + AC_LANG_POP([C++]) CXX="$libmesh_save_CXX" CPPFLAGS="$libmesh_save_CPPFLAGS" From e6ddbe650763e62f838043c2e565f34aab024e1b Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 5 May 2026 10:28:23 -0600 Subject: [PATCH 30/48] Add Kokkos FE implementation headers --- include/Makefile.am | 18 + include/enums/enum_fe_elem_class.h | 50 ++ include/gpu/kokkos_fe_base.h | 43 ++ include/gpu/kokkos_fe_evaluator.h | 368 +++++++++++ include/gpu/kokkos_fe_face_map.h | 160 +++++ include/gpu/kokkos_fe_lagrange_1d.h | 92 +++ include/gpu/kokkos_fe_lagrange_2d.h | 253 +++++++ include/gpu/kokkos_fe_lagrange_3d.h | 367 +++++++++++ include/gpu/kokkos_fe_map.h | 235 +++++++ include/gpu/kokkos_fe_monomial.h | 941 +++++++++++++++++++++++++++ include/gpu/kokkos_fe_types.h | 681 +++++++++++++++++++ include/gpu/kokkos_quadrature.h | 652 +++++++++++++++++++ include/gpu/kokkos_scalar_types.h | 118 ++++ src/quadrature/quadrature_gauss_3D.C | 2 - 14 files changed, 3978 insertions(+), 2 deletions(-) create mode 100644 include/enums/enum_fe_elem_class.h create mode 100644 include/gpu/kokkos_fe_base.h create mode 100644 include/gpu/kokkos_fe_evaluator.h create mode 100644 include/gpu/kokkos_fe_face_map.h create mode 100644 include/gpu/kokkos_fe_lagrange_1d.h create mode 100644 include/gpu/kokkos_fe_lagrange_2d.h create mode 100644 include/gpu/kokkos_fe_lagrange_3d.h create mode 100644 include/gpu/kokkos_fe_map.h create mode 100644 include/gpu/kokkos_fe_monomial.h create mode 100644 include/gpu/kokkos_fe_types.h create mode 100644 include/gpu/kokkos_quadrature.h create mode 100644 include/gpu/kokkos_scalar_types.h diff --git a/include/Makefile.am b/include/Makefile.am index a8ace90467c..1eb5f275748 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,5 +1,23 @@ SUBDIRS = libmesh +# GPU (Kokkos) FE math headers — installed preserving the gpu/ subdirectory so +# downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. +# nobase_ is used instead of the standard flat install to keep the namespace. +if LIBMESH_ENABLE_KOKKOS +nobase_include_HEADERS = \ + gpu/kokkos_scalar_types.h \ + gpu/kokkos_fe_types.h \ + gpu/kokkos_fe_base.h \ + gpu/kokkos_fe_evaluator.h \ + gpu/kokkos_fe_lagrange_1d.h \ + gpu/kokkos_fe_lagrange_2d.h \ + gpu/kokkos_fe_lagrange_3d.h \ + gpu/kokkos_fe_monomial.h \ + gpu/kokkos_fe_face_map.h \ + gpu/kokkos_fe_map.h \ + gpu/kokkos_quadrature.h +endif + # special handholding for prefix_config.m4 generated files # so that 'make clean ; make' works as does 'make distcheck' # libmesh_config.h is made by ./configure, so it should get diff --git a/include/enums/enum_fe_elem_class.h b/include/enums/enum_fe_elem_class.h new file mode 100644 index 00000000000..2b1b2e96d12 --- /dev/null +++ b/include/enums/enum_fe_elem_class.h @@ -0,0 +1,50 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + +#ifndef LIBMESH_ENUM_FE_ELEM_CLASS_H +#define LIBMESH_ENUM_FE_ELEM_CLASS_H + +namespace libMesh { + +/** + * \enum libMesh::FEElemClass groups element types by topological class, + * independent of polynomial order. + * + * e.g. QUAD4, QUAD8, QUAD9 all map to QUAD; TRI3, TRI6, TRI7 all map to TRI. + * Used together with FEFamily and polynomial order to uniquely identify a + * physics finite element space. + * + * The fixed type allows forward declaration as: + * enum class FEElemClass : unsigned int; + */ +enum class FEElemClass : unsigned int +{ + EDGE = 0, + TRI = 1, + QUAD = 2, + TET = 3, + HEX = 4, + PRISM = 5, + PYRAMID = 6, + N_CLASSES +}; + +} // namespace libMesh + +#endif // LIBMESH_ENUM_FE_ELEM_CLASS_H diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h new file mode 100644 index 00000000000..07664e627eb --- /dev/null +++ b/include/gpu/kokkos_fe_base.h @@ -0,0 +1,43 @@ +// Primary FEEvaluator template for Kokkos device-compatible shape functions. +// +// Uses libMesh's own ElemType and FEFamily enums as non-type template +// parameters — no separate tag structs are needed. +// +// All uses must be explicit specializations defined in the kokkos_fe_lagrange_*.h +// and kokkos_fe_monomial.h headers. Every specialization must provide: +// +// static constexpr unsigned int n_dofs() +// +// LIBMESH_DEVICE_INLINE +// static Real shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// LIBMESH_DEVICE_INLINE +// static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// Reference-element coordinate conventions (matching libMesh): +// Edge: xi in [-1, 1] +// Quad: (xi, eta) in [-1,1]^2 +// Hex: (xi, eta, zeta) in [-1,1]^3 +// Tri: (xi, eta) in unit triangle, xi >= 0, eta >= 0, xi+eta <= 1 +// Tet: (xi, eta, zeta) in unit tetrahedron +// +// Unused coordinate arguments (e.g. zeta on a 2D element) are accepted but +// ignored, so call sites can always pass all three without special-casing. +// +#ifndef LIBMESH_KOKKOS_FE_BASE_H +#define LIBMESH_KOKKOS_FE_BASE_H + +#include "gpu/kokkos_scalar_types.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" + +namespace libMesh::Kokkos +{ + +template +struct FEEvaluator; // forward declaration only; instantiation requires a specialization + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_BASE_H diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h new file mode 100644 index 00000000000..118880c614d --- /dev/null +++ b/include/gpu/kokkos_fe_evaluator.h @@ -0,0 +1,368 @@ +// Kokkos on-device FE shape function dispatch (fe_evaluator.h). +// +// Provides: +// map_shape — isoparametric Lagrange shape (topology-based) +// grad_map_shape — isoparametric Lagrange gradient (topology-based) +// shape — physics FE shape (FEShapeKey-based) +// grad_shape — physics FE gradient (FEShapeKey-based) +// +// All functions are LIBMESH_DEVICE_INLINE and dispatch via switch statements +// that compile to fast GPU branch logic. +// +// These helpers are intended for Kokkos-enabled code paths. Device execution +// happens from .K translation units, but the header is also parsed by host code. + +#ifndef LIBMESH_KOKKOS_FE_EVALUATOR_H +#define LIBMESH_KOKKOS_FE_EVALUATOR_H + +#include "gpu/kokkos_fe_base.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_fe_lagrange_1d.h" +#include "gpu/kokkos_fe_lagrange_2d.h" +#include "gpu/kokkos_fe_lagrange_3d.h" +#include "gpu/kokkos_fe_monomial.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" + +namespace libMesh::Kokkos +{ + +// ── On-device helpers: element class -> spatial dimension ───────────────────── + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_class(FEElemClass cls) +{ + switch (cls) + { + case FEElemClass::EDGE: + return 1; + case FEElemClass::TRI: + case FEElemClass::QUAD: + return 2; + case FEElemClass::TET: + case FEElemClass::HEX: + case FEElemClass::PRISM: + case FEElemClass::PYRAMID: + return 3; + default: + detail::abort_unsupported("dim_from_class(): unsupported element class"); + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_topology(libMesh::ElemType topo) +{ + return dim_from_class(class_from_topology(topo)); +} + +// ── On-device helper: exact libMesh Lagrange key -> evaluator topology ───────── + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_for_key(FEShapeKey key) +{ + const libMesh::ElemType topo = lagrange_shape_topology_or_invalid(key); + + if (topo == libMesh::INVALID_ELEM) + { + detail::abort_unsupported("lagrange_shape_topology_for_key(): unsupported LAGRANGE key for current Kokkos evaluator support boundary"); + return libMesh::INVALID_ELEM; + } + + return topo; +} + +LIBMESH_DEVICE_INLINE Real +eval_lagrange_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (topo) + { + case libMesh::EDGE2: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::EDGE3: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TRI3: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TRI6: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD4: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD8: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::QUAD9: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TET4: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::TET10: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX8: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX20: + return FEEvaluator::shape(i, xi, eta, zeta); + case libMesh::HEX27: + return FEEvaluator::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("eval_lagrange_shape(): unsupported evaluator topology"); + return Real(0); + } +} + +LIBMESH_DEVICE_INLINE RealVector +eval_lagrange_grad_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (topo) + { + case libMesh::EDGE2: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::EDGE3: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TRI3: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TRI6: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD4: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD8: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::QUAD9: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TET4: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::TET10: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX8: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX20: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + case libMesh::HEX27: + return FEEvaluator::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("eval_lagrange_grad_shape(): unsupported evaluator topology"); + return zero_vector(); + } +} + +// ── Geometry-only shape dispatch (mapping-type + topology) ──────────────────── +// +// Used by map_face_qp_to_parent() for the isoparametric mapping from face reference +// coordinates to parent reference coordinates. +// +// The mapping_type parameter selects the geometric map family. Currently only +// LAGRANGE_MAP is supported; RATIONAL_BERNSTEIN_MAP requires additional +// rational-weight data that is not yet threaded through the device path. + +// ── Compile-time topology versions (preferred for GPU) ─────────────────── +// Template on FEFamily and ElemType so gpu compiler only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. + +/// Compile-time map shape evaluation. +template +LIBMESH_DEVICE_INLINE Real +map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::shape(i, xi, eta, zeta); +} + +/// Compile-time map gradient evaluation. +template +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::grad_shape(i, xi, eta, zeta); +} + +// ── Runtime topology versions (larger GPU stack usage) ─────────────────── + +/// Evaluate the i-th geometric map shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("map_shape(): only LAGRANGE_MAP is implemented"); + return Real(0); + } +} + +/// Evaluate the reference-space gradient of the i-th geometric map shape function. +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_grad_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_map_shape(): only LAGRANGE_MAP is implemented"); + return zero_vector(); + } +} + +// ── Physics shape dispatch (FEShapeKey-based) ───────────────────────────────── + +/// Evaluate the i-th physics shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_shape(key)) + { + detail::abort_unsupported("shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return Real(0); + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + + case libMesh::MONOMIAL: + { + switch (monomial_evaluator_dim_or_zero(key.elem_type)) + { + case 1: + switch (key.order) + { + case 0: return MonomialImpl1D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl1D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl1D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl1D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl1D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl1D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 1D MONOMIAL order"); + return Real(0); + } + case 2: + switch (key.order) + { + case 0: return MonomialImpl2D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl2D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl2D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl2D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl2D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl2D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 2D MONOMIAL order"); + return Real(0); + } + case 3: + switch (key.order) + { + case 0: return MonomialImpl3D<0>::shape(i, xi, eta, zeta); + case 1: return MonomialImpl3D<1>::shape(i, xi, eta, zeta); + case 2: return MonomialImpl3D<2>::shape(i, xi, eta, zeta); + case 3: return MonomialImpl3D<3>::shape(i, xi, eta, zeta); + case 4: return MonomialImpl3D<4>::shape(i, xi, eta, zeta); + case 5: return MonomialImpl3D<5>::shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("shape(): unsupported 3D MONOMIAL order"); + return Real(0); + } + default: + detail::abort_unsupported("shape(): unsupported MONOMIAL element topology"); + return Real(0); + } + } + + default: + detail::abort_unsupported("shape(): unsupported FE family"); + return Real(0); + } +} + +/// Evaluate the reference-space gradient of the i-th physics shape function. +/// With J from jacobian(), rows are reference derivatives, so +/// grad_ref = J * grad_phys and grad_phys = J.inverse(dim) * grad_ref. +LIBMESH_DEVICE_INLINE RealVector +grad_shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_grad_shape(key)) + { + detail::abort_unsupported("grad_shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return zero_vector(); + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + + case libMesh::MONOMIAL: + { + switch (monomial_evaluator_dim_or_zero(key.elem_type)) + { + case 1: + switch (key.order) + { + case 0: return MonomialImpl1D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl1D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl1D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl1D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl1D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl1D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 1D MONOMIAL order"); + return zero_vector(); + } + case 2: + switch (key.order) + { + case 0: return MonomialImpl2D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl2D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl2D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl2D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl2D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl2D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 2D MONOMIAL order"); + return zero_vector(); + } + case 3: + switch (key.order) + { + case 0: return MonomialImpl3D<0>::grad_shape(i, xi, eta, zeta); + case 1: return MonomialImpl3D<1>::grad_shape(i, xi, eta, zeta); + case 2: return MonomialImpl3D<2>::grad_shape(i, xi, eta, zeta); + case 3: return MonomialImpl3D<3>::grad_shape(i, xi, eta, zeta); + case 4: return MonomialImpl3D<4>::grad_shape(i, xi, eta, zeta); + case 5: return MonomialImpl3D<5>::grad_shape(i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_shape(): unsupported 3D MONOMIAL order"); + return zero_vector(); + } + default: + detail::abort_unsupported("grad_shape(): unsupported MONOMIAL element topology"); + return zero_vector(); + } + } + + default: + detail::abort_unsupported("grad_shape(): unsupported FE family"); + return zero_vector(); + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_EVALUATOR_H diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h new file mode 100644 index 00000000000..822ce4da406 --- /dev/null +++ b/include/gpu/kokkos_fe_face_map.h @@ -0,0 +1,160 @@ +#ifndef LIBMESH_KOKKOS_FE_FACE_MAP_H +#define LIBMESH_KOKKOS_FE_FACE_MAP_H + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "gpu/kokkos_fe_evaluator.h" +#include "libmesh/elem.h" + +namespace libMesh::Kokkos +{ + +LIBMESH_DEVICE_INLINE +RealVector point_to_real_vector(const libMesh::Point & pt) +{ +#if LIBMESH_DIM == 1 + return make_vector(pt(0)); +#elif LIBMESH_DIM == 2 + return make_vector(pt(0), pt(1)); +#else + return make_vector(pt(0), pt(1), pt(2)); +#endif +} + +inline unsigned int +recover_parent_side(const libMesh::Elem & parent, + const libMesh::Elem & side_in_parent) +{ + for (unsigned int side = 0; side < parent.n_sides(); ++side) + { + auto candidate = parent.build_side_ptr(side); + + if (candidate->type() != side_in_parent.type() || + candidate->n_nodes() != side_in_parent.n_nodes()) + continue; + + bool same_side = true; + for (unsigned int k = 0; k < candidate->n_nodes(); ++k) + if (candidate->node_ptr(k) != side_in_parent.node_ptr(k)) + { + same_side = false; + break; + } + + if (same_side) + return side; + } + + return libMesh::invalid_uint; +} + +inline libMesh::Point +parent_refspace_node(const libMesh::Elem & parent, unsigned int node) +{ + switch (parent.type()) + { + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + switch (node) + { + case 9: + return libMesh::Point(-0.5, -0.5, 0.5); + case 10: + return libMesh::Point(0.5, -0.5, 0.5); + case 11: + return libMesh::Point(0.5, 0.5, 0.5); + case 12: + return libMesh::Point(-0.5, 0.5, 0.5); + default: + return parent.master_point(node); + } + + case libMesh::PYRAMID18: + switch (node) + { + case 9: + return libMesh::Point(-0.5, -0.5, 0.5); + case 10: + return libMesh::Point(0.5, -0.5, 0.5); + case 11: + return libMesh::Point(0.5, 0.5, 0.5); + case 12: + return libMesh::Point(-0.5, 0.5, 0.5); + case 14: + return libMesh::Point(-2. / 3., 0.0, 1. / 3.); + case 15: + return libMesh::Point(0.0, 2. / 3., 1. / 3.); + case 16: + return libMesh::Point(2. / 3., 0.0, 1. / 3.); + case 17: + return libMesh::Point(0.0, -2. / 3., 1. / 3.); + default: + return parent.master_point(node); + } + + default: + return parent.master_point(node); + } +} + +/** + * Map a face quadrature point from the side element's reference coordinate system + * to the parent element's reference coordinate system. + * + * side_in_parent must be obtained via build_side_ptr() (not side_ptr()), so that + * second-order sides carry their midpoint nodes. Parent reference coordinates + * are reconstructed from the FE reference-space node convention used by + * FE::side_map(), not from side_in_parent.point(k), which lives in physical + * space, and not from Elem::master_point() on pyramids, where those node + * coordinates differ. + * + * @param side_in_parent The side element as embedded in the parent (from build_side_ptr()) + * @param mapping_type Geometric mapping type (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) + * @param side_topo Topology of the side element (libMesh::ElemType) + * @param face_qpt Quadrature point in the side element's reference coordinates + * @returns Corresponding point in the parent element's reference coordinates + */ +inline RealVector +map_face_qp_to_parent(const libMesh::Elem & side_in_parent, + libMesh::ElemMappingType mapping_type, + libMesh::ElemType side_topo, + RealVector face_qpt) +{ + const libMesh::Elem * parent = side_in_parent.interior_parent(); + libmesh_error_msg_if(!parent, + "map_face_qp_to_parent(): side element must carry an interior_parent() from build_side_ptr()"); + + const unsigned int side = recover_parent_side(*parent, side_in_parent); + libmesh_error_msg_if(side == libMesh::invalid_uint, + "map_face_qp_to_parent(): could not recover parent side for the provided side element"); + + const unsigned int n = side_in_parent.n_nodes(); + RealVector parent_pt = zero_vector(); + + // 1-D elements: the "side" is a single vertex node. There is only one + // point-side reference coordinate, (0,0,0), so we map directly to the + // corresponding parent vertex in the parent reference element. + if (n == 1) + { + const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, 0)); + return point_to_real_vector(pt); + } + + for (unsigned int k = 0; k < n; ++k) + { + const Real s = face_qpt(0); + const Real t = face_qpt(1); + const Real psi = map_shape(mapping_type, side_topo, k, s, t, 0.0); + + const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, k)); + parent_pt.add_scaled(point_to_real_vector(pt), psi); + } + + return parent_pt; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_FE_FACE_MAP_H diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h new file mode 100644 index 00000000000..72e78692882 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -0,0 +1,92 @@ +// Kokkos FEEvaluator specializations for 1-D Lagrange elements. +// +// Covers EDGE2 (linear) and EDGE3 (quadratic). +// Reference-element coordinate convention (libMesh-compatible): +// EDGE2/EDGE3: xi in [-1, 1] +// +// EDGE3 node ordering (libMesh non-sequential): +// index 0 -> xi = -1 (left node) +// index 1 -> xi = +1 (right node) +// index 2 -> xi = 0 (midpoint) + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_1D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── EDGE2 (linear edge, 2 nodes) ───────────────────────────────────────────── + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 2; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.5 * (1.0 - xi); + case 1: return 0.5 * (1.0 + xi); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-0.5, 0.0, 0.0); + case 1: return make_vector( 0.5, 0.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── EDGE3 (quadratic edge, 3 nodes) ────────────────────────────────────────── +// Node ordering matches libMesh: 0->left(-1), 1->right(+1), 2->mid(0) +// L_0(xi) = 0.5*xi*(xi-1) dL_0/dxi = xi - 0.5 +// L_1(xi) = 0.5*xi*(xi+1) dL_1/dxi = xi + 0.5 +// L_2(xi) = 1 - xi² dL_2/dxi = -2*xi + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.5 * xi * (xi - 1.0); + case 1: return 0.5 * xi * (xi + 1.0); + case 2: return 1.0 - xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(xi - 0.5, 0.0, 0.0); + case 1: return make_vector(xi + 0.5, 0.0, 0.0); + case 2: return make_vector(-2.0 * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_1D_H diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h new file mode 100644 index 00000000000..08d1e2f5ba6 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -0,0 +1,253 @@ +// Kokkos FEEvaluator specializations for 2-D Lagrange elements. +// +// Covers TRI3, TRI6, QUAD4, QUAD8, QUAD9. +// Reference-element coordinate conventions (libMesh-compatible): +// Tri: xi >= 0, eta >= 0, xi+eta <= 1 (unit triangle) +// Quad: (xi, eta) in [-1,1]² + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_2D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── TRI3 (linear triangle, 3 nodes) ────────────────────────────────────────── +// Barycentric: zeta0 = 1-xi-eta, zeta1 = xi, zeta2 = eta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0 - xi - eta; + case 1: return xi; + case 2: return eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-1.0, -1.0, 0.0); + case 1: return make_vector( 1.0, 0.0, 0.0); + case 2: return make_vector( 0.0, 1.0, 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── TRI6 (quadratic triangle, 6 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta, z1=xi, z2=eta +// phi_0 = z0*(2*z0-1) = (1-xi-eta)*(1-2*xi-2*eta) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = 4*z0*z1 = 4*(1-xi-eta)*xi +// phi_4 = 4*z1*z2 = 4*xi*eta +// phi_5 = 4*z2*z0 = 4*eta*(1-xi-eta) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 6; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + const Real z0 = 1.0 - xi - eta; + switch (i) + { + case 0: return z0 * (2.0 * z0 - 1.0); + case 1: return xi * (2.0 * xi - 1.0); + case 2: return eta * (2.0 * eta - 1.0); + case 3: return 4.0 * z0 * xi; + case 4: return 4.0 * xi * eta; + case 5: return 4.0 * eta * z0; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(4.0*xi + 4.0*eta - 3.0, 4.0*xi + 4.0*eta - 3.0, 0.0); + case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); + case 3: return make_vector(4.0*(1.0 - 2.0*xi - eta), -4.0*xi, 0.0); + case 4: return make_vector(4.0*eta, 4.0*xi, 0.0); + case 5: return make_vector(-4.0*eta, 4.0*(1.0 - xi - 2.0*eta), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD4 (bilinear quadrilateral, 4 nodes) ─────────────────────────────────── +// Tensor product of two EDGE2 bases. libMesh node ordering: +// node 0: (-1,-1) node 1: (+1,-1) +// node 2: (+1,+1) node 3: (-1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (1.0 - eta); + case 1: return 0.25 * (1.0 + xi) * (1.0 - eta); + case 2: return 0.25 * (1.0 + xi) * (1.0 + eta); + case 3: return 0.25 * (1.0 - xi) * (1.0 + eta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-0.25*(1.0-eta), -0.25*(1.0-xi), 0.0); + case 1: return make_vector( 0.25*(1.0-eta), -0.25*(1.0+xi), 0.0); + case 2: return make_vector( 0.25*(1.0+eta), 0.25*(1.0+xi), 0.0); + case 3: return make_vector(-0.25*(1.0+eta), 0.25*(1.0-xi), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD8 (serendipity quadrilateral, 8 nodes) ──────────────────────────────── +// Node ordering: +// 0: (-1,-1) 1: (+1,-1) 2: (+1,+1) 3: (-1,+1) +// 4: ( 0,-1) 5: (+1, 0) 6: ( 0,+1) 7: (-1, 0) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 0.25 * (1.0-xi) * (1.0-eta) * (-1.0-xi-eta); + case 1: return 0.25 * (1.0+xi) * (1.0-eta) * (-1.0+xi-eta); + case 2: return 0.25 * (1.0+xi) * (1.0+eta) * (-1.0+xi+eta); + case 3: return 0.25 * (1.0-xi) * (1.0+eta) * (-1.0-xi+eta); + case 4: return 0.5 * (1.0-xi*xi) * (1.0-eta); + case 5: return 0.5 * (1.0+xi) * (1.0-eta*eta); + case 6: return 0.5 * (1.0-xi*xi) * (1.0+eta); + case 7: return 0.5 * (1.0-xi) * (1.0-eta*eta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(0.25*(1.0-eta)*(2.0*xi+eta), + 0.25*(1.0-xi)*(xi+2.0*eta), + 0.0); + case 1: return make_vector(0.25*(1.0-eta)*(2.0*xi-eta), + 0.25*(1.0+xi)*(2.0*eta-xi), + 0.0); + case 2: return make_vector(0.25*(1.0+eta)*(2.0*xi+eta), + 0.25*(1.0+xi)*(xi+2.0*eta), + 0.0); + case 3: return make_vector(0.25*(1.0+eta)*(2.0*xi-eta), + 0.25*(1.0-xi)*(2.0*eta-xi), + 0.0); + case 4: return make_vector(-xi*(1.0-eta), -0.5*(1.0-xi*xi), 0.0); + case 5: return make_vector(0.5*(1.0-eta*eta), -eta*(1.0+xi), 0.0); + case 6: return make_vector(-xi*(1.0+eta), 0.5*(1.0-xi*xi), 0.0); + case 7: return make_vector(-0.5*(1.0-eta*eta), -eta*(1.0-xi), 0.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── QUAD9 (biquadratic quadrilateral, 9 nodes) ──────────────────────────────── +// Tensor product of two EDGE3 bases. libMesh node ordering: +// i0[] = {0,1,1,0, 2,1,2,0, 2} +// i1[] = {0,0,1,1, 0,2,1,2, 2} +// +// 1D basis (libMesh non-sequential ordering): +// L_0(t) = 0.5*t*(t-1) dL_0/dt = t - 0.5 +// L_1(t) = 0.5*t*(t+1) dL_1/dt = t + 0.5 +// L_2(t) = 1 - t² dL_2/dt = -2*t + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 9; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) + { + switch (k) + { + case 0: return 0.5 * t * (t - 1.0); + case 1: return 0.5 * t * (t + 1.0); + case 2: return 1.0 - t * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) + { + switch (k) + { + case 0: return t - 0.5; + case 1: return t + 0.5; + case 2: return -2.0 * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; + static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + return L(i0[i], xi) * L(i1[i], eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; + static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + const Real dxi = dL(i0[i], xi) * L(i1[i], eta); + const Real deta = L(i0[i], xi) * dL(i1[i], eta); + return make_vector(dxi, deta, 0.0); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_2D_H diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h new file mode 100644 index 00000000000..5f2fbb203c3 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -0,0 +1,367 @@ +// Kokkos FEEvaluator specializations for 3-D Lagrange elements. +// +// Covers TET4, TET10, HEX8, HEX20, HEX27. +// Reference-element coordinate conventions (libMesh-compatible): +// Tet: xi >= 0, eta >= 0, zeta >= 0, xi+eta+zeta <= 1 (unit tetrahedron) +// Hex: (xi, eta, zeta) in [-1,1]³ + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_3D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H + +#include "gpu/kokkos_fe_base.h" + +namespace libMesh::Kokkos +{ + +// ── TET4 (linear tetrahedron, 4 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0 - xi - eta - zeta; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return make_vector(-1.0, -1.0, -1.0); + case 1: return make_vector( 1.0, 0.0, 0.0); + case 2: return make_vector( 0.0, 1.0, 0.0); + case 3: return make_vector( 0.0, 0.0, 1.0); + default: return zero_vector(); + } + } +#endif +}; + +// ── TET10 (quadratic tetrahedron, 10 nodes) ─────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta +// phi_0 = z0*(2*z0-1) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = z3*(2*z3-1) = zeta*(2*zeta-1) +// phi_4 = 4*z0*z1 = 4*(1-xi-eta-zeta)*xi +// phi_5 = 4*z1*z2 = 4*xi*eta +// phi_6 = 4*z2*z0 = 4*eta*(1-xi-eta-zeta) +// phi_7 = 4*z0*z3 = 4*(1-xi-eta-zeta)*zeta +// phi_8 = 4*z1*z3 = 4*xi*zeta +// phi_9 = 4*z2*z3 = 4*eta*zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 10; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + const Real z0 = 1.0 - xi - eta - zeta; + switch (i) + { + case 0: return z0 * (2.0*z0 - 1.0); + case 1: return xi * (2.0*xi - 1.0); + case 2: return eta * (2.0*eta - 1.0); + case 3: return zeta* (2.0*zeta - 1.0); + case 4: return 4.0 * z0 * xi; + case 5: return 4.0 * xi * eta; + case 6: return 4.0 * eta * z0; + case 7: return 4.0 * z0 * zeta; + case 8: return 4.0 * xi * zeta; + case 9: return 4.0 * eta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: + { + const Real v = 4.0*(xi + eta + zeta) - 3.0; + return make_vector(v, v, v); + } + case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 4.0*zeta - 1.0); + case 4: return make_vector( 4.0*(1.0-2.0*xi-eta-zeta), -4.0*xi, -4.0*xi); + case 5: return make_vector( 4.0*eta, 4.0*xi, 0.0); + case 6: return make_vector(-4.0*eta, 4.0*(1.0-xi-2.0*eta-zeta), -4.0*eta); + case 7: return make_vector(-4.0*zeta, -4.0*zeta, 4.0*(1.0-xi-eta-2.0*zeta)); + case 8: return make_vector(4.0*zeta, 0.0, 4.0*xi); + case 9: return make_vector(0.0, 4.0*zeta, 4.0*eta); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX8 (trilinear hexahedron, 8 nodes) ───────────────────────────────────── +// Tensor product of three EDGE2 bases. +// Node ordering (same as libMesh): +// 0:(-1,-1,-1) 1:(+1,-1,-1) 2:(+1,+1,-1) 3:(-1,+1,-1) +// 4:(-1,-1,+1) 5:(+1,-1,+1) 6:(+1,+1,+1) 7:(-1,+1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta); + case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta); + case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta); + case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta); + case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta); + case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta); + case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta); + case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return make_vector(-0.125*(1.0-eta)*(1.0-zeta), + -0.125*(1.0-xi) *(1.0-zeta), + -0.125*(1.0-xi) *(1.0-eta)); + case 1: return make_vector( 0.125*(1.0-eta)*(1.0-zeta), + -0.125*(1.0+xi) *(1.0-zeta), + -0.125*(1.0+xi) *(1.0-eta)); + case 2: return make_vector( 0.125*(1.0+eta)*(1.0-zeta), + 0.125*(1.0+xi) *(1.0-zeta), + -0.125*(1.0+xi) *(1.0+eta)); + case 3: return make_vector(-0.125*(1.0+eta)*(1.0-zeta), + 0.125*(1.0-xi) *(1.0-zeta), + -0.125*(1.0-xi) *(1.0+eta)); + case 4: return make_vector(-0.125*(1.0-eta)*(1.0+zeta), + -0.125*(1.0-xi) *(1.0+zeta), + 0.125*(1.0-xi) *(1.0-eta)); + case 5: return make_vector( 0.125*(1.0-eta)*(1.0+zeta), + -0.125*(1.0+xi) *(1.0+zeta), + 0.125*(1.0+xi) *(1.0-eta)); + case 6: return make_vector( 0.125*(1.0+eta)*(1.0+zeta), + 0.125*(1.0+xi) *(1.0+zeta), + 0.125*(1.0+xi) *(1.0+eta)); + case 7: return make_vector(-0.125*(1.0+eta)*(1.0+zeta), + 0.125*(1.0-xi) *(1.0+zeta), + 0.125*(1.0-xi) *(1.0+eta)); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX20 (serendipity hexahedron, 20 nodes) ───────────────────────────────── +// Corner nodes: phi = 0.125*(1+sx*xi)*(1+sy*eta)*(1+sz*zeta)*(sx*xi+sy*eta+sz*zeta-2) +// Node ordering follows libMesh (nodes 0-7 corners, 8-19 midside). + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 20; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta)*(-xi-eta-zeta-2.0); + case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta)*( xi-eta-zeta-2.0); + case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta)*( xi+eta-zeta-2.0); + case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta)*(-xi+eta-zeta-2.0); + case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta)*(-xi-eta+zeta-2.0); + case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta)*( xi-eta+zeta-2.0); + case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta)*( xi+eta+zeta-2.0); + case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta)*(-xi+eta+zeta-2.0); + case 8: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0-zeta); + case 10: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0-zeta); + case 16: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0+zeta); + case 18: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0+zeta); + case 9: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0-zeta); + case 11: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0-zeta); + case 17: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0+zeta); + case 19: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0+zeta); + case 12: return 0.25*(1.0-xi)*(1.0-eta)*(1.0-zeta*zeta); + case 13: return 0.25*(1.0+xi)*(1.0-eta)*(1.0-zeta*zeta); + case 14: return 0.25*(1.0+xi)*(1.0+eta)*(1.0-zeta*zeta); + case 15: return 0.25*(1.0-xi)*(1.0+eta)*(1.0-zeta*zeta); + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return make_vector( + -0.125*(1.0-eta)*(1.0-zeta)*(-2.0*xi-eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0-zeta)*(-xi-2.0*eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0-eta) *(-xi-eta-2.0*zeta-1.0)); + case 1: return make_vector( + 0.125*(1.0-eta)*(1.0-zeta)*(2.0*xi-eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0-zeta)*(xi-2.0*eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0-eta) *(xi-eta-2.0*zeta-1.0)); + case 2: return make_vector( + 0.125*(1.0+eta)*(1.0-zeta)*(2.0*xi+eta-zeta-1.0), + 0.125*(1.0+xi) *(1.0-zeta)*(xi+2.0*eta-zeta-1.0), + -0.125*(1.0+xi) *(1.0+eta) *(xi+eta-2.0*zeta-1.0)); + case 3: return make_vector( + -0.125*(1.0+eta)*(1.0-zeta)*(-2.0*xi+eta-zeta-1.0), + 0.125*(1.0-xi) *(1.0-zeta)*(-xi+2.0*eta-zeta-1.0), + -0.125*(1.0-xi) *(1.0+eta) *(-xi+eta-2.0*zeta-1.0)); + case 4: return make_vector( + -0.125*(1.0-eta)*(1.0+zeta)*(-2.0*xi-eta+zeta-1.0), + -0.125*(1.0-xi) *(1.0+zeta)*(-xi-2.0*eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0-eta) *(-xi-eta+2.0*zeta-1.0)); + case 5: return make_vector( + 0.125*(1.0-eta)*(1.0+zeta)*(2.0*xi-eta+zeta-1.0), + -0.125*(1.0+xi) *(1.0+zeta)*(xi-2.0*eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0-eta) *(xi-eta+2.0*zeta-1.0)); + case 6: return make_vector( + 0.125*(1.0+eta)*(1.0+zeta)*(2.0*xi+eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0+zeta)*(xi+2.0*eta+zeta-1.0), + 0.125*(1.0+xi) *(1.0+eta) *(xi+eta+2.0*zeta-1.0)); + case 7: return make_vector( + -0.125*(1.0+eta)*(1.0+zeta)*(-2.0*xi+eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0+zeta)*(-xi+2.0*eta+zeta-1.0), + 0.125*(1.0-xi) *(1.0+eta) *(-xi+eta+2.0*zeta-1.0)); + case 8: return make_vector(-0.5*xi*(1.0-eta)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0-eta)); + case 10: return make_vector(-0.5*xi*(1.0+eta)*(1.0-zeta), + 0.25*(1.0-xi*xi)*(1.0-zeta), + -0.25*(1.0-xi*xi)*(1.0+eta)); + case 16: return make_vector(-0.5*xi*(1.0-eta)*(1.0+zeta), + -0.25*(1.0-xi*xi)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0-eta)); + case 18: return make_vector(-0.5*xi*(1.0+eta)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0+zeta), + 0.25*(1.0-xi*xi)*(1.0+eta)); + case 9: return make_vector( 0.25*(1.0-eta*eta)*(1.0-zeta), + -0.5*eta*(1.0+xi)*(1.0-zeta), + -0.25*(1.0+xi)*(1.0-eta*eta)); + case 11: return make_vector(-0.25*(1.0-eta*eta)*(1.0-zeta), + -0.5*eta*(1.0-xi)*(1.0-zeta), + -0.25*(1.0-xi)*(1.0-eta*eta)); + case 17: return make_vector( 0.25*(1.0-eta*eta)*(1.0+zeta), + -0.5*eta*(1.0+xi)*(1.0+zeta), + 0.25*(1.0+xi)*(1.0-eta*eta)); + case 19: return make_vector(-0.25*(1.0-eta*eta)*(1.0+zeta), + -0.5*eta*(1.0-xi)*(1.0+zeta), + 0.25*(1.0-xi)*(1.0-eta*eta)); + case 12: return make_vector(-0.25*(1.0-eta)*(1.0-zeta*zeta), + -0.25*(1.0-xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0-xi)*(1.0-eta)); + case 13: return make_vector( 0.25*(1.0-eta)*(1.0-zeta*zeta), + -0.25*(1.0+xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0+xi)*(1.0-eta)); + case 14: return make_vector( 0.25*(1.0+eta)*(1.0-zeta*zeta), + 0.25*(1.0+xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0+xi)*(1.0+eta)); + case 15: return make_vector(-0.25*(1.0+eta)*(1.0-zeta*zeta), + 0.25*(1.0-xi)*(1.0-zeta*zeta), + -0.5*zeta*(1.0-xi)*(1.0+eta)); + default: return zero_vector(); + } + } +#endif +}; + +// ── HEX27 (triquadratic hexahedron, 27 nodes) ───────────────────────────────── +// Tensor product of three EDGE3 bases. +// Index tables (libMesh fe_lagrange_shape_3D.C): +// i0[] = {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2} +// i1[] = {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2} +// i2[] = {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2} + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 27; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) + { + switch (k) + { + case 0: return 0.5 * t * (t - 1.0); + case 1: return 0.5 * t * (t + 1.0); + case 2: return 1.0 - t * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) + { + switch (k) + { + case 0: return t - 0.5; + case 1: return t + 0.5; + case 2: return -2.0 * t; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + static const unsigned int i0[] = + {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; + static const unsigned int i1[] = + {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; + static const unsigned int i2[] = + {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; + return L(i0[i], xi) * L(i1[i], eta) * L(i2[i], zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + static const unsigned int i0[] = + {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; + static const unsigned int i1[] = + {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; + static const unsigned int i2[] = + {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; + const Real lxi = L(i0[i], xi); + const Real leta = L(i1[i], eta); + const Real lzeta = L(i2[i], zeta); + return make_vector(dL(i0[i], xi) * leta * lzeta, + lxi * dL(i1[i], eta) * lzeta, + lxi * leta * dL(i2[i], zeta)); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_3D_H diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h new file mode 100644 index 00000000000..d71f81c931a --- /dev/null +++ b/include/gpu/kokkos_fe_map.h @@ -0,0 +1,235 @@ +// Kokkos device-compatible physical map evaluation. +// +// All functions are LIBMESH_DEVICE_INLINE — callable from both host and GPU. +// +// Two API levels: +// 1. Template on ElemType (preferred): eliminates the topology switch at +// compile time, producing small inlined functions with no stack pressure. +// 2. Runtime ElemType dispatch: convenient but requires increased CUDA +// stack size due to the large switch in map_shape. +// +// Given node coordinates and a reference-space point, these functions compute: +// - Physical coordinates (xyz) +// - Jacobian matrix (reference -> physical) +// - Jacobian measures and JxW +// - Outward normal helpers for face/edge integrals + +#ifndef LIBMESH_KOKKOS_FE_MAP_H +#define LIBMESH_KOKKOS_FE_MAP_H + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_scalar_types.h" + +namespace libMesh::Kokkos +{ + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(i, xi, eta, zeta) * nodes[i]; + return xyz; +} + +// ========================================================================= +// Compile-time dispatch (preferred for GPU — no switch overhead) +// +// Template on FEFamily and ElemType so nvcc only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. +// ========================================================================= + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), nodes[k]); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(k, xi, eta, zeta); + const RealVector grad = grad_map_shape(k, xi, eta, zeta); + xyz += phi * nodes[k]; + J += libMesh::outer_product(grad, nodes[k]); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + face_nodes[k]); + return J; +} + +// ========================================================================= +// Runtime topology dispatch (convenient, but larger GPU stack usage) +// ========================================================================= + +/// Compute physical coordinate (runtime topology). +LIBMESH_DEVICE_INLINE RealVector +physical_point(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(mapping_type, topo, i, xi, eta, zeta) * nodes[i]; + return xyz; +} + +/// Compute Jacobian matrix (runtime topology), with rows d(x)/d(xi_r). +LIBMESH_DEVICE_INLINE RealTensor +jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, topo, k, xi, eta, zeta), + nodes[k]); + return J; +} + +/// Compute physical point and Jacobian together (runtime topology). +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector grad = grad_map_shape(mapping_type, topo, k, xi, eta, zeta); + xyz += phi * nodes[k]; + J += libMesh::outer_product(grad, nodes[k]); + } +} + +/// Face Jacobian (runtime topology). +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, face_topo, k, xi, eta, zeta), + face_nodes[k]); + return J; +} + +// ========================================================================= +// Geometry helpers (topology-independent) +// ========================================================================= + +/// libMesh FEMap-compatible volume measure * quadrature_weight. +/// 3D: det(J) * weight +/// 2D: ||J_row0 x J_row1|| * weight +/// 1D: ||J_row0|| * weight +/// 0D: weight +LIBMESH_DEVICE_INLINE Real +volume_jxw(const RealTensor & J, unsigned int dim, Real quad_weight) +{ + if (dim == 3) + return leading_determinant(J, 3) * quad_weight; + else if (dim == 2) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (dim == 1) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Face JxW: surface measure * quadrature_weight +/// 3D: ||J_row0 x J_row1|| * weight +/// 2D: ||J_row0|| * weight +/// 1D: weight (face is a point) +LIBMESH_DEVICE_INLINE Real +face_jxw(const RealTensor & J, unsigned int parent_dim, Real quad_weight) +{ + if (parent_dim == 3) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (parent_dim == 2) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Outward unit normal for a 3D face from the face Jacobian. +LIBMESH_DEVICE_INLINE RealVector +face_normal(const RealTensor & J, unsigned int parent_dim) +{ + if (parent_dim != 3) + { + detail::abort_unsupported("face_normal(): only 3D face normals are defined from face Jacobians alone; use edge_normal_on_parent_surface() for 2D parent elements"); + return zero_vector(); + } + + RealVector n = J.row(0).cross(J.row(1)); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +/// Outward edge normal for a 2D parent element embedded in 3D. +/// Requires the edge Jacobian and the parent surface Jacobian at the mapped +/// parent-reference point. +LIBMESH_DEVICE_INLINE RealVector +edge_normal_on_parent_surface(const RealTensor & edge_J, + const RealTensor & parent_J) +{ + RealVector surface_normal = parent_J.row(0).cross(parent_J.row(1)); + const Real surface_len = surface_normal.norm(); + if (surface_len > 0.0) + surface_normal *= 1.0 / surface_len; + + RealVector n = edge_J.row(0).cross(surface_normal); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MAP_H diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h new file mode 100644 index 00000000000..b68289c72eb --- /dev/null +++ b/include/gpu/kokkos_fe_monomial.h @@ -0,0 +1,941 @@ +// Kokkos FEEvaluator specializations for MONOMIAL elements. +// +// MONOMIAL uses the complete total-degree polynomial space P_p. Following +// libMesh's FE, the basis is parameterised by spatial dimension, +// not element class — TRI and QUAD share MonomialImpl2D; TET/HEX/PRISM/PYRAMID +// share MonomialImpl3D. This gives 3 x 6 = 18 impl specializations (dims 1/2/3, +// orders 0-5), then per-topology FEEvaluator delegating specializations wire each +// libMesh::ElemType to the matching impl. +// +// Basis ordering: graded-lex (total degree first, then lexicographic by +// decreasing xi exponent). Matches libMesh::FE::shape ordering. + +#ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H +#define LIBMESH_KOKKOS_FE_MONOMIAL_H + +#include "gpu/kokkos_fe_base.h" +#include "libmesh/enum_elem_type.h" + +namespace libMesh::Kokkos +{ + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl1D — 1-D MONOMIAL basis, order N +// n_dofs = N + 1 +// Basis: {1, xi, xi², xi³, ...} +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl1D; + +template <> +struct MonomialImpl1D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl1D<1> +{ + static constexpr unsigned int n_dofs() { return 2; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<2> +{ + static constexpr unsigned int n_dofs() { return 3; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<3> +{ + static constexpr unsigned int n_dofs() { return 4; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<4> +{ + static constexpr unsigned int n_dofs() { return 5; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + case 4: return xi * xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl1D<5> +{ + static constexpr unsigned int n_dofs() { return 6; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return xi * xi; + case 3: return xi * xi * xi; + case 4: return xi * xi * xi * xi; + case 5: return xi * xi * xi * xi * xi; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(2.0 * xi, 0.0, 0.0); + case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 5: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl2D — 2-D MONOMIAL basis, order N +// n_dofs = (N+1)(N+2)/2 +// Graded-lex basis: {1, xi, eta, xi², xi·eta, eta², ...} +// Shared by TRI and QUAD element classes. +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl2D; + +template <> +struct MonomialImpl2D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl2D<1> +{ + static constexpr unsigned int n_dofs() { return 3; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<2> +{ + static constexpr unsigned int n_dofs() { return 6; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<3> +{ + static constexpr unsigned int n_dofs() { return 10; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<4> +{ + static constexpr unsigned int n_dofs() { return 15; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + case 10: return xi * xi * xi * xi; + case 11: return xi * xi * xi * eta; + case 12: return xi * xi * eta * eta; + case 13: return xi * eta * eta * eta; + case 14: return eta * eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl2D<5> +{ + static constexpr unsigned int n_dofs() { return 21; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return xi * xi; + case 4: return xi * eta; + case 5: return eta * eta; + case 6: return xi * xi * xi; + case 7: return xi * xi * eta; + case 8: return xi * eta * eta; + case 9: return eta * eta * eta; + case 10: return xi * xi * xi * xi; + case 11: return xi * xi * xi * eta; + case 12: return xi * xi * eta * eta; + case 13: return xi * eta * eta * eta; + case 14: return eta * eta * eta * eta; + case 15: return xi * xi * xi * xi * xi; + case 16: return xi * xi * xi * xi * eta; + case 17: return xi * xi * xi * eta * eta; + case 18: return xi * xi * eta * eta * eta; + case 19: return xi * eta * eta * eta * eta; + case 20: return eta * eta * eta * eta * eta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(2.0 * xi, 0.0, 0.0); + case 4: return make_vector(eta, xi, 0.0); + case 5: return make_vector(0.0, 2.0 * eta, 0.0); + case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 15: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + case 16: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); + case 17: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); + case 18: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); + case 19: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); + case 20: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// MonomialImpl3D — 3-D MONOMIAL basis, order N +// n_dofs = (N+1)(N+2)(N+3)/6 +// Basis ordering: graded-lex; for each total degree d, iterate c (zeta +// exponent) from 0 to d, then a (xi exponent) from d-c down to 0 (b=d-c-a). +// Shared by TET, HEX, PRISM, and PYRAMID element classes. +// ═══════════════════════════════════════════════════════════════════════════ + +template +struct MonomialImpl3D; + +template <> +struct MonomialImpl3D<0> +{ + static constexpr unsigned int n_dofs() { return 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return 1.0; + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return zero_vector(); + } +}; + +template <> +struct MonomialImpl3D<1> +{ + static constexpr unsigned int n_dofs() { return 4; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<2> +{ + static constexpr unsigned int n_dofs() { return 10; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<3> +{ + static constexpr unsigned int n_dofs() { return 20; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + case 10: return xi * xi * xi; + case 11: return xi * xi * eta; + case 12: return xi * eta * eta; + case 13: return eta * eta * eta; + case 14: return xi * xi * zeta; + case 15: return xi * eta * zeta; + case 16: return eta * eta * zeta; + case 17: return xi * zeta * zeta; + case 18: return eta * zeta * zeta; + case 19: return zeta * zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<4> +{ + static constexpr unsigned int n_dofs() { return 35; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi * xi; + case 5: return xi * eta; + case 6: return eta * eta; + case 7: return xi * zeta; + case 8: return eta * zeta; + case 9: return zeta * zeta; + case 10: return xi * xi * xi; + case 11: return xi * xi * eta; + case 12: return xi * eta * eta; + case 13: return eta * eta * eta; + case 14: return xi * xi * zeta; + case 15: return xi * eta * zeta; + case 16: return eta * eta * zeta; + case 17: return xi * zeta * zeta; + case 18: return eta * zeta * zeta; + case 19: return zeta * zeta * zeta; + case 20: return xi * xi * xi * xi; + case 21: return xi * xi * xi * eta; + case 22: return xi * xi * eta * eta; + case 23: return xi * eta * eta * eta; + case 24: return eta * eta * eta * eta; + case 25: return xi * xi * xi * zeta; + case 26: return xi * xi * eta * zeta; + case 27: return xi * eta * eta * zeta; + case 28: return eta * eta * eta * zeta; + case 29: return xi * xi * zeta * zeta; + case 30: return xi * eta * zeta * zeta; + case 31: return eta * eta * zeta * zeta; + case 32: return xi * zeta * zeta * zeta; + case 33: return eta * zeta * zeta * zeta; + case 34: return zeta * zeta * zeta * zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); + case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); + case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); + case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); + case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); + case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); + case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); + case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); + case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); + case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); + default: return zero_vector(); + } + } +}; + +template <> +struct MonomialImpl3D<5> +{ + static constexpr unsigned int n_dofs() { return 56; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return 1.0; + case 1: return xi; + case 2: return eta; + case 3: return zeta; + case 4: return xi*xi; + case 5: return xi*eta; + case 6: return eta*eta; + case 7: return xi*zeta; + case 8: return eta*zeta; + case 9: return zeta*zeta; + case 10: return xi*xi*xi; + case 11: return xi*xi*eta; + case 12: return xi*eta*eta; + case 13: return eta*eta*eta; + case 14: return xi*xi*zeta; + case 15: return xi*eta*zeta; + case 16: return eta*eta*zeta; + case 17: return xi*zeta*zeta; + case 18: return eta*zeta*zeta; + case 19: return zeta*zeta*zeta; + case 20: return xi*xi*xi*xi; + case 21: return xi*xi*xi*eta; + case 22: return xi*xi*eta*eta; + case 23: return xi*eta*eta*eta; + case 24: return eta*eta*eta*eta; + case 25: return xi*xi*xi*zeta; + case 26: return xi*xi*eta*zeta; + case 27: return xi*eta*eta*zeta; + case 28: return eta*eta*eta*zeta; + case 29: return xi*xi*zeta*zeta; + case 30: return xi*eta*zeta*zeta; + case 31: return eta*eta*zeta*zeta; + case 32: return xi*zeta*zeta*zeta; + case 33: return eta*zeta*zeta*zeta; + case 34: return zeta*zeta*zeta*zeta; + case 35: return xi*xi*xi*xi*xi; + case 36: return xi*xi*xi*xi*eta; + case 37: return xi*xi*xi*eta*eta; + case 38: return xi*xi*eta*eta*eta; + case 39: return xi*eta*eta*eta*eta; + case 40: return eta*eta*eta*eta*eta; + case 41: return xi*xi*xi*xi*zeta; + case 42: return xi*xi*xi*eta*zeta; + case 43: return xi*xi*eta*eta*zeta; + case 44: return xi*eta*eta*eta*zeta; + case 45: return eta*eta*eta*eta*zeta; + case 46: return xi*xi*xi*zeta*zeta; + case 47: return xi*xi*eta*zeta*zeta; + case 48: return xi*eta*eta*zeta*zeta; + case 49: return eta*eta*eta*zeta*zeta; + case 50: return xi*xi*zeta*zeta*zeta; + case 51: return xi*eta*zeta*zeta*zeta; + case 52: return eta*eta*zeta*zeta*zeta; + case 53: return xi*zeta*zeta*zeta*zeta; + case 54: return eta*zeta*zeta*zeta*zeta; + case 55: return zeta*zeta*zeta*zeta*zeta; + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + switch (i) + { + case 0: return zero_vector(); + case 1: return make_vector(1.0, 0.0, 0.0); + case 2: return make_vector(0.0, 1.0, 0.0); + case 3: return make_vector(0.0, 0.0, 1.0); + case 4: return make_vector(2.0 * xi, 0.0, 0.0); + case 5: return make_vector(eta, xi, 0.0); + case 6: return make_vector(0.0, 2.0 * eta, 0.0); + case 7: return make_vector(zeta, 0.0, xi); + case 8: return make_vector(0.0, zeta, eta); + case 9: return make_vector(0.0, 0.0, 2.0 * zeta); + case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); + case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); + case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); + case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); + case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); + case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); + case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); + case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); + case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); + case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); + case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); + case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); + case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); + case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); + case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); + case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); + case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); + case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); + case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); + case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); + case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); + case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); + case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); + case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); + case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); + case 35: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); + case 36: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); + case 37: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); + case 38: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); + case 39: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); + case 40: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); + case 41: return make_vector(4.0 * xi * xi * xi * zeta, 0.0, xi * xi * xi * xi); + case 42: return make_vector(3.0 * xi * xi * eta * zeta, xi * xi * xi * zeta, xi * xi * xi * eta); + case 43: return make_vector(2.0 * xi * eta * eta * zeta, 2.0 * xi * xi * eta * zeta, xi * xi * eta * eta); + case 44: return make_vector(eta * eta * eta * zeta, 3.0 * xi * eta * eta * zeta, xi * eta * eta * eta); + case 45: return make_vector(0.0, 4.0 * eta * eta * eta * zeta, eta * eta * eta * eta); + case 46: return make_vector(3.0 * xi * xi * zeta * zeta, 0.0, 2.0 * xi * xi * xi * zeta); + case 47: return make_vector(2.0 * xi * eta * zeta * zeta, xi * xi * zeta * zeta, 2.0 * xi * xi * eta * zeta); + case 48: return make_vector(eta * eta * zeta * zeta, 2.0 * xi * eta * zeta * zeta, 2.0 * xi * eta * eta * zeta); + case 49: return make_vector(0.0, 3.0 * eta * eta * zeta * zeta, 2.0 * eta * eta * eta * zeta); + case 50: return make_vector(2.0 * xi * zeta * zeta * zeta, 0.0, 3.0 * xi * xi * zeta * zeta); + case 51: return make_vector(eta * zeta * zeta * zeta, xi * zeta * zeta * zeta, 3.0 * xi * eta * zeta * zeta); + case 52: return make_vector(0.0, 2.0 * eta * zeta * zeta * zeta, 3.0 * eta * eta * zeta * zeta); + case 53: return make_vector(zeta * zeta * zeta * zeta, 0.0, 4.0 * xi * zeta * zeta * zeta); + case 54: return make_vector(0.0, zeta * zeta * zeta * zeta, 4.0 * eta * zeta * zeta * zeta); + case 55: return make_vector(0.0, 0.0, 5.0 * zeta * zeta * zeta * zeta); + default: return zero_vector(); + } + } +}; + +// ═══════════════════════════════════════════════════════════════════════════ +// Per-topology FEEvaluator delegating specializations +// +// Each partial specialization fixes family=MONOMIAL and elem_type, leaving the +// polynomial Order as a template parameter, then inherits the matching impl. +// ═══════════════════════════════════════════════════════════════════════════ + +// ── 1-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl1D {}; + +template +struct FEEvaluator : MonomialImpl1D {}; + +// ── 2-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +// ── 3-D ────────────────────────────────────────────────────────────────────── + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MONOMIAL_H diff --git a/include/gpu/kokkos_fe_types.h b/include/gpu/kokkos_fe_types.h new file mode 100644 index 00000000000..c266e6dc15a --- /dev/null +++ b/include/gpu/kokkos_fe_types.h @@ -0,0 +1,681 @@ +// Kokkos FE type helpers. +// +// Defines the FEShapeKey aggregate and device-callable dispatch functions used +// by both host-side assembly setup and device-side evaluation. +// +// Uses libMesh's own ElemType, FEFamily, and FEElemClass enums directly — +// no wrapper enums are needed. + +#ifndef LIBMESH_KOKKOS_FE_TYPES_H +#define LIBMESH_KOKKOS_FE_TYPES_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_fe_elem_class.h" +#include "libmesh/enum_order.h" +// ElemMappingType (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) is defined in enum_elem_type.h +#include "libmesh/libmesh_device.h" +#ifndef LIBMESH_KOKKOS_COMPILATION +# include "libmesh/libmesh_common.h" +#endif + +namespace libMesh::Kokkos +{ + +// Bring FEElemClass into this namespace so existing unqualified uses compile. +using libMesh::FEElemClass; + +namespace detail +{ + +LIBMESH_DEVICE_INLINE void +abort_unsupported(const char * msg) +{ +#ifdef LIBMESH_KOKKOS_COMPILATION + ::Kokkos::abort(msg); +#else + libmesh_error_msg(msg); +#endif +} + +} // namespace detail + +LIBMESH_DEVICE_INLINE bool +is_monomial_2d_elem_type(libMesh::ElemType elem_type) +{ + switch (elem_type) + { + case libMesh::C0POLYGON: + case libMesh::TRI3: + case libMesh::TRISHELL3: + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD4: + case libMesh::QUADSHELL4: + case libMesh::QUAD8: + case libMesh::QUADSHELL8: + case libMesh::QUAD9: + case libMesh::QUADSHELL9: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +is_monomial_3d_elem_type(libMesh::ElemType elem_type, + bool include_pyramid18 = true) +{ + switch (elem_type) + { + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::C0POLYHEDRON: + return true; + case libMesh::PYRAMID18: + return include_pyramid18; + default: + return false; + } +} + +// ── Shape function space key ────────────────────────────────────────────────── +// Uniquely identifies a libMesh FE space, including the exact element topology. +// This must be exact for LAGRANGE spaces, since libMesh distinguishes e.g. +// QUAD8 from QUAD9 and HEX20 from HEX27 at the same polynomial order. +// +// Trivially copyable; fits in a register (enum + enum + enum, no heap). + +struct FEShapeKey +{ + libMesh::FEFamily family; + libMesh::ElemType elem_type; + libMesh::Order order; +}; + +// ── Device-callable conversion helpers ─────────────────────────────────────── + +/// Return the Kokkos side topology used for dispatch for any side of parent +/// element type \p parent. +/// This helper is valid only for elements whose side topology is uniform. +/// Mixed-face elements such as prisms and pyramids require side-specific logic. +/// In 1D, libMesh sides are NODEELEM objects; this helper returns EDGE2 as the +/// internal surrogate topology used by the Kokkos map/shape path. +LIBMESH_DEVICE_INLINE libMesh::ElemType +get_side_topology(libMesh::ElemType parent) +{ + switch (parent) + { + // 1D: libMesh sides are NodeElem, but Kokkos dispatches them through + // a degenerate EDGE2 surrogate. + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::EDGE2; + + // 2D first-order: sides are linear edges + case libMesh::TRI3: + case libMesh::QUAD4: + return libMesh::EDGE2; + + // 2D second-order: sides are quadratic edges + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::EDGE3; + + // 3D first-order: uniform-side-topology elements only + case libMesh::TET4: + return libMesh::TRI3; + case libMesh::HEX8: + return libMesh::QUAD4; + + // 3D second-order: uniform-side-topology elements only + case libMesh::TET10: + return libMesh::TRI6; + case libMesh::TET14: + return libMesh::TRI7; + case libMesh::HEX20: + return libMesh::QUAD8; + case libMesh::HEX27: + return libMesh::QUAD9; + + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PRISM6: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID18: + detail::abort_unsupported("get_side_topology(): mixed-face elements require side-specific topology"); + return libMesh::INVALID_ELEM; + + default: + detail::abort_unsupported("get_side_topology(): unsupported element type"); + return libMesh::INVALID_ELEM; // unreachable after abort + } +} + +/// Map an ElemType to its base geometric class (order-independent). +/// e.g. QUAD4 / QUAD8 / QUAD9 all return FEElemClass::QUAD. +LIBMESH_DEVICE_INLINE libMesh::FEElemClass +class_from_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::FEElemClass::EDGE; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::FEElemClass::TRI; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::FEElemClass::QUAD; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return libMesh::FEElemClass::TET; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return libMesh::FEElemClass::HEX; + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return libMesh::FEElemClass::PRISM; + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return libMesh::FEElemClass::PYRAMID; + + default: + detail::abort_unsupported("class_from_topology(): unsupported element type"); + return libMesh::FEElemClass::N_CLASSES; // unreachable after abort + } +} + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_or_invalid(FEShapeKey key) +{ + switch (key.order) + { + case libMesh::FIRST: + switch (key.elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return libMesh::EDGE2; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::TRI3; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return libMesh::QUAD4; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return libMesh::TET4; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return libMesh::HEX8; + + default: + return libMesh::INVALID_ELEM; + } + + case libMesh::SECOND: + switch (key.elem_type) + { + case libMesh::EDGE3: + return libMesh::EDGE3; + + case libMesh::TRI6: + case libMesh::TRI7: + return libMesh::TRI6; + + case libMesh::QUAD8: + return libMesh::QUAD8; + + case libMesh::QUAD9: + return libMesh::QUAD9; + + case libMesh::TET10: + case libMesh::TET14: + return libMesh::TET10; + + case libMesh::HEX20: + return libMesh::HEX20; + + case libMesh::HEX27: + return libMesh::HEX27; + + default: + return libMesh::INVALID_ELEM; + } + + default: + return libMesh::INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +lagrange_exact_n_dofs_or_zero(libMesh::ElemType elem_type, + libMesh::Order order) +{ + switch (order) + { + case libMesh::CONSTANT: + return (elem_type == libMesh::NODEELEM) ? 1u : 0u; + + case libMesh::FIRST: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 2; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return 3; + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return 4; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return 4; + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return 8; + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return 6; + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 5; + + default: + return 0; + } + + case libMesh::SECOND: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE3: + return 3; + + case libMesh::TRI6: + case libMesh::TRI7: + return 6; + + case libMesh::QUAD8: + return 8; + + case libMesh::QUAD9: + return 9; + + case libMesh::TET10: + case libMesh::TET14: + return 10; + + case libMesh::HEX20: + return 20; + + case libMesh::HEX27: + return 27; + + case libMesh::PRISM15: + return 15; + + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return 18; + + case libMesh::PYRAMID13: + return 13; + + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 14; + + default: + return 0; + } + + case libMesh::THIRD: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE4: + return 4; + + case libMesh::TRI7: + return 7; + + case libMesh::TET14: + return 14; + + case libMesh::PRISM20: + return 20; + + case libMesh::PRISM21: + return 21; + + case libMesh::PYRAMID18: + return 18; + + default: + return 0; + } + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_exact_n_dofs_or_zero(libMesh::ElemType elem_type, + libMesh::Order order) +{ + if (elem_type == libMesh::INVALID_ELEM) + return 0; + if (order < libMesh::CONSTANT) + return 0; + + switch (order) + { + case libMesh::CONSTANT: + return 1; + + case libMesh::FIRST: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 2; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 3; + if (is_monomial_3d_elem_type(elem_type)) + return 4; + return 0; + + case libMesh::SECOND: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 3; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 6; + if (is_monomial_3d_elem_type(elem_type)) + return 10; + return 0; + + case libMesh::THIRD: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 4; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 10; + if (is_monomial_3d_elem_type(elem_type)) + return 20; + return 0; + + case libMesh::FOURTH: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return 5; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 15; + if (is_monomial_3d_elem_type(elem_type, false)) + return 35; + return 0; + + case libMesh::FIFTH: + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return 6; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 21; + if (is_monomial_3d_elem_type(elem_type, false)) + return 56; + return 0; + + default: + { + const unsigned int p = static_cast(order); + + switch (elem_type) + { + case libMesh::NODEELEM: + return 1; + + case libMesh::EDGE2: + case libMesh::EDGE3: + return p + 1; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return (p + 1) * (p + 2) / 2; + if (is_monomial_3d_elem_type(elem_type, false)) + return (p + 1) * (p + 2) * (p + 3) / 6; + return 0; + } + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_evaluator_dim_or_zero(libMesh::ElemType elem_type) +{ + switch (elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return 1; + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return 2; + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return 3; + + default: + return 0; + } +} + +/// Return true iff the current Kokkos physics evaluators can evaluate \p key. +/// This boundary is the intersection of: +/// 1. exact libMesh-valid (family, elem_type, order) keys, and +/// 2. currently implemented Kokkos evaluator topologies/orders. +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key) +{ + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + lagrange_shape_topology_or_invalid(key) != libMesh::INVALID_ELEM; + + case libMesh::MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + monomial_evaluator_dim_or_zero(key.elem_type) != 0 && + key.order >= libMesh::CONSTANT && + key.order <= libMesh::FIFTH; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_grad_shape(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE bool +supports_n_dofs(FEShapeKey key) +{ + return supports_shape(key); +} + +/// Return the number of DOFs for a physics FE space described by \p key, +/// restricted to the current Kokkos evaluator support boundary. +LIBMESH_DEVICE_INLINE unsigned int +n_dofs(FEShapeKey key) +{ + if (!supports_n_dofs(key)) + { + detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE key for current Kokkos evaluator support boundary"); + return 0; + } + + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order); + + case libMesh::MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order); + + default: + detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE family"); + return 0; + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_TYPES_H diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h new file mode 100644 index 00000000000..4c2e8750bab --- /dev/null +++ b/include/gpu/kokkos_quadrature.h @@ -0,0 +1,652 @@ +// Kokkos device-compatible Gauss quadrature rules. +// +// All evaluation functions are LIBMESH_DEVICE_INLINE — callable from both +// host and GPU device code. +// +// GaussLegendre1D: 1-D Gauss-Legendre on [-1,1], 1-7 point rules. +// GaussQuadrature: Full quadrature dispatcher for all supported topologies. +// - n_points(topo, order): number of quadrature points +// - point(topo, order, qp): reference coordinate of qp-th point +// - weight(topo, order, qp): weight of qp-th point +// +// Values match the libMesh QGauss implementation. + +#ifndef LIBMESH_KOKKOS_QUADRATURE_H +#define LIBMESH_KOKKOS_QUADRATURE_H + +#include "gpu/kokkos_scalar_types.h" +#include "libmesh/enum_elem_type.h" +#include +#include + +namespace libMesh::Kokkos +{ + +// --------------------------------------------------------------------------- +// 1-D Gauss-Legendre quadrature on [-1, 1] +// --------------------------------------------------------------------------- + +struct GaussLegendre1D +{ + LIBMESH_DEVICE_INLINE static unsigned int n_points(unsigned int alg_order) + { + const unsigned int n = (alg_order + 2u) / 2u; + return (n < 1u) ? 1u : (n > 7u ? 7u : n); + } + + LIBMESH_DEVICE_INLINE static Real point(unsigned int n, unsigned int i) + { + switch (n) + { + case 1: return 0.0; + case 2: + switch (i) + { + case 0: return -5.7735026918962576450914878050196e-01; + case 1: return 5.7735026918962576450914878050196e-01; + default: return 0.0; + } + case 3: + switch (i) + { + case 0: return -7.7459666924148337703585307995648e-01; + case 1: return 0.0; + case 2: return 7.7459666924148337703585307995648e-01; + default: return 0.0; + } + case 4: + switch (i) + { + case 0: return -8.6113631159405257522394648889281e-01; + case 1: return -3.3998104358485626480266575910324e-01; + case 2: return 3.3998104358485626480266575910324e-01; + case 3: return 8.6113631159405257522394648889281e-01; + default: return 0.0; + } + case 5: + switch (i) + { + case 0: return -9.0617984593866399279762687829939e-01; + case 1: return -5.3846931010568309103631442070021e-01; + case 2: return 0.0; + case 3: return 5.3846931010568309103631442070021e-01; + case 4: return 9.0617984593866399279762687829939e-01; + default: return 0.0; + } + case 6: + switch (i) + { + case 0: return -9.3246951420315202781230155449399e-01; + case 1: return -6.6120938646626451366139959501991e-01; + case 2: return -2.3861918608319690863050172168071e-01; + case 3: return 2.3861918608319690863050172168071e-01; + case 4: return 6.6120938646626451366139959501991e-01; + case 5: return 9.3246951420315202781230155449399e-01; + default: return 0.0; + } + case 7: + switch (i) + { + case 0: return -9.4910791234275852452618968404785e-01; + case 1: return -7.4153118559939443986386477328079e-01; + case 2: return -4.0584515137739716690660641207696e-01; + case 3: return 0.0; + case 4: return 4.0584515137739716690660641207696e-01; + case 5: return 7.4153118559939443986386477328079e-01; + case 6: return 9.4910791234275852452618968404785e-01; + default: return 0.0; + } + default: return 0.0; + } + } + + LIBMESH_DEVICE_INLINE static Real weight(unsigned int n, unsigned int i) + { + switch (n) + { + case 1: return 2.0; + case 2: return 1.0; + case 3: + switch (i) + { + case 0: case 2: return 5.5555555555555555555555555555556e-01; + case 1: return 8.8888888888888888888888888888889e-01; + default: return 0.0; + } + case 4: + switch (i) + { + case 0: case 3: return 3.4785484513745385737306394922200e-01; + case 1: case 2: return 6.5214515486254614262693605077800e-01; + default: return 0.0; + } + case 5: + switch (i) + { + case 0: case 4: return 2.3692688505618908751426404071992e-01; + case 1: case 3: return 4.7862867049936646804129151483564e-01; + case 2: return 5.6888888888888888888888888888889e-01; + default: return 0.0; + } + case 6: + switch (i) + { + case 0: case 5: return 1.7132449237917034504029614217273e-01; + case 1: case 4: return 3.6076157304813860756983351383772e-01; + case 2: case 3: return 4.6791393457269104738987034398955e-01; + default: return 0.0; + } + case 7: + switch (i) + { + case 0: case 6: return 1.2948496616886969327061143267908e-01; + case 1: case 5: return 2.7970539148927666790146777142378e-01; + case 2: case 4: return 3.8183005050511894495036977548898e-01; + case 3: return 4.1795918367346938775510204081633e-01; + default: return 0.0; + } + default: return 0.0; + } + } +}; + +// --------------------------------------------------------------------------- +// GaussQuadrature — device-callable quadrature for all supported topologies +// +// Coordinate conventions (same as libMesh): +// EDGE: xi in [-1,1] +// QUAD: (xi,eta) in [-1,1]^2, tensor product +// HEX: (xi,eta,zeta) in [-1,1]^3, tensor product +// TRI: (x,y) on unit triangle {(0,0),(1,0),(0,1)} +// TET: (x,y,z) on unit tet {(0,0,0),(1,0,0),(0,1,0),(0,0,1)} +// --------------------------------------------------------------------------- + +struct GaussQuadrature +{ + /// Number of quadrature points for a given topology and polynomial order. + LIBMESH_DEVICE_INLINE static unsigned int + n_points(libMesh::ElemType topo, unsigned int order) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + return GaussLegendre1D::n_points(order); + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n; + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n * n; + } + + case libMesh::TRI3: case libMesh::TRI6: + switch (order) + { + case 0: case 1: return 1; + case 2: return 3; + case 3: return 4; + case 4: return 6; + case 5: return 7; + default: return 12; + } + + case libMesh::TET4: case libMesh::TET10: + switch (order) + { + case 0: case 1: return 1; + case 2: return 4; + case 3: return 5; + case 4: return 11; + case 5: return 14; + default: return 24; + } + + default: return 0; + } + } + + /// Reference coordinate of the qp-th quadrature point. + LIBMESH_DEVICE_INLINE static RealVector + point(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + return make_vector(GaussLegendre1D::point(GaussLegendre1D::n_points(order), qp), 0.0, 0.0); + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + const unsigned int i = qp % n; + const unsigned int j = qp / n; + return make_vector(GaussLegendre1D::point(n, i), + GaussLegendre1D::point(n, j), 0.0); + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + const unsigned int i = qp % n; + const unsigned int j = (qp / n) % n; + const unsigned int k = qp / (n * n); + return make_vector(GaussLegendre1D::point(n, i), + GaussLegendre1D::point(n, j), + GaussLegendre1D::point(n, k)); + } + + case libMesh::TRI3: case libMesh::TRI6: + return tri_point(order, qp); + + case libMesh::TET4: case libMesh::TET10: + return tet_point(order, qp); + + default: return zero_vector(); + } + } + + /// Weight of the qp-th quadrature point. + LIBMESH_DEVICE_INLINE static Real + weight(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: case libMesh::EDGE3: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp); + } + + case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp % n) * + GaussLegendre1D::weight(n, qp / n); + } + + case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return GaussLegendre1D::weight(n, qp % n) * + GaussLegendre1D::weight(n, (qp / n) % n) * + GaussLegendre1D::weight(n, qp / (n * n)); + } + + case libMesh::TRI3: case libMesh::TRI6: + return tri_weight(order, qp); + + case libMesh::TET4: case libMesh::TET10: + return tet_weight(order, qp); + + default: return 0.0; + } + } + +private: + // ── Triangle rules ──────────────────────────────────────────────────────── + + LIBMESH_DEVICE_INLINE static RealVector + tri_point(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: + return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); + + case 2: + switch (qp) + { + case 0: return make_vector(2.0 / 3.0, 1.0 / 6.0, 0.0); + case 1: return make_vector(1.0 / 6.0, 2.0 / 3.0, 0.0); + case 2: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.0); + default: return zero_vector(); + } + + case 3: + switch (qp) + { + case 0: return make_vector(1.5505102572168219018e-01, 1.7855872826361642312e-01, 0.0); + case 1: return make_vector(6.4494897427831780982e-01, 7.5031110222608118177e-02, 0.0); + case 2: return make_vector(1.5505102572168219018e-01, 6.6639024601470138670e-01, 0.0); + case 3: return make_vector(6.4494897427831780982e-01, 2.8001991549907407200e-01, 0.0); + default: return zero_vector(); + } + + case 4: + { + constexpr Real a1 = 4.4594849091596488632e-01, b1 = 1.0 - 2.0 * a1; + constexpr Real a2 = 9.1576213509770743460e-02, b2 = 1.0 - 2.0 * a2; + switch (qp) + { + case 0: return make_vector(a1, a1, 0.0); + case 1: return make_vector(a1, b1, 0.0); + case 2: return make_vector(b1, a1, 0.0); + case 3: return make_vector(a2, a2, 0.0); + case 4: return make_vector(a2, b2, 0.0); + case 5: return make_vector(b2, a2, 0.0); + default: return zero_vector(); + } + } + + case 5: + { + const Real sq15 = 3.872983346207417; // sqrt(15) + const Real a1 = 2.0 / 7.0 + sq15 / 21.0; + const Real a2 = 2.0 / 7.0 - sq15 / 21.0; + const Real b1 = 1.0 - 2.0 * a1, b2 = 1.0 - 2.0 * a2; + switch (qp) + { + case 0: return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); + case 1: return make_vector(a1, a1, 0.0); + case 2: return make_vector(a1, b1, 0.0); + case 3: return make_vector(b1, a1, 0.0); + case 4: return make_vector(a2, a2, 0.0); + case 5: return make_vector(a2, b2, 0.0); + case 6: return make_vector(b2, a2, 0.0); + default: return zero_vector(); + } + } + + case 6: + { + constexpr Real a1 = 2.4928674517091042129163855310701908e-01; + constexpr Real a2 = 6.3089014491502228340331602870819157e-02; + constexpr Real a3 = 3.1035245103378440541660773395655215e-01; + constexpr Real b1 = 1.0 - 2.0 * a1; + constexpr Real b2 = 1.0 - 2.0 * a2; + constexpr Real b3 = 6.3650249912139864723014259441204970e-01; + constexpr Real c3 = 1.0 - a3 - b3; + switch (qp) + { + case 0: return make_vector(a1, a1, 0.0); + case 1: return make_vector(a1, b1, 0.0); + case 2: return make_vector(b1, a1, 0.0); + case 3: return make_vector(a2, a2, 0.0); + case 4: return make_vector(a2, b2, 0.0); + case 5: return make_vector(b2, a2, 0.0); + case 6: return make_vector(a3, b3, 0.0); + case 7: return make_vector(b3, a3, 0.0); + case 8: return make_vector(a3, c3, 0.0); + case 9: return make_vector(c3, a3, 0.0); + case 10: return make_vector(b3, c3, 0.0); + case 11: return make_vector(c3, b3, 0.0); + default: return zero_vector(); + } + } + + default: // order >= 7: 12-point Ro3-invariant rule + { + constexpr Real rd[4][2] = { + {6.2382265094402118174e-02, 6.7517867073916085443e-02}, + {5.5225456656926611737e-02, 3.2150249385198182267e-01}, + {3.4324302945097146470e-02, 6.6094919618673565761e-01}, + {5.1584233435359177926e-01, 2.7771616697639178257e-01} + }; + const unsigned int row = qp / 3; + const unsigned int sub = qp % 3; + if (row >= 4) + return zero_vector(); + const Real z1 = rd[row][0], z2 = rd[row][1], z3 = 1.0 - z1 - z2; + switch (sub) + { + case 0: return make_vector(z1, z2, 0.0); + case 1: return make_vector(z3, z1, 0.0); + case 2: return make_vector(z2, z3, 0.0); + default: return zero_vector(); + } + } + } + } + + LIBMESH_DEVICE_INLINE static Real + tri_weight(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: return 0.5; + case 2: return 1.0 / 6.0; + case 3: return (qp % 2 == 0) ? 1.5902069087198858470e-01 : 9.0979309128011415303e-02; + case 4: return (qp < 3) ? 1.1169079483900573285e-01 : 5.4975871827660933819e-02; + case 5: + { + if (qp == 0) + return 9.0 / 80.0; + const Real sq15 = 3.872983346207417; + return (qp <= 3) ? (31.0 / 480.0 + sq15 / 2400.0) : (31.0 / 480.0 - sq15 / 2400.0); + } + case 6: + { + if (qp <= 2) + return 5.8393137863189683012644805692789721e-02; + if (qp <= 5) + return 2.5422453185103408460468404553434492e-02; + return 4.1425537809186787596776728210221227e-02; + } + default: + { + constexpr Real wts[4] = { + 2.6517028157436251429e-02, 4.3881408714446055037e-02, + 2.8775042784981585738e-02, 6.7493187009802774463e-02 + }; + return (qp / 3 < 4) ? wts[qp / 3] : 0.0; + } + } + } + + // ── Tetrahedral rules ───────────────────────────────────────────────────── + + LIBMESH_DEVICE_INLINE static RealVector + tet_point(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: + return make_vector(0.25, 0.25, 0.25); + + case 2: + { + const Real b = 0.25 * (1.0 - 1.0 / 2.2360679774997896964); // 1/sqrt(5) + const Real a = 1.0 - 3.0 * b; + switch (qp) + { + case 0: return make_vector(a, b, b); + case 1: return make_vector(b, a, b); + case 2: return make_vector(b, b, a); + case 3: return make_vector(b, b, b); + default: return zero_vector(); + } + } + + case 3: + switch (qp) + { + case 0: return make_vector(0.25, 0.25, 0.25); + case 1: return make_vector(0.5, 1.0 / 6.0, 1.0 / 6.0); + case 2: return make_vector(1.0 / 6.0, 0.5, 1.0 / 6.0); + case 3: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.5); + case 4: return make_vector(1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0); + default: return zero_vector(); + } + + case 4: + { + constexpr Real a1 = 2.5e-01; + constexpr Real a2 = 7.85714285714285714e-01, b2 = 7.14285714285714285e-02; + constexpr Real a3 = 3.99403576166799219e-01, b3 = 1.00596423833200785e-01; + switch (qp) + { + case 0: return make_vector(a1, a1, a1); + case 1: return make_vector(a2, b2, b2); + case 2: return make_vector(b2, a2, b2); + case 3: return make_vector(b2, b2, a2); + case 4: return make_vector(b2, b2, b2); + case 5: return make_vector(a3, a3, b3); + case 6: return make_vector(a3, b3, b3); + case 7: return make_vector(b3, b3, a3); + case 8: return make_vector(b3, a3, b3); + case 9: return make_vector(b3, a3, a3); + case 10: return make_vector(a3, b3, a3); + default: return zero_vector(); + } + } + + case 5: + { + constexpr Real af[3] = {3.1088591926330060980e-01, + 9.2735250310891226402e-02, + 4.5503704125649649492e-02}; + if (qp < 8) + { + const unsigned int g = qp / 4; + const unsigned int sub = qp % 4; + const Real ag = af[g], bg = 1.0 - 3.0 * ag; + switch (sub) + { + case 0: return make_vector(ag, ag, ag); + case 1: return make_vector(ag, bg, ag); + case 2: return make_vector(bg, ag, ag); + case 3: return make_vector(ag, ag, bg); + default: return zero_vector(); + } + } + else + { + const Real a2 = af[2], b2 = 0.5 * (1.0 - 2.0 * a2); + switch (qp - 8) + { + case 0: return make_vector(b2, b2, a2); + case 1: return make_vector(b2, a2, a2); + case 2: return make_vector(a2, a2, b2); + case 3: return make_vector(a2, b2, a2); + case 4: return make_vector(b2, a2, b2); + case 5: return make_vector(a2, b2, b2); + default: return zero_vector(); + } + } + } + + default: // order >= 6: 24-point Keast rule + { + constexpr Real data[4][3] = { + {3.56191386222544953e-01, 2.14602871259151684e-01, 0.0}, + {8.77978124396165982e-01, 4.06739585346113397e-02, 0.0}, + {3.29863295731730594e-02, 3.22337890142275646e-01, 0.0}, + {0.0, 0.0, 0.0} // 12-perm group handled separately + }; + + if (qp < 12) + { + // Three 4-permutation groups + const unsigned int grp = qp / 4; + const unsigned int sub = qp % 4; + const Real a = data[grp][0], b = data[grp][1]; + switch (sub) + { + case 0: return make_vector(a, b, b); + case 1: return make_vector(b, a, b); + case 2: return make_vector(b, b, a); + case 3: return make_vector(b, b, b); + default: return zero_vector(); + } + } + else + { + // 12-permutation group + constexpr Real a4 = 6.36610018750175299e-02; + constexpr Real b4 = 2.69672331458315867e-01; + constexpr Real c4 = 6.03005664791649076e-01; + switch (qp - 12) + { + case 0: return make_vector(a4, a4, b4); + case 1: return make_vector(a4, a4, c4); + case 2: return make_vector(b4, a4, a4); + case 3: return make_vector(c4, a4, a4); + case 4: return make_vector(a4, b4, a4); + case 5: return make_vector(a4, c4, a4); + case 6: return make_vector(a4, b4, c4); + case 7: return make_vector(a4, c4, b4); + case 8: return make_vector(b4, a4, c4); + case 9: return make_vector(b4, c4, a4); + case 10: return make_vector(c4, a4, b4); + case 11: return make_vector(c4, b4, a4); + default: return zero_vector(); + } + } + } + } + } + + LIBMESH_DEVICE_INLINE static Real + tet_weight(unsigned int order, unsigned int qp) + { + switch (order) + { + case 0: case 1: return 1.0 / 6.0; + case 2: return 1.0 / 24.0; + case 3: return (qp == 0) ? -2.0 / 15.0 : 0.075; + case 4: + { + if (qp == 0) + return -1.31555555555555556e-02; + if (qp <= 4) + return 7.62222222222222222e-03; + return 2.48888888888888889e-02; + } + case 5: + { + constexpr Real wf[3] = {1.8781320953002641800e-02, + 1.2248840519393658257e-02, + 7.0910034628469110730e-03}; + if (qp < 4) + return wf[0]; + if (qp < 8) + return wf[1]; + return wf[2]; + } + default: + { + constexpr Real wts[4] = {6.65379170969464506e-03, + 1.67953517588677620e-03, + 9.22619692394239843e-03, + 8.03571428571428248e-03}; + if (qp < 4) + return wts[0]; + if (qp < 8) + return wts[1]; + if (qp < 12) + return wts[2]; + return wts[3]; + } + } + } +}; + +// --------------------------------------------------------------------------- +// fill_quadrature — host-side convenience wrapper +// +// Fills std::vectors using the device-callable GaussQuadrature functions. +// --------------------------------------------------------------------------- + +inline void +fill_quadrature(libMesh::ElemType topo, + unsigned int order, + std::vector & qpts, + std::vector & weights) +{ + const unsigned int nqp = GaussQuadrature::n_points(topo, order); + qpts.resize(nqp); + weights.resize(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + qpts[q] = GaussQuadrature::point(topo, order, q); + weights[q] = GaussQuadrature::weight(topo, order, q); + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_QUADRATURE_H diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h new file mode 100644 index 00000000000..b7386cf900f --- /dev/null +++ b/include/gpu/kokkos_scalar_types.h @@ -0,0 +1,118 @@ +// libMesh Kokkos device-compatible scalar types. +// +// This header provides dimension-aware Kokkos aliases/helpers that mirror +// libMesh host numerics at LIBMESH_DIM=1/2/3. + +#ifndef LIBMESH_KOKKOS_SCALAR_TYPES_H +#define LIBMESH_KOKKOS_SCALAR_TYPES_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_vector.h" +#include "libmesh/type_tensor.h" +namespace libMesh::Kokkos +{ + +using Real = libMesh::Real; +using RealVector = libMesh::TypeVector; +using RealTensor = libMesh::TypeTensor; + +LIBMESH_DEVICE_INLINE +RealVector zero_vector() +{ + RealVector v; + v.zero(); + return v; +} + +LIBMESH_DEVICE_INLINE +RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + RealVector v = zero_vector(); + + v(0) = x; + +#if LIBMESH_DIM > 1 + v(1) = y; +#else + libmesh_assert_equal_to(y, Real(0)); +#endif + +#if LIBMESH_DIM > 2 + v(2) = z; +#else + libmesh_assert_equal_to(z, Real(0)); +#endif + + return v; +} + +LIBMESH_DEVICE_INLINE +RealTensor zero_tensor() +{ + RealTensor J; + J.zero(); + return J; +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_identity(const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + RealTensor I = zero_tensor(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + + return I; +} + +LIBMESH_DEVICE_INLINE +Real leading_determinant(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return Real(1); + + if (dim == 1) + return J(0, 0); + + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); + + return J.det(); +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_inverse(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return leading_identity(0); + + if (dim == 1) + { + RealTensor inv = zero_tensor(); + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real inv_det = Real(1) / leading_determinant(J, dim); + RealTensor inv = zero_tensor(); + inv(0, 0) = J(1, 1) * inv_det; + inv(0, 1) = -J(0, 1) * inv_det; + inv(1, 0) = -J(1, 0) * inv_det; + inv(1, 1) = J(0, 0) * inv_det; + return inv; + } + + return J.inverse(); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_SCALAR_TYPES_H diff --git a/src/quadrature/quadrature_gauss_3D.C b/src/quadrature/quadrature_gauss_3D.C index e9986c216cb..8e2f96ed5cc 100644 --- a/src/quadrature/quadrature_gauss_3D.C +++ b/src/quadrature/quadrature_gauss_3D.C @@ -181,8 +181,6 @@ void QGauss::init_3D() // Note: if !allow_rules_with_negative_weights, fall through to next case. } - - // Originally a Keast rule, // Patrick Keast, // Moderate Degree Tetrahedral Quadrature Formulas, From c21018bd1445863337515904831f1b992d849dc5 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 5 May 2026 10:28:33 -0600 Subject: [PATCH 31/48] Add Kokkos FE oracle test suite --- tests/Makefile.am | 110 +- tests/fe/kokkos_fe_contract_test.K | 333 ++++ tests/fe/kokkos_fe_invariant_test.K | 418 +++++ tests/fe/kokkos_fe_map_oracle_test.K | 529 +++++++ tests/fe/kokkos_fe_oracle_test_utils.h | 1386 +++++++++++++++++ tests/fe/kokkos_fe_permuted_map_oracle_test.K | 512 ++++++ .../fe/kokkos_fe_reconstruction_oracle_test.K | 320 ++++ tests/fe/kokkos_fe_shape_oracle_test.K | 629 ++++++++ tests/fe/kokkos_fe_side_trace_oracle_test.K | 342 ++++ tests/fe/kokkos_fe_types_oracle_test.K | 509 ++++++ tests/fe/kokkos_quadrature_oracle_test.K | 747 +++++++++ tests/quadrature/quadrature_exactness.h | 176 +++ tests/quadrature/quadrature_test.C | 108 +- 13 files changed, 6028 insertions(+), 91 deletions(-) create mode 100644 tests/fe/kokkos_fe_contract_test.K create mode 100644 tests/fe/kokkos_fe_invariant_test.K create mode 100644 tests/fe/kokkos_fe_map_oracle_test.K create mode 100644 tests/fe/kokkos_fe_oracle_test_utils.h create mode 100644 tests/fe/kokkos_fe_permuted_map_oracle_test.K create mode 100644 tests/fe/kokkos_fe_reconstruction_oracle_test.K create mode 100644 tests/fe/kokkos_fe_shape_oracle_test.K create mode 100644 tests/fe/kokkos_fe_side_trace_oracle_test.K create mode 100644 tests/fe/kokkos_fe_types_oracle_test.K create mode 100644 tests/fe/kokkos_quadrature_oracle_test.K create mode 100644 tests/quadrature/quadrature_exactness.h diff --git a/tests/Makefile.am b/tests/Makefile.am index 85d686408c7..d752bcfd487 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -135,6 +135,7 @@ unit_tests_sources = \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ + quadrature/quadrature_exactness.h \ quadrature/quadrature_test.C \ solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ @@ -241,7 +242,7 @@ data = matrices/geom_1_extraction_op.h5 \ unit_tests_data = $(data) # Why isn't this working automatically? -EXTRA_DIST = $(data) +EXTRA_DIST = $(data) fe/kokkos_fe_oracle_test_utils.h if LIBMESH_ENABLE_FPARSER unit_tests_sources += \ @@ -254,8 +255,75 @@ TESTS = if LIBMESH_ENABLE_KOKKOS KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) - check_PROGRAMS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit - TESTS += kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit + check_PROGRAMS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + TESTS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + + kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K + kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K + kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K + kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K + kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K + kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K + kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K + kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K + kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + + check_PROGRAMS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + TESTS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + + kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K + kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) + kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) + kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) @@ -393,6 +461,42 @@ endif -c $< -o $@ # Custom link rules so the Kokkos compiler drives the final link step. +kokkos_fe_types_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_types_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_shape_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_shape_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_invariant_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_invariant_unit_LDFLAGS) -o $@ + +kokkos_fe_contract_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_contract_unit_LDFLAGS) -o $@ + +kokkos_fe_permuted_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_permuted_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_reconstruction_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_reconstruction_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_side_trace_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_side_trace_oracle_unit_LDFLAGS) -o $@ + +kokkos_quadrature_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_quadrature_oracle_unit_LDFLAGS) -o $@ + kokkos_vector_ops_oracle_unit_LINK = \ $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ diff --git a/tests/fe/kokkos_fe_contract_test.K b/tests/fe/kokkos_fe_contract_test.K new file mode 100644 index 00000000000..826693a2e93 --- /dev/null +++ b/tests/fe/kokkos_fe_contract_test.K @@ -0,0 +1,333 @@ +// Host-side contract tests for libMesh::Kokkos hard-fail paths. +// +// This executable self-spawns child processes that intentionally invoke +// unsupported Kokkos FE entry points. A child succeeds only if the call +// returns normally; the parent test expects those calls to terminate with a +// non-zero exit status instead. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/node.h" + +#include +#include +#include +#include +#include +#include + +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; + +namespace +{ + +struct contract_case +{ + const char * name; +}; + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +} // anonymous namespace + +static element_fixture +build_master_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(elem_type); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(elem_type, refspace_nodes); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point refspace = refspace_nodes[i]; + fixture.nodes.push_back(libMesh::Node::build(refspace(0), refspace(1), refspace(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static std::unique_ptr +find_side_of_type(const libMesh::Elem & parent, + libMesh::ElemType desired_type, + unsigned int & side_id) +{ + for (unsigned int s = 0; s < parent.n_sides(); ++s) + { + auto side = parent.build_side_ptr(s); + if (side->type() == desired_type) + { + side_id = s; + return side; + } + } + + side_id = libMesh::invalid_uint; + return nullptr; +} + +static void +invoke_face_jacobian_on_side(const libMesh::Elem & side) +{ + constexpr unsigned int max_face_nodes = 9; + + RealVector face_nodes[max_face_nodes]; + for (unsigned int i = 0; i < side.n_nodes(); ++i) + face_nodes[i] = libMesh::Kokkos::point_to_real_vector(side.point(i)); + + (void)libMesh::Kokkos::face_jacobian( + libMesh::LAGRANGE_MAP, side.type(), face_nodes, side.n_nodes(), 0.0, 0.0, 0.0); +} + +static int +run_child_case(const std::string & case_name) +{ + using libMesh::Kokkos::FEShapeKey; + + if (case_name == "noop") + return 0; + + if (case_name == "get_side_topology_prism6") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PRISM6); + return 0; + } + + if (case_name == "get_side_topology_pyramid5") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PYRAMID5); + return 0; + } + + if (case_name == "shape_lagrange_edge4_third") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_lagrange_prism6_first") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "shape_monomial_hex27_sixth") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_monomial_tri7_sixth") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "ndofs_lagrange_prism6_first") + { + (void)libMesh::Kokkos::n_dofs( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}); + return 0; + } + + if (case_name == "map_shape_rational") + { + (void)libMesh::Kokkos::map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_map_shape_rational") + { + (void)libMesh::Kokkos::grad_map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "face_normal_parent_dim2") + { + RealTensor J = libMesh::Kokkos::zero_tensor(); + J(0, 0) = 1.0; + J(0, 1) = 2.0; + (void)libMesh::Kokkos::face_normal(J, 2); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "face_jacobian_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + std::fprintf(stderr, "Unknown child case: %s\n", case_name.c_str()); + return 3; +} + +static int +run_command(const std::string & command) +{ + std::fflush(nullptr); + return std::system(command.c_str()); +} + +static bool +expect_child_success(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + return status == 0; +} + +static bool +expect_child_abort(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + if (status == -1) + return false; + + if (WIFSIGNALED(status)) + return true; + + if (!WIFEXITED(status)) + return false; + + const int exit_code = WEXITSTATUS(status); + return exit_code != 0 && exit_code != 2 && exit_code != 3; +} + +int +main(int argc, char ** argv) +{ + if (argc == 3 && std::string(argv[1]) == "--child") + return run_child_case(argv[2]); + + if (!expect_child_success(argv[0], "noop")) + { + std::printf("[contract_spawn] FAIL (could not successfully respawn test executable)\n"); + return 1; + } + + const contract_case cases[] = { + { "get_side_topology_prism6" }, + { "get_side_topology_pyramid5" }, + { "shape_lagrange_edge4_third" }, + { "grad_shape_lagrange_prism6_first" }, + { "shape_monomial_hex27_sixth" }, + { "grad_shape_monomial_tri7_sixth" }, + { "ndofs_lagrange_prism6_first" }, + { "map_shape_rational" }, + { "grad_map_shape_rational" }, + { "face_normal_parent_dim2" }, + { "face_jacobian_prism20_tri7" }, + { "face_jacobian_prism21_tri7" }, + { "face_jacobian_pyramid18_tri7" }, + { "map_face_qp_to_parent_prism20_tri7" }, + { "map_face_qp_to_parent_prism21_tri7" }, + { "map_face_qp_to_parent_pyramid18_tri7" } + }; + + int total_fail = 0; + for (const auto & info : cases) + { + const bool passed = expect_child_abort(argv[0], info.name); + const int fail = passed ? 0 : 1; + std::printf("[contract_abort] [%s] %s (%d failures)\n", + info.name, + passed ? "PASS" : "FAIL", + fail); + total_fail += fail; + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_invariant_test.K b/tests/fe/kokkos_fe_invariant_test.K new file mode 100644 index 00000000000..da089f74737 --- /dev/null +++ b/tests/fe/kokkos_fe_invariant_test.K @@ -0,0 +1,418 @@ +// Kokkos kernel regression tests for libMesh::Kokkos FE invariants and quadrature exactness. +// The test suite covers: +// A. Partition of unity for all implemented LAGRANGE map topologies. +// B. Zero-sum gradients for the same map topologies. +// C. Nodal Kronecker-delta behavior at master nodes for the same map topologies. +// D. Quadrature exactness sweeps for the canonical tensor-product and simplex +// reference topologies using analytic monomial integrals on the reference +// element. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_quadrature.h" +#include "../quadrature/quadrature_exactness.h" + +#include "libmesh/elem.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::upload_real; + +static constexpr double invariant_tol = 1.0e-13; +static constexpr double exactness_tol = 2.0e-12; +static constexpr unsigned int quadrature_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct quadrature_exactness_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int max_order; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" } +}; + +// These cases sweep the full exactness range provided by the current Kokkos +// QGauss helper: +// - tensor-product rules: through order 13 (7-point 1D Gauss-Legendre) +// - simplex rules: through order 6 (highest explicit triangle/tet tables) +static const quadrature_exactness_case quadrature_cases[] = { + { libMesh::EDGE2, 1, 13, "EDGE2" }, + { libMesh::TRI3, 2, 6, "TRI3" }, + { libMesh::QUAD4, 2, 13, "QUAD4" }, + { libMesh::TET4, 3, 6, "TET4" }, + { libMesh::HEX8, 3, 13, "HEX8" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE Real +int_pow(Real x, unsigned int p) +{ + Real result = 1.0; + for (unsigned int i = 0; i < p; ++i) + result *= x; + return result; +} + +template +static int +test_partition_of_unity_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "unity_xi"); + auto d_eta = upload_real(eta_h, "unity_eta"); + auto d_zeta = upload_real(zeta_h, "unity_zeta"); + + Kokkos::View d_sum(std::string("unity_sum"), nqp); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + Real sum = 0.0; + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::map_shape(i, d_xi(q), d_eta(q), d_zeta(q)); + d_sum(q) = sum; + }); + Kokkos::fence(); + + std::vector ref_values(nqp, 1.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct partition_of_unity_dispatch +{ + explicit partition_of_unity_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_partition_of_unity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported partition topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_partition_of_unity(const map_elem_info & info) +{ + const partition_of_unity_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_zero_sum_gradients_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "gradsum_xi"); + auto d_eta = upload_real(eta_h, "gradsum_eta"); + auto d_zeta = upload_real(zeta_h, "gradsum_zeta"); + + Kokkos::View d_sum(std::string("gradsum"), info.dim * nqp); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int dim = info.dim; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + RealVector sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::grad_map_shape( + i, d_xi(q), d_eta(q), d_zeta(q)); + + for (unsigned int d = 0; d < dim; ++d) + d_sum(dim * q + d) = sum(d); + }); + Kokkos::fence(); + + std::vector ref_values(info.dim * nqp, 0.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct zero_sum_gradients_dispatch +{ + explicit zero_sum_gradients_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_zero_sum_gradients_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported zero-sum gradient topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_zero_sum_gradients(const map_elem_info & info) +{ + const zero_sum_gradients_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_kronecker_delta_impl(const map_elem_info & info) +{ + auto elem = build_reference_elem(Topo); + + std::vector xi_h(info.n_dofs), eta_h(info.n_dofs), zeta_h(info.n_dofs); + for (unsigned int j = 0; j < info.n_dofs; ++j) + { + const libMesh::Point p = elem->master_point(j); + xi_h[j] = p(0); + eta_h[j] = p(1); + zeta_h[j] = p(2); + } + + auto d_xi = upload_real(xi_h, "delta_xi"); + auto d_eta = upload_real(eta_h, "delta_eta"); + auto d_zeta = upload_real(zeta_h, "delta_zeta"); + + Kokkos::View d_values(std::string("delta_values"), info.n_dofs * info.n_dofs); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + n_dofs * n_dofs, + KOKKOS_LAMBDA(int idx) { + const unsigned int i = static_cast(idx) / n_dofs; + const unsigned int j = static_cast(idx) % n_dofs; + d_values(idx) = libMesh::Kokkos::map_shape( + i, d_xi(j), d_eta(j), d_zeta(j)); + }); + Kokkos::fence(); + + std::vector ref_values(info.n_dofs * info.n_dofs, 0.0); + for (unsigned int i = 0; i < info.n_dofs; ++i) + ref_values[i * info.n_dofs + i] = 1.0; + + return compare_device_values(d_values, ref_values, invariant_tol); +} + +struct kronecker_delta_dispatch +{ + explicit kronecker_delta_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_kronecker_delta_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported kronecker topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_kronecker_delta(const map_elem_info & info) +{ + const kronecker_delta_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +static double +integrate_monomial_on_device(const quadrature_exactness_case & info, + unsigned int order, + unsigned int a, + unsigned int b, + unsigned int c) +{ + const unsigned int nqp = GaussQuadrature::n_points(info.topo, order); + const libMesh::ElemType topo = info.topo; + + double integral = 0.0; + Kokkos::parallel_reduce( + nqp, + KOKKOS_LAMBDA(int qp, double & local_sum) { + const RealVector pt = GaussQuadrature::point(topo, order, qp); + const Real weight = GaussQuadrature::weight(topo, order, qp); + Real monomial = int_pow(pt(0), a); + +#if LIBMESH_DIM > 1 + monomial *= int_pow(pt(1), b); +#else + libmesh_assert_equal_to(b, 0); +#endif + +#if LIBMESH_DIM > 2 + monomial *= int_pow(pt(2), c); +#else + libmesh_assert_equal_to(c, 0); +#endif + + local_sum += static_cast(weight) * static_cast(monomial); + }, + integral); + + return integral; +} + +static int +test_quadrature_exactness(const quadrature_exactness_case & info) +{ + int fail = 0; + + for (unsigned int order = 0; order <= info.max_order; ++order) + switch (info.dim) + { + case 1: + for (unsigned int a = 0; a <= order; ++a) + { + const double actual = integrate_monomial_on_device(info, order, a, 0, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, 0, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 2: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + { + const double actual = integrate_monomial_on_device(info, order, a, b, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 3: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + for (unsigned int c = 0; a + b + c <= order; ++c) + { + const double actual = integrate_monomial_on_device(info, order, a, b, c); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, c); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + default: + ++fail; + break; + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : map_elems) + { + { + const int f = test_partition_of_unity(info); + std::printf("[partition_of_unity] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_zero_sum_gradients(info); + std::printf("[zero_sum_gradients] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_kronecker_delta(info); + std::printf("[kronecker_delta] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (const auto & info : quadrature_cases) + { + const int f = test_quadrature_exactness(info); + std::printf("[quadrature_exactness] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_map_oracle_test.K b/tests/fe/kokkos_fe_map_oracle_test.K new file mode 100644 index 00000000000..afde3b315ff --- /dev/null +++ b/tests/fe/kokkos_fe_map_oracle_test.K @@ -0,0 +1,529 @@ +// GPU kernel tests for libMesh::Kokkos map helpers across broad topology coverage. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for all implemented LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for all +// sides of the implemented 2D and 3D parent topologies. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for all +// sides and multiple side quadrature points, including supported +// mixed-face prism and pyramid element types. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_qp_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +} // anonymous namespace + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_oracle"); + const int fail = evaluate_map_helper_context(context, "map_oracle_results", tol); + if (fail) + std::printf(" map helper mismatch: topo=%s (%d failures)\n", info.name, fail); + + return fail; +} + +struct map_helper_dispatch +{ + explicit map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-helper topology: topo=%s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct face_helper_side_dispatch_3d +{ + face_helper_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_3d_impl(context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +template +struct face_helper_side_dispatch_2d +{ + face_helper_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_2d_impl( + context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +struct face_helper_parent_dispatch_2d +{ + face_helper_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const face_helper_side_dispatch_2d dispatch( + context, side_id, parent_name, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper parent: parent=%s parent_type=%d side_id=%u\n", + parent_name, + static_cast(topo), + side_id); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +static int +test_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "face_oracle"); + + if (context.parent_dim == 3) + { + const face_helper_side_dispatch_3d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const face_helper_parent_dispatch_2d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported face-helper parent dimension: parent=%s dim=%u side_id=%u\n", + parent_name, + context.parent_dim, + side_id); + return 1; +} + +static int +test_face_helpers_for_parent(const face_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + fail += test_face_helper_side_case(fixture, side_id, info.name); + + return fail; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); + + std::printf(" face_qp mismatch: parent=%s side_id=%u parent_type=%d side_type=%d\n", + parent_name, + side_id, + static_cast(parent.type()), + static_cast(side.type())); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + const unsigned int parent_node = parent.local_side_node(side_id, k); + const libMesh::Point parent_refspace = refspace_nodes[parent_node]; + std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_node, + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_for_parent(const face_qp_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + if (side->n_nodes() == 1) + { + fail += + check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported face_qp side: parent=%s side_id=%u side_type=%d\n", + info.name, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, face_qpt); + } + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const map_helper_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_parent_case cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_helpers_for_parent(info); + std::printf("[face_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_qp_parent_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::EDGE4, "EDGE4" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_qp_to_parent_for_parent(info); + std::printf("[face_qp_parent_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_oracle_test_utils.h b/tests/fe/kokkos_fe_oracle_test_utils.h new file mode 100644 index 00000000000..5d6f53e0920 --- /dev/null +++ b/tests/fe/kokkos_fe_oracle_test_utils.h @@ -0,0 +1,1386 @@ +#ifndef LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H +#define LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +#include +#include +#include +#include + +// This header is intended for the standalone Kokkos test executables, which +// include Kokkos before pulling in these helpers. + +namespace kokkos_test_utils +{ + +LIBMESH_DEVICE_INLINE libMesh::Real +vector_component(const libMesh::Kokkos::RealVector & v, unsigned int component) +{ + switch (component) + { + case 0: + return v(0); + case 1: +#if LIBMESH_DIM > 1 + return v(1); +#else + return 0.0; +#endif + case 2: +#if LIBMESH_DIM > 2 + return v(2); +#else + return 0.0; +#endif + default: + return 0.0; + } +} + +LIBMESH_DEVICE_INLINE libMesh::Real +tensor_component(const libMesh::Kokkos::RealTensor & T, unsigned int i, unsigned int j) +{ +#if LIBMESH_DIM > 2 + return T(i, j); +#elif LIBMESH_DIM > 1 + if (i < 2 && j < 2) + return T(i, j); + return 0.0; +#else + if (i == 0 && j == 0) + return T(0, 0); + return 0.0; +#endif +} + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +struct map_helper_context +{ + std::vector ref_values; + Kokkos::View d_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + unsigned int nqp; + unsigned int dim; + unsigned int n_nodes; +}; + +struct face_helper_context +{ + std::vector ref_values; + Kokkos::View d_face_coords; + Kokkos::View d_parent_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + Kokkos::View d_parent_xi; + Kokkos::View d_parent_eta; + Kokkos::View d_parent_zeta; + unsigned int nqp; + unsigned int parent_dim; + unsigned int n_parent_nodes; + unsigned int n_face_nodes; +}; + +template +struct lagrange_evaluator_topology +{ + static const libMesh::ElemType value = libMesh::INVALID_ELEM; +}; + +#define KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(exact_topo, exact_order, evaluator_topo) \ + template <> \ + struct lagrange_evaluator_topology \ + { \ + static const libMesh::ElemType value = libMesh::evaluator_topo; \ + } + +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE2, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, SECOND, EDGE3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE4, FIRST, EDGE2); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI3, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, SECOND, TRI6); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, FIRST, TRI3); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, SECOND, TRI6); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD4, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, SECOND, QUAD8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, FIRST, QUAD4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, SECOND, QUAD9); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET4, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, SECOND, TET10); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, FIRST, TET4); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, SECOND, TET10); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX8, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, SECOND, HEX20); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, FIRST, HEX8); +KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, SECOND, HEX27); + +#undef KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE + +template +struct monomial_evaluator_dim +{ + static const unsigned int value = 0; +}; + +#define KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(exact_topo, dim_value) \ + template <> \ + struct monomial_evaluator_dim \ + { \ + static const unsigned int value = dim_value; \ + } + +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE2, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE3, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE4, 1); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI3, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI6, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI7, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD4, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD8, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD9, 2); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET4, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET10, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET14, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX8, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX20, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX27, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM6, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM15, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM18, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM20, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM21, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID5, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID13, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID14, 3); +KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID18, 3); + +#undef KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE + +template +struct monomial_order_evaluator; + +#define KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(dim_value, exact_order, impl_suffix, impl_order) \ + template <> \ + struct monomial_order_evaluator \ + { \ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::shape(i, xi, eta, zeta); \ + } \ + \ + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::grad_shape(i, xi, eta, zeta); \ + } \ + } + +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, CONSTANT, MonomialImpl1D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIRST, MonomialImpl1D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, SECOND, MonomialImpl1D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, THIRD, MonomialImpl1D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FOURTH, MonomialImpl1D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIFTH, MonomialImpl1D, 5); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, CONSTANT, MonomialImpl2D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIRST, MonomialImpl2D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, SECOND, MonomialImpl2D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, THIRD, MonomialImpl2D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FOURTH, MonomialImpl2D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIFTH, MonomialImpl2D, 5); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, CONSTANT, MonomialImpl3D, 0); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIRST, MonomialImpl3D, 1); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, SECOND, MonomialImpl3D, 2); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, THIRD, MonomialImpl3D, 3); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FOURTH, MonomialImpl3D, 4); +KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIFTH, MonomialImpl3D, 5); + +#undef KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE + +template +struct exact_shape_evaluator; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return libMesh::Kokkos::map_shape::value>( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return libMesh::Kokkos::grad_map_shape::value>( + i, xi, eta, zeta); + } +}; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::shape( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::grad_shape( + i, xi, eta, zeta); + } +}; + +template +LIBMESH_DEVICE_INLINE libMesh::Real +shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::shape(i, xi, eta, zeta); +} + +template +LIBMESH_DEVICE_INLINE libMesh::Kokkos::RealVector +grad_shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::grad_shape(i, xi, eta, zeta); +} + +template +inline int +dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispatcher) +{ + switch (order) + { + case libMesh::CONSTANT: + return dispatcher.template operator()(); + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + case libMesh::FOURTH: + return dispatcher.template operator()(); + case libMesh::FIFTH: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(libMesh::Kokkos::FEShapeKey{ libMesh::MONOMIAL, ExactTopo, order }); + } +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo); + +template +inline int +dispatch_supported_lagrange_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || !libMesh::Kokkos::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI7: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET14: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_supported_lagrange_shape_key_with_map(libMesh::Kokkos::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || + !libMesh::Kokkos::supports_shape(key) || + !is_supported_lagrange_map_topology(key.elem_type)) + return dispatcher.unsupported_key(key); + + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_supported_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (!libMesh::Kokkos::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI7: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM15: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM18: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM21: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID5: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID13: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID18: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + case libMesh::TET4: + case libMesh::TET10: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return true; + + default: + return false; + } +} + +inline bool +supports_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key) +{ + return libMesh::Kokkos::supports_shape(key) && + is_supported_lagrange_map_topology(key.elem_type); +} + +template +inline int +dispatch_supported_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (!supports_shape_key_with_lagrange_map(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key_with_map(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_face_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return true; + + default: + return false; + } +} + +template +inline int +dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + switch (topo) + { + case libMesh::EDGE2: + return dispatcher.template operator()(); + case libMesh::EDGE3: + return dispatcher.template operator()(); + case libMesh::TRI3: + return dispatcher.template operator()(); + case libMesh::TRI6: + return dispatcher.template operator()(); + case libMesh::QUAD4: + return dispatcher.template operator()(); + case libMesh::QUAD8: + return dispatcher.template operator()(); + case libMesh::QUAD9: + return dispatcher.template operator()(); + case libMesh::TET4: + return dispatcher.template operator()(); + case libMesh::TET10: + return dispatcher.template operator()(); + case libMesh::HEX8: + return dispatcher.template operator()(); + case libMesh::HEX20: + return dispatcher.template operator()(); + case libMesh::HEX27: + return dispatcher.template operator()(); + + default: + return dispatcher.unsupported_topology(topo); + } +} + +template +inline int +dispatch_supported_lagrange_face_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + if (!is_supported_lagrange_face_map_topology(topo)) + return dispatcher.unsupported_topology(topo); + + return dispatch_supported_lagrange_map_topology(topo, dispatcher); +} + +inline int +compare_device_values(const Kokkos::View & d_values, + const std::vector & ref_values, + double tol = 1.0e-13) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > tol) + ++fail; + + return fail; +} + +inline std::unique_ptr +build_reference_elem(libMesh::ElemType elem_type) +{ + auto elem = libMesh::Elem::build(elem_type); + elem->set_mapping_type(libMesh::LAGRANGE_MAP); + return elem; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + unsigned int quadrature_order, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + libMesh::QGauss qr(dim, static_cast(quadrature_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(elem_type); + + const unsigned int nqp = qr.n_points(); + xi_h.resize(nqp); + eta_h.resize(nqp); + zeta_h.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + } + + return nqp; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + return build_qps(elem_type, dim, /*quadrature_order=*/4, xi_h, eta_h, zeta_h); +} + +inline unsigned int +build_host_qgauss(libMesh::ElemType topo, + unsigned int dim, + unsigned int order, + std::vector & x_ref, + std::vector & y_ref, + std::vector & z_ref, + std::vector & w_ref) +{ + libMesh::QGauss qr(dim, static_cast(order)); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + const unsigned int nqp = qr.n_points(); + x_ref.resize(nqp); + y_ref.resize(nqp); + z_ref.resize(nqp); + w_ref.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + x_ref[q] = qr.qp(q)(0); + y_ref[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + z_ref[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_ref[q] = qr.w(q); + } + + return nqp; +} + +inline Kokkos::View +upload_real(const std::vector & values, const char * label) +{ + Kokkos::View d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + Kokkos::deep_copy(d, h); + return d; +} + +inline Kokkos::View +upload_point_coordinates(const libMesh::Elem & elem, const char * label) +{ + Kokkos::View d(std::string(label), 3 * elem.n_nodes()); + auto h = Kokkos::create_mirror_view(d); + for (unsigned int i = 0; i < elem.n_nodes(); ++i) + { + h(3 * i + 0) = elem.point(i)(0); + h(3 * i + 1) = elem.point(i)(1); + h(3 * i + 2) = elem.point(i)(2); + } + Kokkos::deep_copy(d, h); + return d; +} + +inline std::string +make_label(const char * prefix, const char * suffix) +{ + return std::string(prefix) + suffix; +} + +inline element_fixture +build_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point master = fixture.elem->master_point(i); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point( + 0.7 + 0.8 * xi + 0.06 * xi * xi, + -0.3 + 0.25 * xi + 0.04 * xi * xi, + 0.2 + 0.1 * xi - 0.03 * xi * xi); + break; + + case 2: + xyz = libMesh::Point( + 0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.1 + 0.12 * xi - 0.08 * eta + 0.02 * xi * eta); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_flat_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point master = fixture.elem->master_point(i); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point(0.7 + 0.8 * xi + 0.06 * xi * xi, + 0.0, + 0.0); + break; + + case 2: + xyz = libMesh::Point(0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.0); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_permuted_reference_fixture(libMesh::ElemType elem_type, + unsigned int perm_num) +{ + element_fixture fixture = build_reference_fixture(elem_type); + fixture.elem->permute(perm_num); + return fixture; +} + +inline map_helper_context +build_map_helper_context(const element_fixture & fixture, + libMesh::ElemType topo, + const char * label_prefix) +{ + map_helper_context context; + + const unsigned int dim = fixture.elem->dim(); + const unsigned int n_nodes = fixture.elem->n_nodes(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + auto fe = libMesh::FEBase::build(dim, fe_type); + + libMesh::QGauss qr(dim, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + fe->attach_quadrature_rule(&qr); + fe->get_xyz(); + fe->get_dxyzdxi(); + if (dim >= 2) + fe->get_dxyzdeta(); + if (dim >= 3) + fe->get_dxyzdzeta(); + fe->get_JxW(); + fe->reinit(fixture.elem.get()); + + const unsigned int nqp = qr.n_points(); + const auto & xyz = fe->get_xyz(); + const auto & dxyzdxi = fe->get_dxyzdxi(); + const auto & jxw = fe->get_JxW(); + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + libMesh::RealGradient dxyzdeta(0.0); + libMesh::RealGradient dxyzdzeta(0.0); + if (dim >= 2) + dxyzdeta = fe->get_dxyzdeta()[q]; + if (dim >= 3) + dxyzdzeta = fe->get_dxyzdzeta()[q]; + + const unsigned int base = 13 * q; + context.ref_values[base + 0] = xyz[q](0); + context.ref_values[base + 1] = xyz[q](1); + context.ref_values[base + 2] = xyz[q](2); + context.ref_values[base + 3] = dxyzdxi[q](0); + context.ref_values[base + 4] = dxyzdxi[q](1); + context.ref_values[base + 5] = dxyzdxi[q](2); + context.ref_values[base + 6] = dxyzdeta(0); + context.ref_values[base + 7] = dxyzdeta(1); + context.ref_values[base + 8] = dxyzdeta(2); + context.ref_values[base + 9] = dxyzdzeta(0); + context.ref_values[base + 10] = dxyzdzeta(1); + context.ref_values[base + 11] = dxyzdzeta(2); + context.ref_values[base + 12] = jxw[q]; + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_h[q] = qr.w(q); + } + + context.d_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.nqp = nqp; + context.dim = dim; + context.n_nodes = n_nodes; + + return context; +} + +template +inline int +evaluate_map_helper_context(const map_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_nodes = 27; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_coords = context.d_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int dim_ = context.dim; + const unsigned int n_nodes_ = context.n_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector nodes[max_nodes]; + for (unsigned int i = 0; i < n_nodes_; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + libMesh::Kokkos::RealVector xyz; + libMesh::Kokkos::RealTensor J; + libMesh::Kokkos::physical_point_and_jacobian( + nodes, n_nodes_, d_xi(q), d_eta(q), d_zeta(q), xyz, J); + + const libMesh::Real jxw_q = libMesh::Kokkos::volume_jxw(J, dim_, d_w(q)); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = vector_component(xyz, 0); + d_results(base + 1) = vector_component(xyz, 1); + d_results(base + 2) = vector_component(xyz, 2); + d_results(base + 3) = tensor_component(J, 0, 0); + d_results(base + 4) = tensor_component(J, 0, 1); + d_results(base + 5) = tensor_component(J, 0, 2); + d_results(base + 6) = tensor_component(J, 1, 0); + d_results(base + 7) = tensor_component(J, 1, 1); + d_results(base + 8) = tensor_component(J, 1, 2); + d_results(base + 9) = tensor_component(J, 2, 0); + d_results(base + 10) = tensor_component(J, 2, 1); + d_results(base + 11) = tensor_component(J, 2, 2); + d_results(base + 12) = jxw_q; + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +inline face_helper_context +build_face_helper_context(const element_fixture & fixture, + const libMesh::Elem & side, + unsigned int side_id, + const char * label_prefix) +{ + face_helper_context context; + const unsigned int parent_dim = fixture.elem->dim(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + const unsigned int side_dim = side.dim(); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side.type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_JxW(); + side_fe->get_normals(); + side_fe->get_dxyzdxi(); + if (parent_dim >= 3) + side_fe->get_dxyzdeta(); + side_fe->reinit(fixture.elem.get(), side_id); + + const unsigned int nqp = qr.n_points(); + const unsigned int n_parent_nodes = fixture.elem->n_nodes(); + const unsigned int n_face_nodes = side.n_nodes(); + + std::vector side_ref_points(nqp); + for (unsigned int q = 0; q < nqp; ++q) + side_ref_points[q] = qr.qp(q); + + std::vector parent_ref_points; + if (parent_dim == 2) + { + auto side_map_fe = libMesh::FEBase::build(parent_dim, fe_type); + side_map_fe->get_xyz(); + side_map_fe->side_map(fixture.elem.get(), &side, side_id, side_ref_points, parent_ref_points); + } + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + std::vector parent_xi_h(nqp, 0.0), parent_eta_h(nqp, 0.0), parent_zeta_h(nqp, 0.0); + for (unsigned int q = 0; q < nqp; ++q) + { + const libMesh::Point row0 = libMesh::FEMap::map_deriv(side_dim, &side, 0, side_ref_points[q]); + libMesh::Point row1(0.0); + if (side_dim >= 2) + row1 = libMesh::FEMap::map_deriv(side_dim, &side, 1, side_ref_points[q]); + const auto & normal = side_fe->get_normals()[q]; + const unsigned int base = 13 * q; + + context.ref_values[base + 0] = row0(0); + context.ref_values[base + 1] = row0(1); + context.ref_values[base + 2] = row0(2); + context.ref_values[base + 3] = row1(0); + context.ref_values[base + 4] = row1(1); + context.ref_values[base + 5] = row1(2); + context.ref_values[base + 6] = 0.0; + context.ref_values[base + 7] = 0.0; + context.ref_values[base + 8] = 0.0; + context.ref_values[base + 9] = side_fe->get_JxW()[q]; + context.ref_values[base + 10] = normal(0); + context.ref_values[base + 11] = normal(1); + context.ref_values[base + 12] = normal(2); + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (parent_dim >= 3) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = 0.0; + w_h[q] = qr.w(q); + + if (parent_dim == 2) + { + parent_xi_h[q] = parent_ref_points[q](0); + parent_eta_h[q] = parent_ref_points[q](1); + parent_zeta_h[q] = parent_ref_points[q](2); + } + } + + context.d_face_coords = upload_point_coordinates(side, make_label(label_prefix, "_coords").c_str()); + context.d_parent_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_parent_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.d_parent_xi = upload_real(parent_xi_h, make_label(label_prefix, "_parent_xi").c_str()); + context.d_parent_eta = upload_real(parent_eta_h, make_label(label_prefix, "_parent_eta").c_str()); + context.d_parent_zeta = upload_real(parent_zeta_h, make_label(label_prefix, "_parent_zeta").c_str()); + context.nqp = nqp; + context.parent_dim = parent_dim; + context.n_parent_nodes = n_parent_nodes; + context.n_face_nodes = n_face_nodes; + + return context; +} + +template +inline int +evaluate_face_helper_context_2d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_face_nodes = 9; + constexpr unsigned int max_parent_nodes = 27; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_parent_coords = context.d_parent_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const auto d_parent_xi = context.d_parent_xi; + const auto d_parent_eta = context.d_parent_eta; + const auto d_parent_zeta = context.d_parent_zeta; + const unsigned int n_parent_nodes_ = context.n_parent_nodes; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; + libMesh::Kokkos::RealVector parent_nodes[max_parent_nodes]; + for (unsigned int i = 0; i < n_face_nodes_; ++i) + face_nodes[i] = libMesh::Kokkos::make_vector( + d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); + for (unsigned int i = 0; i < n_parent_nodes_; ++i) + parent_nodes[i] = libMesh::Kokkos::make_vector( + d_parent_coords(3 * i + 0), d_parent_coords(3 * i + 1), d_parent_coords(3 * i + 2)); + + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Kokkos::RealTensor parent_J = libMesh::Kokkos::jacobian( + parent_nodes, n_parent_nodes_, d_parent_xi(q), d_parent_eta(q), d_parent_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/2u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::edge_normal_on_parent_surface(J, parent_J); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +template +inline int +evaluate_face_helper_context_3d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + constexpr unsigned int max_face_nodes = 9; + + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; + for (unsigned int i = 0; i < n_face_nodes_; ++i) + face_nodes[i] = libMesh::Kokkos::make_vector( + d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); + + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/3u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::face_normal(J, /*parent_dim=*/3u); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +} // namespace kokkos_test_utils + +#endif diff --git a/tests/fe/kokkos_fe_permuted_map_oracle_test.K b/tests/fe/kokkos_fe_permuted_map_oracle_test.K new file mode 100644 index 00000000000..d988bc9543e --- /dev/null +++ b/tests/fe/kokkos_fe_permuted_map_oracle_test.K @@ -0,0 +1,512 @@ +// GPU kernel tests for permuted libMesh::Kokkos map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for every non-identity element permutation of the implemented +// LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for every +// non-identity parent permutation of the supported 2D and 3D parent +// topologies, including mixed-face prism and pyramid parents. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for every +// non-identity permutation of those same parent topologies. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_permuted_reference_fixture; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +static const map_helper_case map_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } +}; + +static const face_parent_case face_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } +}; + +} // anonymous namespace + +template +static int +test_permuted_map_case_impl(const map_helper_case & info, unsigned int perm_num) +{ + auto fixture = build_permuted_reference_fixture(Topo, perm_num); + const auto context = build_map_helper_context(fixture, info.topo, "perm_map"); + const int fail = evaluate_map_helper_context(context, "perm_map_results", tol); + if (fail) + std::printf(" permuted map mismatch: topo=%s perm=%u (%d failures)\n", + info.name, perm_num, fail); + + return fail; +} + +struct permuted_map_dispatch +{ + permuted_map_dispatch(const map_helper_case & in_info, unsigned int in_perm_num) + : info(in_info), perm_num(in_perm_num) + { + } + + template + int operator()() const + { + return test_permuted_map_case_impl(info, perm_num); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted map topology: topo=%s perm=%u type=%d\n", + info.name, + perm_num, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; + unsigned int perm_num; +}; + +static int +test_permuted_map_case(const map_helper_case & info, unsigned int perm_num) +{ + const permuted_map_dispatch dispatch(info, perm_num); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_permuted_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_permuted_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct permuted_face_side_dispatch_3d +{ + permuted_face_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_3d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +template +struct permuted_face_side_dispatch_2d +{ + permuted_face_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_2d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +struct permuted_face_parent_dispatch_2d +{ + permuted_face_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const permuted_face_side_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper parent: parent=%s perm=%u side_id=%u parent_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +static int +test_permuted_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "perm_face"); + + if (context.parent_dim == 3) + { + const permuted_face_side_dispatch_3d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const permuted_face_parent_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported permuted face-helper parent dimension: parent=%s perm=%u side_id=%u dim=%u\n", + parent_name, + perm_num, + side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_permuted_face_qp_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + unsigned int perm_num, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + std::printf(" permuted face_qp mismatch: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(side.type())); + + return fail; +} + +static int +test_map_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : map_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + fail += test_permuted_map_case(info, perm); + } + return fail; +} + +static int +test_face_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + fail += test_permuted_face_helper_side_case(fixture, side_id, info.name, perm); + } + } + } + return fail; +} + +static int +test_face_qp_to_parent_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (side->n_nodes() == 1) + { + fail += check_permuted_face_qp_case( + info.name, *fixture.elem, *side, side_id, perm, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face_qp side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_permuted_face_qp_case(info.name, *fixture.elem, *side, side_id, perm, face_qpt); + } + } + } + } + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int fail = 0; + + const int map_fail = test_map_helpers_for_all_permutations(); + fail += map_fail; + std::printf("[permuted_map_helpers] %s (%d failures)\n", map_fail ? "FAIL" : "OK", map_fail); + + const int face_fail = test_face_helpers_for_all_permutations(); + fail += face_fail; + std::printf("[permuted_face_helpers] %s (%d failures)\n", face_fail ? "FAIL" : "OK", face_fail); + + const int face_qp_fail = test_face_qp_to_parent_for_all_permutations(); + fail += face_qp_fail; + std::printf("[permuted_face_qp] %s (%d failures)\n", face_qp_fail ? "FAIL" : "OK", face_qp_fail); + + Kokkos::finalize(); + return fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_reconstruction_oracle_test.K b/tests/fe/kokkos_fe_reconstruction_oracle_test.K new file mode 100644 index 00000000000..a00947f0ebf --- /dev/null +++ b/tests/fe/kokkos_fe_reconstruction_oracle_test.K @@ -0,0 +1,320 @@ +// GPU kernel tests for libMesh::Kokkos FE reconstruction on physical elements. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase provides the host physical-space oracle values and gradients. +// +// The test suite covers: +// A. Reconstruction of solution values on distorted physical elements for +// every exact LAGRANGE key currently supported by the Kokkos evaluator. +// B. Reconstruction of physical-space gradients on the same elements by +// pulling reference-space gradients through the element Jacobian. +// C. The same value/gradient reconstruction parity for representative exact +// MONOMIAL keys across all supported dimensions and orders. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct reconstruction_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +static const reconstruction_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "LAGRANGE/EDGE3/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const reconstruction_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +static std::vector +build_coefficients(const reconstruction_case & info) +{ + std::vector coeffs(info.n_dofs); + const Real family_bias = (info.key.family == libMesh::MONOMIAL) ? Real(0.19) : Real(0.07); + const Real order_bias = Real(static_cast(info.key.order) + 1u) * Real(0.013); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const Real sign = (i % 2) ? Real(-1.0) : Real(1.0); + coeffs[i] = sign * (Real(0.17) + Real(0.041) * Real(i + 1u) + family_bias + order_bias); + } + + return coeffs; +} + +template +static int +test_reconstruction_case_impl(const reconstruction_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + element_fixture fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto fe = libMesh::FEBase::build(info.dim, fe_type); + + libMesh::QGauss qr(info.dim, static_cast(quad_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(info.key.elem_type); + + fe->attach_quadrature_rule(&qr); + fe->get_phi(); + fe->get_dphi(); + fe->reinit(fixture.elem.get()); + + const auto & phi = fe->get_phi(); + const auto & dphi = fe->get_dphi(); + const unsigned int nqp = qr.n_points(); + + const std::vector coeffs = build_coefficients(info); + std::vector ref_u(nqp, 0.0); + std::vector ref_gx(nqp, 0.0); + std::vector ref_gy(nqp, 0.0); + std::vector ref_gz(nqp, 0.0); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + ref_u[q] += phi[i][q] * coeffs[i]; + ref_gx[q] += dphi[i][q](0) * coeffs[i]; + ref_gy[q] += dphi[i][q](1) * coeffs[i]; + ref_gz[q] += dphi[i][q](2) * coeffs[i]; + } + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (info.dim >= 2) ? qr.qp(q)(1) : Real(0); + zeta_h[q] = (info.dim >= 3) ? qr.qp(q)(2) : Real(0); + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "recon_coords"); + auto d_coeffs = upload_real(coeffs, "recon_coeffs"); + auto d_xi = upload_real(xi_h, "recon_xi"); + auto d_eta = upload_real(eta_h, "recon_eta"); + auto d_zeta = upload_real(zeta_h, "recon_zeta"); + + Kokkos::View d_u(std::string("recon_u"), nqp); + Kokkos::View d_gx(std::string("recon_gx"), nqp); + Kokkos::View d_gy(std::string("recon_gy"), nqp); + Kokkos::View d_gz(std::string("recon_gz"), nqp); + + const unsigned int dim = info.dim; + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + RealVector geom_nodes[max_geom_nodes]; + for (unsigned int i = 0; i < n_geom_nodes_; ++i) + geom_nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealTensor J = libMesh::Kokkos::jacobian( + geom_nodes, n_geom_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + + Real u = 0.0; + RealVector grad_ref_sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + { + const Real coeff = d_coeffs(i); + u += coeff * shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + grad_ref_sum += + coeff * grad_shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + } + + const RealTensor invJ = libMesh::Kokkos::leading_inverse(J, dim); + const RealVector grad_phys = invJ * grad_ref_sum; + + d_u(q) = u; + d_gx(q) = vector_component(grad_phys, 0); + d_gy(q) = vector_component(grad_phys, 1); + d_gz(q) = vector_component(grad_phys, 2); + }); + Kokkos::fence(); + + int fail = 0; + fail += compare_device_values(d_u, ref_u, value_tol); + fail += compare_device_values(d_gx, ref_gx, grad_tol); + fail += compare_device_values(d_gy, ref_gy, grad_tol); + fail += compare_device_values(d_gz, ref_gz, grad_tol); + + if (fail) + std::printf(" reconstruction mismatch: %s (%d failures)\n", info.name, fail); + + return fail; +} + +struct reconstruction_dispatch +{ + explicit reconstruction_dispatch(const reconstruction_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_reconstruction_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported reconstruction key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const reconstruction_case & info; +}; + +static int +test_reconstruction_case(const reconstruction_case & info) +{ + const reconstruction_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_shape_oracle_test.K b/tests/fe/kokkos_fe_shape_oracle_test.K new file mode 100644 index 00000000000..d98de26dce8 --- /dev/null +++ b/tests/fe/kokkos_fe_shape_oracle_test.K @@ -0,0 +1,629 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE shape functions. +// The test suite covers: +// A. Geometry-map shape parity against the libMesh FE map oracle for the +// 12 implemented LAGRANGE map topologies. +// B. Geometry-map gradient parity against the libMesh FE map oracle for the +// same topologies. +// C. Physics FE parity for exact libMesh LAGRANGE keys that the Kokkos +// evaluator currently supports. +// D. Physics FE gradient parity for the same exact LAGRANGE keys. +// E. Physics FE parity for Kokkos-supported exact MONOMIAL keys. +// F. Physics FE gradient parity for the same exact MONOMIAL keys. +// +// Unsupported exact keys are expected to hard-abort in the Kokkos path and +// are therefore intentionally not invoked here. + +#include "libmesh/elem.h" +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/fe.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct physics_shape_info +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" }, +}; +static constexpr unsigned int n_map_elems = sizeof(map_elems) / sizeof(map_elems[0]); + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Only exact libMesh LAGRANGE keys whose evaluator topology is implemented in +// the current Kokkos path are included here. +static const physics_shape_info lagrange_physics_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "EDGE3/SECOND" }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, 1, 2, "EDGE4/FIRST" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, 2, 3, "TRI7/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, 2, 6, "TRI7/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, 3, 4, "TET14/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, 3, 10, "TET14/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "HEX27/SECOND" }, +}; +static constexpr unsigned int n_lagrange_physics_cases = + sizeof(lagrange_physics_cases) / sizeof(lagrange_physics_cases[0]); + +// These MONOMIAL cases cover all implemented MonomialImpl paths +// for orders 0..5, plus representative non-simplex exact keys. +static const physics_shape_info monomial_physics_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, 1, monomial_n_dofs_for_dim(1, libMesh::THIRD), "MONOMIAL/EDGE4/THIRD" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::THIRD }, 2, monomial_n_dofs_for_dim(2, libMesh::THIRD), "MONOMIAL/TRI7/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FOURTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FOURTH), "MONOMIAL/TRI7/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/TRI7/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::THIRD }, 3, monomial_n_dofs_for_dim(3, libMesh::THIRD), "MONOMIAL/TET14/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FOURTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FOURTH), "MONOMIAL/TET14/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/TET14/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PRISM6, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PRISM6/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PRISM21/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID5, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PYRAMID5/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PYRAMID14/FIFTH" }, +}; +static constexpr unsigned int n_monomial_physics_cases = + sizeof(monomial_physics_cases) / sizeof(monomial_physics_cases[0]); + +} // anonymous namespace + +static Real +host_map_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_map_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +static Real +host_physics_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_physics_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +// --------------------------------------------------------------------------- +// Test A: Geometry-map shape parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_shape_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_map_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_phi(std::string("dev_phi"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = + libMesh::Kokkos::map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +struct map_shape_dispatch +{ + explicit map_shape_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_shape_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-shape topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_shape_parity(const map_elem_info & e) +{ + const map_shape_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test B: Geometry-map gradient parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_grad_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + const unsigned int dim = e.dim; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = host_map_grad(*elem, fe_type, dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_gx(std::string("map_gx"), nd * nqp); + Kokkos::View d_gy(std::string("map_gy"), nd * nqp); + Kokkos::View d_gz(std::string("map_gz"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = + libMesh::Kokkos::grad_map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct map_grad_dispatch +{ + explicit map_grad_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_grad_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-grad topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_grad_parity(const map_elem_info & e) +{ + const map_grad_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test C: Exact-key physics shape parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_physics_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "phys_xi"); + auto d_eta = upload_real(eta_h, "phys_eta"); + auto d_zeta = upload_real(zeta_h, "phys_zeta"); + + Kokkos::View d_phi(std::string("phys_phi"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +// --------------------------------------------------------------------------- +// Test D: Exact-key physics gradient parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_grad_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = + host_physics_grad(*elem, fe_type, info.dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "grad_xi"); + auto d_eta = upload_real(eta_h, "grad_eta"); + auto d_zeta = upload_real(zeta_h, "grad_zeta"); + + Kokkos::View d_gx(std::string("phys_gx"), nd * nqp); + Kokkos::View d_gy(std::string("phys_gy"), nd * nqp); + Kokkos::View d_gz(std::string("phys_gz"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = grad_shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct shape_dispatch +{ + explicit shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +struct grad_shape_dispatch +{ + explicit grad_shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_grad_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported grad-shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +static int +test_shape_parity(const physics_shape_info & info) +{ + const shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +static int +test_grad_shape_parity(const physics_shape_info & info) +{ + const grad_shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (unsigned int e = 0; e < n_map_elems; ++e) + { + const map_elem_info & info = map_elems[e]; + + { + const int f = test_map_shape_parity(info); + std::printf("[map_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_map_grad_parity(info); + std::printf("[map_grad_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_lagrange_physics_cases; ++c) + { + const physics_shape_info & info = lagrange_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_monomial_physics_cases; ++c) + { + const physics_shape_info & info = monomial_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + Kokkos::finalize(); + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_side_trace_oracle_test.K b/tests/fe/kokkos_fe_side_trace_oracle_test.K new file mode 100644 index 00000000000..802545ccf31 --- /dev/null +++ b/tests/fe/kokkos_fe_side_trace_oracle_test.K @@ -0,0 +1,342 @@ +// GPU kernel tests for libMesh::Kokkos side-trace FE parity. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase side reinit provides host side-trace oracle values. +// +// The test suite covers: +// A. Side-restricted shape values for supported exact LAGRANGE keys on +// distorted physical elements. +// B. Tangential physical gradients on those same sides, using host FEBase +// side traces as the oracle. +// C. The same side value and tangential-gradient parity for representative +// exact MONOMIAL keys whose parent geometry topology is supported by the +// current Kokkos map implementation. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_scalar_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; + +namespace +{ + +struct side_trace_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Restrict to parent geometries currently handled by the Kokkos map path. +static const side_trace_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const side_trace_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE RealVector +tangential_component(const RealVector & v, const RealVector & normal) +{ + return v - ((v * normal) * normal); +} + +template +static int +test_side_trace_case_impl(const side_trace_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + auto fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + const unsigned int parent_dim = fixture.elem->dim(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + int fail = 0; + + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_phi(); + side_fe->get_dphi(); + side_fe->get_normals(); + side_fe->reinit(fixture.elem.get(), side_id); + + const auto & phi = side_fe->get_phi(); + const auto & dphi = side_fe->get_dphi(); + const auto & normals = side_fe->get_normals(); + + if (phi.size() != info.n_dofs || dphi.size() != info.n_dofs) + { + std::printf(" unexpected host side FE size: %s side_id=%u phi=%llu dphi=%llu expected=%u\n", + info.name, + side_id, + libMesh::cast_int(phi.size()), + libMesh::cast_int(dphi.size()), + info.n_dofs); + fail += 1; + continue; + } + + const unsigned int nqp = qr.n_points(); + std::vector ref_phi(info.n_dofs * nqp); + std::vector ref_tgx(info.n_dofs * nqp); + std::vector ref_tgy(info.n_dofs * nqp); + std::vector ref_tgz(info.n_dofs * nqp); + std::vector parent_xi_h(nqp), parent_eta_h(nqp), parent_zeta_h(nqp); + std::vector normal_x_h(nqp), normal_y_h(nqp), normal_z_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + const RealVector parent_qpt = + libMesh::Kokkos::map_face_qp_to_parent(*side, libMesh::LAGRANGE_MAP, side->type(), face_qpt); + + parent_xi_h[q] = vector_component(parent_qpt, 0); + parent_eta_h[q] = vector_component(parent_qpt, 1); + parent_zeta_h[q] = vector_component(parent_qpt, 2); + + const auto & n = normals[q]; + normal_x_h[q] = n(0); + normal_y_h[q] = n(1); + normal_z_h[q] = n(2); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const unsigned int idx = q * info.n_dofs + i; + const RealVector host_tg = tangential_component( + libMesh::Kokkos::make_vector(dphi[i][q](0), dphi[i][q](1), dphi[i][q](2)), + libMesh::Kokkos::make_vector(n(0), n(1), n(2))); + + ref_phi[idx] = phi[i][q]; + ref_tgx[idx] = vector_component(host_tg, 0); + ref_tgy[idx] = vector_component(host_tg, 1); + ref_tgz[idx] = vector_component(host_tg, 2); + } + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "side_trace_coords"); + auto d_parent_xi = upload_real(parent_xi_h, "side_trace_parent_xi"); + auto d_parent_eta = upload_real(parent_eta_h, "side_trace_parent_eta"); + auto d_parent_zeta = upload_real(parent_zeta_h, "side_trace_parent_zeta"); + auto d_normal_x = upload_real(normal_x_h, "side_trace_normal_x"); + auto d_normal_y = upload_real(normal_y_h, "side_trace_normal_y"); + auto d_normal_z = upload_real(normal_z_h, "side_trace_normal_z"); + + Kokkos::View d_phi(std::string("side_trace_phi"), ref_phi.size()); + Kokkos::View d_tgx(std::string("side_trace_tgx"), ref_tgx.size()); + Kokkos::View d_tgy(std::string("side_trace_tgy"), ref_tgy.size()); + Kokkos::View d_tgz(std::string("side_trace_tgz"), ref_tgz.size()); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + static_cast(ref_phi.size()), + KOKKOS_LAMBDA(int idx) { + const unsigned int q = static_cast(idx) / n_dofs; + const unsigned int i = static_cast(idx) % n_dofs; + + RealVector geom_nodes[max_geom_nodes]; + for (unsigned int k = 0; k < n_geom_nodes_; ++k) + geom_nodes[k] = libMesh::Kokkos::make_vector( + d_coords(3 * k + 0), d_coords(3 * k + 1), d_coords(3 * k + 2)); + + const Real xi = d_parent_xi(q); + const Real eta = d_parent_eta(q); + const Real zeta = d_parent_zeta(q); + const RealTensor J = + libMesh::Kokkos::jacobian(geom_nodes, n_geom_nodes_, xi, eta, zeta); + const RealVector grad_ref = grad_shape_for_key(i, xi, eta, zeta); + const RealVector grad_phys = libMesh::Kokkos::leading_inverse(J, parent_dim) * grad_ref; + const RealVector normal = libMesh::Kokkos::make_vector(d_normal_x(q), d_normal_y(q), d_normal_z(q)); + const RealVector tangential_grad = tangential_component(grad_phys, normal); + + d_phi(idx) = shape_for_key(i, xi, eta, zeta); + d_tgx(idx) = vector_component(tangential_grad, 0); + d_tgy(idx) = vector_component(tangential_grad, 1); + d_tgz(idx) = vector_component(tangential_grad, 2); + }); + Kokkos::fence(); + + const int side_fail = + compare_device_values(d_phi, ref_phi, value_tol) + + compare_device_values(d_tgx, ref_tgx, grad_tol) + + compare_device_values(d_tgy, ref_tgy, grad_tol) + + compare_device_values(d_tgz, ref_tgz, grad_tol); + + if (side_fail) + std::printf(" side-trace mismatch: %s side_id=%u side_type=%d (%d failures)\n", + info.name, + side_id, + static_cast(side->type()), + side_fail); + + fail += side_fail; + } + + return fail; +} + +struct side_trace_dispatch +{ + explicit side_trace_dispatch(const side_trace_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_side_trace_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported side-trace key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const side_trace_case & info; +}; + +static int +test_side_trace_case(const side_trace_case & info) +{ + const side_trace_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_types_oracle_test.K b/tests/fe/kokkos_fe_types_oracle_test.K new file mode 100644 index 00000000000..cc6b3cfd640 --- /dev/null +++ b/tests/fe/kokkos_fe_types_oracle_test.K @@ -0,0 +1,509 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE type helpers. +// The test suite covers: +// A. get_side_topology() against libMesh side topology, with the 1D +// NODEELEM -> EDGE2 surrogate used by the Kokkos path. +// B. class_from_topology() against a class inferred from libMesh +// first-order LAGRANGE FE spaces. +// C. n_dofs(FEShapeKey) for Kokkos-supported exact LAGRANGE keys against +// libMesh::FEInterface::n_dofs(). +// D. n_dofs(FEShapeKey) for Kokkos-supported MONOMIAL keys against +// libMesh::FEInterface::n_dofs(). +// E. supports_shape()/supports_grad_shape()/supports_n_dofs() agree on the +// current Kokkos evaluator support boundary. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_type.h" +#include "libmesh/libmesh.h" + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using kokkos_test_utils::build_reference_elem; + +namespace +{ + +struct side_topology_case +{ + libMesh::ElemType parent; + libMesh::ElemType expected; +}; + +struct class_from_topology_case +{ + libMesh::ElemType topo; + libMesh::FEElemClass expected; +}; + +struct n_dof_case +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int expected; +}; + +struct support_case +{ + libMesh::Kokkos::FEShapeKey key; + bool expected; +}; + +static libMesh::ElemType +host_side_topology_oracle(libMesh::ElemType parent_type) +{ + auto elem = build_reference_elem(parent_type); + + if (elem->dim() == 1) + return libMesh::EDGE2; + + auto first_side = elem->build_side_ptr(0); + const libMesh::ElemType side_topo = first_side->type(); + + for (unsigned int s = 1; s < elem->n_sides(); ++s) + { + auto side = elem->build_side_ptr(s); + if (side->type() != side_topo) + return libMesh::INVALID_ELEM; + } + + return side_topo; +} + +static libMesh::FEElemClass +host_class_from_topology_oracle(libMesh::ElemType topo) +{ + auto elem = build_reference_elem(topo); + + if (elem->dim() == 1) + return libMesh::FEElemClass::EDGE; + + const libMesh::FEType fe_type(libMesh::FIRST, libMesh::LAGRANGE); + const unsigned int ndofs = libMesh::FEInterface::n_dofs(fe_type, 0, elem.get()); + + switch (elem->dim()) + { + case 2: + switch (ndofs) + { + case 3: return libMesh::FEElemClass::TRI; + case 4: return libMesh::FEElemClass::QUAD; + default: return libMesh::FEElemClass::N_CLASSES; + } + + case 3: + switch (ndofs) + { + case 4: return libMesh::FEElemClass::TET; + case 8: return libMesh::FEElemClass::HEX; + case 6: return libMesh::FEElemClass::PRISM; + case 5: return libMesh::FEElemClass::PYRAMID; + default: return libMesh::FEElemClass::N_CLASSES; + } + + default: + return libMesh::FEElemClass::N_CLASSES; + } +} + +static unsigned int +host_n_dofs_oracle(libMesh::Kokkos::FEShapeKey key) +{ + auto elem = build_reference_elem(key.elem_type); + return libMesh::FEInterface::n_dofs(libMesh::FEType(key.order, key.family), 0, elem.get()); +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// Test 1: get_side_topology() against libMesh side topology. +// --------------------------------------------------------------------------- +static int +test_side_topology() +{ + static const libMesh::ElemType parents[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::TRI6, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::HEX8, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX20, + libMesh::HEX27 + }; + constexpr int n_cases = sizeof(parents) / sizeof(parents[0]); + + Kokkos::View d_cases(std::string("side_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).parent = parents[i]; + h(i).expected = host_side_topology_oracle(parents[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("side_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (get_side_topology(d_cases(i).parent) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 2: class_from_topology() against libMesh FE oracle. +// --------------------------------------------------------------------------- +static int +test_class_from_topology() +{ + static const libMesh::ElemType topos[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI6, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX8, + libMesh::HEX20, + libMesh::HEX27, + libMesh::PRISM6, + libMesh::PRISM15, + libMesh::PRISM18, + libMesh::PRISM20, + libMesh::PRISM21, + libMesh::PYRAMID5, + libMesh::PYRAMID13, + libMesh::PYRAMID14, + libMesh::PYRAMID18 + }; + constexpr int n_cases = sizeof(topos) / sizeof(topos[0]); + + Kokkos::View d_cases(std::string("class_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).topo = topos[i]; + h(i).expected = host_class_from_topology_oracle(topos[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("class_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (class_from_topology(d_cases(i).topo) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 3: n_dofs() for Kokkos-supported exact LAGRANGE keys against +// libMesh FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_lagrange() +{ + using libMesh::Kokkos::FEShapeKey; + + static const FEShapeKey keys[] = { + { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, + + { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, + + { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND } + }; + constexpr int n_cases = sizeof(keys) / sizeof(keys[0]); + + Kokkos::View d_cases(std::string("lagrange_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).key = keys[i]; + h(i).expected = host_n_dofs_oracle(keys[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("lagrange_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 4: n_dofs() for Kokkos-supported MONOMIAL keys against libMesh +// FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_monomial() +{ + using libMesh::Kokkos::FEShapeKey; + + static const libMesh::Order orders[] = { + libMesh::CONSTANT, + libMesh::FIRST, + libMesh::SECOND, + libMesh::THIRD, + libMesh::FOURTH, + libMesh::FIFTH + }; + static const libMesh::ElemType higher_dim_topos[] = { + libMesh::TRI7, + libMesh::QUAD9, + libMesh::TET14, + libMesh::HEX27, + libMesh::PRISM21 + }; + + constexpr int n_cases = sizeof(orders) / sizeof(orders[0]) * + (2 + sizeof(higher_dim_topos) / sizeof(higher_dim_topos[0])); + + Kokkos::View d_cases(std::string("monomial_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + int i = 0; + for (const auto order : orders) + { + const libMesh::ElemType edge_topo = + (order <= libMesh::THIRD) ? libMesh::EDGE4 : libMesh::EDGE3; + const libMesh::ElemType pyramid_topo = + (order <= libMesh::THIRD) ? libMesh::PYRAMID18 : libMesh::PYRAMID14; + + h(i).key = { libMesh::MONOMIAL, edge_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + h(i).key = { libMesh::MONOMIAL, pyramid_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + for (const auto topo : higher_dim_topos) + { + h(i).key = { libMesh::MONOMIAL, topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + } + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("monomial_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 5: support predicates agree on the Kokkos evaluator boundary. +// --------------------------------------------------------------------------- +static int +test_support_contract() +{ + using libMesh::Kokkos::FEShapeKey; + + static const support_case cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, true }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM15, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID5, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID14, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID18, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::INVALID_ORDER }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, true }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::PYRAMID18, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::INVALID_ORDER }, false } + }; + constexpr int n_cases = sizeof(cases) / sizeof(cases[0]); + + Kokkos::View d_cases(std::string("support_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + h(i) = cases[i]; + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("support_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + + const bool shape_supported = supports_shape(d_cases(i).key); + const bool grad_supported = supports_grad_shape(d_cases(i).key); + const bool ndofs_supported = supports_n_dofs(d_cases(i).key); + + if (shape_supported != d_cases(i).expected || + grad_supported != d_cases(i).expected || + ndofs_supported != d_cases(i).expected || + shape_supported != grad_supported || + shape_supported != ndofs_supported) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_side_topology(); + std::printf("[side_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_class_from_topology(); + std::printf("[class_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_lagrange(); + std::printf("[lagrange_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_monomial(); + std::printf("[monomial_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_support_contract(); + std::printf("[support_contract_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_quadrature_oracle_test.K b/tests/fe/kokkos_quadrature_oracle_test.K new file mode 100644 index 00000000000..96ebfe71640 --- /dev/null +++ b/tests/fe/kokkos_quadrature_oracle_test.K @@ -0,0 +1,747 @@ +// GPU kernel oracle tests for libMesh::Kokkos quadrature and map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// QGauss, FEMap, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. GaussQuadrature point and weight tables against libMesh QGauss. +// B. physical_point() and jacobian() against libMesh FEMap::map() and +// FEMap::map_deriv(). +// C. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase. +// D. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles. +// E. map_face_qp_to_parent() against libMesh FEBase::side_map(). +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_quadrature.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_host_qgauss; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::tensor_component; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct quadrature_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int order; +}; + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_helper_case +{ + libMesh::ElemType parent_topo; + unsigned int side_id; + const char * name; +}; + +} // anonymous namespace + +static int +test_quadrature_case(const quadrature_case & info) +{ + std::vector x_ref, y_ref, z_ref, w_ref; + const unsigned int host_nqp = + build_host_qgauss(info.topo, info.dim, info.order, x_ref, y_ref, z_ref, w_ref); + + Kokkos::View d_nqp(std::string("nqp")); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + d_nqp() = GaussQuadrature::n_points(info.topo, info.order); + }); + Kokkos::fence(); + + unsigned int device_nqp = 0; + Kokkos::deep_copy(device_nqp, d_nqp); + + int fail = 0; + if (device_nqp != host_nqp) + ++fail; + + Kokkos::View d_x(std::string("qx"), host_nqp); + Kokkos::View d_y(std::string("qy"), host_nqp); + Kokkos::View d_z(std::string("qz"), host_nqp); + Kokkos::View d_w(std::string("qw"), host_nqp); + + Kokkos::parallel_for( + host_nqp, + KOKKOS_LAMBDA(int qp) { + const RealVector pt = GaussQuadrature::point(info.topo, info.order, qp); + d_x(qp) = vector_component(pt, 0); + d_y(qp) = vector_component(pt, 1); + d_z(qp) = vector_component(pt, 2); + d_w(qp) = GaussQuadrature::weight(info.topo, info.order, qp); + }); + Kokkos::fence(); + + fail += compare_device_values(d_x, x_ref); + fail += compare_device_values(d_y, y_ref); + fail += compare_device_values(d_z, z_ref); + fail += compare_device_values(d_w, w_ref); + + if (fail) + std::printf(" quadrature mismatch: topo=%d dim=%u order=%u (%d failures)\n", + static_cast(info.topo), info.dim, info.order, fail); + + return fail; +} + +static int +test_quadrature_against_qgauss() +{ + int fail = 0; + + const libMesh::ElemType edge_topos[] = { libMesh::EDGE2, libMesh::EDGE3 }; + const libMesh::ElemType quad_topos[] = { libMesh::QUAD4, libMesh::QUAD8, libMesh::QUAD9 }; + const libMesh::ElemType hex_topos[] = { libMesh::HEX8, libMesh::HEX20, libMesh::HEX27 }; + const libMesh::ElemType tri_topos[] = { libMesh::TRI3, libMesh::TRI6 }; + const libMesh::ElemType tet_topos[] = { libMesh::TET4, libMesh::TET10 }; + + for (const auto topo : edge_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 1, order }); + + for (const auto topo : quad_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : hex_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + for (const auto topo : tri_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : tet_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + return fail; +} + + +static element_fixture +build_hex8_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::HEX8); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(8); + + static const double coords[8][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {1.0, 1.0, 0.0}, + {0.0, 1.0, 0.0}, + {0.0, 0.0, 1.0}, + {1.0, 0.0, 1.0}, + {1.0, 1.0, 1.0}, + {0.0, 1.0, 1.0} + }; + + for (unsigned int i = 0; i < 8; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static element_fixture +build_tri3_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::TRI3); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(3); + + static const double coords[3][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {0.0, 1.0, 0.0} + }; + + for (unsigned int i = 0; i < 3; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static int +test_physical_map_hex8() +{ + auto fixture = build_hex8_fixture(); + + const libMesh::Point ref_center(0.0, 0.0, 0.0); + const libMesh::Point ref_corner(-1.0, -1.0, -1.0); + + const libMesh::Point host_center = libMesh::FEMap::map(3, fixture.elem.get(), ref_center); + const libMesh::Point host_corner = libMesh::FEMap::map(3, fixture.elem.get(), ref_corner); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 0, ref_center); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 1, ref_center); + const libMesh::Point host_dzeta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 2, ref_center); + + std::vector ref_values = { + host_center(0), host_center(1), host_center(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2), + host_dzeta(0), host_dzeta(1), host_dzeta(2), + host_corner(0), host_corner(1), host_corner(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "hex_coords"); + + Kokkos::View d_results(std::string("hex_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + RealVector nodes[8]; + for (unsigned int i = 0; i < 8; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealVector xyz_center = + libMesh::Kokkos::physical_point(nodes, 8, 0.0, 0.0, 0.0); + const RealTensor J_center = + libMesh::Kokkos::jacobian(nodes, 8, 0.0, 0.0, 0.0); + const RealVector xyz_corner = libMesh::Kokkos::physical_point( + nodes, 8, -1.0, -1.0, -1.0); + + d_results(0) = vector_component(xyz_center, 0); + d_results(1) = vector_component(xyz_center, 1); + d_results(2) = vector_component(xyz_center, 2); + d_results(3) = tensor_component(J_center, 0, 0); + d_results(4) = tensor_component(J_center, 0, 1); + d_results(5) = tensor_component(J_center, 0, 2); + d_results(6) = tensor_component(J_center, 1, 0); + d_results(7) = tensor_component(J_center, 1, 1); + d_results(8) = tensor_component(J_center, 1, 2); + d_results(9) = tensor_component(J_center, 2, 0); + d_results(10) = tensor_component(J_center, 2, 1); + d_results(11) = tensor_component(J_center, 2, 2); + d_results(12) = vector_component(xyz_corner, 0); + d_results(13) = vector_component(xyz_corner, 1); + d_results(14) = vector_component(xyz_corner, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +static int +test_physical_map_tri3() +{ + auto fixture = build_tri3_fixture(); + + const libMesh::Point ref_pt(1.0 / 3.0, 1.0 / 3.0, 0.0); + + const libMesh::Point host_xyz = libMesh::FEMap::map(2, fixture.elem.get(), ref_pt); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 0, ref_pt); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 1, ref_pt); + + std::vector ref_values = { + host_xyz(0), host_xyz(1), host_xyz(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "tri_coords"); + + Kokkos::View d_results(std::string("tri_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + RealVector nodes[3]; + for (unsigned int i = 0; i < 3; ++i) + nodes[i] = libMesh::Kokkos::make_vector( + d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); + + const RealVector xyz = libMesh::Kokkos::physical_point( + nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + const RealTensor J = libMesh::Kokkos::jacobian( + nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + + d_results(0) = vector_component(xyz, 0); + d_results(1) = vector_component(xyz, 1); + d_results(2) = vector_component(xyz, 2); + d_results(3) = tensor_component(J, 0, 0); + d_results(4) = tensor_component(J, 0, 1); + d_results(5) = tensor_component(J, 0, 2); + d_results(6) = tensor_component(J, 1, 0); + d_results(7) = tensor_component(J, 1, 1); + d_results(8) = tensor_component(J, 1, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_helper"); + const int fail = evaluate_map_helper_context(context, "map_helper_results", tol); + if (fail) + std::printf(" quadrature map-helper mismatch: %s (%d failures)\n", + info.name, + fail); + return fail; +} + +struct quadrature_map_helper_dispatch +{ + explicit quadrature_map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature map-helper topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const quadrature_map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_map_helpers_case_3d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_map_helpers_case_2d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct quadrature_face_side_dispatch_3d +{ + quadrature_face_side_dispatch_3d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_3d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +template +struct quadrature_face_side_dispatch_2d +{ + quadrature_face_side_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_2d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +struct quadrature_face_parent_dispatch_2d +{ + quadrature_face_parent_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const quadrature_face_side_dispatch_2d dispatch(context, info, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper parent: %s parent_type=%d side_id=%u\n", + info.name, + static_cast(topo), + info.side_id); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +static int +test_face_map_helpers_case(const face_helper_case & info) +{ + auto fixture = build_reference_fixture(info.parent_topo); + auto side = fixture.elem->build_side_ptr(info.side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, info.side_id, "face_helper"); + + if (context.parent_dim == 3) + { + const quadrature_face_side_dispatch_3d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const quadrature_face_parent_dispatch_2d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported quadrature face-helper parent dimension: %s parent_type=%d side_id=%u dim=%u\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * case_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); + + std::printf(" face_qp mismatch: case=%s parent_type=%d side_type=%d side_id=%u\n", + case_name, + static_cast(parent.type()), + static_cast(side.type()), + side_id); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" diff =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0) - vector_component(host, 0), + vector_component(kokkos, 1) - vector_component(host, 1), + vector_component(kokkos, 2) - vector_component(host, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + const unsigned int parent_node = parent.local_side_node(side_id, k); + const libMesh::Point parent_refspace = refspace_nodes[parent_node]; + std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_node, + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_ref_coords() +{ + using libMesh::Elem; + using libMesh::Node; + + int fail = 0; + + { + auto edge = Elem::build(libMesh::EDGE2); + edge->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(3.25, -2.0, 5.0, 0); + auto n1 = Node::build(9.50, 4.0, -1.0, 1); + edge->set_node(0, n0.get()); + edge->set_node(1, n1.get()); + + auto side0 = edge->build_side_ptr(0); + auto side1 = edge->build_side_ptr(1); + + fail += check_face_qp_to_parent_case("edge2_side0", *edge, *side0, 0, libMesh::Kokkos::zero_vector()); + fail += check_face_qp_to_parent_case("edge2_side1", *edge, *side1, 1, libMesh::Kokkos::zero_vector()); + } + + { + auto tri3 = Elem::build(libMesh::TRI3); + tri3->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(10.0, 20.0, 0.0, 0); + auto n1 = Node::build(14.0, 20.0, 0.0, 1); + auto n2 = Node::build(10.0, 23.0, 0.0, 2); + tri3->set_node(0, n0.get()); + tri3->set_node(1, n1.get()); + tri3->set_node(2, n2.get()); + + auto side0 = tri3->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri3_side0", *tri3, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + { + auto tri6 = Elem::build(libMesh::TRI6); + tri6->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(4.0, 1.0, 0.0, 0); + auto n1 = Node::build(9.0, 2.0, 0.0, 1); + auto n2 = Node::build(3.0, 8.0, 0.0, 2); + auto n3 = Node::build(42.0, -17.0, 5.0, 3); + auto n4 = Node::build(11.0, 11.0, 1.0, 4); + auto n5 = Node::build(-7.0, 4.0, 2.0, 5); + tri6->set_node(0, n0.get()); + tri6->set_node(1, n1.get()); + tri6->set_node(2, n2.get()); + tri6->set_node(3, n3.get()); + tri6->set_node(4, n4.get()); + tri6->set_node(5, n5.get()); + + auto side0 = tri6->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri6_side0", *tri6, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_quadrature_against_qgauss(); + std::printf("[quadrature_qgauss_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_hex8(); + std::printf("[physical_map_hex8] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_tri3(); + std::printf("[physical_map_tri3] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const map_helper_case cases[] = { + { libMesh::EDGE3, "EDGE3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX20, "HEX20" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const face_helper_case cases[] = { + { libMesh::TRI6, 0, "TRI6/side0" }, + { libMesh::TET10, 0, "TET10/side0" }, + { libMesh::HEX20, 0, "HEX20/side0" } + }; + + for (const auto & info : cases) + { + const int f = test_face_map_helpers_case(info); + std::printf("[face_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const int f = test_face_qp_to_parent_ref_coords(); + std::printf("[face_qp_parent_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/quadrature/quadrature_exactness.h b/tests/quadrature/quadrature_exactness.h new file mode 100644 index 00000000000..5db510f09ef --- /dev/null +++ b/tests/quadrature/quadrature_exactness.h @@ -0,0 +1,176 @@ +#ifndef LIBMESH_QUADRATURE_EXACTNESS_H +#define LIBMESH_QUADRATURE_EXACTNESS_H + +#include +#include +#include + +#include +#include +#include + +namespace quadrature_exactness +{ + +inline libMesh::Real +axis_integral(const unsigned int power) +{ + return (power % 2) ? libMesh::Real(0) : (libMesh::Real(2) / (power + 1)); +} + +inline libMesh::Real +edge_integral(const unsigned int x_power) +{ + return axis_integral(x_power); +} + +inline libMesh::Real +quad_integral(const unsigned int x_power, + const unsigned int y_power) +{ + return axis_integral(x_power) * axis_integral(y_power); +} + +inline libMesh::Real +tri_integral(const unsigned int x_power, + const unsigned int y_power) +{ + libMesh::Real analytical = 1.0; + + const unsigned int larger_power = std::max(x_power, y_power); + const unsigned int smaller_power = std::min(x_power, y_power); + + std::vector numerator(smaller_power > 1 ? smaller_power - 1 : 0); + std::vector denominator(2 + smaller_power); + + std::iota(numerator.begin(), numerator.end(), 2); + std::iota(denominator.begin(), denominator.end(), larger_power + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator.size()) + analytical *= numerator[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +hex_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return axis_integral(x_power) * axis_integral(y_power) * axis_integral(z_power); +} + +inline libMesh::Real +tet_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + libMesh::Real analytical = 1.0; + + unsigned int sorted_powers[3] = {x_power, y_power, z_power}; + std::sort(sorted_powers, sorted_powers + 3); + + std::vector numerator_1(sorted_powers[0] > 1 ? sorted_powers[0] - 1 : 0); + std::vector numerator_2(sorted_powers[1] > 1 ? sorted_powers[1] - 1 : 0); + std::vector denominator(3 + sorted_powers[0] + sorted_powers[1]); + + std::iota(numerator_1.begin(), numerator_1.end(), 2); + std::iota(numerator_2.begin(), numerator_2.end(), 2); + std::iota(denominator.begin(), denominator.end(), sorted_powers[2] + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator_1.size()) + analytical *= numerator_1[i]; + + if (i < numerator_2.size()) + analytical *= numerator_2[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +prism_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return tri_integral(x_power, y_power) * axis_integral(z_power); +} + +inline libMesh::Real +pyramid_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + if (x_power % 2 || y_power % 2) + return libMesh::Real(0); + + const unsigned int binom = + libMesh::Utility::binomial(x_power + y_power + z_power + 3, z_power); + + return libMesh::Real(4) / + ((x_power + 1) * (y_power + 1) * binom * (x_power + y_power + z_power + 3)); +} + +inline libMesh::Real +monomial_integral(const libMesh::ElemType elem_type, + const unsigned int x_power, + const unsigned int y_power = 0, + const unsigned int z_power = 0) +{ + switch (elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return edge_integral(x_power); + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return tri_integral(x_power, y_power); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return quad_integral(x_power, y_power); + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return tet_integral(x_power, y_power, z_power); + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return hex_integral(x_power, y_power, z_power); + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return prism_integral(x_power, y_power, z_power); + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return pyramid_integral(x_power, y_power, z_power); + + default: + return libMesh::Real(0); + } +} + +} // namespace quadrature_exactness + +#endif // LIBMESH_QUADRATURE_EXACTNESS_H diff --git a/tests/quadrature/quadrature_test.C b/tests/quadrature/quadrature_test.C index 1dd39a01832..f72440f417e 100644 --- a/tests/quadrature/quadrature_test.C +++ b/tests/quadrature/quadrature_test.C @@ -2,10 +2,10 @@ #include #include #include -#include #include -#include // std::iota + +#include "quadrature_exactness.h" #include "libmesh_cppunit.h" @@ -205,115 +205,47 @@ private: const std::function edge_integrals = [](int mode, int, int) { - return (mode % 2) ? 0 : (Real(2.0) / (mode+1)); + return quadrature_exactness::edge_integral(static_cast(mode)); }; const std::function quad_integrals = [](int modex, int modey, int) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - return exactx*exacty; + return quadrature_exactness::quad_integral(static_cast(modex), + static_cast(modey)); }; const std::function tri_integrals = [](int x_power, int y_power, int) { - // Compute the true integral, a! b! / (a + b + 2)! - Real analytical = 1.0; - - unsigned - larger_power = std::max(x_power, y_power), - smaller_power = std::min(x_power, y_power); - - // Cancel the larger of the two numerator terms with the - // denominator, and fill in the remaining entries. - std::vector - numerator(smaller_power > 1 ? smaller_power-1 : 0), - denominator(2+smaller_power); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator.begin(), numerator.end(), 2); - std::iota(denominator.begin(), denominator.end(), larger_power+1); - - // The denominator is guaranteed to have more terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power)); }; const std::function hex_integrals = [](int modex, int modey, int modez) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactx*exacty*exactz; + return quadrature_exactness::hex_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function tet_integrals = [](int x_power, int y_power, int z_power) { - // Compute the true integral, a! b! c! / (a + b + c + 3)! - Real analytical = 1.0; - - // Sort the a, b, c values - int sorted_powers[3] = {x_power, y_power, z_power}; - std::sort(sorted_powers, sorted_powers+3); - - // Cancel the largest power with the denominator, fill in the - // entries for the remaining numerator terms and the denominator. - std::vector - numerator_1(sorted_powers[0] > 1 ? sorted_powers[0]-1 : 0), - numerator_2(sorted_powers[1] > 1 ? sorted_powers[1]-1 : 0), - denominator(3 + sorted_powers[0] + sorted_powers[1]); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator_1.begin(), numerator_1.end(), 2); - std::iota(numerator_2.begin(), numerator_2.end(), 2); - std::iota(denominator.begin(), denominator.end(), sorted_powers[2]+1); - - // The denominator is guaranteed to have the most terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power), + static_cast(z_power)); }; const std::function prism_integrals = - [this](int modex, int modey, int modez) { - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactz * tri_integrals(modex, modey, 0); + [](int modex, int modey, int modez) { + return quadrature_exactness::prism_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function pyramid_integrals = [](int modex, int modey, int modez) { - - const int binom = Utility::binomial(modex+modey+modez+3, modez); - - if (modex%2 || modey%2) - return Real(0); - - return Real(4)/((modex+1)*(modey+1)*binom*(modex+modey+modez+3)); + return quadrature_exactness::pyramid_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; From 5ebe357661e65ab0c38e3e68089ed152a655a453 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 13:20:30 -0600 Subject: [PATCH 32/48] Fix installed Kokkos FE header support --- contrib/bin/libmesh-config.in | 2 +- contrib/utils/libmesh-dbg.pc.in | 1 + contrib/utils/libmesh-devel.pc.in | 1 + contrib/utils/libmesh-oprof.pc.in | 1 + contrib/utils/libmesh-opt.pc.in | 1 + contrib/utils/libmesh-prof.pc.in | 1 + include/gpu/kokkos_fe_base.h | 2 +- include/gpu/kokkos_fe_evaluator.h | 12 ++--- include/gpu/kokkos_fe_face_map.h | 2 +- include/gpu/kokkos_fe_lagrange_1d.h | 2 +- include/gpu/kokkos_fe_lagrange_2d.h | 2 +- include/gpu/kokkos_fe_lagrange_3d.h | 2 +- include/gpu/kokkos_fe_map.h | 4 +- include/gpu/kokkos_fe_monomial.h | 2 +- include/gpu/kokkos_quadrature.h | 2 +- include/gpu/kokkos_scalar_types.h | 68 +++++++++++++++++++++++++++++ 16 files changed, 89 insertions(+), 16 deletions(-) diff --git a/contrib/bin/libmesh-config.in b/contrib/bin/libmesh-config.in index b935b5b334b..e75de90ed95 100644 --- a/contrib/bin/libmesh-config.in +++ b/contrib/bin/libmesh-config.in @@ -106,7 +106,7 @@ while [ "x$1" != "x" ]; do ;; "--cppflags") - return_val="${CPPFLAGS} $return_val" + return_val="${CPPFLAGS} @KOKKOS_CPPFLAGS@ $return_val" ;; "--cxxflags") diff --git a/contrib/utils/libmesh-dbg.pc.in b/contrib/utils/libmesh-dbg.pc.in index b4a29f1156f..8e366deaf74 100644 --- a/contrib/utils/libmesh-dbg.pc.in +++ b/contrib/utils/libmesh-dbg.pc.in @@ -14,4 +14,5 @@ Libs.private: Cflags: @CPPFLAGS_DBG@ \ ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-devel.pc.in b/contrib/utils/libmesh-devel.pc.in index 4614467f5d9..62f74ccad3a 100644 --- a/contrib/utils/libmesh-devel.pc.in +++ b/contrib/utils/libmesh-devel.pc.in @@ -14,4 +14,5 @@ Libs.private: Cflags: @CPPFLAGS_DEVEL@ \ ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-oprof.pc.in b/contrib/utils/libmesh-oprof.pc.in index 0a1fbd47e25..896f849bbd0 100644 --- a/contrib/utils/libmesh-oprof.pc.in +++ b/contrib/utils/libmesh-oprof.pc.in @@ -14,4 +14,5 @@ Libs.private: Cflags: @CPPFLAGS_OPROF@ \ ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-opt.pc.in b/contrib/utils/libmesh-opt.pc.in index cbb9529da5b..6beb7b6a87f 100644 --- a/contrib/utils/libmesh-opt.pc.in +++ b/contrib/utils/libmesh-opt.pc.in @@ -14,4 +14,5 @@ Libs.private: Cflags: @CPPFLAGS_OPT@ \ ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-prof.pc.in b/contrib/utils/libmesh-prof.pc.in index 333dcaa92b6..8f24579e6f6 100644 --- a/contrib/utils/libmesh-prof.pc.in +++ b/contrib/utils/libmesh-prof.pc.in @@ -14,4 +14,5 @@ Libs.private: Cflags: @CPPFLAGS_PROF@ \ ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h index 07664e627eb..4526ebdc67a 100644 --- a/include/gpu/kokkos_fe_base.h +++ b/include/gpu/kokkos_fe_base.h @@ -27,7 +27,7 @@ #ifndef LIBMESH_KOKKOS_FE_BASE_H #define LIBMESH_KOKKOS_FE_BASE_H -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_scalar_types.h" #include "libmesh/libmesh_device.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_fe_family.h" diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h index 118880c614d..5fb7c1d1dc4 100644 --- a/include/gpu/kokkos_fe_evaluator.h +++ b/include/gpu/kokkos_fe_evaluator.h @@ -15,12 +15,12 @@ #ifndef LIBMESH_KOKKOS_FE_EVALUATOR_H #define LIBMESH_KOKKOS_FE_EVALUATOR_H -#include "gpu/kokkos_fe_base.h" -#include "gpu/kokkos_fe_types.h" -#include "gpu/kokkos_fe_lagrange_1d.h" -#include "gpu/kokkos_fe_lagrange_2d.h" -#include "gpu/kokkos_fe_lagrange_3d.h" -#include "gpu/kokkos_fe_monomial.h" +#include "kokkos_fe_base.h" +#include "kokkos_fe_types.h" +#include "kokkos_fe_lagrange_1d.h" +#include "kokkos_fe_lagrange_2d.h" +#include "kokkos_fe_lagrange_3d.h" +#include "kokkos_fe_monomial.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_fe_family.h" diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h index 822ce4da406..5cefdd2402b 100644 --- a/include/gpu/kokkos_fe_face_map.h +++ b/include/gpu/kokkos_fe_face_map.h @@ -3,7 +3,7 @@ #ifdef LIBMESH_HAVE_KOKKOS -#include "gpu/kokkos_fe_evaluator.h" +#include "kokkos_fe_evaluator.h" #include "libmesh/elem.h" namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h index 72e78692882..3ade1ee709c 100644 --- a/include/gpu/kokkos_fe_lagrange_1d.h +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -12,7 +12,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_1D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h index 08d1e2f5ba6..f58097cbe21 100644 --- a/include/gpu/kokkos_fe_lagrange_2d.h +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -8,7 +8,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_2D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h index 5f2fbb203c3..48afbccaa28 100644 --- a/include/gpu/kokkos_fe_lagrange_3d.h +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -8,7 +8,7 @@ #ifndef LIBMESH_KOKKOS_FE_LAGRANGE_3D_H #define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h index d71f81c931a..6e237997f81 100644 --- a/include/gpu/kokkos_fe_map.h +++ b/include/gpu/kokkos_fe_map.h @@ -17,8 +17,8 @@ #ifndef LIBMESH_KOKKOS_FE_MAP_H #define LIBMESH_KOKKOS_FE_MAP_H -#include "gpu/kokkos_fe_evaluator.h" -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_fe_evaluator.h" +#include "kokkos_scalar_types.h" namespace libMesh::Kokkos { diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h index b68289c72eb..2dde44785f6 100644 --- a/include/gpu/kokkos_fe_monomial.h +++ b/include/gpu/kokkos_fe_monomial.h @@ -13,7 +13,7 @@ #ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H #define LIBMESH_KOKKOS_FE_MONOMIAL_H -#include "gpu/kokkos_fe_base.h" +#include "kokkos_fe_base.h" #include "libmesh/enum_elem_type.h" namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h index 4c2e8750bab..d8b94c56a5c 100644 --- a/include/gpu/kokkos_quadrature.h +++ b/include/gpu/kokkos_quadrature.h @@ -14,7 +14,7 @@ #ifndef LIBMESH_KOKKOS_QUADRATURE_H #define LIBMESH_KOKKOS_QUADRATURE_H -#include "gpu/kokkos_scalar_types.h" +#include "kokkos_scalar_types.h" #include "libmesh/enum_elem_type.h" #include #include diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h index b7386cf900f..7584819413b 100644 --- a/include/gpu/kokkos_scalar_types.h +++ b/include/gpu/kokkos_scalar_types.h @@ -10,6 +10,7 @@ #include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" #include "libmesh/type_tensor.h" + namespace libMesh::Kokkos { @@ -17,6 +18,73 @@ using Real = libMesh::Real; using RealVector = libMesh::TypeVector; using RealTensor = libMesh::TypeTensor; +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + VectorType v; + v.zero(); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + v(d) = view(i, d); + + return v; +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + view(i, d) = v(d); +} + +template +LIBMESH_DEVICE_INLINE +Real vector_component(const ViewType & view, const unsigned int i, const unsigned int component) +{ + if (component < LIBMESH_DIM) + return view(i, component); + + return Real(0); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + TensorType T; + T.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + T(row, col) = view(i, row, col); + + return T; +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + view(i, row, col) = T(row, col); +} + +template +LIBMESH_DEVICE_INLINE +Real tensor_component(const ViewType & view, + const unsigned int i, + const unsigned int row, + const unsigned int col) +{ + if (row < LIBMESH_DIM && col < LIBMESH_DIM) + return view(i, row, col); + + return Real(0); +} + LIBMESH_DEVICE_INLINE RealVector zero_vector() { From 0f30e7b4883cee5df82246a816d176074851eeb6 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 6 May 2026 15:03:32 -0600 Subject: [PATCH 33/48] Export enum_fe_elem_class header --- include/include_HEADERS | 1 + include/libmesh/Makefile.am | 4 ++++ include/libmesh/Makefile.in | 17 ++++++++++------- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/include_HEADERS b/include/include_HEADERS index a08484a3083..a17fe5f508e 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -50,6 +50,7 @@ include_HEADERS = \ enums/enum_elem_quality.h \ enums/enum_elem_type.h \ enums/enum_error_estimator_type.h \ + enums/enum_fe_elem_class.h \ enums/enum_fe_family.h \ enums/enum_inf_map_type.h \ enums/enum_io_package.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index 25470b1ea0e..f0bba052ded 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -41,6 +41,7 @@ BUILT_SOURCES = \ enum_elem_quality.h \ enum_elem_type.h \ enum_error_estimator_type.h \ + enum_fe_elem_class.h \ enum_fe_family.h \ enum_inf_map_type.h \ enum_io_package.h \ @@ -729,6 +730,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index c2ff4cf1b7c..823e23c2643 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -547,13 +547,13 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ single_predicates.h sparsity_pattern.h variable.h \ variant_filter_iterator.h enum_convergence_flags.h \ enum_eigen_solver_type.h enum_elem_quality.h enum_elem_type.h \ - enum_error_estimator_type.h enum_fe_family.h \ - enum_inf_map_type.h enum_io_package.h enum_matrix_build_type.h \ - enum_norm_type.h enum_order.h enum_parallel_type.h \ - enum_partitioner_type.h enum_point_locator_type.h \ - enum_preconditioner_type.h enum_quadrature_type.h \ - enum_solver_package.h enum_solver_type.h \ - enum_subset_solve_mode.h enum_xdr_mode.h \ + enum_error_estimator_type.h enum_fe_elem_class.h \ + enum_fe_family.h enum_inf_map_type.h enum_io_package.h \ + enum_matrix_build_type.h enum_norm_type.h enum_order.h \ + enum_parallel_type.h enum_partitioner_type.h \ + enum_point_locator_type.h enum_preconditioner_type.h \ + enum_quadrature_type.h enum_solver_package.h \ + enum_solver_type.h enum_subset_solve_mode.h enum_xdr_mode.h \ adjoint_refinement_estimator.h \ adjoint_residual_error_estimator.h discontinuity_measure.h \ error_estimator.h exact_error_estimator.h exact_solution.h \ @@ -1069,6 +1069,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ From f7fdf896b4de278fb050cabc9f4626e344b42787 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 15:47:37 -0600 Subject: [PATCH 34/48] Share FE kernels and reference traits with Kokkos FE --- include/Makefile.am | 1 - include/fe/fe_lagrange_shape_1D.h | 23 +- include/fe/fe_reference_element_traits.h | 897 ++++++++ include/fe/fe_serendipity_lagrange.h | 417 ++++ include/fe/fe_shape_traits.h | 628 ++++++ include/fe/fe_simplex_lagrange.h | 462 +++++ include/fe/fe_tensor_product_lagrange.h | 320 +++ include/gpu/kokkos_fe_base.h | 43 +- include/gpu/kokkos_fe_evaluator.h | 324 ++- include/gpu/kokkos_fe_face_map.h | 85 +- include/gpu/kokkos_fe_lagrange_1d.h | 33 +- include/gpu/kokkos_fe_lagrange_2d.h | 136 +- include/gpu/kokkos_fe_lagrange_3d.h | 249 +-- include/gpu/kokkos_fe_map.h | 3 +- include/gpu/kokkos_fe_monomial.h | 901 +-------- include/gpu/kokkos_fe_types.h | 648 +----- include/gpu/kokkos_quadrature.h | 634 +----- include/gpu/kokkos_scalar_types.h | 186 -- include/include_HEADERS | 6 + include/quadrature/quadrature_gauss_rules.h | 373 ++++ src/fe/fe_lagrange_shape_2D.C | 854 +------- src/fe/fe_lagrange_shape_3D.C | 1795 +---------------- src/quadrature/quadrature_gauss_1D.C | 17 + src/quadrature/quadrature_gauss_2D.C | 19 + src/quadrature/quadrature_gauss_3D.C | 21 + .../fe/kokkos_fe_reconstruction_oracle_test.K | 4 +- tests/fe/kokkos_fe_shape_oracle_test.K | 2 +- tests/fe/kokkos_fe_side_trace_oracle_test.K | 4 +- 28 files changed, 3777 insertions(+), 5308 deletions(-) create mode 100644 include/fe/fe_reference_element_traits.h create mode 100644 include/fe/fe_serendipity_lagrange.h create mode 100644 include/fe/fe_shape_traits.h create mode 100644 include/fe/fe_simplex_lagrange.h create mode 100644 include/fe/fe_tensor_product_lagrange.h delete mode 100644 include/gpu/kokkos_scalar_types.h create mode 100644 include/quadrature/quadrature_gauss_rules.h diff --git a/include/Makefile.am b/include/Makefile.am index 1eb5f275748..ee7b5e1b61a 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -5,7 +5,6 @@ SUBDIRS = libmesh # nobase_ is used instead of the standard flat install to keep the namespace. if LIBMESH_ENABLE_KOKKOS nobase_include_HEADERS = \ - gpu/kokkos_scalar_types.h \ gpu/kokkos_fe_types.h \ gpu/kokkos_fe_base.h \ gpu/kokkos_fe_evaluator.h \ diff --git a/include/fe/fe_lagrange_shape_1D.h b/include/fe/fe_lagrange_shape_1D.h index e72ad564723..f9949090043 100644 --- a/include/fe/fe_lagrange_shape_1D.h +++ b/include/fe/fe_lagrange_shape_1D.h @@ -20,6 +20,7 @@ #define LIBMESH_FE_LAGRANGE_SHAPE_1D_H // Local includes +#include "libmesh/libmesh_device.h" #include "libmesh/enum_order.h" // FIRST, SECOND, etc. #include "libmesh/point.h" @@ -28,7 +29,7 @@ namespace libMesh { -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_linear_shape(const unsigned int i, const Real xi) { @@ -47,7 +48,7 @@ Real fe_lagrange_1D_linear_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape(const unsigned int i, const Real xi) { @@ -69,7 +70,7 @@ Real fe_lagrange_1D_quadratic_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape(const unsigned int i, const Real xi) { @@ -94,7 +95,7 @@ Real fe_lagrange_1D_cubic_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape(const Order order, const unsigned int i, const Real xi) @@ -120,7 +121,7 @@ Real fe_lagrange_1D_shape(const Order order, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_linear_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real) @@ -142,7 +143,7 @@ Real fe_lagrange_1D_linear_shape_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -167,7 +168,7 @@ Real fe_lagrange_1D_quadratic_shape_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -196,7 +197,7 @@ Real fe_lagrange_1D_cubic_shape_deriv(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape_deriv(const Order order, const unsigned int i, const unsigned int j, @@ -224,7 +225,7 @@ Real fe_lagrange_1D_shape_deriv(const Order order, // fe_lagrange_1D_linear_shape_second_deriv is 0 -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape_second_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real) @@ -249,7 +250,7 @@ Real fe_lagrange_1D_quadratic_shape_second_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape_second_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -278,7 +279,7 @@ Real fe_lagrange_1D_cubic_shape_second_deriv(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape_second_deriv(const Order order, const unsigned int i, const unsigned int j, diff --git a/include/fe/fe_reference_element_traits.h b/include/fe/fe_reference_element_traits.h new file mode 100644 index 00000000000..e79364923bc --- /dev/null +++ b/include/fe/fe_reference_element_traits.h @@ -0,0 +1,897 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H +#define LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" + +namespace libMesh +{ + +constexpr unsigned int edge2_side_node_counts[2] = {1, 1}; +constexpr unsigned int edge3_side_node_counts[2] = {1, 1}; + +constexpr unsigned int tri3_side_node_counts[3] = {2, 2, 2}; +constexpr unsigned int tri6_side_node_counts[3] = {3, 3, 3}; +constexpr unsigned int tri7_side_node_counts[3] = {3, 3, 3}; + +constexpr unsigned int quad4_side_node_counts[4] = {2, 2, 2, 2}; +constexpr unsigned int quad8_side_node_counts[4] = {3, 3, 3, 3}; +constexpr unsigned int quad9_side_node_counts[4] = {3, 3, 3, 3}; + +constexpr unsigned int tet4_side_node_counts[4] = {3, 3, 3, 3}; +constexpr unsigned int tet10_side_node_counts[4] = {6, 6, 6, 6}; +constexpr unsigned int tet14_side_node_counts[4] = {7, 7, 7, 7}; + +constexpr unsigned int hex8_side_node_counts[6] = {4, 4, 4, 4, 4, 4}; +constexpr unsigned int hex20_side_node_counts[6] = {8, 8, 8, 8, 8, 8}; +constexpr unsigned int hex27_side_node_counts[6] = {9, 9, 9, 9, 9, 9}; + +constexpr unsigned int prism6_side_node_counts[5] = {3, 4, 4, 4, 3}; +constexpr unsigned int prism15_side_node_counts[5] = {6, 8, 8, 8, 6}; +constexpr unsigned int prism18_side_node_counts[5] = {6, 9, 9, 9, 6}; +constexpr unsigned int prism20_side_node_counts[5] = {7, 9, 9, 9, 7}; +constexpr unsigned int prism21_side_node_counts[5] = {7, 9, 9, 9, 7}; + +constexpr unsigned int pyramid5_side_node_counts[5] = {3, 3, 3, 3, 4}; +constexpr unsigned int pyramid13_side_node_counts[5] = {6, 6, 6, 6, 8}; +constexpr unsigned int pyramid14_side_node_counts[5] = {6, 6, 6, 6, 9}; +constexpr unsigned int pyramid18_side_node_counts[5] = {7, 7, 7, 7, 9}; + +constexpr unsigned int prism6_side_nodes[5][4] = + { + {0, 2, 1, 99}, + {0, 1, 4, 3}, + {1, 2, 5, 4}, + {2, 0, 3, 5}, + {3, 4, 5, 99} + }; + +constexpr unsigned int prism15_side_nodes[5][8] = + { + {0, 2, 1, 8, 7, 6, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9}, + {1, 2, 5, 4, 7, 11, 13, 10}, + {2, 0, 3, 5, 8, 9, 14, 11}, + {3, 4, 5, 12, 13, 14, 99, 99} + }; + +constexpr unsigned int prism18_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 99, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 99, 99, 99} + }; + +constexpr unsigned int prism20_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 18, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 19, 99, 99} + }; + +constexpr unsigned int prism21_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 18, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 19, 99, 99} + }; + +constexpr unsigned int pyramid5_side_nodes[5][4] = + { + {0, 1, 4, 99}, + {1, 2, 4, 99}, + {2, 3, 4, 99}, + {3, 0, 4, 99}, + {0, 3, 2, 1} + }; + +constexpr unsigned int pyramid13_side_nodes[5][8] = + { + {0, 1, 4, 5, 10, 9, 99, 99}, + {1, 2, 4, 6, 11, 10, 99, 99}, + {2, 3, 4, 7, 12, 11, 99, 99}, + {3, 0, 4, 8, 9, 12, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5} + }; + +constexpr unsigned int pyramid14_side_nodes[5][9] = + { + {0, 1, 4, 5, 10, 9, 99, 99, 99}, + {1, 2, 4, 6, 11, 10, 99, 99, 99}, + {2, 3, 4, 7, 12, 11, 99, 99, 99}, + {3, 0, 4, 8, 9, 12, 99, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5, 13} + }; + +constexpr unsigned int pyramid18_side_nodes[5][9] = + { + {0, 1, 4, 5, 10, 9, 14, 99, 99}, + {1, 2, 4, 6, 11, 10, 15, 99, 99}, + {2, 3, 4, 7, 12, 11, 16, 99, 99}, + {3, 0, 4, 8, 9, 12, 17, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5, 13} + }; + +constexpr unsigned int tri3_side_nodes[3][2] = + { + {0, 1}, + {1, 2}, + {2, 0} + }; + +constexpr unsigned int tri6_side_nodes[3][3] = + { + {0, 1, 3}, + {1, 2, 4}, + {2, 0, 5} + }; + +constexpr unsigned int tri7_side_nodes[3][3] = + { + {0, 1, 3}, + {1, 2, 4}, + {2, 0, 5} + }; + +constexpr unsigned int quad4_side_nodes[4][2] = + { + {0, 1}, + {1, 2}, + {2, 3}, + {3, 0} + }; + +constexpr unsigned int quad8_side_nodes[4][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {2, 3, 6}, + {3, 0, 7} + }; + +constexpr unsigned int quad9_side_nodes[4][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {2, 3, 6}, + {3, 0, 7} + }; + +constexpr unsigned int tet4_side_nodes[4][3] = + { + {0, 2, 1}, + {0, 1, 3}, + {1, 2, 3}, + {2, 0, 3} + }; + +constexpr unsigned int tet10_side_nodes[4][6] = + { + {0, 2, 1, 6, 5, 4}, + {0, 1, 3, 4, 8, 7}, + {1, 2, 3, 5, 9, 8}, + {2, 0, 3, 6, 7, 9} + }; + +constexpr unsigned int tet14_side_nodes[4][7] = + { + {0, 2, 1, 6, 5, 4, 10}, + {0, 1, 3, 4, 8, 7, 11}, + {1, 2, 3, 5, 9, 8, 12}, + {2, 0, 3, 6, 7, 9, 13} + }; + +constexpr unsigned int hex8_side_nodes[6][4] = + { + {0, 3, 2, 1}, + {0, 1, 5, 4}, + {1, 2, 6, 5}, + {2, 3, 7, 6}, + {3, 0, 4, 7}, + {4, 5, 6, 7} + }; + +constexpr unsigned int hex20_side_nodes[6][8] = + { + {0, 3, 2, 1, 11, 10, 9, 8}, + {0, 1, 5, 4, 8, 13, 16, 12}, + {1, 2, 6, 5, 9, 14, 17, 13}, + {2, 3, 7, 6, 10, 15, 18, 14}, + {3, 0, 4, 7, 11, 12, 19, 15}, + {4, 5, 6, 7, 16, 17, 18, 19} + }; + +constexpr unsigned int hex27_side_nodes[6][9] = + { + {0, 3, 2, 1, 11, 10, 9, 8, 20}, + {0, 1, 5, 4, 8, 13, 16, 12, 21}, + {1, 2, 6, 5, 9, 14, 17, 13, 22}, + {2, 3, 7, 6, 10, 15, 18, 14, 23}, + {3, 0, 4, 7, 11, 12, 19, 15, 24}, + {4, 5, 6, 7, 16, 17, 18, 19, 25} + }; + +constexpr unsigned int edge2_side_nodes[2][1] = + { + {0}, + {1} + }; + +constexpr unsigned int edge3_side_nodes[2][1] = + { + {0}, + {1} + }; + +LIBMESH_DEVICE_INLINE bool +requires_side_specific_topology(ElemType parent) +{ + switch (parent) + { + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE ElemType +side_topology_or_invalid(ElemType parent, + unsigned int side) +{ + switch (parent) + { + case PRISM6: + switch (side) + { + case 0: + case 4: + return TRI3; + case 1: + case 2: + case 3: + return QUAD4; + default: + return INVALID_ELEM; + } + + case PRISM15: + switch (side) + { + case 0: + case 4: + return TRI6; + case 1: + case 2: + case 3: + return QUAD8; + default: + return INVALID_ELEM; + } + + case PRISM18: + switch (side) + { + case 0: + case 4: + return TRI6; + case 1: + case 2: + case 3: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PRISM20: + case PRISM21: + switch (side) + { + case 0: + case 4: + return TRI7; + case 1: + case 2: + case 3: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PYRAMID5: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI3; + case 4: + return QUAD4; + default: + return INVALID_ELEM; + } + + case PYRAMID13: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI6; + case 4: + return QUAD8; + default: + return INVALID_ELEM; + } + + case PYRAMID14: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI6; + case 4: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PYRAMID18: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI7; + case 4: + return QUAD9; + default: + return INVALID_ELEM; + } + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +side_node_count_or_zero(ElemType parent, + unsigned int side) +{ + switch (parent) + { + case EDGE2: + return side < 2 ? edge2_side_node_counts[side] : 0; + case EDGE3: + return side < 2 ? edge3_side_node_counts[side] : 0; + case TRI3: + return side < 3 ? tri3_side_node_counts[side] : 0; + case TRI6: + return side < 3 ? tri6_side_node_counts[side] : 0; + case TRI7: + return side < 3 ? tri7_side_node_counts[side] : 0; + case QUAD4: + return side < 4 ? quad4_side_node_counts[side] : 0; + case QUAD8: + return side < 4 ? quad8_side_node_counts[side] : 0; + case QUAD9: + return side < 4 ? quad9_side_node_counts[side] : 0; + case TET4: + return side < 4 ? tet4_side_node_counts[side] : 0; + case TET10: + return side < 4 ? tet10_side_node_counts[side] : 0; + case TET14: + return side < 4 ? tet14_side_node_counts[side] : 0; + case HEX8: + return side < 6 ? hex8_side_node_counts[side] : 0; + case HEX20: + return side < 6 ? hex20_side_node_counts[side] : 0; + case HEX27: + return side < 6 ? hex27_side_node_counts[side] : 0; + case PRISM6: + return side < 5 ? prism6_side_node_counts[side] : 0; + case PRISM15: + return side < 5 ? prism15_side_node_counts[side] : 0; + case PRISM18: + return side < 5 ? prism18_side_node_counts[side] : 0; + case PRISM20: + return side < 5 ? prism20_side_node_counts[side] : 0; + case PRISM21: + return side < 5 ? prism21_side_node_counts[side] : 0; + case PYRAMID5: + return side < 5 ? pyramid5_side_node_counts[side] : 0; + case PYRAMID13: + return side < 5 ? pyramid13_side_node_counts[side] : 0; + case PYRAMID14: + return side < 5 ? pyramid14_side_node_counts[side] : 0; + case PYRAMID18: + return side < 5 ? pyramid18_side_node_counts[side] : 0; + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE bool +try_local_side_node(ElemType parent, + unsigned int side, + unsigned int side_node, + unsigned int & node) +{ + const unsigned int count = side_node_count_or_zero(parent, side); + if (!count || side_node >= count) + return false; + + switch (parent) + { + case EDGE2: + node = edge2_side_nodes[side][side_node]; + return true; + case EDGE3: + node = edge3_side_nodes[side][side_node]; + return true; + case TRI3: + node = tri3_side_nodes[side][side_node]; + return true; + case TRI6: + node = tri6_side_nodes[side][side_node]; + return true; + case TRI7: + node = tri7_side_nodes[side][side_node]; + return true; + case QUAD4: + node = quad4_side_nodes[side][side_node]; + return true; + case QUAD8: + node = quad8_side_nodes[side][side_node]; + return true; + case QUAD9: + node = quad9_side_nodes[side][side_node]; + return true; + case TET4: + node = tet4_side_nodes[side][side_node]; + return true; + case TET10: + node = tet10_side_nodes[side][side_node]; + return true; + case TET14: + node = tet14_side_nodes[side][side_node]; + return true; + case HEX8: + node = hex8_side_nodes[side][side_node]; + return true; + case HEX20: + node = hex20_side_nodes[side][side_node]; + return true; + case HEX27: + node = hex27_side_nodes[side][side_node]; + return true; + case PRISM6: + node = prism6_side_nodes[side][side_node]; + return true; + case PRISM15: + node = prism15_side_nodes[side][side_node]; + return true; + case PRISM18: + node = prism18_side_nodes[side][side_node]; + return true; + case PRISM20: + node = prism20_side_nodes[side][side_node]; + return true; + case PRISM21: + node = prism21_side_nodes[side][side_node]; + return true; + case PYRAMID5: + node = pyramid5_side_nodes[side][side_node]; + return true; + case PYRAMID13: + node = pyramid13_side_nodes[side][side_node]; + return true; + case PYRAMID14: + node = pyramid14_side_nodes[side][side_node]; + return true; + case PYRAMID18: + node = pyramid18_side_nodes[side][side_node]; + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +try_reference_node(ElemType type, + unsigned int node, + Point & pt) +{ + switch (type) + { + case EDGE2: + case EDGE3: + switch (node) + { + case 0: + pt = Point(-1.0); + return true; + case 1: + pt = Point(1.0); + return true; + case 2: + if (type == EDGE3) + { + pt = Point(0.0); + return true; + } + return false; + default: + return false; + } + + case TRI3: + case TRI6: + case TRI7: + switch (node) + { + case 0: + pt = Point(0.0, 0.0); + return true; + case 1: + pt = Point(1.0, 0.0); + return true; + case 2: + pt = Point(0.0, 1.0); + return true; + case 3: + pt = Point(0.5, 0.0); + return true; + case 4: + pt = Point(0.5, 0.5); + return true; + case 5: + pt = Point(0.0, 0.5); + return true; + case 6: + if (type == TRI7) + { + pt = Point(1. / 3., 1. / 3.); + return true; + } + return false; + default: + return false; + } + + case QUAD4: + case QUAD8: + case QUAD9: + switch (node) + { + case 0: + pt = Point(-1.0, -1.0); + return true; + case 1: + pt = Point(1.0, -1.0); + return true; + case 2: + pt = Point(1.0, 1.0); + return true; + case 3: + pt = Point(-1.0, 1.0); + return true; + case 4: + pt = Point(0.0, -1.0); + return true; + case 5: + pt = Point(1.0, 0.0); + return true; + case 6: + pt = Point(0.0, 1.0); + return true; + case 7: + pt = Point(-1.0, 0.0); + return true; + case 8: + if (type == QUAD9) + { + pt = Point(0.0, 0.0); + return true; + } + return false; + default: + return false; + } + + case TET4: + case TET10: + case TET14: + switch (node) + { + case 0: + pt = Point(0.0, 0.0, 0.0); + return true; + case 1: + pt = Point(1.0, 0.0, 0.0); + return true; + case 2: + pt = Point(0.0, 1.0, 0.0); + return true; + case 3: + pt = Point(0.0, 0.0, 1.0); + return true; + case 4: + pt = Point(0.5, 0.0, 0.0); + return true; + case 5: + pt = Point(0.5, 0.5, 0.0); + return true; + case 6: + pt = Point(0.0, 0.5, 0.0); + return true; + case 7: + pt = Point(0.0, 0.0, 0.5); + return true; + case 8: + pt = Point(0.5, 0.0, 0.5); + return true; + case 9: + pt = Point(0.0, 0.5, 0.5); + return true; + case 10: + if (type == TET14) + { + pt = Point(1. / 3., 1. / 3., 0.0); + return true; + } + return false; + case 11: + if (type == TET14) + { + pt = Point(1. / 3., 0.0, 1. / 3.); + return true; + } + return false; + case 12: + if (type == TET14) + { + pt = Point(1. / 3., 1. / 3., 1. / 3.); + return true; + } + return false; + case 13: + if (type == TET14) + { + pt = Point(0.0, 1. / 3., 1. / 3.); + return true; + } + return false; + default: + return false; + } + + case HEX8: + case HEX20: + case HEX27: + switch (node) + { + case 0: + pt = Point(-1.0, -1.0, -1.0); + return true; + case 1: + pt = Point(1.0, -1.0, -1.0); + return true; + case 2: + pt = Point(1.0, 1.0, -1.0); + return true; + case 3: + pt = Point(-1.0, 1.0, -1.0); + return true; + case 4: + pt = Point(-1.0, -1.0, 1.0); + return true; + case 5: + pt = Point(1.0, -1.0, 1.0); + return true; + case 6: + pt = Point(1.0, 1.0, 1.0); + return true; + case 7: + pt = Point(-1.0, 1.0, 1.0); + return true; + case 8: + pt = Point(0.0, -1.0, -1.0); + return true; + case 9: + pt = Point(1.0, 0.0, -1.0); + return true; + case 10: + pt = Point(0.0, 1.0, -1.0); + return true; + case 11: + pt = Point(-1.0, 0.0, -1.0); + return true; + case 12: + pt = Point(-1.0, -1.0, 0.0); + return true; + case 13: + pt = Point(1.0, -1.0, 0.0); + return true; + case 14: + pt = Point(1.0, 1.0, 0.0); + return true; + case 15: + pt = Point(-1.0, 1.0, 0.0); + return true; + case 16: + pt = Point(0.0, -1.0, 1.0); + return true; + case 17: + pt = Point(1.0, 0.0, 1.0); + return true; + case 18: + pt = Point(0.0, 1.0, 1.0); + return true; + case 19: + pt = Point(-1.0, 0.0, 1.0); + return true; + case 20: + if (type == HEX27) + { + pt = Point(0.0, 0.0, -1.0); + return true; + } + return false; + case 21: + if (type == HEX27) + { + pt = Point(0.0, -1.0, 0.0); + return true; + } + return false; + case 22: + if (type == HEX27) + { + pt = Point(1.0, 0.0, 0.0); + return true; + } + return false; + case 23: + if (type == HEX27) + { + pt = Point(0.0, 1.0, 0.0); + return true; + } + return false; + case 24: + if (type == HEX27) + { + pt = Point(-1.0, 0.0, 0.0); + return true; + } + return false; + case 25: + if (type == HEX27) + { + pt = Point(0.0, 0.0, 1.0); + return true; + } + return false; + case 26: + if (type == HEX27) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + default: + return false; + } + + case PYRAMID13: + case PYRAMID14: + switch (node) + { + case 9: + pt = Point(-0.5, -0.5, 0.5); + return true; + case 10: + pt = Point(0.5, -0.5, 0.5); + return true; + case 11: + pt = Point(0.5, 0.5, 0.5); + return true; + case 12: + pt = Point(-0.5, 0.5, 0.5); + return true; + default: + return false; + } + + case PYRAMID18: + switch (node) + { + case 9: + pt = Point(-0.5, -0.5, 0.5); + return true; + case 10: + pt = Point(0.5, -0.5, 0.5); + return true; + case 11: + pt = Point(0.5, 0.5, 0.5); + return true; + case 12: + pt = Point(-0.5, 0.5, 0.5); + return true; + case 14: + pt = Point(-2. / 3., 0.0, 1. / 3.); + return true; + case 15: + pt = Point(0.0, 2. / 3., 1. / 3.); + return true; + case 16: + pt = Point(2. / 3., 0.0, 1. / 3.); + return true; + case 17: + pt = Point(0.0, -2. / 3., 1. / 3.); + return true; + default: + return false; + } + + case PRISM20: + switch (node) + { + case 18: + pt = Point(1. / 3., 1. / 3., -1.0); + return true; + case 19: + pt = Point(1. / 3., 1. / 3., 1.0); + return true; + default: + return false; + } + + case PRISM21: + switch (node) + { + case 18: + pt = Point(1. / 3., 1. / 3., -1.0); + return true; + case 19: + pt = Point(1. / 3., 1. / 3., 1.0); + return true; + case 20: + pt = Point(1. / 3., 1. / 3., 0.0); + return true; + default: + return false; + } + + default: + return false; + } +} + +} // namespace libMesh + +#endif // LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H diff --git a/include/fe/fe_serendipity_lagrange.h b/include/fe/fe_serendipity_lagrange.h new file mode 100644 index 00000000000..f1a44f6cd1f --- /dev/null +++ b/include/fe/fe_serendipity_lagrange.h @@ -0,0 +1,417 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_SERENDIPITY_LAGRANGE_H +#define LIBMESH_FE_SERENDIPITY_LAGRANGE_H + +#include "libmesh/point.h" + +namespace libMesh +{ +namespace detail +{ + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (1.0 - eta) * (-1.0 - xi - eta); + case 1: return 0.25 * (1.0 + xi) * (1.0 - eta) * (-1.0 + xi - eta); + case 2: return 0.25 * (1.0 + xi) * (1.0 + eta) * (-1.0 + xi + eta); + case 3: return 0.25 * (1.0 - xi) * (1.0 + eta) * (-1.0 - xi + eta); + case 4: return 0.5 * (1.0 - xi * xi) * (1.0 - eta); + case 5: return 0.5 * (1.0 + xi) * (1.0 - eta * eta); + case 6: return 0.5 * (1.0 - xi * xi) * (1.0 + eta); + default: return 0.5 * (1.0 - xi) * (1.0 - eta * eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + switch (i) + { + case 0: return 0.25 * (1.0 - eta) * (2.0 * xi + eta); + case 1: return 0.25 * (1.0 - eta) * (2.0 * xi - eta); + case 2: return 0.25 * (1.0 + eta) * (2.0 * xi + eta); + case 3: return 0.25 * (1.0 + eta) * (2.0 * xi - eta); + case 4: return -xi * (1.0 - eta); + case 5: return 0.5 * (1.0 - eta * eta); + case 6: return -xi * (1.0 + eta); + default: return -0.5 * (1.0 - eta * eta); + } + + default: + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (xi + 2.0 * eta); + case 1: return 0.25 * (1.0 + xi) * (2.0 * eta - xi); + case 2: return 0.25 * (1.0 + xi) * (xi + 2.0 * eta); + case 3: return 0.25 * (1.0 - xi) * (2.0 * eta - xi); + case 4: return -0.5 * (1.0 - xi * xi); + case 5: return -eta * (1.0 + xi); + case 6: return 0.5 * (1.0 - xi * xi); + default: return -eta * (1.0 - xi); + } + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + switch (i) + { + case 0: + case 1: + return 0.5 * (1.0 - eta); + case 2: + case 3: + return 0.5 * (1.0 + eta); + case 4: + return eta - 1.0; + case 6: + return -1.0 - eta; + default: + return 0.0; + } + + case 1: + switch (i) + { + case 0: return 0.25 * (1.0 - 2.0 * xi - 2.0 * eta); + case 1: return 0.25 * (-1.0 - 2.0 * xi + 2.0 * eta); + case 2: return 0.25 * (1.0 + 2.0 * xi + 2.0 * eta); + case 3: return 0.25 * (-1.0 + 2.0 * xi - 2.0 * eta); + case 4: return xi; + case 5: return -eta; + case 6: return -xi; + default: return eta; + } + + default: + switch (i) + { + case 0: + case 3: + return 0.5 * (1.0 - xi); + case 1: + case 2: + return 0.5 * (1.0 + xi); + case 5: + return -1.0 - xi; + case 7: + return xi - 1.0; + default: + return 0.0; + } + } +} +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (i) + { + case 0: return (1.0 - x) * (1.0 - y) * (1.0 - z) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z); + case 1: return x * (1.0 - y) * (1.0 - z) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0); + case 2: return x * y * (1.0 - z) * (2.0 * x + 2.0 * y - 2.0 * z - 3.0); + case 3: return (1.0 - x) * y * (1.0 - z) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0); + case 4: return (1.0 - x) * (1.0 - y) * z * (2.0 * z - 2.0 * x - 2.0 * y - 1.0); + case 5: return x * (1.0 - y) * z * (2.0 * x - 2.0 * y + 2.0 * z - 3.0); + case 6: return x * y * z * (2.0 * x + 2.0 * y + 2.0 * z - 5.0); + case 7: return (1.0 - x) * y * z * (2.0 * y - 2.0 * x + 2.0 * z - 3.0); + case 8: return 4.0 * x * (1.0 - x) * (1.0 - y) * (1.0 - z); + case 9: return 4.0 * x * y * (1.0 - y) * (1.0 - z); + case 10: return 4.0 * x * (1.0 - x) * y * (1.0 - z); + case 11: return 4.0 * (1.0 - x) * y * (1.0 - y) * (1.0 - z); + case 12: return 4.0 * (1.0 - x) * (1.0 - y) * z * (1.0 - z); + case 13: return 4.0 * x * (1.0 - y) * z * (1.0 - z); + case 14: return 4.0 * x * y * z * (1.0 - z); + case 15: return 4.0 * (1.0 - x) * y * z * (1.0 - z); + case 16: return 4.0 * x * (1.0 - x) * (1.0 - y) * z; + case 17: return 4.0 * x * y * (1.0 - y) * z; + case 18: return 4.0 * x * (1.0 - x) * y * z; + default: return 4.0 * (1.0 - x) * y * (1.0 - y) * z; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + libmesh_assert_less(j, 3); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (j) + { + case 0: + switch (i) + { + case 0: return 0.5 * (1.0 - y) * (1.0 - z) * ((1.0 - x) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * (1.0 - y) * (1.0 - z) * (x * 2.0 + (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * y * (1.0 - z) * (x * 2.0 + (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * y * (1.0 - z) * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - y) * z * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * (1.0 - y) * z * (x * 2.0 + (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * y * z * (x * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * y * z * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return 2.0 * (1.0 - y) * (1.0 - z) * (1.0 - 2.0 * x); + case 9: return 2.0 * y * (1.0 - y) * (1.0 - z); + case 10: return 2.0 * y * (1.0 - z) * (1.0 - 2.0 * x); + case 11: return -2.0 * y * (1.0 - y) * (1.0 - z); + case 12: return -2.0 * (1.0 - y) * z * (1.0 - z); + case 13: return 2.0 * (1.0 - y) * z * (1.0 - z); + case 14: return 2.0 * y * z * (1.0 - z); + case 15: return -2.0 * y * z * (1.0 - z); + case 16: return 2.0 * (1.0 - y) * z * (1.0 - 2.0 * x); + case 17: return 2.0 * y * (1.0 - y) * z; + case 18: return 2.0 * y * z * (1.0 - 2.0 * x); + default: return -2.0 * y * (1.0 - y) * z; + } + + case 1: + switch (i) + { + case 0: return 0.5 * (1.0 - x) * (1.0 - z) * ((1.0 - y) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * x * (1.0 - z) * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * x * (1.0 - z) * (y * 2.0 + (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * (1.0 - x) * (1.0 - z) * (y * 2.0 + (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - x) * z * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * x * z * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * x * z * (y * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * (1.0 - x) * z * (y * 2.0 + (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return -2.0 * x * (1.0 - x) * (1.0 - z); + case 9: return 2.0 * x * (1.0 - z) * (1.0 - 2.0 * y); + case 10: return 2.0 * x * (1.0 - x) * (1.0 - z); + case 11: return 2.0 * (1.0 - x) * (1.0 - z) * (1.0 - 2.0 * y); + case 12: return -2.0 * (1.0 - x) * z * (1.0 - z); + case 13: return -2.0 * x * z * (1.0 - z); + case 14: return 2.0 * x * z * (1.0 - z); + case 15: return 2.0 * (1.0 - x) * z * (1.0 - z); + case 16: return -2.0 * x * (1.0 - x) * z; + case 17: return 2.0 * x * z * (1.0 - 2.0 * y); + case 18: return 2.0 * x * (1.0 - x) * z; + default: return 2.0 * (1.0 - x) * z * (1.0 - 2.0 * y); + } + + default: + switch (i) + { + case 0: return 0.5 * (1.0 - x) * (1.0 - y) * ((1.0 - z) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * x * (1.0 - y) * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * x * y * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * (1.0 - x) * y * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - x) * (1.0 - y) * (z * 2.0 + (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * x * (1.0 - y) * (z * 2.0 + (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * x * y * (z * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * (1.0 - x) * y * (z * 2.0 + (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return -2.0 * x * (1.0 - x) * (1.0 - y); + case 9: return -2.0 * x * y * (1.0 - y); + case 10: return -2.0 * x * (1.0 - x) * y; + case 11: return -2.0 * (1.0 - x) * y * (1.0 - y); + case 12: return 2.0 * (1.0 - x) * (1.0 - y) * (1.0 - 2.0 * z); + case 13: return 2.0 * x * (1.0 - y) * (1.0 - 2.0 * z); + case 14: return 2.0 * x * y * (1.0 - 2.0 * z); + case 15: return 2.0 * (1.0 - x) * y * (1.0 - 2.0 * z); + case 16: return 2.0 * x * (1.0 - x) * (1.0 - y); + case 17: return 2.0 * x * y * (1.0 - y); + case 18: return 2.0 * x * (1.0 - x) * y; + default: return 2.0 * (1.0 - x) * y * (1.0 - y); + } + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + libmesh_assert_less(j, 6); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (j) + { + case 0: + switch (i) + { + case 0: + case 1: return (1.0 - y) * (1.0 - z); + case 2: + case 3: return y * (1.0 - z); + case 4: + case 5: return (1.0 - y) * z; + case 6: + case 7: return y * z; + case 8: return -2.0 * (1.0 - y) * (1.0 - z); + case 10: return -2.0 * y * (1.0 - z); + case 16: return -2.0 * (1.0 - y) * z; + case 18: return -2.0 * y * z; + default: return 0.0; + } + + case 1: + switch (i) + { + case 0: return (1.25 - x - y - 0.5 * z) * (1.0 - z); + case 1: return (-x + y + 0.5 * z - 0.25) * (1.0 - z); + case 2: return (x + y - 0.5 * z - 0.75) * (1.0 - z); + case 3: return (-y + x + 0.5 * z - 0.25) * (1.0 - z); + case 4: return -0.25 * z * (4.0 * x + 4.0 * y - 2.0 * z - 3.0); + case 5: return -0.25 * z * (-4.0 * y + 4.0 * x + 2.0 * z - 1.0); + case 6: return 0.25 * z * (-5.0 + 4.0 * x + 4.0 * y + 2.0 * z); + case 7: return 0.25 * z * (4.0 * x - 4.0 * y - 2.0 * z + 1.0); + case 8: return (-1.0 + 2.0 * x) * (1.0 - z); + case 9: return (1.0 - 2.0 * y) * (1.0 - z); + case 10: return (1.0 - 2.0 * x) * (1.0 - z); + case 11: return (-1.0 + 2.0 * y) * (1.0 - z); + case 12: return z * (1.0 - z); + case 13: return -z * (1.0 - z); + case 14: return z * (1.0 - z); + case 15: return -z * (1.0 - z); + case 16: return (-1.0 + 2.0 * x) * z; + case 17: return (1.0 - 2.0 * y) * z; + case 18: return (1.0 - 2.0 * x) * z; + default: return (-1.0 + 2.0 * y) * z; + } + + case 2: + switch (i) + { + case 0: + case 3: return (1.0 - x) * (1.0 - z); + case 1: + case 2: return x * (1.0 - z); + case 4: + case 7: return (1.0 - x) * z; + case 5: + case 6: return x * z; + case 9: return -2.0 * x * (1.0 - z); + case 11: return -2.0 * (1.0 - x) * (1.0 - z); + case 17: return -2.0 * x * z; + case 19: return -2.0 * (1.0 - x) * z; + default: return 0.0; + } + + case 3: + switch (i) + { + case 0: return (1.25 - x - 0.5 * y - z) * (1.0 - y); + case 1: return (-x + 0.5 * y + z - 0.25) * (1.0 - y); + case 2: return -0.25 * y * (2.0 * y + 4.0 * x - 4.0 * z - 1.0); + case 3: return -0.25 * y * (-2.0 * y + 4.0 * x + 4.0 * z - 3.0); + case 4: return (-z + x + 0.5 * y - 0.25) * (1.0 - y); + case 5: return (x - 0.5 * y + z - 0.75) * (1.0 - y); + case 6: return 0.25 * y * (2.0 * y + 4.0 * x + 4.0 * z - 5.0); + case 7: return 0.25 * y * (-2.0 * y + 4.0 * x - 4.0 * z + 1.0); + case 8: return (-1.0 + 2.0 * x) * (1.0 - y); + case 9: return -y * (1.0 - y); + case 10: return (-1.0 + 2.0 * x) * y; + case 11: return y * (1.0 - y); + case 12: return (-1.0 + 2.0 * z) * (1.0 - y); + case 13: return (1.0 - 2.0 * z) * (1.0 - y); + case 14: return (1.0 - 2.0 * z) * y; + case 15: return (-1.0 + 2.0 * z) * y; + case 16: return (1.0 - 2.0 * x) * (1.0 - y); + case 17: return y * (1.0 - y); + case 18: return (1.0 - 2.0 * x) * y; + default: return -y * (1.0 - y); + } + + case 4: + switch (i) + { + case 0: return (1.25 - 0.5 * x - y - z) * (1.0 - x); + case 1: return 0.25 * x * (2.0 * x - 4.0 * y - 4.0 * z + 3.0); + case 2: return -0.25 * x * (2.0 * x + 4.0 * y - 4.0 * z - 1.0); + case 3: return (-y + 0.5 * x + z - 0.25) * (1.0 - x); + case 4: return (-z + 0.5 * x + y - 0.25) * (1.0 - x); + case 5: return -0.25 * x * (2.0 * x - 4.0 * y + 4.0 * z - 1.0); + case 6: return 0.25 * x * (2.0 * x + 4.0 * y + 4.0 * z - 5.0); + case 7: return (y - 0.5 * x + z - 0.75) * (1.0 - x); + case 8: return x * (1.0 - x); + case 9: return (-1.0 + 2.0 * y) * x; + case 10: return -x * (1.0 - x); + case 11: return (-1.0 + 2.0 * y) * (1.0 - x); + case 12: return (-1.0 + 2.0 * z) * (1.0 - x); + case 13: return (-1.0 + 2.0 * z) * x; + case 14: return (1.0 - 2.0 * z) * x; + case 15: return (1.0 - 2.0 * z) * (1.0 - x); + case 16: return -x * (1.0 - x); + case 17: return (1.0 - 2.0 * y) * x; + case 18: return x * (1.0 - x); + default: return (1.0 - 2.0 * y) * (1.0 - x); + } + + default: + switch (i) + { + case 0: + case 4: return (1.0 - x) * (1.0 - y); + case 1: + case 5: return x * (1.0 - y); + case 2: + case 6: return x * y; + case 3: + case 7: return (1.0 - x) * y; + case 12: return -2.0 * (1.0 - x) * (1.0 - y); + case 13: return -2.0 * x * (1.0 - y); + case 14: return -2.0 * x * y; + case 15: return -2.0 * (1.0 - x) * y; + default: return 0.0; + } + } +} +#endif + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_SERENDIPITY_LAGRANGE_H diff --git a/include/fe/fe_shape_traits.h b/include/fe/fe_shape_traits.h new file mode 100644 index 00000000000..b90b561fbc1 --- /dev/null +++ b/include/fe/fe_shape_traits.h @@ -0,0 +1,628 @@ +#ifndef LIBMESH_FE_SHAPE_TRAITS_H +#define LIBMESH_FE_SHAPE_TRAITS_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_elem_class.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_order.h" +#include "libmesh/libmesh_device.h" + +namespace libMesh +{ + +struct FEShapeKey +{ + FEFamily family; + ElemType elem_type; + Order order; +}; + +LIBMESH_DEVICE_INLINE bool +is_monomial_2d_elem_type(ElemType elem_type) +{ + switch (elem_type) + { + case C0POLYGON: + case TRI3: + case TRISHELL3: + case TRI6: + case TRI7: + case QUAD4: + case QUADSHELL4: + case QUAD8: + case QUADSHELL8: + case QUAD9: + case QUADSHELL9: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +is_monomial_3d_elem_type(ElemType elem_type, + bool include_pyramid18 = true) +{ + switch (elem_type) + { + case TET4: + case TET10: + case TET14: + case HEX8: + case HEX20: + case HEX27: + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case C0POLYHEDRON: + return true; + case PYRAMID18: + return include_pyramid18; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE ElemType +side_topology_or_invalid(ElemType parent) +{ + switch (parent) + { + case EDGE2: + case EDGE3: + case EDGE4: + return EDGE2; + + case TRI3: + case QUAD4: + return EDGE2; + + case TRI6: + case TRI7: + case QUAD8: + case QUAD9: + return EDGE3; + + case TET4: + return TRI3; + case HEX8: + return QUAD4; + + case TET10: + return TRI6; + case TET14: + return TRI7; + case HEX20: + return QUAD8; + case HEX27: + return QUAD9; + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE FEElemClass +class_from_topology_or_invalid(ElemType topo) +{ + switch (topo) + { + case EDGE2: + case EDGE3: + case EDGE4: + return FEElemClass::EDGE; + + case TRI3: + case TRI6: + case TRI7: + return FEElemClass::TRI; + + case QUAD4: + case QUAD8: + case QUAD9: + return FEElemClass::QUAD; + + case TET4: + case TET10: + case TET14: + return FEElemClass::TET; + + case HEX8: + case HEX20: + case HEX27: + return FEElemClass::HEX; + + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + return FEElemClass::PRISM; + + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return FEElemClass::PYRAMID; + + default: + return FEElemClass::N_CLASSES; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +elem_class_dim_or_zero(FEElemClass cls) +{ + switch (cls) + { + case FEElemClass::EDGE: + return 1; + case FEElemClass::TRI: + case FEElemClass::QUAD: + return 2; + case FEElemClass::TET: + case FEElemClass::HEX: + case FEElemClass::PRISM: + case FEElemClass::PYRAMID: + return 3; + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +topology_dim_or_zero(ElemType topo) +{ + return elem_class_dim_or_zero(class_from_topology_or_invalid(topo)); +} + +LIBMESH_DEVICE_INLINE ElemType +lagrange_shape_topology_or_invalid(FEShapeKey key) +{ + switch (key.order) + { + case CONSTANT: + case FIRST: + switch (key.elem_type) + { + case EDGE2: + case EDGE3: + case EDGE4: + return EDGE2; + + case TRI3: + case TRI6: + case TRI7: + return TRI3; + + case QUAD4: + case QUAD8: + case QUAD9: + return QUAD4; + + case TET4: + case TET10: + case TET14: + return TET4; + + case HEX8: + case HEX20: + case HEX27: + return HEX8; + + default: + return INVALID_ELEM; + } + + case SECOND: + switch (key.elem_type) + { + case EDGE3: + return EDGE3; + + case TRI6: + case TRI7: + return TRI6; + + case QUAD8: + return QUAD8; + + case QUAD9: + return QUAD9; + + case TET10: + case TET14: + return TET10; + + case HEX20: + return HEX20; + + case HEX27: + return HEX27; + + default: + return INVALID_ELEM; + } + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +lagrange_exact_n_dofs_or_zero(ElemType elem_type, + Order order) +{ + switch (order) + { + case CONSTANT: + return (elem_type == NODEELEM) ? 1u : 0u; + + case FIRST: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 2; + + case TRI3: + case TRI6: + case TRI7: + return 3; + + case QUAD4: + case QUAD8: + case QUAD9: + return 4; + + case TET4: + case TET10: + case TET14: + return 4; + + case HEX8: + case HEX20: + case HEX27: + return 8; + + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + return 6; + + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return 5; + + default: + return 0; + } + + case SECOND: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE3: + return 3; + + case TRI6: + case TRI7: + return 6; + + case QUAD8: + return 8; + + case QUAD9: + return 9; + + case TET10: + case TET14: + return 10; + + case HEX20: + return 20; + + case HEX27: + return 27; + + case PRISM15: + return 15; + + case PRISM18: + case PRISM20: + case PRISM21: + return 18; + + case PYRAMID13: + return 13; + + case PYRAMID14: + case PYRAMID18: + return 14; + + default: + return 0; + } + + case THIRD: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE4: + return 4; + + case TRI7: + return 7; + + case TET14: + return 14; + + case PRISM20: + return 20; + + case PRISM21: + return 21; + + case PYRAMID18: + return 18; + + default: + return 0; + } + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_exact_n_dofs_or_zero(ElemType elem_type, + Order order) +{ + if (elem_type == INVALID_ELEM) + return 0; + if (order < CONSTANT) + return 0; + + switch (order) + { + case CONSTANT: + return 1; + + case FIRST: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 2; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 3; + if (is_monomial_3d_elem_type(elem_type)) + return 4; + return 0; + + case SECOND: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 3; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 6; + if (is_monomial_3d_elem_type(elem_type)) + return 10; + return 0; + + case THIRD: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 4; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 10; + if (is_monomial_3d_elem_type(elem_type)) + return 20; + return 0; + + case FOURTH: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return 5; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 15; + if (is_monomial_3d_elem_type(elem_type, false)) + return 35; + return 0; + + case FIFTH: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return 6; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 21; + if (is_monomial_3d_elem_type(elem_type, false)) + return 56; + return 0; + + default: + { + const unsigned int p = static_cast(order); + + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return p + 1; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return (p + 1) * (p + 2) / 2; + if (is_monomial_3d_elem_type(elem_type, false)) + return (p + 1) * (p + 2) * (p + 3) / 6; + return 0; + } + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_evaluator_dim_or_zero(ElemType elem_type) +{ + switch (elem_type) + { + case EDGE2: + case EDGE3: + case EDGE4: + return 1; + + case TRI3: + case TRI6: + case TRI7: + case QUAD4: + case QUAD8: + case QUAD9: + return 2; + + case TET4: + case TET10: + case TET14: + case HEX8: + case HEX20: + case HEX27: + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return 3; + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key) +{ + switch (key.family) + { + case LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + lagrange_shape_topology_or_invalid(key) != INVALID_ELEM; + + case MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + monomial_evaluator_dim_or_zero(key.elem_type) != 0 && + key.order >= CONSTANT && + key.order <= FIFTH; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_grad_shape(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE bool +supports_n_dofs(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE unsigned int +n_dofs_or_zero(FEShapeKey key) +{ + switch (key.family) + { + case LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order); + + case MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order); + + default: + return 0; + } +} + +} // namespace libMesh + +#endif // LIBMESH_FE_SHAPE_TRAITS_H diff --git a/include/fe/fe_simplex_lagrange.h b/include/fe/fe_simplex_lagrange.h new file mode 100644 index 00000000000..ce29d0605fb --- /dev/null +++ b/include/fe/fe_simplex_lagrange.h @@ -0,0 +1,462 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_SIMPLEX_LAGRANGE_H +#define LIBMESH_FE_SIMPLEX_LAGRANGE_H + +#include "libmesh/point.h" + +namespace libMesh +{ +namespace detail +{ + +constexpr Real tri_dzeta[3][2] = + { + {-1., -1.}, + { 1., 0.}, + { 0., 1.} + }; + +constexpr unsigned short tri6_zeta_indices[6][2] = + { + {0, 0}, + {1, 1}, + {2, 2}, + {0, 1}, + {1, 2}, + {2, 0} + }; + +constexpr unsigned short tri7_bubble_zeta_indices[1][3] = + { + {0, 1, 2} + }; + +constexpr Real tet_dzeta[4][3] = + { + {-1., -1., -1.}, + { 1., 0., 0.}, + { 0., 1., 0.}, + { 0., 0., 1.} + }; + +constexpr unsigned short tet10_zeta_indices[10][2] = + { + {0, 0}, + {1, 1}, + {2, 2}, + {3, 3}, + {0, 1}, + {1, 2}, + {2, 0}, + {0, 3}, + {1, 3}, + {2, 3} + }; + +constexpr unsigned short tet14_bubble_zeta_indices[4][3] = + { + {0, 1, 2}, + {0, 1, 3}, + {1, 2, 3}, + {0, 2, 3} + }; + +constexpr unsigned short tet14_vertex_bubble_indices[4][3] = + { + {0, 1, 3}, + {0, 1, 2}, + {0, 2, 3}, + {1, 2, 3} + }; + +constexpr unsigned short tet14_edge_bubble_indices[6][2] = + { + {0, 1}, + {0, 2}, + {0, 3}, + {1, 3}, + {1, 2}, + {3, 2} + }; + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +constexpr unsigned short tet_second_deriv_indices[6][2] = + { + {0, 0}, + {0, 1}, + {1, 1}, + {0, 2}, + {1, 2}, + {2, 2} + }; +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri3_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 3); + + switch (i) + { + case 0: return 1. - xi - eta; + case 1: return xi; + default: return eta; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri3_shape_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 3); + libmesh_assert_less(j, 2); + + return tri_dzeta[i][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 6); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + if (i < 3) + return bary[m] * (2. * bary[m] - 1.); + + return 4. * bary[m] * bary[n]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 6); + libmesh_assert_less(j, 2); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + if (i < 3) + return (4. * bary[m] - 1.) * tri_dzeta[m][j]; + + return 4. * bary[n] * tri_dzeta[m][j] + 4. * bary[m] * tri_dzeta[n][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble = + bary[bubble_indices[0]] * bary[bubble_indices[1]] * bary[bubble_indices[2]]; + + if (i < 3) + return fe_lagrange_tri6_shape(i, xi, eta) + 3. * bubble; + + if (i < 6) + return fe_lagrange_tri6_shape(i, xi, eta) - 12. * bubble; + + return 27. * bubble; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + libmesh_assert_less(j, 2); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble_deriv = + tri_dzeta[bubble_indices[0]][j] * bary[bubble_indices[1]] * bary[bubble_indices[2]] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][j] * bary[bubble_indices[2]] + + bary[bubble_indices[0]] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][j]; + + if (i < 3) + return fe_lagrange_tri6_shape_deriv(i, j, xi, eta) + 3. * bubble_deriv; + + if (i < 6) + return fe_lagrange_tri6_shape_deriv(i, j, xi, eta) - 12. * bubble_deriv; + + return 27. * bubble_deriv; +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape_second_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 6); + libmesh_assert_less(j, 3); + + const unsigned short my_j = j == 2 ? 1 : 0; + const unsigned short my_k = j == 0 ? 0 : 1; + + if (i < 3) + return 4. * tri_dzeta[i][my_j] * tri_dzeta[i][my_k]; + + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + return 4. * (tri_dzeta[n][my_j] * tri_dzeta[m][my_k] + + tri_dzeta[m][my_j] * tri_dzeta[n][my_k]); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + libmesh_assert_less(j, 3); + + const unsigned short my_j = j == 2 ? 1 : 0; + const unsigned short my_k = j == 0 ? 0 : 1; + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble_second_deriv = + tri_dzeta[bubble_indices[0]][my_j] * tri_dzeta[bubble_indices[1]][my_k] * bary[bubble_indices[2]] + + tri_dzeta[bubble_indices[0]][my_j] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][my_k] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][my_j] * tri_dzeta[bubble_indices[2]][my_k] + + tri_dzeta[bubble_indices[0]][my_k] * tri_dzeta[bubble_indices[1]][my_j] * bary[bubble_indices[2]] + + tri_dzeta[bubble_indices[0]][my_k] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][my_j] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][my_k] * tri_dzeta[bubble_indices[2]][my_j]; + + if (i < 3) + return fe_lagrange_tri6_shape_second_deriv(i, j) + 3. * bubble_second_deriv; + + if (i < 6) + return fe_lagrange_tri6_shape_second_deriv(i, j) - 12. * bubble_second_deriv; + + return 27. * bubble_second_deriv; +} +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet4_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 4); + + switch (i) + { + case 0: return 1. - xi - eta - zeta; + case 1: return xi; + case 2: return eta; + default: return zeta; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet4_shape_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 3); + + return tet_dzeta[i][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 10); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + if (i < 4) + return bary[m] * (2. * bary[m] - 1.); + + return 4. * bary[m] * bary[n]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 10); + libmesh_assert_less(j, 3); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + if (i < 4) + return (4. * bary[m] - 1.) * tet_dzeta[m][j]; + + return 4. * bary[n] * tet_dzeta[m][j] + 4. * bary[m] * tet_dzeta[n][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble[4]; + + for (unsigned short b = 0; b != 4; ++b) + bubble[b] = + bary[tet14_bubble_zeta_indices[b][0]] * + bary[tet14_bubble_zeta_indices[b][1]] * + bary[tet14_bubble_zeta_indices[b][2]]; + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape(i, xi, eta, zeta) + + 3. * (bubble[bubble_ids[0]] + bubble[bubble_ids[1]] + bubble[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape(i, xi, eta, zeta) - + 12. * (bubble[bubble_ids[0]] + bubble[bubble_ids[1]]); + } + + return 27. * bubble[i - 10]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + libmesh_assert_less(j, 3); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble_deriv[4]; + + for (unsigned short b = 0; b != 4; ++b) + { + const auto & bubble_ids = tet14_bubble_zeta_indices[b]; + bubble_deriv[b] = + tet_dzeta[bubble_ids[0]][j] * bary[bubble_ids[1]] * bary[bubble_ids[2]] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][j] * bary[bubble_ids[2]] + + bary[bubble_ids[0]] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][j]; + } + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape_deriv(i, j, xi, eta, zeta) + + 3. * (bubble_deriv[bubble_ids[0]] + bubble_deriv[bubble_ids[1]] + bubble_deriv[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape_deriv(i, j, xi, eta, zeta) - + 12. * (bubble_deriv[bubble_ids[0]] + bubble_deriv[bubble_ids[1]]); + } + + return 27. * bubble_deriv[i - 10]; +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape_second_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 10); + libmesh_assert_less(j, 6); + + const unsigned short my_j = tet_second_deriv_indices[j][0]; + const unsigned short my_k = tet_second_deriv_indices[j][1]; + + if (i < 4) + return 4. * tet_dzeta[i][my_j] * tet_dzeta[i][my_k]; + + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + return 4. * (tet_dzeta[n][my_j] * tet_dzeta[m][my_k] + + tet_dzeta[m][my_j] * tet_dzeta[n][my_k]); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + libmesh_assert_less(j, 6); + + const unsigned short my_j = tet_second_deriv_indices[j][0]; + const unsigned short my_k = tet_second_deriv_indices[j][1]; + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble_second_deriv[4]; + + for (unsigned short b = 0; b != 4; ++b) + { + const auto & bubble_ids = tet14_bubble_zeta_indices[b]; + bubble_second_deriv[b] = + tet_dzeta[bubble_ids[0]][my_j] * tet_dzeta[bubble_ids[1]][my_k] * bary[bubble_ids[2]] + + tet_dzeta[bubble_ids[0]][my_j] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][my_k] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][my_j] * tet_dzeta[bubble_ids[2]][my_k] + + tet_dzeta[bubble_ids[0]][my_k] * tet_dzeta[bubble_ids[1]][my_j] * bary[bubble_ids[2]] + + tet_dzeta[bubble_ids[0]][my_k] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][my_j] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][my_k] * tet_dzeta[bubble_ids[2]][my_j]; + } + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape_second_deriv(i, j) + + 3. * (bubble_second_deriv[bubble_ids[0]] + bubble_second_deriv[bubble_ids[1]] + bubble_second_deriv[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape_second_deriv(i, j) - + 12. * (bubble_second_deriv[bubble_ids[0]] + bubble_second_deriv[bubble_ids[1]]); + } + + return 27. * bubble_second_deriv[i - 10]; +} +#endif + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_SIMPLEX_LAGRANGE_H diff --git a/include/fe/fe_tensor_product_lagrange.h b/include/fe/fe_tensor_product_lagrange.h new file mode 100644 index 00000000000..2e4efa2dfcc --- /dev/null +++ b/include/fe/fe_tensor_product_lagrange.h @@ -0,0 +1,320 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +#ifndef LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H +#define LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H + +#include "libmesh/fe_lagrange_shape_1D.h" + +namespace libMesh +{ +namespace detail +{ + +constexpr unsigned int quad4_i0[4] = {0, 1, 1, 0}; +constexpr unsigned int quad4_i1[4] = {0, 0, 1, 1}; + +constexpr unsigned int quad9_i0[9] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; +constexpr unsigned int quad9_i1[9] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + +constexpr unsigned int hex8_i0[8] = {0, 1, 1, 0, 0, 1, 1, 0}; +constexpr unsigned int hex8_i1[8] = {0, 0, 1, 1, 0, 0, 1, 1}; +constexpr unsigned int hex8_i2[8] = {0, 0, 0, 0, 1, 1, 1, 1}; + +constexpr unsigned int hex27_i0[27] = + {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; +constexpr unsigned int hex27_i1[27] = + {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; +constexpr unsigned int hex27_i2[27] = + {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + + return fe_lagrange_1D_linear_shape(quad4_i0[i], xi) * + fe_lagrange_1D_linear_shape(quad4_i1[i], eta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + return fe_lagrange_1D_linear_shape_deriv(quad4_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(quad4_i1[i], eta); + + default: + return fe_lagrange_1D_linear_shape(quad4_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(quad4_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); + + default: + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(quad9_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + case 1: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + default: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 1: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + default: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + case 2: + return 0.; + + default: + return fe_lagrange_1D_linear_shape_deriv(quad4_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape_deriv(quad4_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_second_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); + + case 1: + return fe_lagrange_1D_quadratic_shape_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape_deriv(quad9_i1[i], 0, eta); + + default: + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_second_deriv(quad9_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 6); + + switch (j) + { + case 0: + case 2: + case 5: + return 0.; + + case 1: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + case 3: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + + default: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + libmesh_assert_less(j, 6); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 1: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 2: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 3: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + + case 4: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + + default: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i2[i], 0, zeta); + } +} + +#endif // LIBMESH_ENABLE_SECOND_DERIVATIVES + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h index 4526ebdc67a..140c47c7d3d 100644 --- a/include/gpu/kokkos_fe_base.h +++ b/include/gpu/kokkos_fe_base.h @@ -27,14 +27,55 @@ #ifndef LIBMESH_KOKKOS_FE_BASE_H #define LIBMESH_KOKKOS_FE_BASE_H -#include "kokkos_scalar_types.h" #include "libmesh/libmesh_device.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_fe_family.h" +#include "libmesh/kokkos_tensor_ops.h" +#include "libmesh/kokkos_vector_ops.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" namespace libMesh::Kokkos { +using Real = libMesh::Real; +using RealVector = libMesh::TypeVector; +using RealTensor = libMesh::TypeTensor; + +LIBMESH_DEVICE_INLINE +RealVector zero_vector() +{ + return zero_vector_value(); +} + +LIBMESH_DEVICE_INLINE +RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + RealVector v = zero_vector(); + + v(0) = x; + +#if LIBMESH_DIM > 1 + v(1) = y; +#else + libmesh_assert_equal_to(y, Real(0)); +#endif + +#if LIBMESH_DIM > 2 + v(2) = z; +#else + libmesh_assert_equal_to(z, Real(0)); +#endif + + return v; +} + +LIBMESH_DEVICE_INLINE +RealTensor zero_tensor() +{ + return zero_tensor_value(); +} + template struct FEEvaluator; // forward declaration only; instantiation requires a specialization diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h index 5fb7c1d1dc4..8b802d9dbe4 100644 --- a/include/gpu/kokkos_fe_evaluator.h +++ b/include/gpu/kokkos_fe_evaluator.h @@ -27,33 +27,165 @@ namespace libMesh::Kokkos { +namespace detail +{ + +template +LIBMESH_DEVICE_INLINE auto +dispatch_lagrange_topology(libMesh::ElemType topo, const Op & op) + -> decltype(op.template operator()()) +{ + switch (topo) + { + case libMesh::EDGE2: return op.template operator()(); + case libMesh::EDGE3: return op.template operator()(); + case libMesh::TRI3: return op.template operator()(); + case libMesh::TRI6: return op.template operator()(); + case libMesh::QUAD4: return op.template operator()(); + case libMesh::QUAD8: return op.template operator()(); + case libMesh::QUAD9: return op.template operator()(); + case libMesh::TET4: return op.template operator()(); + case libMesh::TET10: return op.template operator()(); + case libMesh::HEX8: return op.template operator()(); + case libMesh::HEX20: return op.template operator()(); + case libMesh::HEX27: return op.template operator()(); + default: + detail::abort_unsupported("dispatch_lagrange_topology(): unsupported evaluator topology"); + return op.template operator()(); + } +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_monomial_order(libMesh::Order order, const Op & op) + -> decltype(op.template operator()()) +{ + switch (order) + { + case libMesh::CONSTANT: return op.template operator()(); + case libMesh::FIRST: return op.template operator()(); + case libMesh::SECOND: return op.template operator()(); + case libMesh::THIRD: return op.template operator()(); + case libMesh::FOURTH: return op.template operator()(); + case libMesh::FIFTH: return op.template operator()(); + default: + detail::abort_unsupported("dispatch_monomial_order(): unsupported MONOMIAL order"); + return op.template operator()(); + } +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_monomial(libMesh::ElemType elem_type, libMesh::Order order, const Op & op) + -> decltype(op.template operator()<1, 0>()) +{ + switch (monomial_evaluator_dim_or_zero(elem_type)) + { + case 1: return dispatch_monomial_order<1>(order, op); + case 2: return dispatch_monomial_order<2>(order, op); + case 3: return dispatch_monomial_order<3>(order, op); + default: + detail::abort_unsupported("dispatch_monomial(): unsupported MONOMIAL element topology"); + return op.template operator()<1, 0>(); + } +} + +struct LagrangeShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE Real operator()() const + { + return FEEvaluator::shape(i, xi, eta, zeta); + } +}; + +struct LagrangeGradShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return FEEvaluator::grad_shape(i, xi, eta, zeta); + } +}; + +struct MonomialShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE Real operator()() const + { + if constexpr (Dim == 1) + return MonomialImpl1D::shape(i, xi, eta, zeta); + else if constexpr (Dim == 2) + return MonomialImpl2D::shape(i, xi, eta, zeta); + else + return MonomialImpl3D::shape(i, xi, eta, zeta); + } +}; + +struct MonomialGradShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + if constexpr (Dim == 1) + return MonomialImpl1D::grad_shape(i, xi, eta, zeta); + else if constexpr (Dim == 2) + return MonomialImpl2D::grad_shape(i, xi, eta, zeta); + else + return MonomialImpl3D::grad_shape(i, xi, eta, zeta); + } +}; + +} // namespace detail + // ── On-device helpers: element class -> spatial dimension ───────────────────── LIBMESH_DEVICE_INLINE unsigned int dim_from_class(FEElemClass cls) { - switch (cls) + const unsigned int dim = libMesh::elem_class_dim_or_zero(cls); + + if (!dim) { - case FEElemClass::EDGE: - return 1; - case FEElemClass::TRI: - case FEElemClass::QUAD: - return 2; - case FEElemClass::TET: - case FEElemClass::HEX: - case FEElemClass::PRISM: - case FEElemClass::PYRAMID: - return 3; - default: - detail::abort_unsupported("dim_from_class(): unsupported element class"); - return 0; + detail::abort_unsupported("dim_from_class(): unsupported element class"); + return 0; } + + return dim; } LIBMESH_DEVICE_INLINE unsigned int dim_from_topology(libMesh::ElemType topo) { - return dim_from_class(class_from_topology(topo)); + const unsigned int dim = libMesh::topology_dim_or_zero(topo); + + if (!dim) + { + detail::abort_unsupported("dim_from_topology(): unsupported element type"); + return 0; + } + + return dim; } // ── On-device helper: exact libMesh Lagrange key -> evaluator topology ───────── @@ -79,36 +211,7 @@ eval_lagrange_shape(libMesh::ElemType topo, Real eta, Real zeta) { - switch (topo) - { - case libMesh::EDGE2: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::EDGE3: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::TRI3: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::TRI6: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::QUAD4: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::QUAD8: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::QUAD9: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::TET4: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::TET10: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::HEX8: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::HEX20: - return FEEvaluator::shape(i, xi, eta, zeta); - case libMesh::HEX27: - return FEEvaluator::shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("eval_lagrange_shape(): unsupported evaluator topology"); - return Real(0); - } + return detail::dispatch_lagrange_topology(topo, detail::LagrangeShapeOp{i, xi, eta, zeta}); } LIBMESH_DEVICE_INLINE RealVector @@ -118,36 +221,7 @@ eval_lagrange_grad_shape(libMesh::ElemType topo, Real eta, Real zeta) { - switch (topo) - { - case libMesh::EDGE2: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::EDGE3: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::TRI3: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::TRI6: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::QUAD4: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::QUAD8: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::QUAD9: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::TET4: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::TET10: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::HEX8: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::HEX20: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - case libMesh::HEX27: - return FEEvaluator::grad_shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("eval_lagrange_grad_shape(): unsupported evaluator topology"); - return zero_vector(); - } + return detail::dispatch_lagrange_topology(topo, detail::LagrangeGradShapeOp{i, xi, eta, zeta}); } // ── Geometry-only shape dispatch (mapping-type + topology) ──────────────────── @@ -237,53 +311,9 @@ shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); case libMesh::MONOMIAL: - { - switch (monomial_evaluator_dim_or_zero(key.elem_type)) - { - case 1: - switch (key.order) - { - case 0: return MonomialImpl1D<0>::shape(i, xi, eta, zeta); - case 1: return MonomialImpl1D<1>::shape(i, xi, eta, zeta); - case 2: return MonomialImpl1D<2>::shape(i, xi, eta, zeta); - case 3: return MonomialImpl1D<3>::shape(i, xi, eta, zeta); - case 4: return MonomialImpl1D<4>::shape(i, xi, eta, zeta); - case 5: return MonomialImpl1D<5>::shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("shape(): unsupported 1D MONOMIAL order"); - return Real(0); - } - case 2: - switch (key.order) - { - case 0: return MonomialImpl2D<0>::shape(i, xi, eta, zeta); - case 1: return MonomialImpl2D<1>::shape(i, xi, eta, zeta); - case 2: return MonomialImpl2D<2>::shape(i, xi, eta, zeta); - case 3: return MonomialImpl2D<3>::shape(i, xi, eta, zeta); - case 4: return MonomialImpl2D<4>::shape(i, xi, eta, zeta); - case 5: return MonomialImpl2D<5>::shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("shape(): unsupported 2D MONOMIAL order"); - return Real(0); - } - case 3: - switch (key.order) - { - case 0: return MonomialImpl3D<0>::shape(i, xi, eta, zeta); - case 1: return MonomialImpl3D<1>::shape(i, xi, eta, zeta); - case 2: return MonomialImpl3D<2>::shape(i, xi, eta, zeta); - case 3: return MonomialImpl3D<3>::shape(i, xi, eta, zeta); - case 4: return MonomialImpl3D<4>::shape(i, xi, eta, zeta); - case 5: return MonomialImpl3D<5>::shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("shape(): unsupported 3D MONOMIAL order"); - return Real(0); - } - default: - detail::abort_unsupported("shape(): unsupported MONOMIAL element topology"); - return Real(0); - } - } + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialShapeOp{i, xi, eta, zeta}); default: detail::abort_unsupported("shape(): unsupported FE family"); @@ -309,53 +339,9 @@ grad_shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); case libMesh::MONOMIAL: - { - switch (monomial_evaluator_dim_or_zero(key.elem_type)) - { - case 1: - switch (key.order) - { - case 0: return MonomialImpl1D<0>::grad_shape(i, xi, eta, zeta); - case 1: return MonomialImpl1D<1>::grad_shape(i, xi, eta, zeta); - case 2: return MonomialImpl1D<2>::grad_shape(i, xi, eta, zeta); - case 3: return MonomialImpl1D<3>::grad_shape(i, xi, eta, zeta); - case 4: return MonomialImpl1D<4>::grad_shape(i, xi, eta, zeta); - case 5: return MonomialImpl1D<5>::grad_shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("grad_shape(): unsupported 1D MONOMIAL order"); - return zero_vector(); - } - case 2: - switch (key.order) - { - case 0: return MonomialImpl2D<0>::grad_shape(i, xi, eta, zeta); - case 1: return MonomialImpl2D<1>::grad_shape(i, xi, eta, zeta); - case 2: return MonomialImpl2D<2>::grad_shape(i, xi, eta, zeta); - case 3: return MonomialImpl2D<3>::grad_shape(i, xi, eta, zeta); - case 4: return MonomialImpl2D<4>::grad_shape(i, xi, eta, zeta); - case 5: return MonomialImpl2D<5>::grad_shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("grad_shape(): unsupported 2D MONOMIAL order"); - return zero_vector(); - } - case 3: - switch (key.order) - { - case 0: return MonomialImpl3D<0>::grad_shape(i, xi, eta, zeta); - case 1: return MonomialImpl3D<1>::grad_shape(i, xi, eta, zeta); - case 2: return MonomialImpl3D<2>::grad_shape(i, xi, eta, zeta); - case 3: return MonomialImpl3D<3>::grad_shape(i, xi, eta, zeta); - case 4: return MonomialImpl3D<4>::grad_shape(i, xi, eta, zeta); - case 5: return MonomialImpl3D<5>::grad_shape(i, xi, eta, zeta); - default: - detail::abort_unsupported("grad_shape(): unsupported 3D MONOMIAL order"); - return zero_vector(); - } - default: - detail::abort_unsupported("grad_shape(): unsupported MONOMIAL element topology"); - return zero_vector(); - } - } + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialGradShapeOp{i, xi, eta, zeta}); default: detail::abort_unsupported("grad_shape(): unsupported FE family"); diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h index 5cefdd2402b..b09abbd4833 100644 --- a/include/gpu/kokkos_fe_face_map.h +++ b/include/gpu/kokkos_fe_face_map.h @@ -5,6 +5,7 @@ #include "kokkos_fe_evaluator.h" #include "libmesh/elem.h" +#include "libmesh/fe_reference_element_traits.h" namespace libMesh::Kokkos { @@ -21,21 +22,33 @@ RealVector point_to_real_vector(const libMesh::Point & pt) #endif } +inline unsigned int +parent_local_side_node(const libMesh::Elem & parent, + unsigned int side, + unsigned int side_node) +{ + unsigned int node = libMesh::invalid_uint; + if (libMesh::try_local_side_node(parent.type(), side, side_node, node)) + return node; + + detail::abort_unsupported("map_face_qp_to_parent(): unsupported parent element type in local side-node lookup"); + return libMesh::invalid_uint; +} + inline unsigned int recover_parent_side(const libMesh::Elem & parent, const libMesh::Elem & side_in_parent) { for (unsigned int side = 0; side < parent.n_sides(); ++side) { - auto candidate = parent.build_side_ptr(side); - - if (candidate->type() != side_in_parent.type() || - candidate->n_nodes() != side_in_parent.n_nodes()) + if (get_side_topology(parent.type(), side) != side_in_parent.type() || + ((libMesh::side_node_count_or_zero(parent.type(), side) && + libMesh::side_node_count_or_zero(parent.type(), side) != side_in_parent.n_nodes()))) continue; bool same_side = true; - for (unsigned int k = 0; k < candidate->n_nodes(); ++k) - if (candidate->node_ptr(k) != side_in_parent.node_ptr(k)) + for (unsigned int k = 0; k < side_in_parent.n_nodes(); ++k) + if (parent.node_ptr(parent_local_side_node(parent, side, k)) != side_in_parent.node_ptr(k)) { same_side = false; break; @@ -51,50 +64,12 @@ recover_parent_side(const libMesh::Elem & parent, inline libMesh::Point parent_refspace_node(const libMesh::Elem & parent, unsigned int node) { - switch (parent.type()) - { - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - switch (node) - { - case 9: - return libMesh::Point(-0.5, -0.5, 0.5); - case 10: - return libMesh::Point(0.5, -0.5, 0.5); - case 11: - return libMesh::Point(0.5, 0.5, 0.5); - case 12: - return libMesh::Point(-0.5, 0.5, 0.5); - default: - return parent.master_point(node); - } + libMesh::Point pt; + if (libMesh::try_reference_node(parent.type(), node, pt)) + return pt; - case libMesh::PYRAMID18: - switch (node) - { - case 9: - return libMesh::Point(-0.5, -0.5, 0.5); - case 10: - return libMesh::Point(0.5, -0.5, 0.5); - case 11: - return libMesh::Point(0.5, 0.5, 0.5); - case 12: - return libMesh::Point(-0.5, 0.5, 0.5); - case 14: - return libMesh::Point(-2. / 3., 0.0, 1. / 3.); - case 15: - return libMesh::Point(0.0, 2. / 3., 1. / 3.); - case 16: - return libMesh::Point(2. / 3., 0.0, 1. / 3.); - case 17: - return libMesh::Point(0.0, -2. / 3., 1. / 3.); - default: - return parent.master_point(node); - } - - default: - return parent.master_point(node); - } + detail::abort_unsupported("map_face_qp_to_parent(): unsupported parent element type in reference-node lookup"); + return libMesh::Point(); } /** @@ -103,10 +78,10 @@ parent_refspace_node(const libMesh::Elem & parent, unsigned int node) * * side_in_parent must be obtained via build_side_ptr() (not side_ptr()), so that * second-order sides carry their midpoint nodes. Parent reference coordinates - * are reconstructed from the FE reference-space node convention used by - * FE::side_map(), not from side_in_parent.point(k), which lives in physical - * space, and not from Elem::master_point() on pyramids, where those node - * coordinates differ. + * are reconstructed from shared libMesh reference-element traits. They are not + * reconstructed from side_in_parent.point(k), which lives in physical space. + * Element types outside the Kokkos FE support boundary are rejected rather + * than silently falling back to generic Elem runtime helpers. * * @param side_in_parent The side element as embedded in the parent (from build_side_ptr()) * @param mapping_type Geometric mapping type (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) @@ -136,7 +111,7 @@ map_face_qp_to_parent(const libMesh::Elem & side_in_parent, // corresponding parent vertex in the parent reference element. if (n == 1) { - const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, 0)); + const libMesh::Point pt = parent_refspace_node(*parent, parent_local_side_node(*parent, side, 0)); return point_to_real_vector(pt); } @@ -146,7 +121,7 @@ map_face_qp_to_parent(const libMesh::Elem & side_in_parent, const Real t = face_qpt(1); const Real psi = map_shape(mapping_type, side_topo, k, s, t, 0.0); - const libMesh::Point pt = parent_refspace_node(*parent, parent->local_side_node(side, k)); + const libMesh::Point pt = parent_refspace_node(*parent, parent_local_side_node(*parent, side, k)); parent_pt.add_scaled(point_to_real_vector(pt), psi); } diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h index 3ade1ee709c..9a47ffa4b18 100644 --- a/include/gpu/kokkos_fe_lagrange_1d.h +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -13,6 +13,7 @@ #define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H #include "kokkos_fe_base.h" +#include "libmesh/fe_lagrange_shape_1D.h" namespace libMesh::Kokkos { @@ -28,23 +29,13 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return 0.5 * (1.0 - xi); - case 1: return 0.5 * (1.0 + xi); - default: return 0.0; - } + return libMesh::fe_lagrange_1D_linear_shape(i, xi); } LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(-0.5, 0.0, 0.0); - case 1: return make_vector( 0.5, 0.0, 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::fe_lagrange_1D_linear_shape_deriv(i, 0, xi), 0.0, 0.0); } #endif }; @@ -64,25 +55,13 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return 0.5 * xi * (xi - 1.0); - case 1: return 0.5 * xi * (xi + 1.0); - case 2: return 1.0 - xi * xi; - default: return 0.0; - } + return libMesh::fe_lagrange_1D_quadratic_shape(i, xi); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(xi - 0.5, 0.0, 0.0); - case 1: return make_vector(xi + 0.5, 0.0, 0.0); - case 2: return make_vector(-2.0 * xi, 0.0, 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::fe_lagrange_1D_quadratic_shape_deriv(i, 0, xi), 0.0, 0.0); } #endif }; diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h index f58097cbe21..fda9baacc8a 100644 --- a/include/gpu/kokkos_fe_lagrange_2d.h +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -9,6 +9,9 @@ #define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H #include "kokkos_fe_base.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" namespace libMesh::Kokkos { @@ -25,25 +28,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return 1.0 - xi - eta; - case 1: return xi; - case 2: return eta; - default: return 0.0; - } + return libMesh::detail::fe_lagrange_tri3_shape(i, xi, eta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(-1.0, -1.0, 0.0); - case 1: return make_vector( 1.0, 0.0, 0.0); - case 2: return make_vector( 0.0, 1.0, 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_tri3_shape_deriv(i, 0), + libMesh::detail::fe_lagrange_tri3_shape_deriv(i, 1), + 0.0); } #endif }; @@ -66,32 +59,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - const Real z0 = 1.0 - xi - eta; - switch (i) - { - case 0: return z0 * (2.0 * z0 - 1.0); - case 1: return xi * (2.0 * xi - 1.0); - case 2: return eta * (2.0 * eta - 1.0); - case 3: return 4.0 * z0 * xi; - case 4: return 4.0 * xi * eta; - case 5: return 4.0 * eta * z0; - default: return 0.0; - } + return libMesh::detail::fe_lagrange_tri6_shape(i, xi, eta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(4.0*xi + 4.0*eta - 3.0, 4.0*xi + 4.0*eta - 3.0, 0.0); - case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); - case 3: return make_vector(4.0*(1.0 - 2.0*xi - eta), -4.0*xi, 0.0); - case 4: return make_vector(4.0*eta, 4.0*xi, 0.0); - case 5: return make_vector(-4.0*eta, 4.0*(1.0 - xi - 2.0*eta), 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_tri6_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_tri6_shape_deriv(i, 1, xi, eta), + 0.0); } #endif }; @@ -110,27 +86,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return 0.25 * (1.0 - xi) * (1.0 - eta); - case 1: return 0.25 * (1.0 + xi) * (1.0 - eta); - case 2: return 0.25 * (1.0 + xi) * (1.0 + eta); - case 3: return 0.25 * (1.0 - xi) * (1.0 + eta); - default: return 0.0; - } + return libMesh::detail::fe_lagrange_quad4_shape(i, xi, eta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(-0.25*(1.0-eta), -0.25*(1.0-xi), 0.0); - case 1: return make_vector( 0.25*(1.0-eta), -0.25*(1.0+xi), 0.0); - case 2: return make_vector( 0.25*(1.0+eta), 0.25*(1.0+xi), 0.0); - case 3: return make_vector(-0.25*(1.0+eta), 0.25*(1.0-xi), 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_quad4_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad4_shape_deriv(i, 1, xi, eta), + 0.0); } #endif }; @@ -149,43 +113,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return 0.25 * (1.0-xi) * (1.0-eta) * (-1.0-xi-eta); - case 1: return 0.25 * (1.0+xi) * (1.0-eta) * (-1.0+xi-eta); - case 2: return 0.25 * (1.0+xi) * (1.0+eta) * (-1.0+xi+eta); - case 3: return 0.25 * (1.0-xi) * (1.0+eta) * (-1.0-xi+eta); - case 4: return 0.5 * (1.0-xi*xi) * (1.0-eta); - case 5: return 0.5 * (1.0+xi) * (1.0-eta*eta); - case 6: return 0.5 * (1.0-xi*xi) * (1.0+eta); - case 7: return 0.5 * (1.0-xi) * (1.0-eta*eta); - default: return 0.0; - } + return libMesh::detail::fe_lagrange_quad8_shape(i, xi, eta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(0.25*(1.0-eta)*(2.0*xi+eta), - 0.25*(1.0-xi)*(xi+2.0*eta), - 0.0); - case 1: return make_vector(0.25*(1.0-eta)*(2.0*xi-eta), - 0.25*(1.0+xi)*(2.0*eta-xi), - 0.0); - case 2: return make_vector(0.25*(1.0+eta)*(2.0*xi+eta), - 0.25*(1.0+xi)*(xi+2.0*eta), - 0.0); - case 3: return make_vector(0.25*(1.0+eta)*(2.0*xi-eta), - 0.25*(1.0-xi)*(2.0*eta-xi), - 0.0); - case 4: return make_vector(-xi*(1.0-eta), -0.5*(1.0-xi*xi), 0.0); - case 5: return make_vector(0.5*(1.0-eta*eta), -eta*(1.0+xi), 0.0); - case 6: return make_vector(-xi*(1.0+eta), 0.5*(1.0-xi*xi), 0.0); - case 7: return make_vector(-0.5*(1.0-eta*eta), -eta*(1.0-xi), 0.0); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_quad8_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad8_shape_deriv(i, 1, xi, eta), + 0.0); } #endif }; @@ -206,44 +142,18 @@ struct FEEvaluator static constexpr unsigned int n_dofs() { return 9; } #ifdef LIBMESH_HAVE_KOKKOS - LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) - { - switch (k) - { - case 0: return 0.5 * t * (t - 1.0); - case 1: return 0.5 * t * (t + 1.0); - case 2: return 1.0 - t * t; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) - { - switch (k) - { - case 0: return t - 0.5; - case 1: return t + 0.5; - case 2: return -2.0 * t; - default: return 0.0; - } - } - LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - return L(i0[i], xi) * L(i1[i], eta); + return libMesh::detail::fe_lagrange_quad9_shape(i, xi, eta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - const Real dxi = dL(i0[i], xi) * L(i1[i], eta); - const Real deta = L(i0[i], xi) * dL(i1[i], eta); - return make_vector(dxi, deta, 0.0); + return make_vector(libMesh::detail::fe_lagrange_quad9_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad9_shape_deriv(i, 1, xi, eta), + 0.0); } #endif }; diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h index 48afbccaa28..cde05985aa0 100644 --- a/include/gpu/kokkos_fe_lagrange_3d.h +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -9,6 +9,9 @@ #define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H #include "kokkos_fe_base.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" namespace libMesh::Kokkos { @@ -25,27 +28,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return 1.0 - xi - eta - zeta; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - default: return 0.0; - } + return libMesh::detail::fe_lagrange_tet4_shape(i, xi, eta, zeta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return make_vector(-1.0, -1.0, -1.0); - case 1: return make_vector( 1.0, 0.0, 0.0); - case 2: return make_vector( 0.0, 1.0, 0.0); - case 3: return make_vector( 0.0, 0.0, 1.0); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 0), + libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 1), + libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 2)); } #endif }; @@ -72,44 +63,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - const Real z0 = 1.0 - xi - eta - zeta; - switch (i) - { - case 0: return z0 * (2.0*z0 - 1.0); - case 1: return xi * (2.0*xi - 1.0); - case 2: return eta * (2.0*eta - 1.0); - case 3: return zeta* (2.0*zeta - 1.0); - case 4: return 4.0 * z0 * xi; - case 5: return 4.0 * xi * eta; - case 6: return 4.0 * eta * z0; - case 7: return 4.0 * z0 * zeta; - case 8: return 4.0 * xi * zeta; - case 9: return 4.0 * eta * zeta; - default: return 0.0; - } + return libMesh::detail::fe_lagrange_tet10_shape(i, xi, eta, zeta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: - { - const Real v = 4.0*(xi + eta + zeta) - 3.0; - return make_vector(v, v, v); - } - case 1: return make_vector(4.0*xi - 1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 4.0*eta - 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 4.0*zeta - 1.0); - case 4: return make_vector( 4.0*(1.0-2.0*xi-eta-zeta), -4.0*xi, -4.0*xi); - case 5: return make_vector( 4.0*eta, 4.0*xi, 0.0); - case 6: return make_vector(-4.0*eta, 4.0*(1.0-xi-2.0*eta-zeta), -4.0*eta); - case 7: return make_vector(-4.0*zeta, -4.0*zeta, 4.0*(1.0-xi-eta-2.0*zeta)); - case 8: return make_vector(4.0*zeta, 0.0, 4.0*xi); - case 9: return make_vector(0.0, 4.0*zeta, 4.0*eta); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 2, xi, eta, zeta)); } #endif }; @@ -129,51 +91,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta); - case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta); - case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta); - case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta); - case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta); - case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta); - case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta); - case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta); - default: return 0.0; - } + return libMesh::detail::fe_lagrange_hex8_shape(i, xi, eta, zeta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return make_vector(-0.125*(1.0-eta)*(1.0-zeta), - -0.125*(1.0-xi) *(1.0-zeta), - -0.125*(1.0-xi) *(1.0-eta)); - case 1: return make_vector( 0.125*(1.0-eta)*(1.0-zeta), - -0.125*(1.0+xi) *(1.0-zeta), - -0.125*(1.0+xi) *(1.0-eta)); - case 2: return make_vector( 0.125*(1.0+eta)*(1.0-zeta), - 0.125*(1.0+xi) *(1.0-zeta), - -0.125*(1.0+xi) *(1.0+eta)); - case 3: return make_vector(-0.125*(1.0+eta)*(1.0-zeta), - 0.125*(1.0-xi) *(1.0-zeta), - -0.125*(1.0-xi) *(1.0+eta)); - case 4: return make_vector(-0.125*(1.0-eta)*(1.0+zeta), - -0.125*(1.0-xi) *(1.0+zeta), - 0.125*(1.0-xi) *(1.0-eta)); - case 5: return make_vector( 0.125*(1.0-eta)*(1.0+zeta), - -0.125*(1.0+xi) *(1.0+zeta), - 0.125*(1.0+xi) *(1.0-eta)); - case 6: return make_vector( 0.125*(1.0+eta)*(1.0+zeta), - 0.125*(1.0+xi) *(1.0+zeta), - 0.125*(1.0+xi) *(1.0+eta)); - case 7: return make_vector(-0.125*(1.0+eta)*(1.0+zeta), - 0.125*(1.0-xi) *(1.0+zeta), - 0.125*(1.0-xi) *(1.0+eta)); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 2, xi, eta, zeta)); } #endif }; @@ -191,107 +117,15 @@ struct FEEvaluator LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return 0.125*(1.0-xi)*(1.0-eta)*(1.0-zeta)*(-xi-eta-zeta-2.0); - case 1: return 0.125*(1.0+xi)*(1.0-eta)*(1.0-zeta)*( xi-eta-zeta-2.0); - case 2: return 0.125*(1.0+xi)*(1.0+eta)*(1.0-zeta)*( xi+eta-zeta-2.0); - case 3: return 0.125*(1.0-xi)*(1.0+eta)*(1.0-zeta)*(-xi+eta-zeta-2.0); - case 4: return 0.125*(1.0-xi)*(1.0-eta)*(1.0+zeta)*(-xi-eta+zeta-2.0); - case 5: return 0.125*(1.0+xi)*(1.0-eta)*(1.0+zeta)*( xi-eta+zeta-2.0); - case 6: return 0.125*(1.0+xi)*(1.0+eta)*(1.0+zeta)*( xi+eta+zeta-2.0); - case 7: return 0.125*(1.0-xi)*(1.0+eta)*(1.0+zeta)*(-xi+eta+zeta-2.0); - case 8: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0-zeta); - case 10: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0-zeta); - case 16: return 0.25*(1.0-xi*xi)*(1.0-eta)*(1.0+zeta); - case 18: return 0.25*(1.0-xi*xi)*(1.0+eta)*(1.0+zeta); - case 9: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0-zeta); - case 11: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0-zeta); - case 17: return 0.25*(1.0+xi)*(1.0-eta*eta)*(1.0+zeta); - case 19: return 0.25*(1.0-xi)*(1.0-eta*eta)*(1.0+zeta); - case 12: return 0.25*(1.0-xi)*(1.0-eta)*(1.0-zeta*zeta); - case 13: return 0.25*(1.0+xi)*(1.0-eta)*(1.0-zeta*zeta); - case 14: return 0.25*(1.0+xi)*(1.0+eta)*(1.0-zeta*zeta); - case 15: return 0.25*(1.0-xi)*(1.0+eta)*(1.0-zeta*zeta); - default: return 0.0; - } + return libMesh::detail::fe_lagrange_hex20_shape(i, xi, eta, zeta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return make_vector( - -0.125*(1.0-eta)*(1.0-zeta)*(-2.0*xi-eta-zeta-1.0), - -0.125*(1.0-xi) *(1.0-zeta)*(-xi-2.0*eta-zeta-1.0), - -0.125*(1.0-xi) *(1.0-eta) *(-xi-eta-2.0*zeta-1.0)); - case 1: return make_vector( - 0.125*(1.0-eta)*(1.0-zeta)*(2.0*xi-eta-zeta-1.0), - -0.125*(1.0+xi) *(1.0-zeta)*(xi-2.0*eta-zeta-1.0), - -0.125*(1.0+xi) *(1.0-eta) *(xi-eta-2.0*zeta-1.0)); - case 2: return make_vector( - 0.125*(1.0+eta)*(1.0-zeta)*(2.0*xi+eta-zeta-1.0), - 0.125*(1.0+xi) *(1.0-zeta)*(xi+2.0*eta-zeta-1.0), - -0.125*(1.0+xi) *(1.0+eta) *(xi+eta-2.0*zeta-1.0)); - case 3: return make_vector( - -0.125*(1.0+eta)*(1.0-zeta)*(-2.0*xi+eta-zeta-1.0), - 0.125*(1.0-xi) *(1.0-zeta)*(-xi+2.0*eta-zeta-1.0), - -0.125*(1.0-xi) *(1.0+eta) *(-xi+eta-2.0*zeta-1.0)); - case 4: return make_vector( - -0.125*(1.0-eta)*(1.0+zeta)*(-2.0*xi-eta+zeta-1.0), - -0.125*(1.0-xi) *(1.0+zeta)*(-xi-2.0*eta+zeta-1.0), - 0.125*(1.0-xi) *(1.0-eta) *(-xi-eta+2.0*zeta-1.0)); - case 5: return make_vector( - 0.125*(1.0-eta)*(1.0+zeta)*(2.0*xi-eta+zeta-1.0), - -0.125*(1.0+xi) *(1.0+zeta)*(xi-2.0*eta+zeta-1.0), - 0.125*(1.0+xi) *(1.0-eta) *(xi-eta+2.0*zeta-1.0)); - case 6: return make_vector( - 0.125*(1.0+eta)*(1.0+zeta)*(2.0*xi+eta+zeta-1.0), - 0.125*(1.0+xi) *(1.0+zeta)*(xi+2.0*eta+zeta-1.0), - 0.125*(1.0+xi) *(1.0+eta) *(xi+eta+2.0*zeta-1.0)); - case 7: return make_vector( - -0.125*(1.0+eta)*(1.0+zeta)*(-2.0*xi+eta+zeta-1.0), - 0.125*(1.0-xi) *(1.0+zeta)*(-xi+2.0*eta+zeta-1.0), - 0.125*(1.0-xi) *(1.0+eta) *(-xi+eta+2.0*zeta-1.0)); - case 8: return make_vector(-0.5*xi*(1.0-eta)*(1.0-zeta), - -0.25*(1.0-xi*xi)*(1.0-zeta), - -0.25*(1.0-xi*xi)*(1.0-eta)); - case 10: return make_vector(-0.5*xi*(1.0+eta)*(1.0-zeta), - 0.25*(1.0-xi*xi)*(1.0-zeta), - -0.25*(1.0-xi*xi)*(1.0+eta)); - case 16: return make_vector(-0.5*xi*(1.0-eta)*(1.0+zeta), - -0.25*(1.0-xi*xi)*(1.0+zeta), - 0.25*(1.0-xi*xi)*(1.0-eta)); - case 18: return make_vector(-0.5*xi*(1.0+eta)*(1.0+zeta), - 0.25*(1.0-xi*xi)*(1.0+zeta), - 0.25*(1.0-xi*xi)*(1.0+eta)); - case 9: return make_vector( 0.25*(1.0-eta*eta)*(1.0-zeta), - -0.5*eta*(1.0+xi)*(1.0-zeta), - -0.25*(1.0+xi)*(1.0-eta*eta)); - case 11: return make_vector(-0.25*(1.0-eta*eta)*(1.0-zeta), - -0.5*eta*(1.0-xi)*(1.0-zeta), - -0.25*(1.0-xi)*(1.0-eta*eta)); - case 17: return make_vector( 0.25*(1.0-eta*eta)*(1.0+zeta), - -0.5*eta*(1.0+xi)*(1.0+zeta), - 0.25*(1.0+xi)*(1.0-eta*eta)); - case 19: return make_vector(-0.25*(1.0-eta*eta)*(1.0+zeta), - -0.5*eta*(1.0-xi)*(1.0+zeta), - 0.25*(1.0-xi)*(1.0-eta*eta)); - case 12: return make_vector(-0.25*(1.0-eta)*(1.0-zeta*zeta), - -0.25*(1.0-xi)*(1.0-zeta*zeta), - -0.5*zeta*(1.0-xi)*(1.0-eta)); - case 13: return make_vector( 0.25*(1.0-eta)*(1.0-zeta*zeta), - -0.25*(1.0+xi)*(1.0-zeta*zeta), - -0.5*zeta*(1.0+xi)*(1.0-eta)); - case 14: return make_vector( 0.25*(1.0+eta)*(1.0-zeta*zeta), - 0.25*(1.0+xi)*(1.0-zeta*zeta), - -0.5*zeta*(1.0+xi)*(1.0+eta)); - case 15: return make_vector(-0.25*(1.0+eta)*(1.0-zeta*zeta), - 0.25*(1.0-xi)*(1.0-zeta*zeta), - -0.5*zeta*(1.0-xi)*(1.0+eta)); - default: return zero_vector(); - } + return make_vector(libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 2, xi, eta, zeta)); } #endif }; @@ -309,55 +143,18 @@ struct FEEvaluator static constexpr unsigned int n_dofs() { return 27; } #ifdef LIBMESH_HAVE_KOKKOS - LIBMESH_DEVICE_INLINE static Real L(unsigned int k, Real t) - { - switch (k) - { - case 0: return 0.5 * t * (t - 1.0); - case 1: return 0.5 * t * (t + 1.0); - case 2: return 1.0 - t * t; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static Real dL(unsigned int k, Real t) - { - switch (k) - { - case 0: return t - 0.5; - case 1: return t + 0.5; - case 2: return -2.0 * t; - default: return 0.0; - } - } - LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - static const unsigned int i0[] = - {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; - static const unsigned int i1[] = - {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; - static const unsigned int i2[] = - {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; - return L(i0[i], xi) * L(i1[i], eta) * L(i2[i], zeta); + return libMesh::detail::fe_lagrange_hex27_shape(i, xi, eta, zeta); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) { - static const unsigned int i0[] = - {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2}; - static const unsigned int i1[] = - {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2}; - static const unsigned int i2[] = - {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2}; - const Real lxi = L(i0[i], xi); - const Real leta = L(i1[i], eta); - const Real lzeta = L(i2[i], zeta); - return make_vector(dL(i0[i], xi) * leta * lzeta, - lxi * dL(i1[i], eta) * lzeta, - lxi * leta * dL(i2[i], zeta)); + return make_vector(libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 2, xi, eta, zeta)); } #endif }; diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h index 6e237997f81..dd6f10cc642 100644 --- a/include/gpu/kokkos_fe_map.h +++ b/include/gpu/kokkos_fe_map.h @@ -18,7 +18,6 @@ #define LIBMESH_KOKKOS_FE_MAP_H #include "kokkos_fe_evaluator.h" -#include "kokkos_scalar_types.h" namespace libMesh::Kokkos { @@ -168,7 +167,7 @@ LIBMESH_DEVICE_INLINE Real volume_jxw(const RealTensor & J, unsigned int dim, Real quad_weight) { if (dim == 3) - return leading_determinant(J, 3) * quad_weight; + return detail::leading_determinant(J, 3) * quad_weight; else if (dim == 2) return J.row(0).cross(J.row(1)).norm() * quad_weight; else if (dim == 1) diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h index 2dde44785f6..b6e97436b6f 100644 --- a/include/gpu/kokkos_fe_monomial.h +++ b/include/gpu/kokkos_fe_monomial.h @@ -1,14 +1,13 @@ // Kokkos FEEvaluator specializations for MONOMIAL elements. // -// MONOMIAL uses the complete total-degree polynomial space P_p. Following -// libMesh's FE, the basis is parameterised by spatial dimension, -// not element class — TRI and QUAD share MonomialImpl2D; TET/HEX/PRISM/PYRAMID -// share MonomialImpl3D. This gives 3 x 6 = 18 impl specializations (dims 1/2/3, -// orders 0-5), then per-topology FEEvaluator delegating specializations wire each -// libMesh::ElemType to the matching impl. +// MONOMIAL uses the complete total-degree polynomial space P_p. Following +// libMesh's FE, the basis is parameterized by spatial +// dimension, not element class: TRI and QUAD share the 2-D implementation, +// while TET/HEX/PRISM/PYRAMID share the 3-D implementation. // -// Basis ordering: graded-lex (total degree first, then lexicographic by -// decreasing xi exponent). Matches libMesh::FE::shape ordering. +// The implementation below mirrors the generic index-to-exponent decoding used +// by libMesh's host-side MONOMIAL FE code, so the Kokkos layer reuses the same +// basis ordering without hand-expanding every order into bespoke tables. #ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H #define LIBMESH_KOKKOS_FE_MONOMIAL_H @@ -19,858 +18,170 @@ namespace libMesh::Kokkos { -// ═══════════════════════════════════════════════════════════════════════════ -// MonomialImpl1D — 1-D MONOMIAL basis, order N -// n_dofs = N + 1 -// Basis: {1, xi, xi², xi³, ...} -// ═══════════════════════════════════════════════════════════════════════════ - -template -struct MonomialImpl1D; - -template <> -struct MonomialImpl1D<0> +namespace detail { - static constexpr unsigned int n_dofs() { return 1; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return 1.0; - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return zero_vector(); - } -}; -template <> -struct MonomialImpl1D<1> +LIBMESH_DEVICE_INLINE Real +pow_unsigned(Real base, unsigned int exponent) { - static constexpr unsigned int n_dofs() { return 2; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - default: return 0.0; - } - } + Real value = 1; + for (unsigned int i = 0; i < exponent; ++i) + value *= base; + return value; +} - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - default: return zero_vector(); - } - } -}; +template +struct monomial_exponents; template <> -struct MonomialImpl1D<2> +struct monomial_exponents<1> { - static constexpr unsigned int n_dofs() { return 3; } + unsigned int nx; - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return xi * xi; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(2.0 * xi, 0.0, 0.0); - default: return zero_vector(); - } + return {i}; } }; template <> -struct MonomialImpl1D<3> +struct monomial_exponents<2> { - static constexpr unsigned int n_dofs() { return 4; } + unsigned int nx; + unsigned int ny; - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return xi * xi; - case 3: return xi * xi * xi; - default: return 0.0; - } - } + unsigned int degree = 0; + for (; i >= (degree + 1) * (degree + 2) / 2; ++degree) {} - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(2.0 * xi, 0.0, 0.0); - case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); - default: return zero_vector(); - } + const unsigned int ny = i - (degree * (degree + 1) / 2); + const unsigned int nx = degree - ny; + return {nx, ny}; } }; template <> -struct MonomialImpl1D<4> +struct monomial_exponents<3> { - static constexpr unsigned int n_dofs() { return 5; } + unsigned int nx; + unsigned int ny; + unsigned int nz; - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return xi * xi; - case 3: return xi * xi * xi; - case 4: return xi * xi * xi * xi; - default: return 0.0; - } - } + unsigned int degree = 0; + for (; i >= (degree + 1) * (degree + 2) * (degree + 3) / 6; ++degree) {} - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(2.0 * xi, 0.0, 0.0); - case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - default: return zero_vector(); - } - } -}; + const unsigned int degree_offset = degree * (degree + 1) * (degree + 2) / 6; + const unsigned int local_index = i - degree_offset; -template <> -struct MonomialImpl1D<5> -{ - static constexpr unsigned int n_dofs() { return 6; } + unsigned int block = degree; + unsigned int nz = 0; + for (; block < local_index; block += (degree - nz + 1)) + ++nz; - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return xi * xi; - case 3: return xi * xi * xi; - case 4: return xi * xi * xi * xi; - case 5: return xi * xi * xi * xi * xi; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(2.0 * xi, 0.0, 0.0); - case 3: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 4: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - case 5: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); - default: return zero_vector(); - } + const unsigned int nx = block - local_index; + const unsigned int ny = degree - nx - nz; + return {nx, ny, nz}; } }; -// ═══════════════════════════════════════════════════════════════════════════ -// MonomialImpl2D — 2-D MONOMIAL basis, order N -// n_dofs = (N+1)(N+2)/2 -// Graded-lex basis: {1, xi, eta, xi², xi·eta, eta², ...} -// Shared by TRI and QUAD element classes. -// ═══════════════════════════════════════════════════════════════════════════ +} // namespace detail template -struct MonomialImpl2D; - -template <> -struct MonomialImpl2D<0> -{ - static constexpr unsigned int n_dofs() { return 1; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return 1.0; - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return zero_vector(); - } -}; - -template <> -struct MonomialImpl2D<1> -{ - static constexpr unsigned int n_dofs() { return 3; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl2D<2> -{ - static constexpr unsigned int n_dofs() { return 6; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return xi * xi; - case 4: return xi * eta; - case 5: return eta * eta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(2.0 * xi, 0.0, 0.0); - case 4: return make_vector(eta, xi, 0.0); - case 5: return make_vector(0.0, 2.0 * eta, 0.0); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl2D<3> +struct MonomialImpl1D { - static constexpr unsigned int n_dofs() { return 10; } + static constexpr unsigned int n_dofs() { return N + 1; } LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return xi * xi; - case 4: return xi * eta; - case 5: return eta * eta; - case 6: return xi * xi * xi; - case 7: return xi * xi * eta; - case 8: return xi * eta * eta; - case 9: return eta * eta * eta; - default: return 0.0; - } + return detail::pow_unsigned(xi, i); } LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(2.0 * xi, 0.0, 0.0); - case 4: return make_vector(eta, xi, 0.0); - case 5: return make_vector(0.0, 2.0 * eta, 0.0); - case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl2D<4> -{ - static constexpr unsigned int n_dofs() { return 15; } + if (!i) + return zero_vector(); - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return xi * xi; - case 4: return xi * eta; - case 5: return eta * eta; - case 6: return xi * xi * xi; - case 7: return xi * xi * eta; - case 8: return xi * eta * eta; - case 9: return eta * eta * eta; - case 10: return xi * xi * xi * xi; - case 11: return xi * xi * xi * eta; - case 12: return xi * xi * eta * eta; - case 13: return xi * eta * eta * eta; - case 14: return eta * eta * eta * eta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(2.0 * xi, 0.0, 0.0); - case 4: return make_vector(eta, xi, 0.0); - case 5: return make_vector(0.0, 2.0 * eta, 0.0); - case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); - case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); - case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); - case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); - case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); - default: return zero_vector(); - } + return make_vector(i * detail::pow_unsigned(xi, i - 1), 0.0, 0.0); } }; -template <> -struct MonomialImpl2D<5> +template +struct MonomialImpl2D { - static constexpr unsigned int n_dofs() { return 21; } + static constexpr unsigned int n_dofs() { return (N + 1) * (N + 2) / 2; } LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return xi * xi; - case 4: return xi * eta; - case 5: return eta * eta; - case 6: return xi * xi * xi; - case 7: return xi * xi * eta; - case 8: return xi * eta * eta; - case 9: return eta * eta * eta; - case 10: return xi * xi * xi * xi; - case 11: return xi * xi * xi * eta; - case 12: return xi * xi * eta * eta; - case 13: return xi * eta * eta * eta; - case 14: return eta * eta * eta * eta; - case 15: return xi * xi * xi * xi * xi; - case 16: return xi * xi * xi * xi * eta; - case 17: return xi * xi * xi * eta * eta; - case 18: return xi * xi * eta * eta * eta; - case 19: return xi * eta * eta * eta * eta; - case 20: return eta * eta * eta * eta * eta; - default: return 0.0; - } + const auto exponents = detail::monomial_exponents<2>::decode(i); + return detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(2.0 * xi, 0.0, 0.0); - case 4: return make_vector(eta, xi, 0.0); - case 5: return make_vector(0.0, 2.0 * eta, 0.0); - case 6: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 7: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 8: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 9: return make_vector(0.0, 3.0 * eta * eta, 0.0); - case 10: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - case 11: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); - case 12: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); - case 13: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); - case 14: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); - case 15: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); - case 16: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); - case 17: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); - case 18: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); - case 19: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); - case 20: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); - default: return zero_vector(); - } + const auto exponents = detail::monomial_exponents<2>::decode(i); + const Real dx = exponents.nx + ? exponents.nx * + detail::pow_unsigned(xi, exponents.nx - 1) * + detail::pow_unsigned(eta, exponents.ny) + : 0.0; + const Real dy = exponents.ny + ? exponents.ny * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny - 1) + : 0.0; + return make_vector(dx, dy, 0.0); } }; -// ═══════════════════════════════════════════════════════════════════════════ -// MonomialImpl3D — 3-D MONOMIAL basis, order N -// n_dofs = (N+1)(N+2)(N+3)/6 -// Basis ordering: graded-lex; for each total degree d, iterate c (zeta -// exponent) from 0 to d, then a (xi exponent) from d-c down to 0 (b=d-c-a). -// Shared by TET, HEX, PRISM, and PYRAMID element classes. -// ═══════════════════════════════════════════════════════════════════════════ - template -struct MonomialImpl3D; - -template <> -struct MonomialImpl3D<0> +struct MonomialImpl3D { - static constexpr unsigned int n_dofs() { return 1; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return 1.0; - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int /*i*/, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - return zero_vector(); - } -}; - -template <> -struct MonomialImpl3D<1> -{ - static constexpr unsigned int n_dofs() { return 4; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 1.0); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl3D<2> -{ - static constexpr unsigned int n_dofs() { return 10; } + static constexpr unsigned int n_dofs() { return (N + 1) * (N + 2) * (N + 3) / 6; } LIBMESH_DEVICE_INLINE static Real shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - case 4: return xi * xi; - case 5: return xi * eta; - case 6: return eta * eta; - case 7: return xi * zeta; - case 8: return eta * zeta; - case 9: return zeta * zeta; - default: return 0.0; - } + const auto exponents = detail::monomial_exponents<3>::decode(i); + return detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz); } LIBMESH_DEVICE_INLINE static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 1.0); - case 4: return make_vector(2.0 * xi, 0.0, 0.0); - case 5: return make_vector(eta, xi, 0.0); - case 6: return make_vector(0.0, 2.0 * eta, 0.0); - case 7: return make_vector(zeta, 0.0, xi); - case 8: return make_vector(0.0, zeta, eta); - case 9: return make_vector(0.0, 0.0, 2.0 * zeta); - default: return zero_vector(); - } + const auto exponents = detail::monomial_exponents<3>::decode(i); + const Real dx = exponents.nx + ? exponents.nx * + detail::pow_unsigned(xi, exponents.nx - 1) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz) + : 0.0; + const Real dy = exponents.ny + ? exponents.ny * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny - 1) * + detail::pow_unsigned(zeta, exponents.nz) + : 0.0; + const Real dz = exponents.nz + ? exponents.nz * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz - 1) + : 0.0; + return make_vector(dx, dy, dz); } }; -template <> -struct MonomialImpl3D<3> -{ - static constexpr unsigned int n_dofs() { return 20; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - case 4: return xi * xi; - case 5: return xi * eta; - case 6: return eta * eta; - case 7: return xi * zeta; - case 8: return eta * zeta; - case 9: return zeta * zeta; - case 10: return xi * xi * xi; - case 11: return xi * xi * eta; - case 12: return xi * eta * eta; - case 13: return eta * eta * eta; - case 14: return xi * xi * zeta; - case 15: return xi * eta * zeta; - case 16: return eta * eta * zeta; - case 17: return xi * zeta * zeta; - case 18: return eta * zeta * zeta; - case 19: return zeta * zeta * zeta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 1.0); - case 4: return make_vector(2.0 * xi, 0.0, 0.0); - case 5: return make_vector(eta, xi, 0.0); - case 6: return make_vector(0.0, 2.0 * eta, 0.0); - case 7: return make_vector(zeta, 0.0, xi); - case 8: return make_vector(0.0, zeta, eta); - case 9: return make_vector(0.0, 0.0, 2.0 * zeta); - case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); - case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); - case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); - case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); - case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); - case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); - case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl3D<4> -{ - static constexpr unsigned int n_dofs() { return 35; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - case 4: return xi * xi; - case 5: return xi * eta; - case 6: return eta * eta; - case 7: return xi * zeta; - case 8: return eta * zeta; - case 9: return zeta * zeta; - case 10: return xi * xi * xi; - case 11: return xi * xi * eta; - case 12: return xi * eta * eta; - case 13: return eta * eta * eta; - case 14: return xi * xi * zeta; - case 15: return xi * eta * zeta; - case 16: return eta * eta * zeta; - case 17: return xi * zeta * zeta; - case 18: return eta * zeta * zeta; - case 19: return zeta * zeta * zeta; - case 20: return xi * xi * xi * xi; - case 21: return xi * xi * xi * eta; - case 22: return xi * xi * eta * eta; - case 23: return xi * eta * eta * eta; - case 24: return eta * eta * eta * eta; - case 25: return xi * xi * xi * zeta; - case 26: return xi * xi * eta * zeta; - case 27: return xi * eta * eta * zeta; - case 28: return eta * eta * eta * zeta; - case 29: return xi * xi * zeta * zeta; - case 30: return xi * eta * zeta * zeta; - case 31: return eta * eta * zeta * zeta; - case 32: return xi * zeta * zeta * zeta; - case 33: return eta * zeta * zeta * zeta; - case 34: return zeta * zeta * zeta * zeta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 1.0); - case 4: return make_vector(2.0 * xi, 0.0, 0.0); - case 5: return make_vector(eta, xi, 0.0); - case 6: return make_vector(0.0, 2.0 * eta, 0.0); - case 7: return make_vector(zeta, 0.0, xi); - case 8: return make_vector(0.0, zeta, eta); - case 9: return make_vector(0.0, 0.0, 2.0 * zeta); - case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); - case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); - case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); - case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); - case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); - case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); - case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); - case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); - case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); - case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); - case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); - case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); - case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); - case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); - case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); - case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); - case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); - case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); - case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); - case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); - case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); - default: return zero_vector(); - } - } -}; - -template <> -struct MonomialImpl3D<5> -{ - static constexpr unsigned int n_dofs() { return 56; } - - LIBMESH_DEVICE_INLINE static Real - shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return 1.0; - case 1: return xi; - case 2: return eta; - case 3: return zeta; - case 4: return xi*xi; - case 5: return xi*eta; - case 6: return eta*eta; - case 7: return xi*zeta; - case 8: return eta*zeta; - case 9: return zeta*zeta; - case 10: return xi*xi*xi; - case 11: return xi*xi*eta; - case 12: return xi*eta*eta; - case 13: return eta*eta*eta; - case 14: return xi*xi*zeta; - case 15: return xi*eta*zeta; - case 16: return eta*eta*zeta; - case 17: return xi*zeta*zeta; - case 18: return eta*zeta*zeta; - case 19: return zeta*zeta*zeta; - case 20: return xi*xi*xi*xi; - case 21: return xi*xi*xi*eta; - case 22: return xi*xi*eta*eta; - case 23: return xi*eta*eta*eta; - case 24: return eta*eta*eta*eta; - case 25: return xi*xi*xi*zeta; - case 26: return xi*xi*eta*zeta; - case 27: return xi*eta*eta*zeta; - case 28: return eta*eta*eta*zeta; - case 29: return xi*xi*zeta*zeta; - case 30: return xi*eta*zeta*zeta; - case 31: return eta*eta*zeta*zeta; - case 32: return xi*zeta*zeta*zeta; - case 33: return eta*zeta*zeta*zeta; - case 34: return zeta*zeta*zeta*zeta; - case 35: return xi*xi*xi*xi*xi; - case 36: return xi*xi*xi*xi*eta; - case 37: return xi*xi*xi*eta*eta; - case 38: return xi*xi*eta*eta*eta; - case 39: return xi*eta*eta*eta*eta; - case 40: return eta*eta*eta*eta*eta; - case 41: return xi*xi*xi*xi*zeta; - case 42: return xi*xi*xi*eta*zeta; - case 43: return xi*xi*eta*eta*zeta; - case 44: return xi*eta*eta*eta*zeta; - case 45: return eta*eta*eta*eta*zeta; - case 46: return xi*xi*xi*zeta*zeta; - case 47: return xi*xi*eta*zeta*zeta; - case 48: return xi*eta*eta*zeta*zeta; - case 49: return eta*eta*eta*zeta*zeta; - case 50: return xi*xi*zeta*zeta*zeta; - case 51: return xi*eta*zeta*zeta*zeta; - case 52: return eta*eta*zeta*zeta*zeta; - case 53: return xi*zeta*zeta*zeta*zeta; - case 54: return eta*zeta*zeta*zeta*zeta; - case 55: return zeta*zeta*zeta*zeta*zeta; - default: return 0.0; - } - } - - LIBMESH_DEVICE_INLINE static RealVector - grad_shape(unsigned int i, Real xi, Real eta, Real zeta) - { - switch (i) - { - case 0: return zero_vector(); - case 1: return make_vector(1.0, 0.0, 0.0); - case 2: return make_vector(0.0, 1.0, 0.0); - case 3: return make_vector(0.0, 0.0, 1.0); - case 4: return make_vector(2.0 * xi, 0.0, 0.0); - case 5: return make_vector(eta, xi, 0.0); - case 6: return make_vector(0.0, 2.0 * eta, 0.0); - case 7: return make_vector(zeta, 0.0, xi); - case 8: return make_vector(0.0, zeta, eta); - case 9: return make_vector(0.0, 0.0, 2.0 * zeta); - case 10: return make_vector(3.0 * xi * xi, 0.0, 0.0); - case 11: return make_vector(2.0 * xi * eta, xi * xi, 0.0); - case 12: return make_vector(eta * eta, 2.0 * xi * eta, 0.0); - case 13: return make_vector(0.0, 3.0 * eta * eta, 0.0); - case 14: return make_vector(2.0 * xi * zeta, 0.0, xi * xi); - case 15: return make_vector(eta * zeta, xi * zeta, xi * eta); - case 16: return make_vector(0.0, 2.0 * eta * zeta, eta * eta); - case 17: return make_vector(zeta * zeta, 0.0, 2.0 * xi * zeta); - case 18: return make_vector(0.0, zeta * zeta, 2.0 * eta * zeta); - case 19: return make_vector(0.0, 0.0, 3.0 * zeta * zeta); - case 20: return make_vector(4.0 * xi * xi * xi, 0.0, 0.0); - case 21: return make_vector(3.0 * xi * xi * eta, xi * xi * xi, 0.0); - case 22: return make_vector(2.0 * xi * eta * eta, 2.0 * xi * xi * eta, 0.0); - case 23: return make_vector(eta * eta * eta, 3.0 * xi * eta * eta, 0.0); - case 24: return make_vector(0.0, 4.0 * eta * eta * eta, 0.0); - case 25: return make_vector(3.0 * xi * xi * zeta, 0.0, xi * xi * xi); - case 26: return make_vector(2.0 * xi * eta * zeta, xi * xi * zeta, xi * xi * eta); - case 27: return make_vector(eta * eta * zeta, 2.0 * xi * eta * zeta, xi * eta * eta); - case 28: return make_vector(0.0, 3.0 * eta * eta * zeta, eta * eta * eta); - case 29: return make_vector(2.0 * xi * zeta * zeta, 0.0, 2.0 * xi * xi * zeta); - case 30: return make_vector(eta * zeta * zeta, xi * zeta * zeta, 2.0 * xi * eta * zeta); - case 31: return make_vector(0.0, 2.0 * eta * zeta * zeta, 2.0 * eta * eta * zeta); - case 32: return make_vector(zeta * zeta * zeta, 0.0, 3.0 * xi * zeta * zeta); - case 33: return make_vector(0.0, zeta * zeta * zeta, 3.0 * eta * zeta * zeta); - case 34: return make_vector(0.0, 0.0, 4.0 * zeta * zeta * zeta); - case 35: return make_vector(5.0 * xi * xi * xi * xi, 0.0, 0.0); - case 36: return make_vector(4.0 * xi * xi * xi * eta, xi * xi * xi * xi, 0.0); - case 37: return make_vector(3.0 * xi * xi * eta * eta, 2.0 * xi * xi * xi * eta, 0.0); - case 38: return make_vector(2.0 * xi * eta * eta * eta, 3.0 * xi * xi * eta * eta, 0.0); - case 39: return make_vector(eta * eta * eta * eta, 4.0 * xi * eta * eta * eta, 0.0); - case 40: return make_vector(0.0, 5.0 * eta * eta * eta * eta, 0.0); - case 41: return make_vector(4.0 * xi * xi * xi * zeta, 0.0, xi * xi * xi * xi); - case 42: return make_vector(3.0 * xi * xi * eta * zeta, xi * xi * xi * zeta, xi * xi * xi * eta); - case 43: return make_vector(2.0 * xi * eta * eta * zeta, 2.0 * xi * xi * eta * zeta, xi * xi * eta * eta); - case 44: return make_vector(eta * eta * eta * zeta, 3.0 * xi * eta * eta * zeta, xi * eta * eta * eta); - case 45: return make_vector(0.0, 4.0 * eta * eta * eta * zeta, eta * eta * eta * eta); - case 46: return make_vector(3.0 * xi * xi * zeta * zeta, 0.0, 2.0 * xi * xi * xi * zeta); - case 47: return make_vector(2.0 * xi * eta * zeta * zeta, xi * xi * zeta * zeta, 2.0 * xi * xi * eta * zeta); - case 48: return make_vector(eta * eta * zeta * zeta, 2.0 * xi * eta * zeta * zeta, 2.0 * xi * eta * eta * zeta); - case 49: return make_vector(0.0, 3.0 * eta * eta * zeta * zeta, 2.0 * eta * eta * eta * zeta); - case 50: return make_vector(2.0 * xi * zeta * zeta * zeta, 0.0, 3.0 * xi * xi * zeta * zeta); - case 51: return make_vector(eta * zeta * zeta * zeta, xi * zeta * zeta * zeta, 3.0 * xi * eta * zeta * zeta); - case 52: return make_vector(0.0, 2.0 * eta * zeta * zeta * zeta, 3.0 * eta * eta * zeta * zeta); - case 53: return make_vector(zeta * zeta * zeta * zeta, 0.0, 4.0 * xi * zeta * zeta * zeta); - case 54: return make_vector(0.0, zeta * zeta * zeta * zeta, 4.0 * eta * zeta * zeta * zeta); - case 55: return make_vector(0.0, 0.0, 5.0 * zeta * zeta * zeta * zeta); - default: return zero_vector(); - } - } -}; - -// ═══════════════════════════════════════════════════════════════════════════ // Per-topology FEEvaluator delegating specializations -// -// Each partial specialization fixes family=MONOMIAL and elem_type, leaving the -// polynomial Order as a template parameter, then inherits the matching impl. -// ═══════════════════════════════════════════════════════════════════════════ - -// ── 1-D ────────────────────────────────────────────────────────────────────── template struct FEEvaluator : MonomialImpl1D {}; @@ -878,7 +189,8 @@ struct FEEvaluator : MonomialImpl1D {}; template struct FEEvaluator : MonomialImpl1D {}; -// ── 2-D ────────────────────────────────────────────────────────────────────── +template +struct FEEvaluator : MonomialImpl1D {}; template struct FEEvaluator : MonomialImpl2D {}; @@ -898,8 +210,6 @@ struct FEEvaluator : MonomialImpl2D {}; template struct FEEvaluator : MonomialImpl2D {}; -// ── 3-D ────────────────────────────────────────────────────────────────────── - template struct FEEvaluator : MonomialImpl3D {}; @@ -927,6 +237,12 @@ struct FEEvaluator : MonomialImpl3D { template struct FEEvaluator : MonomialImpl3D {}; +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + template struct FEEvaluator : MonomialImpl3D {}; @@ -936,6 +252,9 @@ struct FEEvaluator : MonomialImpl3D template struct FEEvaluator : MonomialImpl3D {}; +template +struct FEEvaluator : MonomialImpl3D {}; + } // namespace libMesh::Kokkos #endif // LIBMESH_KOKKOS_FE_MONOMIAL_H diff --git a/include/gpu/kokkos_fe_types.h b/include/gpu/kokkos_fe_types.h index c266e6dc15a..d61d163c20e 100644 --- a/include/gpu/kokkos_fe_types.h +++ b/include/gpu/kokkos_fe_types.h @@ -1,19 +1,15 @@ // Kokkos FE type helpers. // -// Defines the FEShapeKey aggregate and device-callable dispatch functions used -// by both host-side assembly setup and device-side evaluation. -// -// Uses libMesh's own ElemType, FEFamily, and FEElemClass enums directly — -// no wrapper enums are needed. +// Shared FE topology/order support metadata lives in fe_shape_traits.h. +// This header keeps the Kokkos-facing hard-fail wrappers and namespace +// compatibility for existing FE device code. #ifndef LIBMESH_KOKKOS_FE_TYPES_H #define LIBMESH_KOKKOS_FE_TYPES_H -#include "libmesh/enum_elem_type.h" -#include "libmesh/enum_fe_family.h" #include "libmesh/enum_fe_elem_class.h" -#include "libmesh/enum_order.h" -// ElemMappingType (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) is defined in enum_elem_type.h +#include "libmesh/fe_reference_element_traits.h" +#include "libmesh/fe_shape_traits.h" #include "libmesh/libmesh_device.h" #ifndef LIBMESH_KOKKOS_COMPILATION # include "libmesh/libmesh_common.h" @@ -22,8 +18,17 @@ namespace libMesh::Kokkos { -// Bring FEElemClass into this namespace so existing unqualified uses compile. using libMesh::FEElemClass; +using libMesh::FEShapeKey; +using libMesh::is_monomial_2d_elem_type; +using libMesh::is_monomial_3d_elem_type; +using libMesh::lagrange_shape_topology_or_invalid; +using libMesh::lagrange_exact_n_dofs_or_zero; +using libMesh::monomial_exact_n_dofs_or_zero; +using libMesh::monomial_evaluator_dim_or_zero; +using libMesh::supports_shape; +using libMesh::supports_grad_shape; +using libMesh::supports_n_dofs; namespace detail { @@ -40,619 +45,49 @@ abort_unsupported(const char * msg) } // namespace detail -LIBMESH_DEVICE_INLINE bool -is_monomial_2d_elem_type(libMesh::ElemType elem_type) -{ - switch (elem_type) - { - case libMesh::C0POLYGON: - case libMesh::TRI3: - case libMesh::TRISHELL3: - case libMesh::TRI6: - case libMesh::TRI7: - case libMesh::QUAD4: - case libMesh::QUADSHELL4: - case libMesh::QUAD8: - case libMesh::QUADSHELL8: - case libMesh::QUAD9: - case libMesh::QUADSHELL9: - return true; - default: - return false; - } -} - -LIBMESH_DEVICE_INLINE bool -is_monomial_3d_elem_type(libMesh::ElemType elem_type, - bool include_pyramid18 = true) -{ - switch (elem_type) - { - case libMesh::TET4: - case libMesh::TET10: - case libMesh::TET14: - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - case libMesh::PRISM6: - case libMesh::PRISM15: - case libMesh::PRISM18: - case libMesh::PRISM20: - case libMesh::PRISM21: - case libMesh::PYRAMID5: - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - case libMesh::C0POLYHEDRON: - return true; - case libMesh::PYRAMID18: - return include_pyramid18; - default: - return false; - } -} - -// ── Shape function space key ────────────────────────────────────────────────── -// Uniquely identifies a libMesh FE space, including the exact element topology. -// This must be exact for LAGRANGE spaces, since libMesh distinguishes e.g. -// QUAD8 from QUAD9 and HEX20 from HEX27 at the same polynomial order. -// -// Trivially copyable; fits in a register (enum + enum + enum, no heap). - -struct FEShapeKey -{ - libMesh::FEFamily family; - libMesh::ElemType elem_type; - libMesh::Order order; -}; - -// ── Device-callable conversion helpers ─────────────────────────────────────── - -/// Return the Kokkos side topology used for dispatch for any side of parent -/// element type \p parent. -/// This helper is valid only for elements whose side topology is uniform. -/// Mixed-face elements such as prisms and pyramids require side-specific logic. -/// In 1D, libMesh sides are NODEELEM objects; this helper returns EDGE2 as the -/// internal surrogate topology used by the Kokkos map/shape path. LIBMESH_DEVICE_INLINE libMesh::ElemType get_side_topology(libMesh::ElemType parent) { - switch (parent) - { - // 1D: libMesh sides are NodeElem, but Kokkos dispatches them through - // a degenerate EDGE2 surrogate. - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return libMesh::EDGE2; - - // 2D first-order: sides are linear edges - case libMesh::TRI3: - case libMesh::QUAD4: - return libMesh::EDGE2; + const libMesh::ElemType side_topology = libMesh::side_topology_or_invalid(parent); - // 2D second-order: sides are quadratic edges - case libMesh::TRI6: - case libMesh::TRI7: - case libMesh::QUAD8: - case libMesh::QUAD9: - return libMesh::EDGE3; - - // 3D first-order: uniform-side-topology elements only - case libMesh::TET4: - return libMesh::TRI3; - case libMesh::HEX8: - return libMesh::QUAD4; - - // 3D second-order: uniform-side-topology elements only - case libMesh::TET10: - return libMesh::TRI6; - case libMesh::TET14: - return libMesh::TRI7; - case libMesh::HEX20: - return libMesh::QUAD8; - case libMesh::HEX27: - return libMesh::QUAD9; - - case libMesh::PRISM15: - case libMesh::PRISM18: - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - case libMesh::PRISM6: - case libMesh::PRISM20: - case libMesh::PRISM21: - case libMesh::PYRAMID5: - case libMesh::PYRAMID18: + if (side_topology == libMesh::INVALID_ELEM) + { + if (requires_side_specific_topology(parent)) detail::abort_unsupported("get_side_topology(): mixed-face elements require side-specific topology"); - return libMesh::INVALID_ELEM; - - default: + else detail::abort_unsupported("get_side_topology(): unsupported element type"); - return libMesh::INVALID_ELEM; // unreachable after abort + return libMesh::INVALID_ELEM; } -} - -/// Map an ElemType to its base geometric class (order-independent). -/// e.g. QUAD4 / QUAD8 / QUAD9 all return FEElemClass::QUAD. -LIBMESH_DEVICE_INLINE libMesh::FEElemClass -class_from_topology(libMesh::ElemType topo) -{ - switch (topo) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return libMesh::FEElemClass::EDGE; - - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::TRI7: - return libMesh::FEElemClass::TRI; - - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return libMesh::FEElemClass::QUAD; - - case libMesh::TET4: - case libMesh::TET10: - case libMesh::TET14: - return libMesh::FEElemClass::TET; - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - return libMesh::FEElemClass::HEX; - - case libMesh::PRISM6: - case libMesh::PRISM15: - case libMesh::PRISM18: - case libMesh::PRISM20: - case libMesh::PRISM21: - return libMesh::FEElemClass::PRISM; - - case libMesh::PYRAMID5: - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - case libMesh::PYRAMID18: - return libMesh::FEElemClass::PYRAMID; - - default: - detail::abort_unsupported("class_from_topology(): unsupported element type"); - return libMesh::FEElemClass::N_CLASSES; // unreachable after abort - } + return side_topology; } LIBMESH_DEVICE_INLINE libMesh::ElemType -lagrange_shape_topology_or_invalid(FEShapeKey key) -{ - switch (key.order) - { - case libMesh::FIRST: - switch (key.elem_type) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return libMesh::EDGE2; - - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::TRI7: - return libMesh::TRI3; - - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return libMesh::QUAD4; - - case libMesh::TET4: - case libMesh::TET10: - case libMesh::TET14: - return libMesh::TET4; - - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - return libMesh::HEX8; - - default: - return libMesh::INVALID_ELEM; - } - - case libMesh::SECOND: - switch (key.elem_type) - { - case libMesh::EDGE3: - return libMesh::EDGE3; - - case libMesh::TRI6: - case libMesh::TRI7: - return libMesh::TRI6; - - case libMesh::QUAD8: - return libMesh::QUAD8; - - case libMesh::QUAD9: - return libMesh::QUAD9; - - case libMesh::TET10: - case libMesh::TET14: - return libMesh::TET10; - - case libMesh::HEX20: - return libMesh::HEX20; - - case libMesh::HEX27: - return libMesh::HEX27; - - default: - return libMesh::INVALID_ELEM; - } - - default: - return libMesh::INVALID_ELEM; - } -} - -LIBMESH_DEVICE_INLINE unsigned int -lagrange_exact_n_dofs_or_zero(libMesh::ElemType elem_type, - libMesh::Order order) -{ - switch (order) - { - case libMesh::CONSTANT: - return (elem_type == libMesh::NODEELEM) ? 1u : 0u; - - case libMesh::FIRST: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return 2; - - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::TRI7: - return 3; - - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return 4; - - case libMesh::TET4: - case libMesh::TET10: - case libMesh::TET14: - return 4; - - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - return 8; - - case libMesh::PRISM6: - case libMesh::PRISM15: - case libMesh::PRISM18: - case libMesh::PRISM20: - case libMesh::PRISM21: - return 6; - - case libMesh::PYRAMID5: - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - case libMesh::PYRAMID18: - return 5; - - default: - return 0; - } - - case libMesh::SECOND: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE3: - return 3; - - case libMesh::TRI6: - case libMesh::TRI7: - return 6; - - case libMesh::QUAD8: - return 8; - - case libMesh::QUAD9: - return 9; - - case libMesh::TET10: - case libMesh::TET14: - return 10; - - case libMesh::HEX20: - return 20; - - case libMesh::HEX27: - return 27; - - case libMesh::PRISM15: - return 15; - - case libMesh::PRISM18: - case libMesh::PRISM20: - case libMesh::PRISM21: - return 18; - - case libMesh::PYRAMID13: - return 13; - - case libMesh::PYRAMID14: - case libMesh::PYRAMID18: - return 14; - - default: - return 0; - } - - case libMesh::THIRD: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE4: - return 4; - - case libMesh::TRI7: - return 7; - - case libMesh::TET14: - return 14; - - case libMesh::PRISM20: - return 20; - - case libMesh::PRISM21: - return 21; - - case libMesh::PYRAMID18: - return 18; - - default: - return 0; - } - - default: - return 0; - } -} - -LIBMESH_DEVICE_INLINE unsigned int -monomial_exact_n_dofs_or_zero(libMesh::ElemType elem_type, - libMesh::Order order) +get_side_topology(libMesh::ElemType parent, + unsigned int side) { - if (elem_type == libMesh::INVALID_ELEM) - return 0; - if (order < libMesh::CONSTANT) - return 0; - - switch (order) - { - case libMesh::CONSTANT: - return 1; - - case libMesh::FIRST: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return 2; - - default: - break; - } - - if (is_monomial_2d_elem_type(elem_type)) - return 3; - if (is_monomial_3d_elem_type(elem_type)) - return 4; - return 0; - - case libMesh::SECOND: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return 3; - - default: - break; - } - - if (is_monomial_2d_elem_type(elem_type)) - return 6; - if (is_monomial_3d_elem_type(elem_type)) - return 10; - return 0; - - case libMesh::THIRD: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return 4; + const libMesh::ElemType side_topology = libMesh::side_topology_or_invalid(parent, side); - default: - break; - } + if (side_topology != libMesh::INVALID_ELEM) + return side_topology; - if (is_monomial_2d_elem_type(elem_type)) - return 10; - if (is_monomial_3d_elem_type(elem_type)) - return 20; - return 0; - - case libMesh::FOURTH: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - return 5; - - default: - break; - } - - if (is_monomial_2d_elem_type(elem_type)) - return 15; - if (is_monomial_3d_elem_type(elem_type, false)) - return 35; - return 0; - - case libMesh::FIFTH: - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - return 6; - - default: - break; - } - - if (is_monomial_2d_elem_type(elem_type)) - return 21; - if (is_monomial_3d_elem_type(elem_type, false)) - return 56; - return 0; - - default: - { - const unsigned int p = static_cast(order); - - switch (elem_type) - { - case libMesh::NODEELEM: - return 1; - - case libMesh::EDGE2: - case libMesh::EDGE3: - return p + 1; - - default: - break; - } - - if (is_monomial_2d_elem_type(elem_type)) - return (p + 1) * (p + 2) / 2; - if (is_monomial_3d_elem_type(elem_type, false)) - return (p + 1) * (p + 2) * (p + 3) / 6; - return 0; - } - } + return get_side_topology(parent); } -LIBMESH_DEVICE_INLINE unsigned int -monomial_evaluator_dim_or_zero(libMesh::ElemType elem_type) +LIBMESH_DEVICE_INLINE libMesh::FEElemClass +class_from_topology(libMesh::ElemType topo) { - switch (elem_type) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::EDGE4: - return 1; + const libMesh::FEElemClass elem_class = libMesh::class_from_topology_or_invalid(topo); - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::TRI7: - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return 2; - - case libMesh::TET4: - case libMesh::TET10: - case libMesh::TET14: - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - case libMesh::PRISM6: - case libMesh::PRISM15: - case libMesh::PRISM18: - case libMesh::PRISM20: - case libMesh::PRISM21: - case libMesh::PYRAMID5: - case libMesh::PYRAMID13: - case libMesh::PYRAMID14: - case libMesh::PYRAMID18: - return 3; - - default: - return 0; - } -} - -/// Return true iff the current Kokkos physics evaluators can evaluate \p key. -/// This boundary is the intersection of: -/// 1. exact libMesh-valid (family, elem_type, order) keys, and -/// 2. currently implemented Kokkos evaluator topologies/orders. -LIBMESH_DEVICE_INLINE bool -supports_shape(FEShapeKey key) -{ - switch (key.family) + if (elem_class == libMesh::FEElemClass::N_CLASSES) { - case libMesh::LAGRANGE: - return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && - lagrange_shape_topology_or_invalid(key) != libMesh::INVALID_ELEM; - - case libMesh::MONOMIAL: - return monomial_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && - monomial_evaluator_dim_or_zero(key.elem_type) != 0 && - key.order >= libMesh::CONSTANT && - key.order <= libMesh::FIFTH; - - default: - return false; + detail::abort_unsupported("class_from_topology(): unsupported element type"); + return libMesh::FEElemClass::N_CLASSES; } -} -LIBMESH_DEVICE_INLINE bool -supports_grad_shape(FEShapeKey key) -{ - return supports_shape(key); -} - -LIBMESH_DEVICE_INLINE bool -supports_n_dofs(FEShapeKey key) -{ - return supports_shape(key); + return elem_class; } -/// Return the number of DOFs for a physics FE space described by \p key, -/// restricted to the current Kokkos evaluator support boundary. LIBMESH_DEVICE_INLINE unsigned int n_dofs(FEShapeKey key) { @@ -662,18 +97,7 @@ n_dofs(FEShapeKey key) return 0; } - switch (key.family) - { - case libMesh::LAGRANGE: - return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order); - - case libMesh::MONOMIAL: - return monomial_exact_n_dofs_or_zero(key.elem_type, key.order); - - default: - detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE family"); - return 0; - } + return libMesh::n_dofs_or_zero(key); } } // namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h index d8b94c56a5c..e0d24db8aeb 100644 --- a/include/gpu/kokkos_quadrature.h +++ b/include/gpu/kokkos_quadrature.h @@ -1,636 +1,192 @@ -// Kokkos device-compatible Gauss quadrature rules. -// -// All evaluation functions are LIBMESH_DEVICE_INLINE — callable from both -// host and GPU device code. -// -// GaussLegendre1D: 1-D Gauss-Legendre on [-1,1], 1-7 point rules. -// GaussQuadrature: Full quadrature dispatcher for all supported topologies. -// - n_points(topo, order): number of quadrature points -// - point(topo, order, qp): reference coordinate of qp-th point -// - weight(topo, order, qp): weight of qp-th point -// -// Values match the libMesh QGauss implementation. +// Kokkos FE access to the shared libMesh Gauss quadrature rule tables. #ifndef LIBMESH_KOKKOS_QUADRATURE_H #define LIBMESH_KOKKOS_QUADRATURE_H -#include "kokkos_scalar_types.h" +#include "kokkos_fe_base.h" #include "libmesh/enum_elem_type.h" -#include +#include "libmesh/quadrature_gauss_rules.h" + #include namespace libMesh::Kokkos { -// --------------------------------------------------------------------------- -// 1-D Gauss-Legendre quadrature on [-1, 1] -// --------------------------------------------------------------------------- - struct GaussLegendre1D { LIBMESH_DEVICE_INLINE static unsigned int n_points(unsigned int alg_order) { - const unsigned int n = (alg_order + 2u) / 2u; - return (n < 1u) ? 1u : (n > 7u ? 7u : n); + return Quadrature::Gauss::gauss_legendre_rule(alg_order).count; } - LIBMESH_DEVICE_INLINE static Real point(unsigned int n, unsigned int i) + LIBMESH_DEVICE_INLINE static Real point(unsigned int alg_order, unsigned int i) { - switch (n) - { - case 1: return 0.0; - case 2: - switch (i) - { - case 0: return -5.7735026918962576450914878050196e-01; - case 1: return 5.7735026918962576450914878050196e-01; - default: return 0.0; - } - case 3: - switch (i) - { - case 0: return -7.7459666924148337703585307995648e-01; - case 1: return 0.0; - case 2: return 7.7459666924148337703585307995648e-01; - default: return 0.0; - } - case 4: - switch (i) - { - case 0: return -8.6113631159405257522394648889281e-01; - case 1: return -3.3998104358485626480266575910324e-01; - case 2: return 3.3998104358485626480266575910324e-01; - case 3: return 8.6113631159405257522394648889281e-01; - default: return 0.0; - } - case 5: - switch (i) - { - case 0: return -9.0617984593866399279762687829939e-01; - case 1: return -5.3846931010568309103631442070021e-01; - case 2: return 0.0; - case 3: return 5.3846931010568309103631442070021e-01; - case 4: return 9.0617984593866399279762687829939e-01; - default: return 0.0; - } - case 6: - switch (i) - { - case 0: return -9.3246951420315202781230155449399e-01; - case 1: return -6.6120938646626451366139959501991e-01; - case 2: return -2.3861918608319690863050172168071e-01; - case 3: return 2.3861918608319690863050172168071e-01; - case 4: return 6.6120938646626451366139959501991e-01; - case 5: return 9.3246951420315202781230155449399e-01; - default: return 0.0; - } - case 7: - switch (i) - { - case 0: return -9.4910791234275852452618968404785e-01; - case 1: return -7.4153118559939443986386477328079e-01; - case 2: return -4.0584515137739716690660641207696e-01; - case 3: return 0.0; - case 4: return 4.0584515137739716690660641207696e-01; - case 5: return 7.4153118559939443986386477328079e-01; - case 6: return 9.4910791234275852452618968404785e-01; - default: return 0.0; - } - default: return 0.0; - } + const auto rule = Quadrature::Gauss::gauss_legendre_rule(alg_order); + return (i < rule.count) ? rule.points[i] : 0.0; } - LIBMESH_DEVICE_INLINE static Real weight(unsigned int n, unsigned int i) + LIBMESH_DEVICE_INLINE static Real weight(unsigned int alg_order, unsigned int i) { - switch (n) - { - case 1: return 2.0; - case 2: return 1.0; - case 3: - switch (i) - { - case 0: case 2: return 5.5555555555555555555555555555556e-01; - case 1: return 8.8888888888888888888888888888889e-01; - default: return 0.0; - } - case 4: - switch (i) - { - case 0: case 3: return 3.4785484513745385737306394922200e-01; - case 1: case 2: return 6.5214515486254614262693605077800e-01; - default: return 0.0; - } - case 5: - switch (i) - { - case 0: case 4: return 2.3692688505618908751426404071992e-01; - case 1: case 3: return 4.7862867049936646804129151483564e-01; - case 2: return 5.6888888888888888888888888888889e-01; - default: return 0.0; - } - case 6: - switch (i) - { - case 0: case 5: return 1.7132449237917034504029614217273e-01; - case 1: case 4: return 3.6076157304813860756983351383772e-01; - case 2: case 3: return 4.6791393457269104738987034398955e-01; - default: return 0.0; - } - case 7: - switch (i) - { - case 0: case 6: return 1.2948496616886969327061143267908e-01; - case 1: case 5: return 2.7970539148927666790146777142378e-01; - case 2: case 4: return 3.8183005050511894495036977548898e-01; - case 3: return 4.1795918367346938775510204081633e-01; - default: return 0.0; - } - default: return 0.0; - } + const auto rule = Quadrature::Gauss::gauss_legendre_rule(alg_order); + return (i < rule.count) ? rule.weights[i] : 0.0; } }; -// --------------------------------------------------------------------------- -// GaussQuadrature — device-callable quadrature for all supported topologies -// -// Coordinate conventions (same as libMesh): -// EDGE: xi in [-1,1] -// QUAD: (xi,eta) in [-1,1]^2, tensor product -// HEX: (xi,eta,zeta) in [-1,1]^3, tensor product -// TRI: (x,y) on unit triangle {(0,0),(1,0),(0,1)} -// TET: (x,y,z) on unit tet {(0,0,0),(1,0,0),(0,1,0),(0,0,1)} -// --------------------------------------------------------------------------- - struct GaussQuadrature { - /// Number of quadrature points for a given topology and polynomial order. LIBMESH_DEVICE_INLINE static unsigned int n_points(libMesh::ElemType topo, unsigned int order) { switch (topo) { - case libMesh::EDGE2: case libMesh::EDGE3: + case libMesh::EDGE2: + case libMesh::EDGE3: return GaussLegendre1D::n_points(order); - case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: { const unsigned int n = GaussLegendre1D::n_points(order); return n * n; } - case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: { const unsigned int n = GaussLegendre1D::n_points(order); return n * n * n; } - case libMesh::TRI3: case libMesh::TRI6: - switch (order) - { - case 0: case 1: return 1; - case 2: return 3; - case 3: return 4; - case 4: return 6; - case 5: return 7; - default: return 12; - } + case libMesh::TRI3: + case libMesh::TRI6: + return Quadrature::Gauss::triangle_rule(order).count; - case libMesh::TET4: case libMesh::TET10: - switch (order) - { - case 0: case 1: return 1; - case 2: return 4; - case 3: return 5; - case 4: return 11; - case 5: return 14; - default: return 24; - } + case libMesh::TET4: + case libMesh::TET10: + return Quadrature::Gauss::tetrahedron_rule(order).count; - default: return 0; + default: + return 0; } } - /// Reference coordinate of the qp-th quadrature point. LIBMESH_DEVICE_INLINE static RealVector point(libMesh::ElemType topo, unsigned int order, unsigned int qp) { switch (topo) { - case libMesh::EDGE2: case libMesh::EDGE3: - return make_vector(GaussLegendre1D::point(GaussLegendre1D::n_points(order), qp), 0.0, 0.0); + case libMesh::EDGE2: + case libMesh::EDGE3: + return make_vector(GaussLegendre1D::point(order, qp), 0.0, 0.0); - case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: { - const unsigned int n = GaussLegendre1D::n_points(order); + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return zero_vector(); const unsigned int i = qp % n; const unsigned int j = qp / n; - return make_vector(GaussLegendre1D::point(n, i), - GaussLegendre1D::point(n, j), 0.0); + return make_vector(GaussLegendre1D::point(order, i), + GaussLegendre1D::point(order, j), + 0.0); } - case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: { - const unsigned int n = GaussLegendre1D::n_points(order); + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return zero_vector(); const unsigned int i = qp % n; const unsigned int j = (qp / n) % n; const unsigned int k = qp / (n * n); - return make_vector(GaussLegendre1D::point(n, i), - GaussLegendre1D::point(n, j), - GaussLegendre1D::point(n, k)); - } - - case libMesh::TRI3: case libMesh::TRI6: - return tri_point(order, qp); - - case libMesh::TET4: case libMesh::TET10: - return tet_point(order, qp); - - default: return zero_vector(); - } - } - - /// Weight of the qp-th quadrature point. - LIBMESH_DEVICE_INLINE static Real - weight(libMesh::ElemType topo, unsigned int order, unsigned int qp) - { - switch (topo) - { - case libMesh::EDGE2: case libMesh::EDGE3: - { - const unsigned int n = GaussLegendre1D::n_points(order); - return GaussLegendre1D::weight(n, qp); + return make_vector(GaussLegendre1D::point(order, i), + GaussLegendre1D::point(order, j), + GaussLegendre1D::point(order, k)); } - case libMesh::QUAD4: case libMesh::QUAD8: case libMesh::QUAD9: + case libMesh::TRI3: + case libMesh::TRI6: { - const unsigned int n = GaussLegendre1D::n_points(order); - return GaussLegendre1D::weight(n, qp % n) * - GaussLegendre1D::weight(n, qp / n); + const auto rule = Quadrature::Gauss::triangle_rule(order); + return (qp < rule.count) ? make_vector(rule.points[qp].x, rule.points[qp].y, 0.0) : zero_vector(); } - case libMesh::HEX8: case libMesh::HEX20: case libMesh::HEX27: + case libMesh::TET4: + case libMesh::TET10: { - const unsigned int n = GaussLegendre1D::n_points(order); - return GaussLegendre1D::weight(n, qp % n) * - GaussLegendre1D::weight(n, (qp / n) % n) * - GaussLegendre1D::weight(n, qp / (n * n)); + const auto rule = Quadrature::Gauss::tetrahedron_rule(order); + return (qp < rule.count) + ? make_vector(rule.points[qp].x, rule.points[qp].y, rule.points[qp].z) + : zero_vector(); } - case libMesh::TRI3: case libMesh::TRI6: - return tri_weight(order, qp); - - case libMesh::TET4: case libMesh::TET10: - return tet_weight(order, qp); - - default: return 0.0; - } - } - -private: - // ── Triangle rules ──────────────────────────────────────────────────────── - - LIBMESH_DEVICE_INLINE static RealVector - tri_point(unsigned int order, unsigned int qp) - { - switch (order) - { - case 0: case 1: - return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); - - case 2: - switch (qp) - { - case 0: return make_vector(2.0 / 3.0, 1.0 / 6.0, 0.0); - case 1: return make_vector(1.0 / 6.0, 2.0 / 3.0, 0.0); - case 2: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.0); - default: return zero_vector(); - } - - case 3: - switch (qp) - { - case 0: return make_vector(1.5505102572168219018e-01, 1.7855872826361642312e-01, 0.0); - case 1: return make_vector(6.4494897427831780982e-01, 7.5031110222608118177e-02, 0.0); - case 2: return make_vector(1.5505102572168219018e-01, 6.6639024601470138670e-01, 0.0); - case 3: return make_vector(6.4494897427831780982e-01, 2.8001991549907407200e-01, 0.0); - default: return zero_vector(); - } - - case 4: - { - constexpr Real a1 = 4.4594849091596488632e-01, b1 = 1.0 - 2.0 * a1; - constexpr Real a2 = 9.1576213509770743460e-02, b2 = 1.0 - 2.0 * a2; - switch (qp) - { - case 0: return make_vector(a1, a1, 0.0); - case 1: return make_vector(a1, b1, 0.0); - case 2: return make_vector(b1, a1, 0.0); - case 3: return make_vector(a2, a2, 0.0); - case 4: return make_vector(a2, b2, 0.0); - case 5: return make_vector(b2, a2, 0.0); - default: return zero_vector(); - } - } - - case 5: - { - const Real sq15 = 3.872983346207417; // sqrt(15) - const Real a1 = 2.0 / 7.0 + sq15 / 21.0; - const Real a2 = 2.0 / 7.0 - sq15 / 21.0; - const Real b1 = 1.0 - 2.0 * a1, b2 = 1.0 - 2.0 * a2; - switch (qp) - { - case 0: return make_vector(1.0 / 3.0, 1.0 / 3.0, 0.0); - case 1: return make_vector(a1, a1, 0.0); - case 2: return make_vector(a1, b1, 0.0); - case 3: return make_vector(b1, a1, 0.0); - case 4: return make_vector(a2, a2, 0.0); - case 5: return make_vector(a2, b2, 0.0); - case 6: return make_vector(b2, a2, 0.0); - default: return zero_vector(); - } - } - - case 6: - { - constexpr Real a1 = 2.4928674517091042129163855310701908e-01; - constexpr Real a2 = 6.3089014491502228340331602870819157e-02; - constexpr Real a3 = 3.1035245103378440541660773395655215e-01; - constexpr Real b1 = 1.0 - 2.0 * a1; - constexpr Real b2 = 1.0 - 2.0 * a2; - constexpr Real b3 = 6.3650249912139864723014259441204970e-01; - constexpr Real c3 = 1.0 - a3 - b3; - switch (qp) - { - case 0: return make_vector(a1, a1, 0.0); - case 1: return make_vector(a1, b1, 0.0); - case 2: return make_vector(b1, a1, 0.0); - case 3: return make_vector(a2, a2, 0.0); - case 4: return make_vector(a2, b2, 0.0); - case 5: return make_vector(b2, a2, 0.0); - case 6: return make_vector(a3, b3, 0.0); - case 7: return make_vector(b3, a3, 0.0); - case 8: return make_vector(a3, c3, 0.0); - case 9: return make_vector(c3, a3, 0.0); - case 10: return make_vector(b3, c3, 0.0); - case 11: return make_vector(c3, b3, 0.0); - default: return zero_vector(); - } - } - - default: // order >= 7: 12-point Ro3-invariant rule - { - constexpr Real rd[4][2] = { - {6.2382265094402118174e-02, 6.7517867073916085443e-02}, - {5.5225456656926611737e-02, 3.2150249385198182267e-01}, - {3.4324302945097146470e-02, 6.6094919618673565761e-01}, - {5.1584233435359177926e-01, 2.7771616697639178257e-01} - }; - const unsigned int row = qp / 3; - const unsigned int sub = qp % 3; - if (row >= 4) - return zero_vector(); - const Real z1 = rd[row][0], z2 = rd[row][1], z3 = 1.0 - z1 - z2; - switch (sub) - { - case 0: return make_vector(z1, z2, 0.0); - case 1: return make_vector(z3, z1, 0.0); - case 2: return make_vector(z2, z3, 0.0); - default: return zero_vector(); - } - } - } - } - - LIBMESH_DEVICE_INLINE static Real - tri_weight(unsigned int order, unsigned int qp) - { - switch (order) - { - case 0: case 1: return 0.5; - case 2: return 1.0 / 6.0; - case 3: return (qp % 2 == 0) ? 1.5902069087198858470e-01 : 9.0979309128011415303e-02; - case 4: return (qp < 3) ? 1.1169079483900573285e-01 : 5.4975871827660933819e-02; - case 5: - { - if (qp == 0) - return 9.0 / 80.0; - const Real sq15 = 3.872983346207417; - return (qp <= 3) ? (31.0 / 480.0 + sq15 / 2400.0) : (31.0 / 480.0 - sq15 / 2400.0); - } - case 6: - { - if (qp <= 2) - return 5.8393137863189683012644805692789721e-02; - if (qp <= 5) - return 2.5422453185103408460468404553434492e-02; - return 4.1425537809186787596776728210221227e-02; - } default: - { - constexpr Real wts[4] = { - 2.6517028157436251429e-02, 4.3881408714446055037e-02, - 2.8775042784981585738e-02, 6.7493187009802774463e-02 - }; - return (qp / 3 < 4) ? wts[qp / 3] : 0.0; - } + return zero_vector(); } } - // ── Tetrahedral rules ───────────────────────────────────────────────────── - - LIBMESH_DEVICE_INLINE static RealVector - tet_point(unsigned int order, unsigned int qp) + LIBMESH_DEVICE_INLINE static Real + weight(libMesh::ElemType topo, unsigned int order, unsigned int qp) { - switch (order) + switch (topo) { - case 0: case 1: - return make_vector(0.25, 0.25, 0.25); + case libMesh::EDGE2: + case libMesh::EDGE3: + return GaussLegendre1D::weight(order, qp); - case 2: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: { - const Real b = 0.25 * (1.0 - 1.0 / 2.2360679774997896964); // 1/sqrt(5) - const Real a = 1.0 - 3.0 * b; - switch (qp) - { - case 0: return make_vector(a, b, b); - case 1: return make_vector(b, a, b); - case 2: return make_vector(b, b, a); - case 3: return make_vector(b, b, b); - default: return zero_vector(); - } + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return 0.0; + return GaussLegendre1D::weight(order, qp % n) * + GaussLegendre1D::weight(order, qp / n); } - case 3: - switch (qp) - { - case 0: return make_vector(0.25, 0.25, 0.25); - case 1: return make_vector(0.5, 1.0 / 6.0, 1.0 / 6.0); - case 2: return make_vector(1.0 / 6.0, 0.5, 1.0 / 6.0); - case 3: return make_vector(1.0 / 6.0, 1.0 / 6.0, 0.5); - case 4: return make_vector(1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0); - default: return zero_vector(); - } - - case 4: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: { - constexpr Real a1 = 2.5e-01; - constexpr Real a2 = 7.85714285714285714e-01, b2 = 7.14285714285714285e-02; - constexpr Real a3 = 3.99403576166799219e-01, b3 = 1.00596423833200785e-01; - switch (qp) - { - case 0: return make_vector(a1, a1, a1); - case 1: return make_vector(a2, b2, b2); - case 2: return make_vector(b2, a2, b2); - case 3: return make_vector(b2, b2, a2); - case 4: return make_vector(b2, b2, b2); - case 5: return make_vector(a3, a3, b3); - case 6: return make_vector(a3, b3, b3); - case 7: return make_vector(b3, b3, a3); - case 8: return make_vector(b3, a3, b3); - case 9: return make_vector(b3, a3, a3); - case 10: return make_vector(a3, b3, a3); - default: return zero_vector(); - } + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return 0.0; + return GaussLegendre1D::weight(order, qp % n) * + GaussLegendre1D::weight(order, (qp / n) % n) * + GaussLegendre1D::weight(order, qp / (n * n)); } - case 5: + case libMesh::TRI3: + case libMesh::TRI6: { - constexpr Real af[3] = {3.1088591926330060980e-01, - 9.2735250310891226402e-02, - 4.5503704125649649492e-02}; - if (qp < 8) - { - const unsigned int g = qp / 4; - const unsigned int sub = qp % 4; - const Real ag = af[g], bg = 1.0 - 3.0 * ag; - switch (sub) - { - case 0: return make_vector(ag, ag, ag); - case 1: return make_vector(ag, bg, ag); - case 2: return make_vector(bg, ag, ag); - case 3: return make_vector(ag, ag, bg); - default: return zero_vector(); - } - } - else - { - const Real a2 = af[2], b2 = 0.5 * (1.0 - 2.0 * a2); - switch (qp - 8) - { - case 0: return make_vector(b2, b2, a2); - case 1: return make_vector(b2, a2, a2); - case 2: return make_vector(a2, a2, b2); - case 3: return make_vector(a2, b2, a2); - case 4: return make_vector(b2, a2, b2); - case 5: return make_vector(a2, b2, b2); - default: return zero_vector(); - } - } + const auto rule = Quadrature::Gauss::triangle_rule(order); + return (qp < rule.count) ? rule.points[qp].w : 0.0; } - default: // order >= 6: 24-point Keast rule + case libMesh::TET4: + case libMesh::TET10: { - constexpr Real data[4][3] = { - {3.56191386222544953e-01, 2.14602871259151684e-01, 0.0}, - {8.77978124396165982e-01, 4.06739585346113397e-02, 0.0}, - {3.29863295731730594e-02, 3.22337890142275646e-01, 0.0}, - {0.0, 0.0, 0.0} // 12-perm group handled separately - }; - - if (qp < 12) - { - // Three 4-permutation groups - const unsigned int grp = qp / 4; - const unsigned int sub = qp % 4; - const Real a = data[grp][0], b = data[grp][1]; - switch (sub) - { - case 0: return make_vector(a, b, b); - case 1: return make_vector(b, a, b); - case 2: return make_vector(b, b, a); - case 3: return make_vector(b, b, b); - default: return zero_vector(); - } - } - else - { - // 12-permutation group - constexpr Real a4 = 6.36610018750175299e-02; - constexpr Real b4 = 2.69672331458315867e-01; - constexpr Real c4 = 6.03005664791649076e-01; - switch (qp - 12) - { - case 0: return make_vector(a4, a4, b4); - case 1: return make_vector(a4, a4, c4); - case 2: return make_vector(b4, a4, a4); - case 3: return make_vector(c4, a4, a4); - case 4: return make_vector(a4, b4, a4); - case 5: return make_vector(a4, c4, a4); - case 6: return make_vector(a4, b4, c4); - case 7: return make_vector(a4, c4, b4); - case 8: return make_vector(b4, a4, c4); - case 9: return make_vector(b4, c4, a4); - case 10: return make_vector(c4, a4, b4); - case 11: return make_vector(c4, b4, a4); - default: return zero_vector(); - } - } + const auto rule = Quadrature::Gauss::tetrahedron_rule(order); + return (qp < rule.count) ? rule.points[qp].w : 0.0; } - } - } - LIBMESH_DEVICE_INLINE static Real - tet_weight(unsigned int order, unsigned int qp) - { - switch (order) - { - case 0: case 1: return 1.0 / 6.0; - case 2: return 1.0 / 24.0; - case 3: return (qp == 0) ? -2.0 / 15.0 : 0.075; - case 4: - { - if (qp == 0) - return -1.31555555555555556e-02; - if (qp <= 4) - return 7.62222222222222222e-03; - return 2.48888888888888889e-02; - } - case 5: - { - constexpr Real wf[3] = {1.8781320953002641800e-02, - 1.2248840519393658257e-02, - 7.0910034628469110730e-03}; - if (qp < 4) - return wf[0]; - if (qp < 8) - return wf[1]; - return wf[2]; - } default: - { - constexpr Real wts[4] = {6.65379170969464506e-03, - 1.67953517588677620e-03, - 9.22619692394239843e-03, - 8.03571428571428248e-03}; - if (qp < 4) - return wts[0]; - if (qp < 8) - return wts[1]; - if (qp < 12) - return wts[2]; - return wts[3]; - } + return 0.0; } } }; -// --------------------------------------------------------------------------- -// fill_quadrature — host-side convenience wrapper -// -// Fills std::vectors using the device-callable GaussQuadrature functions. -// --------------------------------------------------------------------------- - inline void fill_quadrature(libMesh::ElemType topo, unsigned int order, diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h deleted file mode 100644 index 7584819413b..00000000000 --- a/include/gpu/kokkos_scalar_types.h +++ /dev/null @@ -1,186 +0,0 @@ -// libMesh Kokkos device-compatible scalar types. -// -// This header provides dimension-aware Kokkos aliases/helpers that mirror -// libMesh host numerics at LIBMESH_DIM=1/2/3. - -#ifndef LIBMESH_KOKKOS_SCALAR_TYPES_H -#define LIBMESH_KOKKOS_SCALAR_TYPES_H - -#include "libmesh/libmesh_common.h" -#include "libmesh/libmesh_device.h" -#include "libmesh/type_vector.h" -#include "libmesh/type_tensor.h" - -namespace libMesh::Kokkos -{ - -using Real = libMesh::Real; -using RealVector = libMesh::TypeVector; -using RealTensor = libMesh::TypeTensor; - -template -LIBMESH_DEVICE_INLINE -VectorType load_vector(const ViewType & view, const unsigned int i) -{ - VectorType v; - v.zero(); - - for (unsigned int d = 0; d < LIBMESH_DIM; ++d) - v(d) = view(i, d); - - return v; -} - -template -LIBMESH_DEVICE_INLINE -void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) -{ - for (unsigned int d = 0; d < LIBMESH_DIM; ++d) - view(i, d) = v(d); -} - -template -LIBMESH_DEVICE_INLINE -Real vector_component(const ViewType & view, const unsigned int i, const unsigned int component) -{ - if (component < LIBMESH_DIM) - return view(i, component); - - return Real(0); -} - -template -LIBMESH_DEVICE_INLINE -TensorType load_tensor(const ViewType & view, const unsigned int i) -{ - TensorType T; - T.zero(); - - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - T(row, col) = view(i, row, col); - - return T; -} - -template -LIBMESH_DEVICE_INLINE -void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) -{ - for (unsigned int row = 0; row < LIBMESH_DIM; ++row) - for (unsigned int col = 0; col < LIBMESH_DIM; ++col) - view(i, row, col) = T(row, col); -} - -template -LIBMESH_DEVICE_INLINE -Real tensor_component(const ViewType & view, - const unsigned int i, - const unsigned int row, - const unsigned int col) -{ - if (row < LIBMESH_DIM && col < LIBMESH_DIM) - return view(i, row, col); - - return Real(0); -} - -LIBMESH_DEVICE_INLINE -RealVector zero_vector() -{ - RealVector v; - v.zero(); - return v; -} - -LIBMESH_DEVICE_INLINE -RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) -{ - RealVector v = zero_vector(); - - v(0) = x; - -#if LIBMESH_DIM > 1 - v(1) = y; -#else - libmesh_assert_equal_to(y, Real(0)); -#endif - -#if LIBMESH_DIM > 2 - v(2) = z; -#else - libmesh_assert_equal_to(z, Real(0)); -#endif - - return v; -} - -LIBMESH_DEVICE_INLINE -RealTensor zero_tensor() -{ - RealTensor J; - J.zero(); - return J; -} - -LIBMESH_DEVICE_INLINE -RealTensor leading_identity(const unsigned int dim = LIBMESH_DIM) -{ - libmesh_assert_less_equal(dim, LIBMESH_DIM); - - RealTensor I = zero_tensor(); - for (unsigned int i = 0; i < dim; ++i) - I(i, i) = Real(1); - - return I; -} - -LIBMESH_DEVICE_INLINE -Real leading_determinant(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) -{ - libmesh_assert_less_equal(dim, LIBMESH_DIM); - - if (dim == 0) - return Real(1); - - if (dim == 1) - return J(0, 0); - - if (dim == 2) - return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); - - return J.det(); -} - -LIBMESH_DEVICE_INLINE -RealTensor leading_inverse(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) -{ - libmesh_assert_less_equal(dim, LIBMESH_DIM); - - if (dim == 0) - return leading_identity(0); - - if (dim == 1) - { - RealTensor inv = zero_tensor(); - inv(0, 0) = Real(1) / J(0, 0); - return inv; - } - - if (dim == 2) - { - const Real inv_det = Real(1) / leading_determinant(J, dim); - RealTensor inv = zero_tensor(); - inv(0, 0) = J(1, 1) * inv_det; - inv(0, 1) = -J(0, 1) * inv_det; - inv(1, 0) = -J(1, 0) * inv_det; - inv(1, 1) = J(0, 0) * inv_det; - return inv; - } - - return J.inverse(); -} - -} // namespace libMesh::Kokkos - -#endif // LIBMESH_KOKKOS_SCALAR_TYPES_H diff --git a/include/include_HEADERS b/include/include_HEADERS index a17fe5f508e..8c4283f5aea 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -89,8 +89,13 @@ include_HEADERS = \ fe/fe_interface.h \ fe/fe_interface_macros.h \ fe/fe_lagrange_shape_1D.h \ + fe/fe_reference_element_traits.h \ + fe/fe_serendipity_lagrange.h \ + fe/fe_simplex_lagrange.h \ + fe/fe_tensor_product_lagrange.h \ fe/fe_macro.h \ fe/fe_map.h \ + fe/fe_shape_traits.h \ fe/fe_transformation_base.h \ fe/fe_type.h \ fe/fe_xyz_map.h \ @@ -350,6 +355,7 @@ include_HEADERS = \ quadrature/quadrature_composite.h \ quadrature/quadrature_conical.h \ quadrature/quadrature_gauss.h \ + quadrature/quadrature_gauss_rules.h \ quadrature/quadrature_gauss_lobatto.h \ quadrature/quadrature_gm.h \ quadrature/quadrature_grid.h \ diff --git a/include/quadrature/quadrature_gauss_rules.h b/include/quadrature/quadrature_gauss_rules.h new file mode 100644 index 00000000000..a1acfcf4adb --- /dev/null +++ b/include/quadrature/quadrature_gauss_rules.h @@ -0,0 +1,373 @@ +#ifndef LIBMESH_QUADRATURE_GAUSS_RULES_H +#define LIBMESH_QUADRATURE_GAUSS_RULES_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" + +namespace libMesh::Quadrature::Gauss +{ + +struct Rule1D +{ + unsigned int count; + const Real * points; + const Real * weights; +}; + +struct PointWeight2D +{ + Real x; + Real y; + Real w; +}; + +struct Rule2D +{ + unsigned int count; + const PointWeight2D * points; +}; + +struct PointWeight3D +{ + Real x; + Real y; + Real z; + Real w; +}; + +struct Rule3D +{ + unsigned int count; + const PointWeight3D * points; +}; + +inline constexpr Real gauss_legendre_points_1[] = {0._R}; +inline constexpr Real gauss_legendre_weights_1[] = {2._R}; + +inline constexpr Real gauss_legendre_points_2[] = { + -5.7735026918962576450914878050196e-01_R, + 5.7735026918962576450914878050196e-01_R +}; +inline constexpr Real gauss_legendre_weights_2[] = {1._R, 1._R}; + +inline constexpr Real gauss_legendre_points_3[] = { + -7.7459666924148337703585307995648e-01_R, + 0._R, + 7.7459666924148337703585307995648e-01_R +}; +inline constexpr Real gauss_legendre_weights_3[] = { + 5.5555555555555555555555555555556e-01_R, + 8.8888888888888888888888888888889e-01_R, + 5.5555555555555555555555555555556e-01_R +}; + +inline constexpr Real gauss_legendre_points_4[] = { + -8.6113631159405257522394648889281e-01_R, + -3.3998104358485626480266575910324e-01_R, + 3.3998104358485626480266575910324e-01_R, + 8.6113631159405257522394648889281e-01_R +}; +inline constexpr Real gauss_legendre_weights_4[] = { + 3.4785484513745385737306394922200e-01_R, + 6.5214515486254614262693605077800e-01_R, + 6.5214515486254614262693605077800e-01_R, + 3.4785484513745385737306394922200e-01_R +}; + +inline constexpr Real gauss_legendre_points_5[] = { + -9.0617984593866399279762687829939e-01_R, + -5.3846931010568309103631442070021e-01_R, + 0._R, + 5.3846931010568309103631442070021e-01_R, + 9.0617984593866399279762687829939e-01_R +}; +inline constexpr Real gauss_legendre_weights_5[] = { + 2.3692688505618908751426404071992e-01_R, + 4.7862867049936646804129151483564e-01_R, + 5.6888888888888888888888888888889e-01_R, + 4.7862867049936646804129151483564e-01_R, + 2.3692688505618908751426404071992e-01_R +}; + +inline constexpr Real gauss_legendre_points_6[] = { + -9.3246951420315202781230155449399e-01_R, + -6.6120938646626451366139959501991e-01_R, + -2.3861918608319690863050172168071e-01_R, + 2.3861918608319690863050172168071e-01_R, + 6.6120938646626451366139959501991e-01_R, + 9.3246951420315202781230155449399e-01_R +}; +inline constexpr Real gauss_legendre_weights_6[] = { + 1.7132449237917034504029614217273e-01_R, + 3.6076157304813860756983351383772e-01_R, + 4.6791393457269104738987034398955e-01_R, + 4.6791393457269104738987034398955e-01_R, + 3.6076157304813860756983351383772e-01_R, + 1.7132449237917034504029614217273e-01_R +}; + +inline constexpr Real gauss_legendre_points_7[] = { + -9.4910791234275852452618968404785e-01_R, + -7.4153118559939443986386477328079e-01_R, + -4.0584515137739716690660641207696e-01_R, + 0._R, + 4.0584515137739716690660641207696e-01_R, + 7.4153118559939443986386477328079e-01_R, + 9.4910791234275852452618968404785e-01_R +}; +inline constexpr Real gauss_legendre_weights_7[] = { + 1.2948496616886969327061143267908e-01_R, + 2.7970539148927666790146777142378e-01_R, + 3.8183005050511894495036977548898e-01_R, + 4.1795918367346938775510204081633e-01_R, + 3.8183005050511894495036977548898e-01_R, + 2.7970539148927666790146777142378e-01_R, + 1.2948496616886969327061143267908e-01_R +}; + +inline constexpr PointWeight2D tri_rule_1[] = { + {Real(1) / 3, Real(1) / 3, Real(1) / 2} +}; + +inline constexpr PointWeight2D tri_rule_2[] = { + {Real(2) / 3, Real(1) / 6, Real(1) / 6}, + {Real(1) / 6, Real(2) / 3, Real(1) / 6}, + {Real(1) / 6, Real(1) / 6, Real(1) / 6} +}; + +inline constexpr PointWeight2D tri_rule_3[] = { + {1.5505102572168219018027159252941e-01_R, 1.7855872826361642311703513337422e-01_R, 1.5902069087198858469718450103758e-01_R}, + {6.4494897427831780981972840747059e-01_R, 7.5031110222608118177475598324603e-02_R, 9.0979309128011415302815498962418e-02_R}, + {1.5505102572168219018027159252941e-01_R, 6.6639024601470138670269327409637e-01_R, 1.5902069087198858469718450103758e-01_R}, + {6.4494897427831780981972840747059e-01_R, 2.8001991549907407200279599420481e-01_R, 9.0979309128011415302815498962418e-02_R} +}; + +inline constexpr Real tri4_a1 = 4.4594849091596488631832925388305199e-01_R; +inline constexpr Real tri4_b1 = 1._R - 2._R * tri4_a1; +inline constexpr Real tri4_a2 = 9.1576213509770743459571463402201508e-02_R; +inline constexpr Real tri4_b2 = 1._R - 2._R * tri4_a2; +inline constexpr PointWeight2D tri_rule_4[] = { + {tri4_a1, tri4_a1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_a1, tri4_b1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_b1, tri4_a1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_a2, tri4_a2, 5.4975871827660933819163162450105264e-02_R}, + {tri4_a2, tri4_b2, 5.4975871827660933819163162450105264e-02_R}, + {tri4_b2, tri4_a2, 5.4975871827660933819163162450105264e-02_R} +}; + +inline constexpr Real tri5_sqrt15 = 3.872983346207417_R; +inline constexpr Real tri5_a1 = Real(2) / 7 + tri5_sqrt15 / 21; +inline constexpr Real tri5_b1 = 1._R - 2._R * tri5_a1; +inline constexpr Real tri5_a2 = Real(2) / 7 - tri5_sqrt15 / 21; +inline constexpr Real tri5_b2 = 1._R - 2._R * tri5_a2; +inline constexpr Real tri5_w1 = Real(31) / 480 + tri5_sqrt15 / 2400; +inline constexpr Real tri5_w2 = Real(31) / 480 - tri5_sqrt15 / 2400; +inline constexpr PointWeight2D tri_rule_5[] = { + {Real(1) / 3, Real(1) / 3, Real(9) / 80}, + {tri5_a1, tri5_a1, tri5_w1}, + {tri5_a1, tri5_b1, tri5_w1}, + {tri5_b1, tri5_a1, tri5_w1}, + {tri5_a2, tri5_a2, tri5_w2}, + {tri5_a2, tri5_b2, tri5_w2}, + {tri5_b2, tri5_a2, tri5_w2} +}; + +inline constexpr Real tri6_a1 = 2.4928674517091042129163855310701908e-01_R; +inline constexpr Real tri6_b1 = 1._R - 2._R * tri6_a1; +inline constexpr Real tri6_a2 = 6.3089014491502228340331602870819157e-02_R; +inline constexpr Real tri6_b2 = 1._R - 2._R * tri6_a2; +inline constexpr Real tri6_a3 = 3.1035245103378440541660773395655215e-01_R; +inline constexpr Real tri6_b3 = 6.3650249912139864723014259441204970e-01_R; +inline constexpr Real tri6_c3 = 1._R - tri6_a3 - tri6_b3; +inline constexpr PointWeight2D tri_rule_6[] = { + {tri6_a1, tri6_a1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_a1, tri6_b1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_b1, tri6_a1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_a2, tri6_a2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_a2, tri6_b2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_b2, tri6_a2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_a3, tri6_b3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_b3, tri6_a3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_a3, tri6_c3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_c3, tri6_a3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_b3, tri6_c3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_c3, tri6_b3, 4.1425537809186787596776728210221227e-02_R} +}; + +inline constexpr PointWeight2D tri_rule_7[] = { + {6.2382265094402118173683000996350e-02_R, 6.7517867073916085442557131050869e-02_R, 2.6517028157436251428754180460739e-02_R}, + {8.7009986783168172748385986795285e-01_R, 6.2382265094402118173683000996350e-02_R, 2.6517028157436251428754180460739e-02_R}, + {6.7517867073916085442557131050869e-02_R, 8.7009986783168172748385986795285e-01_R, 2.6517028157436251428754180460739e-02_R}, + {5.5225456656926611737479190275645e-02_R, 3.2150249385198182266630784919920e-01_R, 4.3881408714446055036769903139288e-02_R}, + {6.2327204949109156559621296052516e-01_R, 5.5225456656926611737479190275645e-02_R, 4.3881408714446055036769903139288e-02_R}, + {3.2150249385198182266630784919920e-01_R, 6.2327204949109156559621296052516e-01_R, 4.3881408714446055036769903139288e-02_R}, + {3.4324302945097146469630642483938e-02_R, 6.6094919618673565761198031019780e-01_R, 2.8775042784981585738445496900219e-02_R}, + {3.0472650086816719591838904731826e-01_R, 3.4324302945097146469630642483938e-02_R, 2.8775042784981585738445496900219e-02_R}, + {6.6094919618673565761198031019780e-01_R, 3.0472650086816719591838904731826e-01_R, 2.8775042784981585738445496900219e-02_R}, + {5.1584233435359177925746338682643e-01_R, 2.7771616697639178256958187139372e-01_R, 6.7493187009802774462697086166421e-02_R}, + {2.0644149867001643817295474177985e-01_R, 5.1584233435359177925746338682643e-01_R, 6.7493187009802774462697086166421e-02_R}, + {2.7771616697639178256958187139372e-01_R, 2.0644149867001643817295474177985e-01_R, 6.7493187009802774462697086166421e-02_R} +}; + +inline constexpr PointWeight3D tet_rule_1[] = { + {0.25_R, 0.25_R, 0.25_R, Real(1) / 6} +}; + +inline constexpr Real tet2_b = 0.25_R * (1._R - 1._R / 2.2360679774997896964_R); +inline constexpr Real tet2_a = 1._R - 3._R * tet2_b; +inline constexpr PointWeight3D tet_rule_2[] = { + {tet2_a, tet2_b, tet2_b, Real(1) / 24}, + {tet2_b, tet2_a, tet2_b, Real(1) / 24}, + {tet2_b, tet2_b, tet2_a, Real(1) / 24}, + {tet2_b, tet2_b, tet2_b, Real(1) / 24} +}; + +inline constexpr PointWeight3D tet_rule_3[] = { + {0.25_R, 0.25_R, 0.25_R, Real(-2) / 15}, + {0.5_R, Real(1) / 6, Real(1) / 6, 0.075_R}, + {Real(1) / 6, 0.5_R, Real(1) / 6, 0.075_R}, + {Real(1) / 6, Real(1) / 6, 0.5_R, 0.075_R}, + {Real(1) / 6, Real(1) / 6, Real(1) / 6, 0.075_R} +}; + +inline constexpr PointWeight3D tet_rule_4[] = { + {2.5e-01_R, 2.5e-01_R, 2.5e-01_R, -1.31555555555555556e-02_R}, + {7.85714285714285714e-01_R, 7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.85714285714285714e-01_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.85714285714285714e-01_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {3.99403576166799219e-01_R, 3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R}, + {3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R} +}; + +inline constexpr PointWeight3D tet_rule_5[] = { + {3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {3.1088591926330060980e-01_R, 6.7342242201009817060e-02_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {6.7342242201009817060e-02_R, 3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 6.7342242201009817060e-02_R, 1.8781320953002641800e-02_R}, + {9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {9.2735250310891226402e-02_R, 7.2179424906732632079e-01_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {7.2179424906732632079e-01_R, 9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 7.2179424906732632079e-01_R, 1.2248840519393658257e-02_R}, + {4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R}, + {4.5503704125649649492e-02_R, 4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R} +}; + +inline constexpr PointWeight3D tet_rule_6[] = { + {3.56191386222544953e-01_R, 2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 3.56191386222544953e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 3.56191386222544953e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {8.77978124396165982e-01_R, 4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 8.77978124396165982e-01_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 8.77978124396165982e-01_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {3.29863295731730594e-02_R, 3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.29863295731730594e-02_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 3.29863295731730594e-02_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R} +}; + +LIBMESH_DEVICE_INLINE +Rule1D gauss_legendre_rule(const unsigned int order) +{ + switch (order) + { + case 0: + case 1: + return {1u, gauss_legendre_points_1, gauss_legendre_weights_1}; + case 2: + case 3: + return {2u, gauss_legendre_points_2, gauss_legendre_weights_2}; + case 4: + case 5: + return {3u, gauss_legendre_points_3, gauss_legendre_weights_3}; + case 6: + case 7: + return {4u, gauss_legendre_points_4, gauss_legendre_weights_4}; + case 8: + case 9: + return {5u, gauss_legendre_points_5, gauss_legendre_weights_5}; + case 10: + case 11: + return {6u, gauss_legendre_points_6, gauss_legendre_weights_6}; + case 12: + case 13: + return {7u, gauss_legendre_points_7, gauss_legendre_weights_7}; + default: + return {0u, nullptr, nullptr}; + } +} + +LIBMESH_DEVICE_INLINE +Rule2D triangle_rule(const unsigned int order) +{ + switch (order) + { + case 0: + case 1: + return {1u, tri_rule_1}; + case 2: + return {3u, tri_rule_2}; + case 3: + return {4u, tri_rule_3}; + case 4: + return {6u, tri_rule_4}; + case 5: + return {7u, tri_rule_5}; + case 6: + return {12u, tri_rule_6}; + case 7: + return {12u, tri_rule_7}; + default: + return {0u, nullptr}; + } +} + +LIBMESH_DEVICE_INLINE +Rule3D tetrahedron_rule(const unsigned int order, + const bool allow_negative_weights = true) +{ + switch (order) + { + case 0: + case 1: + return {1u, tet_rule_1}; + case 2: + return {4u, tet_rule_2}; + case 3: + return allow_negative_weights ? Rule3D{5u, tet_rule_3} : Rule3D{0u, nullptr}; + case 4: + return allow_negative_weights ? Rule3D{11u, tet_rule_4} : Rule3D{0u, nullptr}; + case 5: + return {14u, tet_rule_5}; + case 6: + return {24u, tet_rule_6}; + default: + return {0u, nullptr}; + } +} + +} // namespace libMesh::Quadrature::Gauss + +#endif // LIBMESH_QUADRATURE_GAUSS_RULES_H diff --git a/src/fe/fe_lagrange_shape_2D.C b/src/fe/fe_lagrange_shape_2D.C index 7749e783582..326fc703c99 100644 --- a/src/fe/fe_lagrange_shape_2D.C +++ b/src/fe/fe_lagrange_shape_2D.C @@ -20,6 +20,9 @@ #include "libmesh/fe.h" #include "libmesh/elem.h" #include "libmesh/fe_lagrange_shape_1D.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" #include "libmesh/enum_to_string.h" #include "libmesh/face_c0polygon.h" @@ -346,17 +349,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUADSHELL9: { // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - return (fe_lagrange_1D_linear_shape(i0[i], xi)* - fe_lagrange_1D_linear_shape(i1[i], eta)); + return libMesh::detail::fe_lagrange_quad4_shape(i, p(0), p(1)); } case TRI3: @@ -364,26 +358,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - libmesh_assert_less (i, 3); - - switch(i) - { - case 0: - return zeta0; - - case 1: - return zeta1; - - case 2: - return zeta2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri3_shape(i, p(0), p(1)); } case C0POLYGON: @@ -434,40 +410,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 8); - - switch (i) - { - case 0: - return .25*(1. - xi)*(1. - eta)*(-1. - xi - eta); - - case 1: - return .25*(1. + xi)*(1. - eta)*(-1. + xi - eta); - - case 2: - return .25*(1. + xi)*(1. + eta)*(-1. + xi + eta); - - case 3: - return .25*(1. - xi)*(1. + eta)*(-1. - xi + eta); - - case 4: - return .5*(1. - xi*xi)*(1. - eta); - - case 5: - return .5*(1. + xi)*(1. - eta*eta); - - case 6: - return .5*(1. - xi*xi)*(1. + eta); - - case 7: - return .5*(1. - xi)*(1. - eta*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_quad8_shape(i, p(0), p(1)); } case QUAD4: @@ -477,19 +421,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUAD9: case QUADSHELL9: { - - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - return (fe_lagrange_1D_quadratic_shape(i0[i], xi)* - fe_lagrange_1D_quadratic_shape(i1[i], eta)); + return libMesh::detail::fe_lagrange_quad9_shape(i, p(0), p(1)); } case TRI3: @@ -499,35 +432,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - libmesh_assert_less (i, 6); - - switch(i) - { - case 0: - return 2.*zeta0*(zeta0-0.5); - - case 1: - return 2.*zeta1*(zeta1-0.5); - - case 2: - return 2.*zeta2*(zeta2-0.5); - - case 3: - return 4.*zeta0*zeta1; - - case 4: - return 4.*zeta1*zeta2; - - case 5: - return 4.*zeta2*zeta0; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri6_shape(i, p(0), p(1)); } default: @@ -544,39 +450,8 @@ Real fe_lagrange_2D_shape(const ElemType type, { case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - const Real bubble_27th = zeta0*zeta1*zeta2; - libmesh_assert_less (i, 7); - - switch(i) - { - case 0: - return 2.*zeta0*(zeta0-0.5) + 3.*bubble_27th; - - case 1: - return 2.*zeta1*(zeta1-0.5) + 3.*bubble_27th; - - case 2: - return 2.*zeta2*(zeta2-0.5) + 3.*bubble_27th; - - case 3: - return 4.*zeta0*zeta1 - 12.*bubble_27th; - - case 4: - return 4.*zeta1*zeta2 - 12.*bubble_27th; - - case 5: - return 4.*zeta2*zeta0 - 12.*bubble_27th; - - case 6: - return 27.*bubble_27th; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri7_shape(i, p(0), p(1)); } default: @@ -624,31 +499,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - switch (j) - { - // d()/dxi - case 0: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)); - - // d()/deta - case 1: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad4_shape_deriv(i, j, p(0), p(1)); } case TRI3: @@ -657,56 +509,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 3); - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return dzeta0dxi; - - case 1: - return dzeta1dxi; - - case 2: - return dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - // d()/deta - case 1: - { - switch(i) - { - case 0: - return dzeta0deta; - - case 1: - return dzeta1deta; - - case 2: - return dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri3_shape_deriv(i, j); } case C0POLYGON: @@ -795,88 +598,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 8); - - switch (j) - { - // d/dxi - case 0: - switch (i) - { - case 0: - return .25*(1. - eta)*((1. - xi)*(-1.) + - (-1.)*(-1. - xi - eta)); - - case 1: - return .25*(1. - eta)*((1. + xi)*(1.) + - (1.)*(-1. + xi - eta)); - - case 2: - return .25*(1. + eta)*((1. + xi)*(1.) + - (1.)*(-1. + xi + eta)); - - case 3: - return .25*(1. + eta)*((1. - xi)*(-1.) + - (-1.)*(-1. - xi + eta)); - - case 4: - return .5*(-2.*xi)*(1. - eta); - - case 5: - return .5*(1.)*(1. - eta*eta); - - case 6: - return .5*(-2.*xi)*(1. + eta); - - case 7: - return .5*(-1.)*(1. - eta*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - - // d/deta - case 1: - switch (i) - { - case 0: - return .25*(1. - xi)*((1. - eta)*(-1.) + - (-1.)*(-1. - xi - eta)); - - case 1: - return .25*(1. + xi)*((1. - eta)*(-1.) + - (-1.)*(-1. + xi - eta)); - - case 2: - return .25*(1. + xi)*((1. + eta)*(1.) + - (1.)*(-1. + xi + eta)); - - case 3: - return .25*(1. - xi)*((1. + eta)*(1.) + - (1.)*(-1. - xi + eta)); - - case 4: - return .5*(1. - xi*xi)*(-1.); - - case 5: - return .5*(1. + xi)*(-2.*eta); - - case 6: - return .5*(1. - xi*xi)*(1.); - - case 7: - return .5*(1. - xi)*(-2.*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad8_shape_deriv(i, j, p(0), p(1)); } case QUAD4: @@ -886,31 +609,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - switch (j) - { - // d()/dxi - case 0: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)); - - // d()/deta - case 1: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad9_shape_deriv(i, j, p(0), p(1)); } case TRI3: @@ -921,77 +621,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 6); - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - - switch(j) - { - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0dxi; - - case 1: - return (4.*zeta1-1.)*dzeta1dxi; - - case 2: - return (4.*zeta2-1.)*dzeta2dxi; - - case 3: - return 4.*zeta1*dzeta0dxi + 4.*zeta0*dzeta1dxi; - - case 4: - return 4.*zeta2*dzeta1dxi + 4.*zeta1*dzeta2dxi; - - case 5: - return 4.*zeta2*dzeta0dxi + 4*zeta0*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0deta; - - case 1: - return (4.*zeta1-1.)*dzeta1deta; - - case 2: - return (4.*zeta2-1.)*dzeta2deta; - - case 3: - return 4.*zeta1*dzeta0deta + 4.*zeta0*dzeta1deta; - - case 4: - return 4.*zeta2*dzeta1deta + 4.*zeta1*dzeta2deta; - - case 5: - return 4.*zeta2*dzeta0deta + 4*zeta0*dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri6_shape_deriv(i, j, p(0), p(1)); } default: @@ -1009,86 +639,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 7); - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - // const Real bubble_27th = zeta0*zeta1*zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dbubbledxi = zeta2 * (1. - 2.*zeta1 - zeta2); - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dbubbledeta= zeta1 * (1. - zeta1 - 2.*zeta2); - - switch(j) - { - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0dxi + 3.*dbubbledxi; - - case 1: - return (4.*zeta1-1.)*dzeta1dxi + 3.*dbubbledxi; - - case 2: - return (4.*zeta2-1.)*dzeta2dxi + 3.*dbubbledxi; - - case 3: - return 4.*zeta1*dzeta0dxi + 4.*zeta0*dzeta1dxi - 12.*dbubbledxi; - - case 4: - return 4.*zeta2*dzeta1dxi + 4.*zeta1*dzeta2dxi - 12.*dbubbledxi; - - case 5: - return 4.*zeta2*dzeta0dxi + 4*zeta0*dzeta2dxi - 12.*dbubbledxi; - - case 6: - return 27.*dbubbledxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0deta + 3.*dbubbledeta; - - case 1: - return (4.*zeta1-1.)*dzeta1deta + 3.*dbubbledeta; - - case 2: - return (4.*zeta2-1.)*dzeta2deta + 3.*dbubbledeta; - - case 3: - return 4.*zeta1*dzeta0deta + 4.*zeta0*dzeta1deta - 12.*dbubbledeta; - - case 4: - return 4.*zeta2*dzeta1deta + 4.*zeta1*dzeta2deta - 12.*dbubbledeta; - - case 5: - return 4.*zeta2*dzeta0deta + 4*zeta0*dzeta2deta - 12.*dbubbledeta; - - case 6: - return 27.*dbubbledeta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri7_shape_deriv(i, j, p(0), p(1)); } default: @@ -1141,34 +692,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - switch (j) - { - // d^2() / dxi^2 - case 0: - return 0.; - - // d^2() / dxi deta - case 1: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)); - - // d^2() / deta^2 - case 2: - return 0.; - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad4_shape_second_deriv(i, j, p(0), p(1)); } // All second derivatives for linear triangles are zero. @@ -1200,106 +725,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - case 1: - return 0.5*(1.-eta); - - case 2: - case 3: - return 0.5*(1.+eta); - - case 4: - return eta - 1.; - - case 5: - case 7: - return 0.0; - - case 6: - return -1. - eta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 0.25*( 1. - 2.*xi - 2.*eta); - - case 1: - return 0.25*(-1. - 2.*xi + 2.*eta); - - case 2: - return 0.25*( 1. + 2.*xi + 2.*eta); - - case 3: - return 0.25*(-1. + 2.*xi - 2.*eta); - - case 4: - return xi; - - case 5: - return -eta; - - case 6: - return -xi; - - case 7: - return eta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - case 3: - return 0.5*(1.-xi); - - case 1: - case 2: - return 0.5*(1.+xi); - - case 4: - case 6: - return 0.0; - - case 5: - return -1.0 - xi; - - case 7: - return xi - 1.0; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_quad8_shape_second_deriv(i, j, p(0), p(1)); } // end case QUAD8 case QUAD4: @@ -1309,36 +736,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute QUAD9 second derivatives as tensor product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - switch (j) - { - // d^2() / dxi^2 - case 0: - return (fe_lagrange_1D_quadratic_shape_second_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)); - - // d^2() / dxi deta - case 1: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)); - - // d^2() / deta^2 - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_second_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_quad9_shape_second_deriv(i, j, p(0), p(1)); } // end case QUAD9 case TRI3: @@ -1348,105 +747,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case TRI6: case TRI7: { - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0dxi; - - case 1: - return 4.*dzeta1dxi*dzeta1dxi; - - case 2: - return 4.*dzeta2dxi*dzeta2dxi; - - case 3: - return 8.*dzeta0dxi*dzeta1dxi; - - case 4: - return 8.*dzeta1dxi*dzeta2dxi; - - case 5: - return 8.*dzeta0dxi*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0deta; - - case 1: - return 4.*dzeta1dxi*dzeta1deta; - - case 2: - return 4.*dzeta2dxi*dzeta2deta; - - case 3: - return 4.*dzeta1deta*dzeta0dxi + 4.*dzeta0deta*dzeta1dxi; - - case 4: - return 4.*dzeta2deta*dzeta1dxi + 4.*dzeta1deta*dzeta2dxi; - - case 5: - return 4.*dzeta2deta*dzeta0dxi + 4.*dzeta0deta*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - return 4.*dzeta0deta*dzeta0deta; - - case 1: - return 4.*dzeta1deta*dzeta1deta; - - case 2: - return 4.*dzeta2deta*dzeta2deta; - - case 3: - return 8.*dzeta0deta*dzeta1deta; - - case 4: - return 8.*dzeta1deta*dzeta2deta; - - case 5: - return 8.*dzeta0deta*dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_tri6_shape_second_deriv(i, j); } // end case TRI6+TRI7 default: @@ -1468,124 +770,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - // const Real zeta0 = 1. - zeta1 - zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - // const Real dbubbledxi = zeta2 * (1. - 2.*zeta1 - zeta2); - const Real d2bubbledxi2 = -2. * zeta2; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - // const Real dbubbledeta= zeta1 * (1. - zeta1 - 2.*zeta2); - const Real d2bubbledeta2 = -2. * zeta1; - - const Real d2bubbledxideta = (1. - 2.*zeta1 - 2.*zeta2); - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0dxi + 3.*d2bubbledxi2; - - case 1: - return 4.*dzeta1dxi*dzeta1dxi + 3.*d2bubbledxi2; - - case 2: - return 4.*dzeta2dxi*dzeta2dxi + 3.*d2bubbledxi2; - - case 3: - return 8.*dzeta0dxi*dzeta1dxi - 12.*d2bubbledxi2; - - case 4: - return 8.*dzeta1dxi*dzeta2dxi - 12.*d2bubbledxi2; - - case 5: - return 8.*dzeta0dxi*dzeta2dxi - 12.*d2bubbledxi2; - - case 6: - return 27.*d2bubbledxi2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0deta + 3.*d2bubbledxideta; - - case 1: - return 4.*dzeta1dxi*dzeta1deta + 3.*d2bubbledxideta; - - case 2: - return 4.*dzeta2dxi*dzeta2deta + 3.*d2bubbledxideta; - - case 3: - return 4.*dzeta1deta*dzeta0dxi + 4.*dzeta0deta*dzeta1dxi - 12.*d2bubbledxideta; - - case 4: - return 4.*dzeta2deta*dzeta1dxi + 4.*dzeta1deta*dzeta2dxi - 12.*d2bubbledxideta; - - case 5: - return 4.*dzeta2deta*dzeta0dxi + 4.*dzeta0deta*dzeta2dxi - 12.*d2bubbledxideta; - - case 6: - return 27.*d2bubbledxideta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - return 4.*dzeta0deta*dzeta0deta + 3.*d2bubbledeta2; - - case 1: - return 4.*dzeta1deta*dzeta1deta + 3.*d2bubbledeta2; - - case 2: - return 4.*dzeta2deta*dzeta2deta + 3.*d2bubbledeta2; - - case 3: - return 8.*dzeta0deta*dzeta1deta - 12.*d2bubbledeta2; - - case 4: - return 8.*dzeta1deta*dzeta2deta - 12.*d2bubbledeta2; - - case 5: - return 8.*dzeta0deta*dzeta2deta - 12.*d2bubbledeta2; - - case 6: - return 27.*d2bubbledeta2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_tri7_shape_second_deriv(i, j, p(0), p(1)); } // end case TRI6+TRI7 default: diff --git a/src/fe/fe_lagrange_shape_3D.C b/src/fe/fe_lagrange_shape_3D.C index f4c5a649822..fb45fbc7ae0 100644 --- a/src/fe/fe_lagrange_shape_3D.C +++ b/src/fe/fe_lagrange_shape_3D.C @@ -20,6 +20,9 @@ #include "libmesh/fe.h" #include "libmesh/elem.h" #include "libmesh/fe_lagrange_shape_1D.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" #include "libmesh/enum_to_string.h" #include "libmesh/cell_c0polyhedron.h" #include "libmesh/tensor_value.h" @@ -105,32 +108,12 @@ void FE<3,LAGRANGE>::all_shapes { libmesh_assert_less_equal (n_sf, 8); - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][2] = { - {fe_lagrange_1D_linear_shape(0, xi), - fe_lagrange_1D_linear_shape(1, xi)}, - {fe_lagrange_1D_linear_shape(0, eta), - fe_lagrange_1D_linear_shape(1, eta)}, - {fe_lagrange_1D_linear_shape(0, zeta), - fe_lagrange_1D_linear_shape(1, zeta)}}; for (unsigned int i : make_range(n_sf)) - v[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; + v[i][qp] = libMesh::detail::fe_lagrange_hex8_shape(i, q_point(0), q_point(1), q_point(2)); } return; } @@ -156,38 +139,12 @@ void FE<3,LAGRANGE>::all_shapes { libmesh_assert_less_equal (n_sf, 27); - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // linear_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][3] = { - {fe_lagrange_1D_quadratic_shape(0, xi), - fe_lagrange_1D_quadratic_shape(1, xi), - fe_lagrange_1D_quadratic_shape(2, xi)}, - {fe_lagrange_1D_quadratic_shape(0, eta), - fe_lagrange_1D_quadratic_shape(1, eta), - fe_lagrange_1D_quadratic_shape(2, eta)}, - {fe_lagrange_1D_quadratic_shape(0, zeta), - fe_lagrange_1D_quadratic_shape(1, zeta), - fe_lagrange_1D_quadratic_shape(2, zeta)}}; for (unsigned int i : make_range(n_sf)) - v[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; + v[i][qp] = libMesh::detail::fe_lagrange_hex27_shape(i, q_point(0), q_point(1), q_point(2)); } return; } @@ -273,49 +230,15 @@ void FE<3,LAGRANGE>::all_shape_derivs { libmesh_assert_equal_to (n_sf, 8); - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][2] = { - {fe_lagrange_1D_linear_shape(0, xi), - fe_lagrange_1D_linear_shape(1, xi)}, - {fe_lagrange_1D_linear_shape(0, eta), - fe_lagrange_1D_linear_shape(1, eta)}, - {fe_lagrange_1D_linear_shape(0, zeta), - fe_lagrange_1D_linear_shape(1, zeta)}}; - - // one_d_derivs[dim][i] = dphi_i/dxi(p(dim)) - Real one_d_derivs[3][2] = { - {fe_lagrange_1D_linear_shape_deriv(0, 0, xi), - fe_lagrange_1D_linear_shape_deriv(1, 0, xi)}, - {fe_lagrange_1D_linear_shape_deriv(0, 0, eta), - fe_lagrange_1D_linear_shape_deriv(1, 0, eta)}, - {fe_lagrange_1D_linear_shape_deriv(0, 0, zeta), - fe_lagrange_1D_linear_shape_deriv(1, 0, zeta)}}; - - for (unsigned int i : make_range(n_sf)) - { - (*comps[0])[i][qp] = one_d_derivs[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[1])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_derivs[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[2])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_derivs[2][i2[i]]; - } + for (unsigned int i : make_range(n_sf)) + { + (*comps[0])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 0, q_point(0), q_point(1), q_point(2)); + (*comps[1])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 1, q_point(0), q_point(1), q_point(2)); + (*comps[2])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 2, q_point(0), q_point(1), q_point(2)); + } } return; } @@ -341,58 +264,15 @@ void FE<3,LAGRANGE>::all_shape_derivs { libmesh_assert_less_equal (n_sf, 27); - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][3] = { - {fe_lagrange_1D_quadratic_shape(0, xi), - fe_lagrange_1D_quadratic_shape(1, xi), - fe_lagrange_1D_quadratic_shape(2, xi)}, - {fe_lagrange_1D_quadratic_shape(0, eta), - fe_lagrange_1D_quadratic_shape(1, eta), - fe_lagrange_1D_quadratic_shape(2, eta)}, - {fe_lagrange_1D_quadratic_shape(0, zeta), - fe_lagrange_1D_quadratic_shape(1, zeta), - fe_lagrange_1D_quadratic_shape(2, zeta)}}; - - // one_d_derivs[dim][i] = dphi_i/dxi(p(dim)) - Real one_d_derivs[3][3] = { - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, xi), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, xi), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, xi)}, - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, eta), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, eta), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, eta)}, - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, zeta), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, zeta), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, zeta)}}; - - for (unsigned int i : make_range(n_sf)) - { - (*comps[0])[i][qp] = one_d_derivs[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[1])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_derivs[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[2])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_derivs[2][i2[i]]; - } + for (unsigned int i : make_range(n_sf)) + { + (*comps[0])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 0, q_point(0), q_point(1), q_point(2)); + (*comps[1])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 1, q_point(0), q_point(1), q_point(2)); + (*comps[2])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 2, q_point(0), q_point(1), q_point(2)); + } } return; } @@ -691,19 +571,7 @@ Real fe_lagrange_3D_shape(const ElemType type, { libmesh_assert_less (i, 8); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - return (fe_lagrange_1D_linear_shape(i0[i], xi)* - fe_lagrange_1D_linear_shape(i1[i], eta)* - fe_lagrange_1D_linear_shape(i2[i], zeta)); + return libMesh::detail::fe_lagrange_hex8_shape(i, p(0), p(1), p(2)); } // linear tetrahedral shape functions @@ -712,30 +580,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 4); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - switch(i) - { - case 0: - return zeta0; - - case 1: - return zeta1; - - case 2: - return zeta2; - - case 3: - return zeta3; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet4_shape(i, p(0), p(1), p(2)); } // linear prism shape functions @@ -848,82 +693,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); - - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); - - switch (i) - { - case 0: - return (1. - x)*(1. - y)*(1. - z)*(1. - 2.*x - 2.*y - 2.*z); - - case 1: - return x*(1. - y)*(1. - z)*(2.*x - 2.*y - 2.*z - 1.); - - case 2: - return x*y*(1. - z)*(2.*x + 2.*y - 2.*z - 3.); - - case 3: - return (1. - x)*y*(1. - z)*(2.*y - 2.*x - 2.*z - 1.); - - case 4: - return (1. - x)*(1. - y)*z*(2.*z - 2.*x - 2.*y - 1.); - - case 5: - return x*(1. - y)*z*(2.*x - 2.*y + 2.*z - 3.); - - case 6: - return x*y*z*(2.*x + 2.*y + 2.*z - 5.); - - case 7: - return (1. - x)*y*z*(2.*y - 2.*x + 2.*z - 3.); - - case 8: - return 4.*x*(1. - x)*(1. - y)*(1. - z); - - case 9: - return 4.*x*y*(1. - y)*(1. - z); - - case 10: - return 4.*x*(1. - x)*y*(1. - z); - - case 11: - return 4.*(1. - x)*y*(1. - y)*(1. - z); - - case 12: - return 4.*(1. - x)*(1. - y)*z*(1. - z); - - case 13: - return 4.*x*(1. - y)*z*(1. - z); - - case 14: - return 4.*x*y*z*(1. - z); - - case 15: - return 4.*(1. - x)*y*z*(1. - z); - - case 16: - return 4.*x*(1. - x)*(1. - y)*z; - - case 17: - return 4.*x*y*(1. - y)*z; - - case 18: - return 4.*x*(1. - x)*y*z; - - case 19: - return 4.*(1. - x)*y*(1. - y)*z; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_hex20_shape(i, p(0), p(1), p(2)); } // triquadratic hexahedral shape functions @@ -935,22 +705,7 @@ Real fe_lagrange_3D_shape(const ElemType type, { libmesh_assert_less (i, 27); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - return (fe_lagrange_1D_quadratic_shape(i0[i], xi)* - fe_lagrange_1D_quadratic_shape(i1[i], eta)* - fe_lagrange_1D_quadratic_shape(i2[i], zeta)); + return libMesh::detail::fe_lagrange_hex27_shape(i, p(0), p(1), p(2)); } // quadratic tetrahedral shape functions @@ -964,48 +719,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - switch(i) - { - case 0: - return zeta0*(2.*zeta0 - 1.); - - case 1: - return zeta1*(2.*zeta1 - 1.); - - case 2: - return zeta2*(2.*zeta2 - 1.); - - case 3: - return zeta3*(2.*zeta3 - 1.); - - case 4: - return 4.*zeta0*zeta1; - - case 5: - return 4.*zeta1*zeta2; - - case 6: - return 4.*zeta2*zeta0; - - case 7: - return 4.*zeta0*zeta3; - - case 8: - return 4.*zeta1*zeta3; - - case 9: - return 4.*zeta2*zeta3; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet10_shape(i, p(0), p(1), p(2)); } // "serendipity" prism @@ -1402,66 +1116,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - // Bubble functions (not yet scaled) on side nodes - const Real bubble_012 = zeta0*zeta1*zeta2; - const Real bubble_013 = zeta0*zeta1*zeta3; - const Real bubble_123 = zeta1*zeta2*zeta3; - const Real bubble_023 = zeta0*zeta2*zeta3; - - switch(i) - { - case 0: - return zeta0*(2.*zeta0 - 1.) + 3.*(bubble_012+bubble_013+bubble_023); - - case 1: - return zeta1*(2.*zeta1 - 1.) + 3.*(bubble_012+bubble_013+bubble_123); - - case 2: - return zeta2*(2.*zeta2 - 1.) + 3.*(bubble_012+bubble_023+bubble_123); - - case 3: - return zeta3*(2.*zeta3 - 1.) + 3.*(bubble_013+bubble_023+bubble_123); - - case 4: - return 4.*zeta0*zeta1 - 12.*(bubble_012+bubble_013); - - case 5: - return 4.*zeta1*zeta2 - 12.*(bubble_012+bubble_123); - - case 6: - return 4.*zeta2*zeta0 - 12.*(bubble_012+bubble_023); - - case 7: - return 4.*zeta0*zeta3 - 12.*(bubble_013+bubble_023); - - case 8: - return 4.*zeta1*zeta3 - 12.*(bubble_013+bubble_123); - - case 9: - return 4.*zeta2*zeta3 - 12.*(bubble_023+bubble_123); - - case 10: - return 27.*bubble_012; - - case 11: - return 27.*bubble_013; - - case 12: - return 27.*bubble_123; - - case 13: - return 27.*bubble_023; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet14_shape(i, p(0), p(1), p(2)); } default: @@ -1508,35 +1163,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, { libmesh_assert_less (i, 8); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - switch(j) - { - case 0: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 1: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 2: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex8_shape_deriv(i, j, p(0), p(1), p(2)); } // linear tetrahedral shape functions @@ -1545,94 +1172,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 4); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return dzeta0dxi; - - case 1: - return dzeta1dxi; - - case 2: - return dzeta2dxi; - - case 3: - return dzeta3dxi; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return dzeta0deta; - - case 1: - return dzeta1deta; - - case 2: - return dzeta2deta; - - case 3: - return dzeta3deta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return dzeta0dzeta; - - case 1: - return dzeta1dzeta; - - case 2: - return dzeta2dzeta; - - case 3: - return dzeta3dzeta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid shape function derivative j = " << j); - } + return libMesh::detail::fe_lagrange_tet4_shape_deriv(i, j); } // linear prism shape functions @@ -1876,458 +1416,32 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); + return libMesh::detail::fe_lagrange_hex20_shape_deriv(i, j, p(0), p(1), p(2)); + } - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); + // triquadratic hexahedral shape functions + case HEX8: + libmesh_assert_msg(T == L2_LAGRANGE, + "High order on first order elements only supported for L2 families"); + libmesh_fallthrough(); + case HEX27: + { + libmesh_assert_less (i, 27); - // and don't forget the chain rule! + return libMesh::detail::fe_lagrange_hex27_shape_deriv(i, j, p(0), p(1), p(2)); + } - switch (j) - { - - // d/dx*dx/dxi - case 0: - switch (i) - { - case 0: - return .5*(1. - y)*(1. - z)*((1. - x)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*(1. - y)*(1. - z)*(x*(2.) + - (1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*y*(1. - z)*(x*(2.) + - (1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*y*(1. - z)*((1. - x)*(-2.) + - (-1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - y)*z*((1. - x)*(-2.) + - (-1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*(1. - y)*z*(x*(2.) + - (1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*y*z*(x*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*y*z*((1. - x)*(-2.) + - (-1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*(1. - y)*(1. - z)*(1. - 2.*x); - - case 9: - return 2.*y*(1. - y)*(1. - z); - - case 10: - return 2.*y*(1. - z)*(1. - 2.*x); - - case 11: - return 2.*y*(1. - y)*(1. - z)*(-1.); - - case 12: - return 2.*(1. - y)*z*(1. - z)*(-1.); - - case 13: - return 2.*(1. - y)*z*(1. - z); - - case 14: - return 2.*y*z*(1. - z); - - case 15: - return 2.*y*z*(1. - z)*(-1.); - - case 16: - return 2.*(1. - y)*z*(1. - 2.*x); - - case 17: - return 2.*y*(1. - y)*z; - - case 18: - return 2.*y*z*(1. - 2.*x); - - case 19: - return 2.*y*(1. - y)*z*(-1.); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - - // d/dy*dy/deta - case 1: - switch (i) - { - case 0: - return .5*(1. - x)*(1. - z)*((1. - y)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*x*(1. - z)*((1. - y)*(-2.) + - (-1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*x*(1. - z)*(y*(2.) + - (1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*(1. - x)*(1. - z)*(y*(2.) + - (1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - x)*z*((1. - y)*(-2.) + - (-1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*x*z*((1. - y)*(-2.) + - (-1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*x*z*(y*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*(1. - x)*z*(y*(2.) + - (1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*x*(1. - x)*(1. - z)*(-1.); - - case 9: - return 2.*x*(1. - z)*(1. - 2.*y); - - case 10: - return 2.*x*(1. - x)*(1. - z); - - case 11: - return 2.*(1. - x)*(1. - z)*(1. - 2.*y); - - case 12: - return 2.*(1. - x)*z*(1. - z)*(-1.); - - case 13: - return 2.*x*z*(1. - z)*(-1.); - - case 14: - return 2.*x*z*(1. - z); - - case 15: - return 2.*(1. - x)*z*(1. - z); - - case 16: - return 2.*x*(1. - x)*z*(-1.); - - case 17: - return 2.*x*z*(1. - 2.*y); - - case 18: - return 2.*x*(1. - x)*z; - - case 19: - return 2.*(1. - x)*z*(1. - 2.*y); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - - // d/dz*dz/dzeta - case 2: - switch (i) - { - case 0: - return .5*(1. - x)*(1. - y)*((1. - z)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*x*(1. - y)*((1. - z)*(-2.) + - (-1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*x*y*((1. - z)*(-2.) + - (-1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*(1. - x)*y*((1. - z)*(-2.) + - (-1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - x)*(1. - y)*(z*(2.) + - (1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*x*(1. - y)*(z*(2.) + - (1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*x*y*(z*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*(1. - x)*y*(z*(2.) + - (1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*x*(1. - x)*(1. - y)*(-1.); - - case 9: - return 2.*x*y*(1. - y)*(-1.); - - case 10: - return 2.*x*(1. - x)*y*(-1.); - - case 11: - return 2.*(1. - x)*y*(1. - y)*(-1.); - - case 12: - return 2.*(1. - x)*(1. - y)*(1. - 2.*z); - - case 13: - return 2.*x*(1. - y)*(1. - 2.*z); - - case 14: - return 2.*x*y*(1. - 2.*z); - - case 15: - return 2.*(1. - x)*y*(1. - 2.*z); - - case 16: - return 2.*x*(1. - x)*(1. - y); - - case 17: - return 2.*x*y*(1. - y); - - case 18: - return 2.*x*(1. - x)*y; - - case 19: - return 2.*(1. - x)*y*(1. - y); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - default: - libmesh_error_msg("Invalid shape function derivative j = " << j); - } - } - - // triquadratic hexahedral shape functions - case HEX8: - libmesh_assert_msg(T == L2_LAGRANGE, - "High order on first order elements only supported for L2 families"); - libmesh_fallthrough(); - case HEX27: - { - libmesh_assert_less (i, 27); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - switch(j) - { - case 0: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - case 1: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } - } - - // quadratic tetrahedral shape functions - case TET4: - libmesh_assert_msg(T == L2_LAGRANGE, - "High order on first order elements only supported for L2 families"); - libmesh_fallthrough(); - case TET10: - case TET14: - { - libmesh_assert_less (i, 10); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dxi; - - case 1: - return (4.*zeta1 - 1.)*dzeta1dxi; - - case 2: - return (4.*zeta2 - 1.)*dzeta2dxi; - - case 3: - return (4.*zeta3 - 1.)*dzeta3dxi; - - case 4: - return 4.*(zeta0*dzeta1dxi + dzeta0dxi*zeta1); - - case 5: - return 4.*(zeta1*dzeta2dxi + dzeta1dxi*zeta2); - - case 6: - return 4.*(zeta0*dzeta2dxi + dzeta0dxi*zeta2); - - case 7: - return 4.*(zeta0*dzeta3dxi + dzeta0dxi*zeta3); - - case 8: - return 4.*(zeta1*dzeta3dxi + dzeta1dxi*zeta3); - - case 9: - return 4.*(zeta2*dzeta3dxi + dzeta2dxi*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0deta; - - case 1: - return (4.*zeta1 - 1.)*dzeta1deta; - - case 2: - return (4.*zeta2 - 1.)*dzeta2deta; - - case 3: - return (4.*zeta3 - 1.)*dzeta3deta; - - case 4: - return 4.*(zeta0*dzeta1deta + dzeta0deta*zeta1); - - case 5: - return 4.*(zeta1*dzeta2deta + dzeta1deta*zeta2); - - case 6: - return 4.*(zeta0*dzeta2deta + dzeta0deta*zeta2); - - case 7: - return 4.*(zeta0*dzeta3deta + dzeta0deta*zeta3); - - case 8: - return 4.*(zeta1*dzeta3deta + dzeta1deta*zeta3); - - case 9: - return 4.*(zeta2*dzeta3deta + dzeta2deta*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dzeta; - - case 1: - return (4.*zeta1 - 1.)*dzeta1dzeta; - - case 2: - return (4.*zeta2 - 1.)*dzeta2dzeta; - - case 3: - return (4.*zeta3 - 1.)*dzeta3dzeta; - - case 4: - return 4.*(zeta0*dzeta1dzeta + dzeta0dzeta*zeta1); - - case 5: - return 4.*(zeta1*dzeta2dzeta + dzeta1dzeta*zeta2); - - case 6: - return 4.*(zeta0*dzeta2dzeta + dzeta0dzeta*zeta2); - - case 7: - return 4.*(zeta0*dzeta3dzeta + dzeta0dzeta*zeta3); - - case 8: - return 4.*(zeta1*dzeta3dzeta + dzeta1dzeta*zeta3); - - case 9: - return 4.*(zeta2*dzeta3dzeta + dzeta2dzeta*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid j = " << j); - } - } + // quadratic tetrahedral shape functions + case TET4: + libmesh_assert_msg(T == L2_LAGRANGE, + "High order on first order elements only supported for L2 families"); + libmesh_fallthrough(); + case TET10: + case TET14: + { + libmesh_assert_less (i, 10); + return libMesh::detail::fe_lagrange_tet10_shape_deriv(i, j, p(0), p(1), p(2)); + } // "serendipity" prism @@ -2931,201 +2045,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - const Real dbubble012dxi = (zeta0-zeta1)*zeta2; - const Real dbubble013dxi = (zeta0-zeta1)*zeta3; - const Real dbubble123dxi = zeta2*zeta3; - const Real dbubble023dxi = -zeta2*zeta3; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - const Real dbubble012deta = (zeta0-zeta2)*zeta1; - const Real dbubble013deta = -zeta1*zeta3; - const Real dbubble123deta = zeta1*zeta3; - const Real dbubble023deta = (zeta0-zeta2)*zeta3; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - const Real dbubble012dzeta = -zeta1*zeta2; - const Real dbubble013dzeta = (zeta0-zeta3)*zeta1; - const Real dbubble123dzeta = zeta1*zeta2; - const Real dbubble023dzeta = (zeta0-zeta3)*zeta2; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dxi + 3.*(dbubble012dxi+dbubble013dxi+dbubble023dxi); - - case 1: - return (4.*zeta1 - 1.)*dzeta1dxi + 3.*(dbubble012dxi+dbubble013dxi+dbubble123dxi); - - case 2: - return (4.*zeta2 - 1.)*dzeta2dxi + 3.*(dbubble012dxi+dbubble023dxi+dbubble123dxi); - - case 3: - return (4.*zeta3 - 1.)*dzeta3dxi + 3.*(dbubble013dxi+dbubble023dxi+dbubble123dxi); - - case 4: - return 4.*(zeta0*dzeta1dxi + dzeta0dxi*zeta1) - 12.*(dbubble012dxi+dbubble013dxi); - - case 5: - return 4.*(zeta1*dzeta2dxi + dzeta1dxi*zeta2) - 12.*(dbubble012dxi+dbubble123dxi); - - case 6: - return 4.*(zeta0*dzeta2dxi + dzeta0dxi*zeta2) - 12.*(dbubble012dxi+dbubble023dxi); - - case 7: - return 4.*(zeta0*dzeta3dxi + dzeta0dxi*zeta3) - 12.*(dbubble013dxi+dbubble023dxi); - - case 8: - return 4.*(zeta1*dzeta3dxi + dzeta1dxi*zeta3) - 12.*(dbubble013dxi+dbubble123dxi); - - case 9: - return 4.*(zeta2*dzeta3dxi + dzeta2dxi*zeta3) - 12.*(dbubble023dxi+dbubble123dxi); - - case 10: - return 27.*dbubble012dxi; - - case 11: - return 27.*dbubble013dxi; - - case 12: - return 27.*dbubble123dxi; - - case 13: - return 27.*dbubble023dxi; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0deta + 3.*(dbubble012deta+dbubble013deta+dbubble023deta);; - - case 1: - return (4.*zeta1 - 1.)*dzeta1deta + 3.*(dbubble012deta+dbubble013deta+dbubble123deta); - - case 2: - return (4.*zeta2 - 1.)*dzeta2deta + 3.*(dbubble012deta+dbubble023deta+dbubble123deta); - - case 3: - return (4.*zeta3 - 1.)*dzeta3deta + 3.*(dbubble013deta+dbubble023deta+dbubble123deta); - - case 4: - return 4.*(zeta0*dzeta1deta + dzeta0deta*zeta1) - 12.*(dbubble012deta+dbubble013deta); - - case 5: - return 4.*(zeta1*dzeta2deta + dzeta1deta*zeta2) - 12.*(dbubble012deta+dbubble123deta); - - case 6: - return 4.*(zeta0*dzeta2deta + dzeta0deta*zeta2) - 12.*(dbubble012deta+dbubble023deta); - - case 7: - return 4.*(zeta0*dzeta3deta + dzeta0deta*zeta3) - 12.*(dbubble013deta+dbubble023deta); - - case 8: - return 4.*(zeta1*dzeta3deta + dzeta1deta*zeta3) - 12.*(dbubble013deta+dbubble123deta); - - case 9: - return 4.*(zeta2*dzeta3deta + dzeta2deta*zeta3) - 12.*(dbubble023deta+dbubble123deta); - - case 10: - return 27.*dbubble012deta; - - case 11: - return 27.*dbubble013deta; - - case 12: - return 27.*dbubble123deta; - - case 13: - return 27.*dbubble023deta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dzeta + 3.*(dbubble012dzeta+dbubble013dzeta+dbubble023dzeta); - - case 1: - return (4.*zeta1 - 1.)*dzeta1dzeta + 3.*(dbubble012dzeta+dbubble013dzeta+dbubble123dzeta); - - case 2: - return (4.*zeta2 - 1.)*dzeta2dzeta + 3.*(dbubble012dzeta+dbubble023dzeta+dbubble123dzeta); - - case 3: - return (4.*zeta3 - 1.)*dzeta3dzeta + 3.*(dbubble013dzeta+dbubble023dzeta+dbubble123dzeta); - - case 4: - return 4.*(zeta0*dzeta1dzeta + dzeta0dzeta*zeta1) - 12.*(dbubble012dzeta+dbubble013dzeta); - - case 5: - return 4.*(zeta1*dzeta2dzeta + dzeta1dzeta*zeta2) - 12.*(dbubble012dzeta+dbubble123dzeta); - - case 6: - return 4.*(zeta0*dzeta2dzeta + dzeta0dzeta*zeta2) - 12.*(dbubble012dzeta+dbubble023dzeta); - - case 7: - return 4.*(zeta0*dzeta3dzeta + dzeta0dzeta*zeta3) - 12.*(dbubble013dzeta+dbubble023dzeta); - - case 8: - return 4.*(zeta1*dzeta3dzeta + dzeta1dzeta*zeta3) - 12.*(dbubble013dzeta+dbubble123dzeta); - - case 9: - return 4.*(zeta2*dzeta3dzeta + dzeta2dzeta*zeta3) - 12.*(dbubble023dzeta+dbubble123dzeta); - - case 10: - return 27.*dbubble012dzeta; - - case 11: - return 27.*dbubble013dzeta; - - case 12: - return 27.*dbubble123dzeta; - - case 13: - return 27.*dbubble023dzeta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_tet14_shape_deriv(i, j, p(0), p(1), p(2)); } case PRISM20: @@ -3412,48 +2332,11 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, // Trilinear shape functions on HEX8s have nonzero mixed second derivatives case HEX8: - case HEX20: - case HEX27: - { - libmesh_assert_less (i, 8); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - switch (j) - { - // All repeated second derivatives are zero on HEX8 - case 0: // d^2()/dxi^2 - case 2: // d^2()/deta^2 - case 5: // d^2()/dzeta^2 - { - return 0.; - } - - case 1: // d^2()/dxideta - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 3: // d^2()/dxidzeta - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - case 4: // d^2()/detadzeta - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + case HEX20: + case HEX27: + { + libmesh_assert_less (i, 8); + return libMesh::detail::fe_lagrange_hex8_shape_second_deriv(i, j, p(0), p(1), p(2)); } // All second derivatives for piecewise-linear polyhedra are @@ -3480,269 +2363,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); - - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); - - switch(j) - { - case 0: // d^2()/dxi^2 - { - switch(i) - { - case 0: - case 1: - return (1. - y) * (1. - z); - case 2: - case 3: - return y * (1. - z); - case 4: - case 5: - return (1. - y) * z; - case 6: - case 7: - return y * z; - case 8: - return -2. * (1. - y) * (1. - z); - case 10: - return -2. * y * (1. - z); - case 16: - return -2. * (1. - y) * z; - case 18: - return -2. * y * z; - case 9: - case 11: - case 12: - case 13: - case 14: - case 15: - case 17: - case 19: - return 0; - default: - libmesh_error_msg("Invalid i = " << i); - } - } - case 1: // d^2()/dxideta - { - switch(i) - { - case 0: - return (1.25 - x - y - .5*z) * (1. - z); - case 1: - return (-x + y + .5*z - .25) * (1. - z); - case 2: - return (x + y - .5*z - .75) * (1. - z); - case 3: - return (-y + x + .5*z - .25) * (1. - z); - case 4: - return -.25*z * (4.*x + 4.*y - 2.*z - 3); - case 5: - return -.25*z * (-4.*y + 4.*x + 2.*z - 1.); - case 6: - return .25*z * (-5 + 4.*x + 4.*y + 2.*z); - case 7: - return .25*z * (4.*x - 4.*y - 2.*z + 1.); - case 8: - return (-1. + 2.*x) * (1. - z); - case 9: - return (1. - 2.*y) * (1. - z); - case 10: - return (1. - 2.*x) * (1. - z); - case 11: - return (-1. + 2.*y) * (1. - z); - case 12: - return z * (1. - z); - case 13: - return -z * (1. - z); - case 14: - return z * (1. - z); - case 15: - return -z * (1. - z); - case 16: - return (-1. + 2.*x) * z; - case 17: - return (1. - 2.*y) * z; - case 18: - return (1. - 2.*x) * z; - case 19: - return (-1. + 2.*y) * z; - default: - libmesh_error_msg("Invalid i = " << i); - } - } - case 2: // d^2()/deta^2 - switch(i) - { - case 0: - case 3: - return (1. - x) * (1. - z); - case 1: - case 2: - return x * (1. - z); - case 4: - case 7: - return (1. - x) * z; - case 5: - case 6: - return x * z; - case 9: - return -2. * x * (1. - z); - case 11: - return -2. * (1. - x) * (1. - z); - case 17: - return -2. * x * z; - case 19: - return -2. * (1. - x) * z; - case 8: - case 10: - case 12: - case 13: - case 14: - case 15: - case 16: - case 18: - return 0.; - default: - libmesh_error_msg("Invalid i = " << i); - } - case 3: // d^2()/dxidzeta - switch(i) - { - case 0: - return (1.25 - x - .5*y - z) * (1. - y); - case 1: - return (-x + .5*y + z - .25) * (1. - y); - case 2: - return -.25*y * (2.*y + 4.*x - 4.*z - 1.); - case 3: - return -.25*y * (-2.*y + 4.*x + 4.*z - 3); - case 4: - return (-z + x + .5*y - .25) * (1. - y); - case 5: - return (x - .5*y + z - .75) * (1. - y); - case 6: - return .25*y * (2.*y + 4.*x + 4.*z - 5); - case 7: - return .25*y * (-2.*y + 4.*x - 4.*z + 1.); - case 8: - return (-1. + 2.*x) * (1. - y); - case 9: - return -y * (1. - y); - case 10: - return (-1. + 2.*x) * y; - case 11: - return y * (1. - y); - case 12: - return (-1. + 2.*z) * (1. - y); - case 13: - return (1. - 2.*z) * (1. - y); - case 14: - return (1. - 2.*z) * y; - case 15: - return (-1. + 2.*z) * y; - case 16: - return (1. - 2.*x) * (1. - y); - case 17: - return y * (1. - y); - case 18: - return (1. - 2.*x) * y; - case 19: - return -y * (1. - y); - default: - libmesh_error_msg("Invalid i = " << i); - } - case 4: // d^2()/detadzeta - switch(i) - { - case 0: - return (1.25 - .5*x - y - z) * (1. - x); - case 1: - return .25*x * (2.*x - 4.*y - 4.*z + 3.); - case 2: - return -.25*x * (2.*x + 4.*y - 4.*z - 1.); - case 3: - return (-y + .5*x + z - .25) * (1. - x); - case 4: - return (-z + .5*x + y - .25) * (1. - x); - case 5: - return -.25*x * (2.*x - 4.*y + 4.*z - 1.); - case 6: - return .25*x * (2.*x + 4.*y + 4.*z - 5); - case 7: - return (y - .5*x + z - .75) * (1. - x); - case 8: - return x * (1. - x); - case 9: - return (-1. + 2.*y) * x; - case 10: - return -x * (1. - x); - case 11: - return (-1. + 2.*y) * (1. - x); - case 12: - return (-1. + 2.*z) * (1. - x); - case 13: - return (-1. + 2.*z) * x; - case 14: - return (1. - 2.*z) * x; - case 15: - return (1. - 2.*z) * (1. - x); - case 16: - return -x * (1. - x); - case 17: - return (1. - 2.*y) * x; - case 18: - return x * (1. - x); - case 19: - return (1. - 2.*y) * (1. - x); - default: - libmesh_error_msg("Invalid i = " << i); - } - case 5: // d^2()/dzeta^2 - switch(i) - { - case 0: - case 4: - return (1. - x) * (1. - y); - case 1: - case 5: - return x * (1. - y); - case 2: - case 6: - return x * y; - case 3: - case 7: - return (1. - x) * y; - case 12: - return -2. * (1. - x) * (1. - y); - case 13: - return -2. * x * (1. - y); - case 14: - return -2. * x * y; - case 15: - return -2. * (1. - x) * y; - case 8: - case 9: - case 10: - case 11: - case 16: - case 17: - case 18: - case 19: - return 0.; - default: - libmesh_error_msg("Invalid i = " << i); - } - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex20_shape_second_deriv(i, j, p(0), p(1), p(2)); } // triquadratic hexahedral shape functions @@ -3753,61 +2374,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case HEX27: { libmesh_assert_less (i, 27); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - switch(j) - { - // d^2()/dxi^2 - case 0: - return (fe_lagrange_1D_quadratic_shape_second_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/dxideta - case 1: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/deta^2 - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_second_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/dxidzeta - case 3: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - // d^2()/detadzeta - case 4: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - // d^2()/dzeta^2 - case 5: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_second_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex27_shape_second_deriv(i, j, p(0), p(1), p(2)); } // quadratic tetrahedral shape functions @@ -3818,69 +2385,8 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case TET10: case TET14: { - // The area coordinates are the same as used for the - // shape() and shape_deriv() functions. - // const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - // const Real zeta1 = p(0); - // const Real zeta2 = p(1); - // const Real zeta3 = p(2); - static const Real dzetadxi[4][3] = - { - {-1., -1., -1.}, - {1., 0., 0.}, - {0., 1., 0.}, - {0., 0., 1.} - }; - - // Convert from j -> (j,k) indices for independent variable - // (0=xi, 1=eta, 2=zeta) - static const unsigned short int independent_var_indices[6][2] = - { - {0, 0}, // d^2 phi / dxi^2 - {0, 1}, // d^2 phi / dxi deta - {1, 1}, // d^2 phi / deta^2 - {0, 2}, // d^2 phi / dxi dzeta - {1, 2}, // d^2 phi / deta dzeta - {2, 2} // d^2 phi / dzeta^2 - }; - - // Convert from i -> zeta indices. Each quadratic shape - // function for the Tet10 depends on up to two of the zeta - // area coordinate functions (see the shape() function above). - // This table just tells which two area coords it uses. - static const unsigned short int zeta_indices[10][2] = - { - {0, 0}, - {1, 1}, - {2, 2}, - {3, 3}, - {0, 1}, - {1, 2}, - {2, 0}, - {0, 3}, - {1, 3}, - {2, 3}, - }; - - // Look up the independent variable indices for this value of j. - const unsigned int my_j = independent_var_indices[j][0]; - const unsigned int my_k = independent_var_indices[j][1]; - - if (i<4) - { - return 4.*dzetadxi[i][my_j]*dzetadxi[i][my_k]; - } - - else if (i<10) - { - const unsigned short int my_m = zeta_indices[i][0]; - const unsigned short int my_n = zeta_indices[i][1]; - - return 4.*(dzetadxi[my_n][my_j]*dzetadxi[my_m][my_k] + - dzetadxi[my_m][my_j]*dzetadxi[my_n][my_k] ); - } - else - libmesh_error_msg("Invalid shape function index " << i); + libmesh_assert_less (i, 10); + return libMesh::detail::fe_lagrange_tet10_shape_second_deriv(i, j); } @@ -4983,190 +3489,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // The area coordinates are the same as used for the - // shape() and shape_deriv() functions. - // const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - // const Real zeta1 = p(0); - // const Real zeta2 = p(1); - // const Real zeta3 = p(2); - static const Real dzetadxi[4][3] = - { - {-1., -1., -1.}, - {1., 0., 0.}, - {0., 1., 0.}, - {0., 0., 1.} - }; - - // Convert from j -> (j,k) indices for independent variable - // (0=xi, 1=eta, 2=zeta) - static const unsigned short int independent_var_indices[6][2] = - { - {0, 0}, // d^2 phi / dxi^2 - {0, 1}, // d^2 phi / dxi deta - {1, 1}, // d^2 phi / deta^2 - {0, 2}, // d^2 phi / dxi dzeta - {1, 2}, // d^2 phi / deta dzeta - {2, 2} // d^2 phi / dzeta^2 - }; - - // Convert from i -> zeta indices. Each quadratic shape - // function for the Tet10 depends on up to two of the zeta - // area coordinate functions (see the shape() function above). - // This table just tells which two area coords it uses. - static const unsigned short int zeta_indices[10][2] = - { - {0, 0}, - {1, 1}, - {2, 2}, - {3, 3}, - {0, 1}, - {1, 2}, - {2, 0}, - {0, 3}, - {1, 3}, - {2, 3}, - }; - - // Look up the independent variable indices for this value of j. - const unsigned int my_j = independent_var_indices[j][0]; - const unsigned int my_k = independent_var_indices[j][1]; - - Real returnval = 0; - if (i<4) - returnval = 4.*dzetadxi[i][my_j]*dzetadxi[i][my_k]; - - else if (i<10) - { - const unsigned short int my_m = zeta_indices[i][0]; - const unsigned short int my_n = zeta_indices[i][1]; - - returnval = - 4.*(dzetadxi[my_n][my_j]*dzetadxi[my_m][my_k] + - dzetadxi[my_m][my_j]*dzetadxi[my_n][my_k] ); - } - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - // Fill these with whichever derivative we're concerned - // with - Real d2bubble012, d2bubble013, d2bubble023, d2bubble123; - switch (j) - { - // d^2()/dxi^2 - case 0: - { - d2bubble012 = -2.*zeta2; - d2bubble013 = -2.*zeta3; - d2bubble023 = 0.; - d2bubble123 = 0.; - break; - } - - // d^2()/dxideta - case 1: - { - d2bubble012 = (zeta0-zeta1)-zeta2; - d2bubble013 = -zeta3; - d2bubble123 = zeta3; - d2bubble023 = -zeta3; - break; - } - - // d^2()/deta^2 - case 2: - { - d2bubble012 = -2.*zeta1; - d2bubble013 = 0.; - d2bubble123 = 0.; - d2bubble023 = -2.*zeta3; - break; - } - - // d^2()/dxi dzeta - case 3: - { - d2bubble012 = -zeta2; - d2bubble013 = (zeta0-zeta3)-zeta1; - d2bubble123 = zeta2; - d2bubble023 = -zeta2; - break; - } - - // d^2()/deta dzeta - case 4: - { - d2bubble012 = -zeta1; - d2bubble013 = -zeta1; - d2bubble123 = zeta1; - d2bubble023 = (zeta0-zeta3)-zeta2; - break; - } - - // d^2()/dzeta^2 - case 5: - { - d2bubble012 = 0.; - d2bubble013 = -2.*zeta1; - d2bubble123 = 0.; - d2bubble023 = -2.*zeta2; - break; - } - - default: - libmesh_error_msg("Invalid j = " << j); - } - - switch (i) - { - case 0: - return returnval + 3.*(d2bubble012+d2bubble013+d2bubble023); - - case 1: - return returnval + 3.*(d2bubble012+d2bubble013+d2bubble123); - - case 2: - return returnval + 3.*(d2bubble012+d2bubble023+d2bubble123); - - case 3: - return returnval + 3.*(d2bubble013+d2bubble023+d2bubble123); - - case 4: - return returnval - 12.*(d2bubble012+d2bubble013); - - case 5: - return returnval - 12.*(d2bubble012+d2bubble123); - - case 6: - return returnval - 12.*(d2bubble012+d2bubble023); - - case 7: - return returnval - 12.*(d2bubble013+d2bubble023); - - case 8: - return returnval - 12.*(d2bubble013+d2bubble123); - - case 9: - return returnval - 12.*(d2bubble023+d2bubble123); - - case 10: - return 27.*d2bubble012; - - case 11: - return 27.*d2bubble013; - - case 12: - return 27.*d2bubble123; - - case 13: - return 27.*d2bubble023; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet14_shape_second_deriv(i, j, p(0), p(1), p(2)); } case PRISM20: diff --git a/src/quadrature/quadrature_gauss_1D.C b/src/quadrature/quadrature_gauss_1D.C index 0e72fc7c8a4..3292fff2706 100644 --- a/src/quadrature/quadrature_gauss_1D.C +++ b/src/quadrature/quadrature_gauss_1D.C @@ -21,6 +21,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" +#include "libmesh/quadrature_gauss_rules.h" namespace libMesh { @@ -31,6 +32,22 @@ void QGauss::init_1D() { //---------------------------------------------------------------------- // 1D quadrature rules + const auto shared_rule = Quadrature::Gauss::gauss_legendre_rule(static_cast(get_order())); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i]; + _weights[i] = shared_rule.weights[i]; + } + + return; + } + switch(get_order()) { case CONSTANT: diff --git a/src/quadrature/quadrature_gauss_2D.C b/src/quadrature/quadrature_gauss_2D.C index 06e30bff52b..8dd2e5dc1c1 100644 --- a/src/quadrature/quadrature_gauss_2D.C +++ b/src/quadrature/quadrature_gauss_2D.C @@ -20,6 +20,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" #include "libmesh/quadrature_conical.h" +#include "libmesh/quadrature_gauss_rules.h" #include "libmesh/enum_to_string.h" #include "libmesh/face_c0polygon.h" @@ -76,6 +77,24 @@ void QGauss::init_2D() case TRI6: case TRI7: { + const auto shared_rule = + Quadrature::Gauss::triangle_rule(static_cast(get_order())); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i].x; + _points[i](1) = shared_rule.points[i].y; + _weights[i] = shared_rule.points[i].w; + } + + return; + } + switch(get_order()) { case CONSTANT: diff --git a/src/quadrature/quadrature_gauss_3D.C b/src/quadrature/quadrature_gauss_3D.C index 8e2f96ed5cc..39f3d6e139c 100644 --- a/src/quadrature/quadrature_gauss_3D.C +++ b/src/quadrature/quadrature_gauss_3D.C @@ -20,6 +20,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" #include "libmesh/quadrature_conical.h" +#include "libmesh/quadrature_gauss_rules.h" #include "libmesh/quadrature_gm.h" #include "libmesh/enum_to_string.h" #include "libmesh/cell_c0polyhedron.h" @@ -56,6 +57,26 @@ void QGauss::init_3D() case TET10: case TET14: { + const auto shared_rule = + Quadrature::Gauss::tetrahedron_rule(static_cast(get_order()), + allow_rules_with_negative_weights); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i].x; + _points[i](1) = shared_rule.points[i].y; + _points[i](2) = shared_rule.points[i].z; + _weights[i] = shared_rule.points[i].w; + } + + return; + } + switch(get_order()) { // Taken from pg. 222 of "The finite element method," vol. 1 diff --git a/tests/fe/kokkos_fe_reconstruction_oracle_test.K b/tests/fe/kokkos_fe_reconstruction_oracle_test.K index a00947f0ebf..9f2448d1ad8 100644 --- a/tests/fe/kokkos_fe_reconstruction_oracle_test.K +++ b/tests/fe/kokkos_fe_reconstruction_oracle_test.K @@ -15,10 +15,10 @@ #include "libmesh/libmesh_config.h" +#include "gpu/kokkos_fe_base.h" #include "gpu/kokkos_fe_evaluator.h" #include "gpu/kokkos_fe_map.h" #include "gpu/kokkos_fe_types.h" -#include "gpu/kokkos_scalar_types.h" #include "libmesh/elem.h" #include "libmesh/fe_base.h" @@ -233,7 +233,7 @@ test_reconstruction_case_impl(const reconstruction_case & info) coeff * grad_shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); } - const RealTensor invJ = libMesh::Kokkos::leading_inverse(J, dim); + const RealTensor invJ = libMesh::Kokkos::inverse(J, dim); const RealVector grad_phys = invJ * grad_ref_sum; d_u(q) = u; diff --git a/tests/fe/kokkos_fe_shape_oracle_test.K b/tests/fe/kokkos_fe_shape_oracle_test.K index d98de26dce8..db3b3f77949 100644 --- a/tests/fe/kokkos_fe_shape_oracle_test.K +++ b/tests/fe/kokkos_fe_shape_oracle_test.K @@ -14,9 +14,9 @@ // are therefore intentionally not invoked here. #include "libmesh/elem.h" +#include "gpu/kokkos_fe_base.h" #include "gpu/kokkos_fe_evaluator.h" #include "gpu/kokkos_fe_types.h" -#include "gpu/kokkos_scalar_types.h" #include "libmesh/fe.h" #include "libmesh/fe_interface.h" diff --git a/tests/fe/kokkos_fe_side_trace_oracle_test.K b/tests/fe/kokkos_fe_side_trace_oracle_test.K index 802545ccf31..0ac88d737a3 100644 --- a/tests/fe/kokkos_fe_side_trace_oracle_test.K +++ b/tests/fe/kokkos_fe_side_trace_oracle_test.K @@ -16,11 +16,11 @@ #include "libmesh/libmesh_config.h" +#include "gpu/kokkos_fe_base.h" #include "gpu/kokkos_fe_evaluator.h" #include "gpu/kokkos_fe_face_map.h" #include "gpu/kokkos_fe_map.h" #include "gpu/kokkos_fe_types.h" -#include "gpu/kokkos_scalar_types.h" #include "libmesh/elem.h" #include "libmesh/fe_base.h" @@ -247,7 +247,7 @@ test_side_trace_case_impl(const side_trace_case & info) const RealTensor J = libMesh::Kokkos::jacobian(geom_nodes, n_geom_nodes_, xi, eta, zeta); const RealVector grad_ref = grad_shape_for_key(i, xi, eta, zeta); - const RealVector grad_phys = libMesh::Kokkos::leading_inverse(J, parent_dim) * grad_ref; + const RealVector grad_phys = libMesh::Kokkos::inverse(J, parent_dim) * grad_ref; const RealVector normal = libMesh::Kokkos::make_vector(d_normal_x(q), d_normal_y(q), d_normal_z(q)); const RealVector tangential_grad = tangential_component(grad_phys, normal); From 2ffc6ca348f1b8974717618823afee75d684a890 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 12 May 2026 16:10:22 -0600 Subject: [PATCH 35/48] Share Kokkos FE shape dispatch with oracle tests --- include/Makefile.am | 1 + include/fe/fe_reference_element_traits.h | 13 + include/gpu/kokkos_fe_map.h | 318 ++++++- include/gpu/kokkos_fe_shape_dispatch.h | 608 ++++++++++++++ tests/fe/kokkos_fe_invariant_test.K | 6 +- tests/fe/kokkos_fe_map_oracle_test.K | 11 +- tests/fe/kokkos_fe_oracle_test_utils.h | 783 +----------------- .../fe/kokkos_fe_reconstruction_oracle_test.K | 7 +- tests/fe/kokkos_fe_side_trace_oracle_test.K | 7 +- tests/fe/kokkos_fe_types_oracle_test.K | 49 +- tests/fe/kokkos_quadrature_oracle_test.K | 31 +- 11 files changed, 948 insertions(+), 886 deletions(-) create mode 100644 include/gpu/kokkos_fe_shape_dispatch.h diff --git a/include/Makefile.am b/include/Makefile.am index ee7b5e1b61a..a78f189a29e 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -6,6 +6,7 @@ SUBDIRS = libmesh if LIBMESH_ENABLE_KOKKOS nobase_include_HEADERS = \ gpu/kokkos_fe_types.h \ + gpu/kokkos_fe_shape_dispatch.h \ gpu/kokkos_fe_base.h \ gpu/kokkos_fe_evaluator.h \ gpu/kokkos_fe_lagrange_1d.h \ diff --git a/include/fe/fe_reference_element_traits.h b/include/fe/fe_reference_element_traits.h index e79364923bc..51139f9afd4 100644 --- a/include/fe/fe_reference_element_traits.h +++ b/include/fe/fe_reference_element_traits.h @@ -892,6 +892,19 @@ try_reference_node(ElemType type, } } +LIBMESH_DEVICE_INLINE bool +try_reference_side_node(ElemType parent, + unsigned int side, + unsigned int side_node, + Point & pt) +{ + unsigned int node = libMesh::invalid_uint; + if (!try_local_side_node(parent, side, side_node, node)) + return false; + + return try_reference_node(parent, node, pt); +} + } // namespace libMesh #endif // LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h index dd6f10cc642..7ff539bfbcc 100644 --- a/include/gpu/kokkos_fe_map.h +++ b/include/gpu/kokkos_fe_map.h @@ -18,22 +18,187 @@ #define LIBMESH_KOKKOS_FE_MAP_H #include "kokkos_fe_evaluator.h" +#include "kokkos_storage.h" + +#include namespace libMesh::Kokkos { -template +namespace detail +{ + +LIBMESH_DEVICE_INLINE const RealVector & +node_at(const RealVector * nodes, unsigned int i) +{ + return nodes[i]; +} + +template +LIBMESH_DEVICE_INLINE const RealVector & +node_at(const RealVector (&nodes)[N], unsigned int i) +{ + return nodes[i]; +} + +template > && + !std::is_array_v>, + int> = 0> LIBMESH_DEVICE_INLINE RealVector -physical_point(const RealVector * nodes, +node_at(const NodeStorage & nodes, unsigned int i) +{ + return load_vector(nodes, i); +} + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point_impl(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(i, xi, eta, zeta) * node_at(nodes, i); + return xyz; +} + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian_impl(const NodeStorage & nodes, unsigned int n_nodes, Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + node_at(nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian_impl(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(k, xi, eta, zeta); + const RealVector grad = grad_map_shape(k, xi, eta, zeta); + const RealVector node = node_at(nodes, k); + xyz += phi * node; + J += libMesh::outer_product(grad, node); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian_impl(const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + node_at(face_nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) { RealVector xyz = zero_vector(); for (unsigned int i = 0; i < n_nodes; ++i) - xyz += map_shape(i, xi, eta, zeta) * nodes[i]; + xyz += map_shape(mapping_type, topo, i, xi, eta, zeta) * node_at(nodes, i); return xyz; } +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, topo, k, xi, eta, zeta), + node_at(nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector grad = grad_map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector node = node_at(nodes, k); + xyz += phi * node; + J += libMesh::outer_product(grad, node); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, face_topo, k, xi, eta, zeta), + node_at(face_nodes, k)); + return J; +} + +} // namespace detail + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealVector +physical_point(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(nodes, n_nodes, xi, eta, zeta); +} + // ========================================================================= // Compile-time dispatch (preferred for GPU — no switch overhead) // @@ -47,10 +212,20 @@ jacobian(const RealVector * nodes, unsigned int n_nodes, Real xi, Real eta, Real zeta) { - RealTensor J = zero_tensor(); - for (unsigned int k = 0; k < n_nodes; ++k) - J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), nodes[k]); - return J; + return detail::jacobian_impl(nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +jacobian(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(nodes, n_nodes, xi, eta, zeta); } template @@ -61,15 +236,22 @@ physical_point_and_jacobian(const RealVector * nodes, RealVector & xyz, RealTensor & J) { - xyz = zero_vector(); - J = zero_tensor(); - for (unsigned int k = 0; k < n_nodes; ++k) - { - const Real phi = map_shape(k, xi, eta, zeta); - const RealVector grad = grad_map_shape(k, xi, eta, zeta); - xyz += phi * nodes[k]; - J += libMesh::outer_product(grad, nodes[k]); - } + detail::physical_point_and_jacobian_impl(nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(nodes, n_nodes, xi, eta, zeta, xyz, J); } template @@ -78,11 +260,20 @@ face_jacobian(const RealVector * face_nodes, unsigned int n_face_nodes, Real xi, Real eta, Real zeta) { - RealTensor J = zero_tensor(); - for (unsigned int k = 0; k < n_face_nodes; ++k) - J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), - face_nodes[k]); - return J; + return detail::face_jacobian_impl(face_nodes, n_face_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(face_nodes, n_face_nodes, xi, eta, zeta); } // ========================================================================= @@ -97,10 +288,21 @@ physical_point(libMesh::ElemMappingType mapping_type, unsigned int n_nodes, Real xi, Real eta, Real zeta) { - RealVector xyz = zero_vector(); - for (unsigned int i = 0; i < n_nodes; ++i) - xyz += map_shape(mapping_type, topo, i, xi, eta, zeta) * nodes[i]; - return xyz; + return detail::physical_point_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealVector +physical_point(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); } /// Compute Jacobian matrix (runtime topology), with rows d(x)/d(xi_r). @@ -111,11 +313,21 @@ jacobian(libMesh::ElemMappingType mapping_type, unsigned int n_nodes, Real xi, Real eta, Real zeta) { - RealTensor J = zero_tensor(); - for (unsigned int k = 0; k < n_nodes; ++k) - J += libMesh::outer_product(grad_map_shape(mapping_type, topo, k, xi, eta, zeta), - nodes[k]); - return J; + return detail::jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); } /// Compute physical point and Jacobian together (runtime topology). @@ -128,15 +340,23 @@ physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, RealVector & xyz, RealTensor & J) { - xyz = zero_vector(); - J = zero_tensor(); - for (unsigned int k = 0; k < n_nodes; ++k) - { - const Real phi = map_shape(mapping_type, topo, k, xi, eta, zeta); - const RealVector grad = grad_map_shape(mapping_type, topo, k, xi, eta, zeta); - xyz += phi * nodes[k]; - J += libMesh::outer_product(grad, nodes[k]); - } + detail::physical_point_and_jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta, xyz, J); } /// Face Jacobian (runtime topology). @@ -147,11 +367,21 @@ face_jacobian(libMesh::ElemMappingType mapping_type, unsigned int n_face_nodes, Real xi, Real eta, Real zeta) { - RealTensor J = zero_tensor(); - for (unsigned int k = 0; k < n_face_nodes; ++k) - J += libMesh::outer_product(grad_map_shape(mapping_type, face_topo, k, xi, eta, zeta), - face_nodes[k]); - return J; + return detail::face_jacobian_impl(mapping_type, face_topo, face_nodes, n_face_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(mapping_type, face_topo, face_nodes, n_face_nodes, xi, eta, zeta); } // ========================================================================= diff --git a/include/gpu/kokkos_fe_shape_dispatch.h b/include/gpu/kokkos_fe_shape_dispatch.h new file mode 100644 index 00000000000..8f4503ed746 --- /dev/null +++ b/include/gpu/kokkos_fe_shape_dispatch.h @@ -0,0 +1,608 @@ +// Shared Kokkos FE shape dispatch helpers. +// +// These helpers capture the supported Kokkos FE evaluator boundary in one +// place so production code and oracle tests can dispatch exact FE keys without +// duplicating the support matrix. + +#ifndef LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H +#define LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H + +#include "libmesh/fe_shape_traits.h" +#include "libmesh/kokkos_fe_evaluator.h" + +namespace libMesh::Kokkos +{ + +template +struct lagrange_evaluator_topology +{ + static const libMesh::ElemType value = libMesh::INVALID_ELEM; +}; + +#define LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(exact_topo, exact_order, evaluator_topo) \ + template <> \ + struct lagrange_evaluator_topology \ + { \ + static const libMesh::ElemType value = libMesh::evaluator_topo; \ + } + +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE2, FIRST, EDGE2); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE3, FIRST, EDGE2); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE3, SECOND, EDGE3); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE4, FIRST, EDGE2); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI3, FIRST, TRI3); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI6, FIRST, TRI3); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI6, SECOND, TRI6); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI7, FIRST, TRI3); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI7, SECOND, TRI6); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD4, FIRST, QUAD4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD8, FIRST, QUAD4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD8, SECOND, QUAD8); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD9, FIRST, QUAD4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD9, SECOND, QUAD9); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET4, FIRST, TET4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET10, FIRST, TET4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET10, SECOND, TET10); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET14, FIRST, TET4); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET14, SECOND, TET10); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX8, FIRST, HEX8); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX20, FIRST, HEX8); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX20, SECOND, HEX20); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX27, FIRST, HEX8); +LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX27, SECOND, HEX27); + +#undef LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE + +template +struct monomial_evaluator_dim +{ + static const unsigned int value = 0; +}; + +#define LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(exact_topo, dim_value) \ + template <> \ + struct monomial_evaluator_dim \ + { \ + static const unsigned int value = dim_value; \ + } + +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE2, 1); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE3, 1); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE4, 1); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI3, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI6, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI7, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD4, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD8, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD9, 2); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET4, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET10, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET14, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX8, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX20, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX27, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM6, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM15, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM18, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM20, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM21, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID5, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID13, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID14, 3); +LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID18, 3); + +#undef LIBMESH_KOKKOS_MONOMIAL_DIM_CASE + +template +struct monomial_order_evaluator; + +#define LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(dim_value, exact_order, impl_suffix, impl_order) \ + template <> \ + struct monomial_order_evaluator \ + { \ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::shape(i, xi, eta, zeta); \ + } \ + \ + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape( \ + unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::grad_shape(i, xi, eta, zeta); \ + } \ + } + +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, CONSTANT, MonomialImpl1D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FIRST, MonomialImpl1D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, SECOND, MonomialImpl1D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, THIRD, MonomialImpl1D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FOURTH, MonomialImpl1D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FIFTH, MonomialImpl1D, 5); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, CONSTANT, MonomialImpl2D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FIRST, MonomialImpl2D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, SECOND, MonomialImpl2D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, THIRD, MonomialImpl2D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FOURTH, MonomialImpl2D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FIFTH, MonomialImpl2D, 5); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, CONSTANT, MonomialImpl3D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FIRST, MonomialImpl3D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, SECOND, MonomialImpl3D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, THIRD, MonomialImpl3D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FOURTH, MonomialImpl3D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FIFTH, MonomialImpl3D, 5); + +#undef LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE + +template +struct exact_shape_evaluator; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return map_shape::value>( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return grad_map_shape::value>( + i, xi, eta, zeta); + } +}; + +template +struct exact_shape_evaluator +{ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::shape( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::value, ExactOrder>::grad_shape( + i, xi, eta, zeta); + } +}; + +template +LIBMESH_DEVICE_INLINE libMesh::Real +shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::shape(i, xi, eta, zeta); +} + +template +LIBMESH_DEVICE_INLINE libMesh::Kokkos::RealVector +grad_shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::grad_shape(i, xi, eta, zeta); +} + +template +inline int +dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispatcher) +{ + switch (order) + { + case libMesh::CONSTANT: + return dispatcher.template operator()(); + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + case libMesh::FOURTH: + return dispatcher.template operator()(); + case libMesh::FIFTH: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(libMesh::FEShapeKey{ libMesh::MONOMIAL, ExactTopo, order }); + } +} + +template +inline int +dispatch_supported_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || !libMesh::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI7: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET14: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + case libMesh::TET4: + case libMesh::TET10: + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return true; + + default: + return false; + } +} + +template +inline int +dispatch_supported_lagrange_shape_key_with_map(libMesh::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (key.family != libMesh::LAGRANGE || + !libMesh::supports_shape(key) || + !is_supported_lagrange_map_topology(key.elem_type)) + return dispatcher.unsupported_key(key); + + return dispatch_supported_lagrange_shape_key(key, dispatcher); +} + +template +inline int +dispatch_supported_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (!libMesh::supports_shape(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI7: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM15: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM18: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM21: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID5: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID13: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID18: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +supports_shape_key_with_lagrange_map(libMesh::FEShapeKey key) +{ + return libMesh::supports_shape(key) && + is_supported_lagrange_map_topology(key.elem_type); +} + +template +inline int +dispatch_supported_shape_key_with_lagrange_map(libMesh::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (!supports_shape_key_with_lagrange_map(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_supported_lagrange_shape_key_with_map(key, dispatcher); + + case libMesh::MONOMIAL: + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_face_map_topology(libMesh::ElemType topo) +{ + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return true; + + default: + return false; + } +} + +template +inline int +dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + switch (topo) + { + case libMesh::EDGE2: + return dispatcher.template operator()(); + case libMesh::EDGE3: + return dispatcher.template operator()(); + case libMesh::TRI3: + return dispatcher.template operator()(); + case libMesh::TRI6: + return dispatcher.template operator()(); + case libMesh::QUAD4: + return dispatcher.template operator()(); + case libMesh::QUAD8: + return dispatcher.template operator()(); + case libMesh::QUAD9: + return dispatcher.template operator()(); + case libMesh::TET4: + return dispatcher.template operator()(); + case libMesh::TET10: + return dispatcher.template operator()(); + case libMesh::HEX8: + return dispatcher.template operator()(); + case libMesh::HEX20: + return dispatcher.template operator()(); + case libMesh::HEX27: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_topology(topo); + } +} + +template +inline int +dispatch_supported_lagrange_face_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + if (!is_supported_lagrange_face_map_topology(topo)) + return dispatcher.unsupported_topology(topo); + + return dispatch_supported_lagrange_map_topology(topo, dispatcher); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H diff --git a/tests/fe/kokkos_fe_invariant_test.K b/tests/fe/kokkos_fe_invariant_test.K index da089f74737..f0c070e4406 100644 --- a/tests/fe/kokkos_fe_invariant_test.K +++ b/tests/fe/kokkos_fe_invariant_test.K @@ -220,12 +220,12 @@ template static int test_kronecker_delta_impl(const map_elem_info & info) { - auto elem = build_reference_elem(Topo); - std::vector xi_h(info.n_dofs), eta_h(info.n_dofs), zeta_h(info.n_dofs); for (unsigned int j = 0; j < info.n_dofs; ++j) { - const libMesh::Point p = elem->master_point(j); + libMesh::Point p; + libmesh_error_msg_if(!libMesh::try_reference_node(Topo, j, p), + "test_kronecker_delta_impl(): unsupported reference-node lookup"); xi_h[j] = p(0); eta_h[j] = p(1); zeta_h[j] = p(2); diff --git a/tests/fe/kokkos_fe_map_oracle_test.K b/tests/fe/kokkos_fe_map_oracle_test.K index afde3b315ff..c7e0075450f 100644 --- a/tests/fe/kokkos_fe_map_oracle_test.K +++ b/tests/fe/kokkos_fe_map_oracle_test.K @@ -347,9 +347,6 @@ check_face_qp_to_parent_case(const char * parent_name, if (fail) { - std::vector refspace_nodes; - libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); - std::printf(" face_qp mismatch: parent=%s side_id=%u parent_type=%d side_type=%d\n", parent_name, side_id, @@ -365,12 +362,12 @@ check_face_qp_to_parent_case(const char * parent_name, for (unsigned int k = 0; k < side.n_nodes(); ++k) { - const unsigned int parent_node = parent.local_side_node(side_id, k); - const libMesh::Point parent_refspace = refspace_nodes[parent_node]; - std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + libMesh::Point parent_refspace; + libmesh_error_msg_if(!libMesh::try_reference_side_node(parent.type(), side_id, k, parent_refspace), + "check_face_qp_to_parent_case(): unsupported parent side-node lookup"); + std::printf(" k=%u side_node_id=%llu parent_refspace=(%.17g, %.17g, %.17g)\n", k, libMesh::cast_int(side.node_id(k)), - parent_node, parent_refspace(0), parent_refspace(1), parent_refspace(2)); diff --git a/tests/fe/kokkos_fe_oracle_test_utils.h b/tests/fe/kokkos_fe_oracle_test_utils.h index 5d6f53e0920..3b8a956749c 100644 --- a/tests/fe/kokkos_fe_oracle_test_utils.h +++ b/tests/fe/kokkos_fe_oracle_test_utils.h @@ -4,6 +4,7 @@ #include "gpu/kokkos_fe_evaluator.h" #include "gpu/kokkos_fe_face_map.h" #include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_shape_dispatch.h" #include "gpu/kokkos_fe_types.h" #include "libmesh/elem.h" @@ -72,7 +73,7 @@ struct element_fixture struct map_helper_context { std::vector ref_values; - Kokkos::View d_coords; + libMesh::Kokkos::default_storage_policy::vector_view d_coords; Kokkos::View d_xi; Kokkos::View d_eta; Kokkos::View d_zeta; @@ -85,8 +86,8 @@ struct map_helper_context struct face_helper_context { std::vector ref_values; - Kokkos::View d_face_coords; - Kokkos::View d_parent_coords; + libMesh::Kokkos::default_storage_policy::vector_view d_face_coords; + libMesh::Kokkos::default_storage_policy::vector_view d_parent_coords; Kokkos::View d_xi; Kokkos::View d_eta; Kokkos::View d_zeta; @@ -100,717 +101,15 @@ struct face_helper_context unsigned int n_face_nodes; }; -template -struct lagrange_evaluator_topology -{ - static const libMesh::ElemType value = libMesh::INVALID_ELEM; -}; - -#define KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(exact_topo, exact_order, evaluator_topo) \ - template <> \ - struct lagrange_evaluator_topology \ - { \ - static const libMesh::ElemType value = libMesh::evaluator_topo; \ - } - -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE2, FIRST, EDGE2); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, FIRST, EDGE2); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE3, SECOND, EDGE3); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(EDGE4, FIRST, EDGE2); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI3, FIRST, TRI3); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, FIRST, TRI3); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI6, SECOND, TRI6); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, FIRST, TRI3); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TRI7, SECOND, TRI6); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD4, FIRST, QUAD4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, FIRST, QUAD4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD8, SECOND, QUAD8); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, FIRST, QUAD4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(QUAD9, SECOND, QUAD9); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET4, FIRST, TET4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, FIRST, TET4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET10, SECOND, TET10); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, FIRST, TET4); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(TET14, SECOND, TET10); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX8, FIRST, HEX8); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, FIRST, HEX8); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX20, SECOND, HEX20); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, FIRST, HEX8); -KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE(HEX27, SECOND, HEX27); - -#undef KOKKOS_TEST_UTILS_LAGRANGE_TOPOLOGY_CASE - -template -struct monomial_evaluator_dim -{ - static const unsigned int value = 0; -}; - -#define KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(exact_topo, dim_value) \ - template <> \ - struct monomial_evaluator_dim \ - { \ - static const unsigned int value = dim_value; \ - } - -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE2, 1); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE3, 1); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(EDGE4, 1); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI3, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI6, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TRI7, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD4, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD8, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(QUAD9, 2); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET4, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET10, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(TET14, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX8, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX20, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(HEX27, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM6, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM15, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM18, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM20, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PRISM21, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID5, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID13, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID14, 3); -KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE(PYRAMID18, 3); - -#undef KOKKOS_TEST_UTILS_MONOMIAL_DIM_CASE - -template -struct monomial_order_evaluator; - -#define KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(dim_value, exact_order, impl_suffix, impl_order) \ - template <> \ - struct monomial_order_evaluator \ - { \ - LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, \ - libMesh::Real xi, \ - libMesh::Real eta, \ - libMesh::Real zeta) \ - { \ - return libMesh::Kokkos::impl_suffix::shape(i, xi, eta, zeta); \ - } \ - \ - LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, \ - libMesh::Real xi, \ - libMesh::Real eta, \ - libMesh::Real zeta) \ - { \ - return libMesh::Kokkos::impl_suffix::grad_shape(i, xi, eta, zeta); \ - } \ - } - -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, CONSTANT, MonomialImpl1D, 0); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIRST, MonomialImpl1D, 1); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, SECOND, MonomialImpl1D, 2); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, THIRD, MonomialImpl1D, 3); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FOURTH, MonomialImpl1D, 4); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(1, FIFTH, MonomialImpl1D, 5); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, CONSTANT, MonomialImpl2D, 0); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIRST, MonomialImpl2D, 1); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, SECOND, MonomialImpl2D, 2); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, THIRD, MonomialImpl2D, 3); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FOURTH, MonomialImpl2D, 4); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(2, FIFTH, MonomialImpl2D, 5); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, CONSTANT, MonomialImpl3D, 0); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIRST, MonomialImpl3D, 1); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, SECOND, MonomialImpl3D, 2); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, THIRD, MonomialImpl3D, 3); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FOURTH, MonomialImpl3D, 4); -KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE(3, FIFTH, MonomialImpl3D, 5); - -#undef KOKKOS_TEST_UTILS_MONOMIAL_ORDER_CASE - -template -struct exact_shape_evaluator; - -template -struct exact_shape_evaluator -{ - LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, - libMesh::Real xi, - libMesh::Real eta, - libMesh::Real zeta) - { - return libMesh::Kokkos::map_shape::value>( - i, xi, eta, zeta); - } - - LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, - libMesh::Real xi, - libMesh::Real eta, - libMesh::Real zeta) - { - return libMesh::Kokkos::grad_map_shape::value>( - i, xi, eta, zeta); - } -}; - -template -struct exact_shape_evaluator -{ - LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, - libMesh::Real xi, - libMesh::Real eta, - libMesh::Real zeta) - { - return monomial_order_evaluator::value, ExactOrder>::shape( - i, xi, eta, zeta); - } - - LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, - libMesh::Real xi, - libMesh::Real eta, - libMesh::Real zeta) - { - return monomial_order_evaluator::value, ExactOrder>::grad_shape( - i, xi, eta, zeta); - } -}; - -template -LIBMESH_DEVICE_INLINE libMesh::Real -shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) -{ - return exact_shape_evaluator::shape(i, xi, eta, zeta); -} - -template -LIBMESH_DEVICE_INLINE libMesh::Kokkos::RealVector -grad_shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) -{ - return exact_shape_evaluator::grad_shape(i, xi, eta, zeta); -} - -template -inline int -dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispatcher) -{ - switch (order) - { - case libMesh::CONSTANT: - return dispatcher.template operator()(); - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - case libMesh::THIRD: - return dispatcher.template operator()(); - case libMesh::FOURTH: - return dispatcher.template operator()(); - case libMesh::FIFTH: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(libMesh::Kokkos::FEShapeKey{ libMesh::MONOMIAL, ExactTopo, order }); - } -} - -inline bool -is_supported_lagrange_map_topology(libMesh::ElemType topo); - -template -inline int -dispatch_supported_lagrange_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) -{ - if (key.family != libMesh::LAGRANGE || !libMesh::Kokkos::supports_shape(key)) - return dispatcher.unsupported_key(key); - - switch (key.elem_type) - { - case libMesh::EDGE2: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::EDGE3: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::EDGE4: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TRI3: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TRI6: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TRI7: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD4: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD8: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD9: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TET4: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TET10: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TET14: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX8: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX20: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX27: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - default: - return dispatcher.unsupported_key(key); - } -} - -template -inline int -dispatch_supported_lagrange_shape_key_with_map(libMesh::Kokkos::FEShapeKey key, - const Dispatcher & dispatcher) -{ - if (key.family != libMesh::LAGRANGE || - !libMesh::Kokkos::supports_shape(key) || - !is_supported_lagrange_map_topology(key.elem_type)) - return dispatcher.unsupported_key(key); - - switch (key.elem_type) - { - case libMesh::EDGE2: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::EDGE3: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TRI3: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TRI6: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD4: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD8: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::QUAD9: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TET4: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::TET10: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX8: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX20: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - case libMesh::HEX27: - switch (key.order) - { - case libMesh::FIRST: - return dispatcher.template operator()(); - case libMesh::SECOND: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_key(key); - } - default: - return dispatcher.unsupported_key(key); - } -} - -template -inline int -dispatch_supported_shape_key(libMesh::Kokkos::FEShapeKey key, const Dispatcher & dispatcher) -{ - if (!libMesh::Kokkos::supports_shape(key)) - return dispatcher.unsupported_key(key); - - switch (key.family) - { - case libMesh::LAGRANGE: - return dispatch_supported_lagrange_shape_key(key, dispatcher); - - case libMesh::MONOMIAL: - switch (key.elem_type) - { - case libMesh::EDGE2: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI7: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD9: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET10: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET14: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX27: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM15: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM18: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM21: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID5: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID13: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID14: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID18: - return dispatch_supported_monomial_order(key.order, dispatcher); - default: - return dispatcher.unsupported_key(key); - } - - default: - return dispatcher.unsupported_key(key); - } -} - -inline bool -is_supported_lagrange_map_topology(libMesh::ElemType topo) -{ - switch (topo) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - case libMesh::TET4: - case libMesh::TET10: - case libMesh::HEX8: - case libMesh::HEX20: - case libMesh::HEX27: - return true; - - default: - return false; - } -} - -inline bool -supports_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key) -{ - return libMesh::Kokkos::supports_shape(key) && - is_supported_lagrange_map_topology(key.elem_type); -} - -template -inline int -dispatch_supported_shape_key_with_lagrange_map(libMesh::Kokkos::FEShapeKey key, - const Dispatcher & dispatcher) -{ - if (!supports_shape_key_with_lagrange_map(key)) - return dispatcher.unsupported_key(key); - - switch (key.family) - { - case libMesh::LAGRANGE: - return dispatch_supported_lagrange_shape_key_with_map(key, dispatcher); - - case libMesh::MONOMIAL: - switch (key.elem_type) - { - case libMesh::EDGE2: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD9: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET10: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX27: - return dispatch_supported_monomial_order(key.order, dispatcher); - default: - return dispatcher.unsupported_key(key); - } - - default: - return dispatcher.unsupported_key(key); - } -} - -inline bool -is_supported_lagrange_face_map_topology(libMesh::ElemType topo) -{ - switch (topo) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return true; - - default: - return false; - } -} - -template -inline int -dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, - const Dispatcher & dispatcher) -{ - switch (topo) - { - case libMesh::EDGE2: - return dispatcher.template operator()(); - case libMesh::EDGE3: - return dispatcher.template operator()(); - case libMesh::TRI3: - return dispatcher.template operator()(); - case libMesh::TRI6: - return dispatcher.template operator()(); - case libMesh::QUAD4: - return dispatcher.template operator()(); - case libMesh::QUAD8: - return dispatcher.template operator()(); - case libMesh::QUAD9: - return dispatcher.template operator()(); - case libMesh::TET4: - return dispatcher.template operator()(); - case libMesh::TET10: - return dispatcher.template operator()(); - case libMesh::HEX8: - return dispatcher.template operator()(); - case libMesh::HEX20: - return dispatcher.template operator()(); - case libMesh::HEX27: - return dispatcher.template operator()(); - - default: - return dispatcher.unsupported_topology(topo); - } -} - -template -inline int -dispatch_supported_lagrange_face_map_topology(libMesh::ElemType topo, - const Dispatcher & dispatcher) -{ - if (!is_supported_lagrange_face_map_topology(topo)) - return dispatcher.unsupported_topology(topo); - - return dispatch_supported_lagrange_map_topology(topo, dispatcher); -} +using libMesh::Kokkos::dispatch_supported_lagrange_face_map_topology; +using libMesh::Kokkos::dispatch_supported_lagrange_map_topology; +using libMesh::Kokkos::dispatch_supported_shape_key; +using libMesh::Kokkos::dispatch_supported_shape_key_with_lagrange_map; +using libMesh::Kokkos::grad_shape_for_key; +using libMesh::Kokkos::is_supported_lagrange_face_map_topology; +using libMesh::Kokkos::is_supported_lagrange_map_topology; +using libMesh::Kokkos::shape_for_key; +using libMesh::Kokkos::supports_shape_key_with_lagrange_map; inline int compare_device_values(const Kokkos::View & d_values, @@ -914,16 +213,20 @@ upload_real(const std::vector & values, const char * label) return d; } -inline Kokkos::View +inline libMesh::Kokkos::default_storage_policy::vector_view upload_point_coordinates(const libMesh::Elem & elem, const char * label) { - Kokkos::View d(std::string(label), 3 * elem.n_nodes()); + auto d = libMesh::Kokkos::make_vector_storage(label, elem.n_nodes()); auto h = Kokkos::create_mirror_view(d); for (unsigned int i = 0; i < elem.n_nodes(); ++i) { - h(3 * i + 0) = elem.point(i)(0); - h(3 * i + 1) = elem.point(i)(1); - h(3 * i + 2) = elem.point(i)(2); + h(i, 0) = elem.point(i)(0); +#if LIBMESH_DIM > 1 + h(i, 1) = elem.point(i)(1); +#endif +#if LIBMESH_DIM > 2 + h(i, 2) = elem.point(i)(2); +#endif } Kokkos::deep_copy(d, h); return d; @@ -946,7 +249,9 @@ build_reference_fixture(libMesh::ElemType elem_type) for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) { - const libMesh::Point master = fixture.elem->master_point(i); + libMesh::Point master; + libmesh_error_msg_if(!libMesh::try_reference_node(elem_type, i, master), + "build_reference_fixture(): unsupported reference-node lookup"); const libMesh::Real xi = master(0); const libMesh::Real eta = master(1); const libMesh::Real zeta = master(2); @@ -998,7 +303,9 @@ build_flat_reference_fixture(libMesh::ElemType elem_type) for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) { - const libMesh::Point master = fixture.elem->master_point(i); + libMesh::Point master; + libmesh_error_msg_if(!libMesh::try_reference_node(elem_type, i, master), + "build_flat_reference_fixture(): unsupported reference-node lookup"); const libMesh::Real xi = master(0); const libMesh::Real eta = master(1); const libMesh::Real zeta = master(2); @@ -1128,8 +435,6 @@ evaluate_map_helper_context(const map_helper_context & context, const char * result_label, double tol = 1.0e-13) { - constexpr unsigned int max_nodes = 27; - Kokkos::View d_results(std::string(result_label), context.ref_values.size()); const auto d_coords = context.d_coords; const auto d_xi = context.d_xi; @@ -1142,15 +447,10 @@ evaluate_map_helper_context(const map_helper_context & context, Kokkos::parallel_for( context.nqp, KOKKOS_LAMBDA(int q) { - libMesh::Kokkos::RealVector nodes[max_nodes]; - for (unsigned int i = 0; i < n_nodes_; ++i) - nodes[i] = libMesh::Kokkos::make_vector( - d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); - libMesh::Kokkos::RealVector xyz; libMesh::Kokkos::RealTensor J; libMesh::Kokkos::physical_point_and_jacobian( - nodes, n_nodes_, d_xi(q), d_eta(q), d_zeta(q), xyz, J); + d_coords, n_nodes_, d_xi(q), d_eta(q), d_zeta(q), xyz, J); const libMesh::Real jxw_q = libMesh::Kokkos::volume_jxw(J, dim_, d_w(q)); const unsigned int base = 13 * static_cast(q); @@ -1277,9 +577,6 @@ evaluate_face_helper_context_2d(const face_helper_context & context, const char * result_label, double tol = 1.0e-13) { - constexpr unsigned int max_face_nodes = 9; - constexpr unsigned int max_parent_nodes = 27; - Kokkos::View d_results(std::string(result_label), context.ref_values.size()); const auto d_face_coords = context.d_face_coords; const auto d_parent_coords = context.d_parent_coords; @@ -1296,19 +593,10 @@ evaluate_face_helper_context_2d(const face_helper_context & context, Kokkos::parallel_for( context.nqp, KOKKOS_LAMBDA(int q) { - libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; - libMesh::Kokkos::RealVector parent_nodes[max_parent_nodes]; - for (unsigned int i = 0; i < n_face_nodes_; ++i) - face_nodes[i] = libMesh::Kokkos::make_vector( - d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); - for (unsigned int i = 0; i < n_parent_nodes_; ++i) - parent_nodes[i] = libMesh::Kokkos::make_vector( - d_parent_coords(3 * i + 0), d_parent_coords(3 * i + 1), d_parent_coords(3 * i + 2)); - const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( - face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + d_face_coords, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); const libMesh::Kokkos::RealTensor parent_J = libMesh::Kokkos::jacobian( - parent_nodes, n_parent_nodes_, d_parent_xi(q), d_parent_eta(q), d_parent_zeta(q)); + d_parent_coords, n_parent_nodes_, d_parent_xi(q), d_parent_eta(q), d_parent_zeta(q)); const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/2u, d_w(q)); const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::edge_normal_on_parent_surface(J, parent_J); const unsigned int base = 13 * static_cast(q); @@ -1338,8 +626,6 @@ evaluate_face_helper_context_3d(const face_helper_context & context, const char * result_label, double tol = 1.0e-13) { - constexpr unsigned int max_face_nodes = 9; - Kokkos::View d_results(std::string(result_label), context.ref_values.size()); const auto d_face_coords = context.d_face_coords; const auto d_xi = context.d_xi; @@ -1351,13 +637,8 @@ evaluate_face_helper_context_3d(const face_helper_context & context, Kokkos::parallel_for( context.nqp, KOKKOS_LAMBDA(int q) { - libMesh::Kokkos::RealVector face_nodes[max_face_nodes]; - for (unsigned int i = 0; i < n_face_nodes_; ++i) - face_nodes[i] = libMesh::Kokkos::make_vector( - d_face_coords(3 * i + 0), d_face_coords(3 * i + 1), d_face_coords(3 * i + 2)); - const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( - face_nodes, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + d_face_coords, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/3u, d_w(q)); const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::face_normal(J, /*parent_dim=*/3u); const unsigned int base = 13 * static_cast(q); diff --git a/tests/fe/kokkos_fe_reconstruction_oracle_test.K b/tests/fe/kokkos_fe_reconstruction_oracle_test.K index 9f2448d1ad8..b95311d053a 100644 --- a/tests/fe/kokkos_fe_reconstruction_oracle_test.K +++ b/tests/fe/kokkos_fe_reconstruction_oracle_test.K @@ -215,13 +215,8 @@ test_reconstruction_case_impl(const reconstruction_case & info) Kokkos::parallel_for( nqp, KOKKOS_LAMBDA(int q) { - RealVector geom_nodes[max_geom_nodes]; - for (unsigned int i = 0; i < n_geom_nodes_; ++i) - geom_nodes[i] = libMesh::Kokkos::make_vector( - d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); - const RealTensor J = libMesh::Kokkos::jacobian( - geom_nodes, n_geom_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + d_coords, n_geom_nodes_, d_xi(q), d_eta(q), d_zeta(q)); Real u = 0.0; RealVector grad_ref_sum = libMesh::Kokkos::zero_vector(); diff --git a/tests/fe/kokkos_fe_side_trace_oracle_test.K b/tests/fe/kokkos_fe_side_trace_oracle_test.K index 0ac88d737a3..309eaa49b66 100644 --- a/tests/fe/kokkos_fe_side_trace_oracle_test.K +++ b/tests/fe/kokkos_fe_side_trace_oracle_test.K @@ -236,16 +236,11 @@ test_side_trace_case_impl(const side_trace_case & info) const unsigned int q = static_cast(idx) / n_dofs; const unsigned int i = static_cast(idx) % n_dofs; - RealVector geom_nodes[max_geom_nodes]; - for (unsigned int k = 0; k < n_geom_nodes_; ++k) - geom_nodes[k] = libMesh::Kokkos::make_vector( - d_coords(3 * k + 0), d_coords(3 * k + 1), d_coords(3 * k + 2)); - const Real xi = d_parent_xi(q); const Real eta = d_parent_eta(q); const Real zeta = d_parent_zeta(q); const RealTensor J = - libMesh::Kokkos::jacobian(geom_nodes, n_geom_nodes_, xi, eta, zeta); + libMesh::Kokkos::jacobian(d_coords, n_geom_nodes_, xi, eta, zeta); const RealVector grad_ref = grad_shape_for_key(i, xi, eta, zeta); const RealVector grad_phys = libMesh::Kokkos::inverse(J, parent_dim) * grad_ref; const RealVector normal = libMesh::Kokkos::make_vector(d_normal_x(q), d_normal_y(q), d_normal_z(q)); diff --git a/tests/fe/kokkos_fe_types_oracle_test.K b/tests/fe/kokkos_fe_types_oracle_test.K index cc6b3cfd640..7f239971ba2 100644 --- a/tests/fe/kokkos_fe_types_oracle_test.K +++ b/tests/fe/kokkos_fe_types_oracle_test.K @@ -67,58 +67,13 @@ struct support_case static libMesh::ElemType host_side_topology_oracle(libMesh::ElemType parent_type) { - auto elem = build_reference_elem(parent_type); - - if (elem->dim() == 1) - return libMesh::EDGE2; - - auto first_side = elem->build_side_ptr(0); - const libMesh::ElemType side_topo = first_side->type(); - - for (unsigned int s = 1; s < elem->n_sides(); ++s) - { - auto side = elem->build_side_ptr(s); - if (side->type() != side_topo) - return libMesh::INVALID_ELEM; - } - - return side_topo; + return libMesh::side_topology_or_invalid(parent_type); } static libMesh::FEElemClass host_class_from_topology_oracle(libMesh::ElemType topo) { - auto elem = build_reference_elem(topo); - - if (elem->dim() == 1) - return libMesh::FEElemClass::EDGE; - - const libMesh::FEType fe_type(libMesh::FIRST, libMesh::LAGRANGE); - const unsigned int ndofs = libMesh::FEInterface::n_dofs(fe_type, 0, elem.get()); - - switch (elem->dim()) - { - case 2: - switch (ndofs) - { - case 3: return libMesh::FEElemClass::TRI; - case 4: return libMesh::FEElemClass::QUAD; - default: return libMesh::FEElemClass::N_CLASSES; - } - - case 3: - switch (ndofs) - { - case 4: return libMesh::FEElemClass::TET; - case 8: return libMesh::FEElemClass::HEX; - case 6: return libMesh::FEElemClass::PRISM; - case 5: return libMesh::FEElemClass::PYRAMID; - default: return libMesh::FEElemClass::N_CLASSES; - } - - default: - return libMesh::FEElemClass::N_CLASSES; - } + return libMesh::class_from_topology_or_invalid(topo); } static unsigned int diff --git a/tests/fe/kokkos_quadrature_oracle_test.K b/tests/fe/kokkos_quadrature_oracle_test.K index 96ebfe71640..c4fad7b4aee 100644 --- a/tests/fe/kokkos_quadrature_oracle_test.K +++ b/tests/fe/kokkos_quadrature_oracle_test.K @@ -251,17 +251,12 @@ test_physical_map_hex8() Kokkos::parallel_for( 1, KOKKOS_LAMBDA(int) { - RealVector nodes[8]; - for (unsigned int i = 0; i < 8; ++i) - nodes[i] = libMesh::Kokkos::make_vector( - d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); - const RealVector xyz_center = - libMesh::Kokkos::physical_point(nodes, 8, 0.0, 0.0, 0.0); + libMesh::Kokkos::physical_point(d_coords, 8, 0.0, 0.0, 0.0); const RealTensor J_center = - libMesh::Kokkos::jacobian(nodes, 8, 0.0, 0.0, 0.0); + libMesh::Kokkos::jacobian(d_coords, 8, 0.0, 0.0, 0.0); const RealVector xyz_corner = libMesh::Kokkos::physical_point( - nodes, 8, -1.0, -1.0, -1.0); + d_coords, 8, -1.0, -1.0, -1.0); d_results(0) = vector_component(xyz_center, 0); d_results(1) = vector_component(xyz_center, 1); @@ -307,15 +302,10 @@ test_physical_map_tri3() Kokkos::parallel_for( 1, KOKKOS_LAMBDA(int) { - RealVector nodes[3]; - for (unsigned int i = 0; i < 3; ++i) - nodes[i] = libMesh::Kokkos::make_vector( - d_coords(3 * i + 0), d_coords(3 * i + 1), d_coords(3 * i + 2)); - const RealVector xyz = libMesh::Kokkos::physical_point( - nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + d_coords, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); const RealTensor J = libMesh::Kokkos::jacobian( - nodes, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + d_coords, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); d_results(0) = vector_component(xyz, 0); d_results(1) = vector_component(xyz, 1); @@ -573,9 +563,6 @@ check_face_qp_to_parent_case(const char * case_name, if (fail) { - std::vector refspace_nodes; - libMesh::FEBase::get_refspace_nodes(parent.type(), refspace_nodes); - std::printf(" face_qp mismatch: case=%s parent_type=%d side_type=%d side_id=%u\n", case_name, static_cast(parent.type()), @@ -595,12 +582,12 @@ check_face_qp_to_parent_case(const char * case_name, for (unsigned int k = 0; k < side.n_nodes(); ++k) { - const unsigned int parent_node = parent.local_side_node(side_id, k); - const libMesh::Point parent_refspace = refspace_nodes[parent_node]; - std::printf(" k=%u side_node_id=%llu parent_node=%u parent_refspace=(%.17g, %.17g, %.17g)\n", + libMesh::Point parent_refspace; + libmesh_error_msg_if(!libMesh::try_reference_side_node(parent.type(), side_id, k, parent_refspace), + "check_face_qp_to_parent_case(): unsupported parent side-node lookup"); + std::printf(" k=%u side_node_id=%llu parent_refspace=(%.17g, %.17g, %.17g)\n", k, libMesh::cast_int(side.node_id(k)), - parent_node, parent_refspace(0), parent_refspace(1), parent_refspace(2)); From 5d3059359b9bfd62a726eb01f84c36e168d03e92 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Wed, 13 May 2026 10:52:15 -0600 Subject: [PATCH 36/48] Share FE reference traits across host and Kokkos --- include/fe/fe_reference_element_traits.h | 549 +++++++++++++++++++++-- include/fe/fe_shape_traits.h | 97 +++- include/geom/cell_hex20.h | 16 +- include/geom/cell_hex27.h | 16 +- include/geom/cell_prism15.h | 16 +- include/geom/cell_prism18.h | 16 +- include/geom/cell_prism20.h | 16 +- include/geom/cell_prism21.h | 16 +- include/geom/cell_pyramid13.h | 16 +- include/geom/cell_pyramid14.h | 16 +- include/geom/cell_pyramid18.h | 16 +- include/geom/cell_tet10.h | 16 +- include/geom/cell_tet14.h | 16 +- include/geom/elem.h | 16 +- include/geom/face_quad8.h | 8 +- include/geom/face_quad9.h | 8 +- include/geom/face_tri6.h | 8 +- include/geom/face_tri7.h | 8 +- include/gpu/kokkos_fe_evaluator.h | 159 +++++-- include/gpu/kokkos_fe_lagrange_1d.h | 25 +- include/gpu/kokkos_fe_shape_dispatch.h | 478 ++++++++++---------- include/gpu/kokkos_tensor_ops.h | 22 +- include/libmesh/Makefile.am | 68 +++ src/fe/fe_abstract.C | 229 +--------- src/geom/cell_hex20.C | 65 ++- src/geom/cell_hex27.C | 65 ++- src/geom/cell_prism15.C | 66 ++- src/geom/cell_prism18.C | 66 ++- src/geom/cell_prism20.C | 66 ++- src/geom/cell_prism21.C | 66 ++- src/geom/cell_pyramid13.C | 65 ++- src/geom/cell_pyramid14.C | 65 ++- src/geom/cell_pyramid18.C | 65 ++- src/geom/cell_tet10.C | 57 +-- src/geom/cell_tet14.C | 57 +-- src/geom/face_quad8.C | 29 +- src/geom/face_quad9.C | 29 +- src/geom/face_tri6.C | 28 +- src/geom/face_tri7.C | 28 +- tests/fe/kokkos_fe_contract_test.K | 39 +- tests/fe/kokkos_fe_map_oracle_test.K | 1 + tests/fe/kokkos_fe_oracle_test_utils.h | 5 +- tests/fe/kokkos_fe_shape_oracle_test.K | 1 + tests/fe/kokkos_fe_types_oracle_test.K | 2 +- 44 files changed, 1517 insertions(+), 1199 deletions(-) diff --git a/include/fe/fe_reference_element_traits.h b/include/fe/fe_reference_element_traits.h index 51139f9afd4..f20d7a87adc 100644 --- a/include/fe/fe_reference_element_traits.h +++ b/include/fe/fe_reference_element_traits.h @@ -5,6 +5,7 @@ #define LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H #include "libmesh/enum_elem_type.h" +#include "libmesh/libmesh.h" #include "libmesh/libmesh_device.h" #include "libmesh/point.h" @@ -13,6 +14,7 @@ namespace libMesh constexpr unsigned int edge2_side_node_counts[2] = {1, 1}; constexpr unsigned int edge3_side_node_counts[2] = {1, 1}; +constexpr unsigned int edge4_side_node_counts[2] = {1, 1}; constexpr unsigned int tri3_side_node_counts[3] = {2, 2, 2}; constexpr unsigned int tri6_side_node_counts[3] = {3, 3, 3}; @@ -41,6 +43,18 @@ constexpr unsigned int pyramid13_side_node_counts[5] = {6, 6, 6, 6, 8}; constexpr unsigned int pyramid14_side_node_counts[5] = {6, 6, 6, 6, 9}; constexpr unsigned int pyramid18_side_node_counts[5] = {7, 7, 7, 7, 9}; +constexpr unsigned int tet10_edge_node_counts[6] = {3, 3, 3, 3, 3, 3}; +constexpr unsigned int tet14_edge_node_counts[6] = {3, 3, 3, 3, 3, 3}; +constexpr unsigned int hex20_edge_node_counts[12] = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int hex27_edge_node_counts[12] = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism15_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism18_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism20_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism21_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid13_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid14_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid18_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; + constexpr unsigned int prism6_side_nodes[5][4] = { {0, 2, 1, 99}, @@ -233,6 +247,152 @@ constexpr unsigned int edge3_side_nodes[2][1] = {1} }; +constexpr unsigned int edge4_side_nodes[2][1] = + { + {0}, + {1} + }; + +constexpr unsigned int tet10_edge_nodes[6][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {0, 2, 6}, + {0, 3, 7}, + {1, 3, 8}, + {2, 3, 9} + }; + +constexpr unsigned int tet14_edge_nodes[6][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {0, 2, 6}, + {0, 3, 7}, + {1, 3, 8}, + {2, 3, 9} + }; + +constexpr unsigned int hex20_edge_nodes[12][3] = + { + {0, 1, 8}, + {1, 2, 9}, + {2, 3, 10}, + {0, 3, 11}, + {0, 4, 12}, + {1, 5, 13}, + {2, 6, 14}, + {3, 7, 15}, + {4, 5, 16}, + {5, 6, 17}, + {6, 7, 18}, + {4, 7, 19} + }; + +constexpr unsigned int hex27_edge_nodes[12][3] = + { + {0, 1, 8}, + {1, 2, 9}, + {2, 3, 10}, + {0, 3, 11}, + {0, 4, 12}, + {1, 5, 13}, + {2, 6, 14}, + {3, 7, 15}, + {4, 5, 16}, + {5, 6, 17}, + {6, 7, 18}, + {4, 7, 19} + }; + +constexpr unsigned int prism15_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism18_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism20_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism21_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int pyramid13_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + +constexpr unsigned int pyramid14_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + +constexpr unsigned int pyramid18_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + LIBMESH_DEVICE_INLINE bool requires_side_specific_topology(ElemType parent) { @@ -387,17 +547,23 @@ side_node_count_or_zero(ElemType parent, return side < 2 ? edge2_side_node_counts[side] : 0; case EDGE3: return side < 2 ? edge3_side_node_counts[side] : 0; + case EDGE4: + return side < 2 ? edge4_side_node_counts[side] : 0; case TRI3: + case TRISHELL3: return side < 3 ? tri3_side_node_counts[side] : 0; case TRI6: return side < 3 ? tri6_side_node_counts[side] : 0; case TRI7: return side < 3 ? tri7_side_node_counts[side] : 0; case QUAD4: + case QUADSHELL4: return side < 4 ? quad4_side_node_counts[side] : 0; case QUAD8: + case QUADSHELL8: return side < 4 ? quad8_side_node_counts[side] : 0; case QUAD9: + case QUADSHELL9: return side < 4 ? quad9_side_node_counts[side] : 0; case TET4: return side < 4 ? tet4_side_node_counts[side] : 0; @@ -434,6 +600,39 @@ side_node_count_or_zero(ElemType parent, } } +LIBMESH_DEVICE_INLINE unsigned int +edge_node_count_or_zero(ElemType parent, + unsigned int edge) +{ + switch (parent) + { + case TET10: + return edge < 6 ? tet10_edge_node_counts[edge] : 0; + case TET14: + return edge < 6 ? tet14_edge_node_counts[edge] : 0; + case HEX20: + return edge < 12 ? hex20_edge_node_counts[edge] : 0; + case HEX27: + return edge < 12 ? hex27_edge_node_counts[edge] : 0; + case PRISM15: + return edge < 9 ? prism15_edge_node_counts[edge] : 0; + case PRISM18: + return edge < 9 ? prism18_edge_node_counts[edge] : 0; + case PRISM20: + return edge < 9 ? prism20_edge_node_counts[edge] : 0; + case PRISM21: + return edge < 9 ? prism21_edge_node_counts[edge] : 0; + case PYRAMID13: + return edge < 8 ? pyramid13_edge_node_counts[edge] : 0; + case PYRAMID14: + return edge < 8 ? pyramid14_edge_node_counts[edge] : 0; + case PYRAMID18: + return edge < 8 ? pyramid18_edge_node_counts[edge] : 0; + default: + return 0; + } +} + LIBMESH_DEVICE_INLINE bool try_local_side_node(ElemType parent, unsigned int side, @@ -452,7 +651,11 @@ try_local_side_node(ElemType parent, case EDGE3: node = edge3_side_nodes[side][side_node]; return true; + case EDGE4: + node = edge4_side_nodes[side][side_node]; + return true; case TRI3: + case TRISHELL3: node = tri3_side_nodes[side][side_node]; return true; case TRI6: @@ -462,12 +665,15 @@ try_local_side_node(ElemType parent, node = tri7_side_nodes[side][side_node]; return true; case QUAD4: + case QUADSHELL4: node = quad4_side_nodes[side][side_node]; return true; case QUAD8: + case QUADSHELL8: node = quad8_side_nodes[side][side_node]; return true; case QUAD9: + case QUADSHELL9: node = quad9_side_nodes[side][side_node]; return true; case TET4: @@ -520,6 +726,56 @@ try_local_side_node(ElemType parent, } } +LIBMESH_DEVICE_INLINE bool +try_local_edge_node(ElemType parent, + unsigned int edge, + unsigned int edge_node, + unsigned int & node) +{ + const unsigned int count = edge_node_count_or_zero(parent, edge); + if (!count || edge_node >= count) + return false; + + switch (parent) + { + case TET10: + node = tet10_edge_nodes[edge][edge_node]; + return true; + case TET14: + node = tet14_edge_nodes[edge][edge_node]; + return true; + case HEX20: + node = hex20_edge_nodes[edge][edge_node]; + return true; + case HEX27: + node = hex27_edge_nodes[edge][edge_node]; + return true; + case PRISM15: + node = prism15_edge_nodes[edge][edge_node]; + return true; + case PRISM18: + node = prism18_edge_nodes[edge][edge_node]; + return true; + case PRISM20: + node = prism20_edge_nodes[edge][edge_node]; + return true; + case PRISM21: + node = prism21_edge_nodes[edge][edge_node]; + return true; + case PYRAMID13: + node = pyramid13_edge_nodes[edge][edge_node]; + return true; + case PYRAMID14: + node = pyramid14_edge_nodes[edge][edge_node]; + return true; + case PYRAMID18: + node = pyramid18_edge_nodes[edge][edge_node]; + return true; + default: + return false; + } +} + LIBMESH_DEVICE_INLINE bool try_reference_node(ElemType type, unsigned int node, @@ -529,6 +785,7 @@ try_reference_node(ElemType type, { case EDGE2: case EDGE3: + case EDGE4: switch (node) { case 0: @@ -543,6 +800,18 @@ try_reference_node(ElemType type, pt = Point(0.0); return true; } + if (type == EDGE4) + { + pt = Point(-1. / 3.); + return true; + } + return false; + case 3: + if (type == EDGE4) + { + pt = Point(1. / 3.); + return true; + } return false; default: return false; @@ -807,82 +1076,236 @@ try_reference_node(ElemType type, return false; } + case PYRAMID5: case PYRAMID13: case PYRAMID14: + case PYRAMID18: switch (node) { - case 9: - pt = Point(-0.5, -0.5, 0.5); + case 0: + pt = Point(-1.0, -1.0, 0.0); return true; - case 10: - pt = Point(0.5, -0.5, 0.5); + case 1: + pt = Point(1.0, -1.0, 0.0); return true; - case 11: - pt = Point(0.5, 0.5, 0.5); + case 2: + pt = Point(1.0, 1.0, 0.0); return true; - case 12: - pt = Point(-0.5, 0.5, 0.5); + case 3: + pt = Point(-1.0, 1.0, 0.0); return true; - default: - return false; - } - - case PYRAMID18: - switch (node) - { - case 9: - pt = Point(-0.5, -0.5, 0.5); + case 4: + pt = Point(0.0, 0.0, 1.0); return true; - case 10: - pt = Point(0.5, -0.5, 0.5); + case 5: + pt = Point(0.0, -1.0, 0.0); return true; - case 11: - pt = Point(0.5, 0.5, 0.5); + case 6: + pt = Point(1.0, 0.0, 0.0); return true; - case 12: - pt = Point(-0.5, 0.5, 0.5); + case 7: + pt = Point(0.0, 1.0, 0.0); return true; - case 14: - pt = Point(-2. / 3., 0.0, 1. / 3.); + case 8: + pt = Point(-1.0, 0.0, 0.0); return true; + case 9: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(-0.5, -0.5, 0.5); + return true; + } + return false; + case 10: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.5, -0.5, 0.5); + return true; + } + return false; + case 11: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.5, 0.5, 0.5); + return true; + } + return false; + case 12: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(-0.5, 0.5, 0.5); + return true; + } + return false; + case 13: + if (type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + case 14: + if (type == PYRAMID18) + { + pt = Point(-2. / 3., 0.0, 1. / 3.); + return true; + } + return false; case 15: - pt = Point(0.0, 2. / 3., 1. / 3.); - return true; + if (type == PYRAMID18) + { + pt = Point(0.0, 2. / 3., 1. / 3.); + return true; + } + return false; case 16: - pt = Point(2. / 3., 0.0, 1. / 3.); - return true; + if (type == PYRAMID18) + { + pt = Point(2. / 3., 0.0, 1. / 3.); + return true; + } + return false; case 17: - pt = Point(0.0, -2. / 3., 1. / 3.); - return true; + if (type == PYRAMID18) + { + pt = Point(0.0, -2. / 3., 1. / 3.); + return true; + } + return false; default: return false; } + case PRISM6: + case PRISM15: + case PRISM18: case PRISM20: + case PRISM21: switch (node) { - case 18: - pt = Point(1. / 3., 1. / 3., -1.0); + case 0: + pt = Point(0.0, 0.0, -1.0); return true; - case 19: - pt = Point(1. / 3., 1. / 3., 1.0); + case 1: + pt = Point(1.0, 0.0, -1.0); return true; - default: + case 2: + pt = Point(0.0, 1.0, -1.0); + return true; + case 3: + pt = Point(0.0, 0.0, 1.0); + return true; + case 4: + pt = Point(1.0, 0.0, 1.0); + return true; + case 5: + pt = Point(0.0, 1.0, 1.0); + return true; + case 6: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, -1.0); + return true; + } + return false; + case 7: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, -1.0); + return true; + } + return false; + case 8: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, -1.0); + return true; + } + return false; + case 9: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + case 10: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(1.0, 0.0, 0.0); + return true; + } + return false; + case 11: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 1.0, 0.0); + return true; + } + return false; + case 12: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, 1.0); + return true; + } + return false; + case 13: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, 1.0); + return true; + } + return false; + case 14: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, 1.0); + return true; + } + return false; + case 15: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, 0.0); + return true; + } + return false; + case 16: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, 0.0); + return true; + } + return false; + case 17: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, 0.0); + return true; + } return false; - } - - case PRISM21: - switch (node) - { case 18: - pt = Point(1. / 3., 1. / 3., -1.0); - return true; + if (type == PRISM20 || type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., -1.0); + return true; + } + return false; case 19: - pt = Point(1. / 3., 1. / 3., 1.0); - return true; + if (type == PRISM20 || type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., 1.0); + return true; + } + return false; case 20: - pt = Point(1. / 3., 1. / 3., 0.0); - return true; + if (type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., 0.0); + return true; + } + return false; default: return false; } @@ -892,6 +1315,38 @@ try_reference_node(ElemType type, } } +LIBMESH_DEVICE_INLINE bool +try_refspace_node(ElemType type, + unsigned int node, + Point & pt) +{ + switch (type) + { + case NODEELEM: + if (!node) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + + case TRISHELL3: + return try_reference_node(TRI3, node, pt); + + case QUADSHELL4: + return try_reference_node(QUAD4, node, pt); + + case QUADSHELL8: + return try_reference_node(QUAD8, node, pt); + + case QUADSHELL9: + return try_reference_node(QUAD9, node, pt); + + default: + return try_reference_node(type, node, pt); + } +} + LIBMESH_DEVICE_INLINE bool try_reference_side_node(ElemType parent, unsigned int side, diff --git a/include/fe/fe_shape_traits.h b/include/fe/fe_shape_traits.h index b90b561fbc1..bd300f31859 100644 --- a/include/fe/fe_shape_traits.h +++ b/include/fe/fe_shape_traits.h @@ -76,7 +76,7 @@ side_topology_or_invalid(ElemType parent) case EDGE2: case EDGE3: case EDGE4: - return EDGE2; + return NODEELEM; case TRI3: case QUAD4: @@ -181,7 +181,7 @@ topology_dim_or_zero(ElemType topo) return elem_class_dim_or_zero(class_from_topology_or_invalid(topo)); } -LIBMESH_DEVICE_INLINE ElemType +LIBMESH_DEVICE_INLINE constexpr ElemType lagrange_shape_topology_or_invalid(FEShapeKey key) { switch (key.order) @@ -249,6 +249,16 @@ lagrange_shape_topology_or_invalid(FEShapeKey key) return INVALID_ELEM; } + case THIRD: + switch (key.elem_type) + { + case EDGE4: + return EDGE4; + + default: + return INVALID_ELEM; + } + default: return INVALID_ELEM; } @@ -535,7 +545,7 @@ monomial_exact_n_dofs_or_zero(ElemType elem_type, } } -LIBMESH_DEVICE_INLINE unsigned int +LIBMESH_DEVICE_INLINE constexpr unsigned int monomial_evaluator_dim_or_zero(ElemType elem_type) { switch (elem_type) @@ -575,6 +585,87 @@ monomial_evaluator_dim_or_zero(ElemType elem_type) } } +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key); + +LIBMESH_DEVICE_INLINE bool +supports_lagrange_map_topology(ElemType topo) +{ + switch (topo) + { + case EDGE2: + case EDGE3: + case EDGE4: + case TRI3: + case TRI6: + case QUAD4: + case QUAD8: + case QUAD9: + case TET4: + case TET10: + case HEX8: + case HEX20: + case HEX27: + return true; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_lagrange_face_map_topology(ElemType topo) +{ + return supports_lagrange_map_topology(topo); +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_lagrange_map_topology_or(ElemType topo, + const Op & op, + const Unsupported & unsupported) + -> decltype(op.template operator()()) +{ + switch (topo) + { + case EDGE2: + return op.template operator()(); + case EDGE3: + return op.template operator()(); + case EDGE4: + return op.template operator()(); + case TRI3: + return op.template operator()(); + case TRI6: + return op.template operator()(); + case QUAD4: + return op.template operator()(); + case QUAD8: + return op.template operator()(); + case QUAD9: + return op.template operator()(); + case TET4: + return op.template operator()(); + case TET10: + return op.template operator()(); + case HEX8: + return op.template operator()(); + case HEX20: + return op.template operator()(); + case HEX27: + return op.template operator()(); + default: + return unsupported(topo); + } +} + +LIBMESH_DEVICE_INLINE bool +supports_shape_with_lagrange_map(FEShapeKey key) +{ + return supports_shape(key) && + supports_lagrange_map_topology(key.elem_type); +} + LIBMESH_DEVICE_INLINE bool supports_shape(FEShapeKey key) { diff --git a/include/geom/cell_hex20.h b/include/geom/cell_hex20.h index 70c37c23f70..cbbf7bcb5b7 100644 --- a/include/geom/cell_hex20.h +++ b/include/geom/cell_hex20.h @@ -145,13 +145,13 @@ class Hex20 final : public Hex virtual Order default_order() const override; /** - * \returns \p Hex20::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Hex20::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -216,18 +216,6 @@ class Hex20 final : public Hex static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Hex20. */ diff --git a/include/geom/cell_hex27.h b/include/geom/cell_hex27.h index 0777540a303..63268b48495 100644 --- a/include/geom/cell_hex27.h +++ b/include/geom/cell_hex27.h @@ -160,13 +160,13 @@ class Hex27 final : public Hex virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Hex27::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Hex27::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -231,18 +231,6 @@ class Hex27 final : public Hex static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Hex27. */ diff --git a/include/geom/cell_prism15.h b/include/geom/cell_prism15.h index 68374b06f6f..3c8707cdad4 100644 --- a/include/geom/cell_prism15.h +++ b/include/geom/cell_prism15.h @@ -150,13 +150,13 @@ class Prism15 final : public Prism virtual Order default_order() const override; /** - * \returns \p Prism15::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism15::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -221,18 +221,6 @@ class Prism15 final : public Prism static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Prism15. */ diff --git a/include/geom/cell_prism18.h b/include/geom/cell_prism18.h index 530f6f37970..4d6e53ef9f9 100644 --- a/include/geom/cell_prism18.h +++ b/include/geom/cell_prism18.h @@ -165,13 +165,13 @@ class Prism18 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism18::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism18::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -236,18 +236,6 @@ class Prism18 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Prism18. */ diff --git a/include/geom/cell_prism20.h b/include/geom/cell_prism20.h index af1bedf2634..d58193f94b6 100644 --- a/include/geom/cell_prism20.h +++ b/include/geom/cell_prism20.h @@ -169,13 +169,13 @@ class Prism20 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism20::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism20::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -241,18 +241,6 @@ class Prism20 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_prism21.h b/include/geom/cell_prism21.h index 894f86789fd..1b34fccc470 100644 --- a/include/geom/cell_prism21.h +++ b/include/geom/cell_prism21.h @@ -172,13 +172,13 @@ class Prism21 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism21::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism21::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -244,18 +244,6 @@ class Prism21 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_pyramid13.h b/include/geom/cell_pyramid13.h index f0d2819fb2e..971ddba79a9 100644 --- a/include/geom/cell_pyramid13.h +++ b/include/geom/cell_pyramid13.h @@ -149,13 +149,13 @@ class Pyramid13 final : public Pyramid virtual Order default_order() const override; /** - * \returns \p Pyramid13::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid13::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -209,18 +209,6 @@ class Pyramid13 final : public Pyramid static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * Specialization for computing the volume of a Pyramid13. */ diff --git a/include/geom/cell_pyramid14.h b/include/geom/cell_pyramid14.h index 87547dc63e4..8c58f2300e6 100644 --- a/include/geom/cell_pyramid14.h +++ b/include/geom/cell_pyramid14.h @@ -167,13 +167,13 @@ class Pyramid14 final : public Pyramid virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Pyramid14::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid14::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -227,18 +227,6 @@ class Pyramid14 final : public Pyramid static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * Specialization for computing the volume of a Pyramid14. */ diff --git a/include/geom/cell_pyramid18.h b/include/geom/cell_pyramid18.h index 33f5c21e707..eb049a5aac2 100644 --- a/include/geom/cell_pyramid18.h +++ b/include/geom/cell_pyramid18.h @@ -173,13 +173,13 @@ class Pyramid18 final : public Pyramid virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Pyramid18::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid18::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -234,18 +234,6 @@ class Pyramid18 final : public Pyramid static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_tet10.h b/include/geom/cell_tet10.h index 5f454fe755c..b7c381df29d 100644 --- a/include/geom/cell_tet10.h +++ b/include/geom/cell_tet10.h @@ -146,13 +146,13 @@ class Tet10 final : public Tet virtual Order default_order() const override; /** - * \returns \p Tet10::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Tet10::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -217,18 +217,6 @@ class Tet10 final : public Tet static const int nodes_per_side = 6; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Tet10. */ diff --git a/include/geom/cell_tet14.h b/include/geom/cell_tet14.h index 43245751eec..800044de525 100644 --- a/include/geom/cell_tet14.h +++ b/include/geom/cell_tet14.h @@ -152,13 +152,13 @@ class Tet14 final : public Tet virtual Order default_order() const override; /** - * \returns \p Tet14::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Tet14::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -222,18 +222,6 @@ class Tet14 final : public Tet static const int nodes_per_side = 7; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/elem.h b/include/geom/elem.h index 5ce4e229aee..add84059882 100644 --- a/include/geom/elem.h +++ b/include/geom/elem.h @@ -2787,9 +2787,10 @@ Elem::simple_build_side_ptr (const unsigned int i) { libmesh_assert_less (i, this->n_sides()); + Subclass & real_me = cast_ref(*this); std::unique_ptr face = std::make_unique(); for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Subclass::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -2817,8 +2818,9 @@ Elem::simple_build_side_ptr (std::unique_ptr & side, { side->set_interior_parent(this); side->inherit_data_from(*this); + Subclass & real_me = cast_ref(*this); for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Subclass::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); } } @@ -2841,9 +2843,9 @@ Elem::simple_side_ptr (std::unique_ptr & side, else { side->subdomain_id() = this->subdomain_id(); - + Subclass & real_me = cast_ref(*this); for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Mapclass::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); } } @@ -2881,10 +2883,11 @@ Elem::simple_build_edge_ptr (const unsigned int i) { libmesh_assert_less (i, this->n_edges()); + Subclass & real_me = cast_ref(*this); std::unique_ptr edge = std::make_unique(); for (auto n : edge->node_index_range()) - edge->set_node(n, this->node_ptr(Subclass::edge_nodes_map[i][n])); + edge->set_node(n, this->node_ptr(real_me.local_edge_node(i, n))); edge->set_interior_parent(this); edge->inherit_data_from(*this); @@ -2912,8 +2915,9 @@ Elem::simple_build_edge_ptr (std::unique_ptr & edge, else { edge->inherit_data_from(*this); + Subclass & real_me = cast_ref(*this); for (auto n : edge->node_index_range()) - edge->set_node(n, this->node_ptr(Subclass::edge_nodes_map[i][n])); + edge->set_node(n, this->node_ptr(real_me.local_edge_node(i, n))); } } diff --git a/include/geom/face_quad8.h b/include/geom/face_quad8.h index 4bcdaa39433..4b510aa8eb7 100644 --- a/include/geom/face_quad8.h +++ b/include/geom/face_quad8.h @@ -141,7 +141,7 @@ class Quad8 : public Quad virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Quad8::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -190,12 +190,6 @@ class Quad8 : public Quad static const int num_nodes = 8; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * QUAD8 using quadrature. diff --git a/include/geom/face_quad9.h b/include/geom/face_quad9.h index e065d8bafa5..bc666861761 100644 --- a/include/geom/face_quad9.h +++ b/include/geom/face_quad9.h @@ -149,7 +149,7 @@ class Quad9 : public Quad virtual dof_id_type key () const override; /** - * \returns \p Quad9::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -197,12 +197,6 @@ class Quad9 : public Quad static const int num_nodes = 9; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * QUAD9 using quadrature. diff --git a/include/geom/face_tri6.h b/include/geom/face_tri6.h index 6417999e9fd..2573d9b7f70 100644 --- a/include/geom/face_tri6.h +++ b/include/geom/face_tri6.h @@ -151,7 +151,7 @@ class Tri6 : public Tri virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Tri6::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -200,12 +200,6 @@ class Tri6 : public Tri static const int num_nodes = 6; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * TRI6 using quadrature. diff --git a/include/geom/face_tri7.h b/include/geom/face_tri7.h index 833bb3ac3b6..0556ccb2f09 100644 --- a/include/geom/face_tri7.h +++ b/include/geom/face_tri7.h @@ -156,7 +156,7 @@ class Tri7 : public Tri virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Tri7::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -204,12 +204,6 @@ class Tri7 : public Tri static const int num_nodes = 7; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * \returns A bounding box (not necessarily the minimal bounding box) * containing the geometric element. diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h index 8b802d9dbe4..6d7e8bf110f 100644 --- a/include/gpu/kokkos_fe_evaluator.h +++ b/include/gpu/kokkos_fe_evaluator.h @@ -27,6 +27,23 @@ namespace libMesh::Kokkos { +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_for_key(FEShapeKey key); + +LIBMESH_DEVICE_INLINE Real +eval_lagrange_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta); + +LIBMESH_DEVICE_INLINE RealVector +eval_lagrange_grad_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta); + namespace detail { @@ -35,24 +52,14 @@ LIBMESH_DEVICE_INLINE auto dispatch_lagrange_topology(libMesh::ElemType topo, const Op & op) -> decltype(op.template operator()()) { - switch (topo) - { - case libMesh::EDGE2: return op.template operator()(); - case libMesh::EDGE3: return op.template operator()(); - case libMesh::TRI3: return op.template operator()(); - case libMesh::TRI6: return op.template operator()(); - case libMesh::QUAD4: return op.template operator()(); - case libMesh::QUAD8: return op.template operator()(); - case libMesh::QUAD9: return op.template operator()(); - case libMesh::TET4: return op.template operator()(); - case libMesh::TET10: return op.template operator()(); - case libMesh::HEX8: return op.template operator()(); - case libMesh::HEX20: return op.template operator()(); - case libMesh::HEX27: return op.template operator()(); - default: + return libMesh::dispatch_lagrange_map_topology_or( + topo, + op, + [&](libMesh::ElemType) -> decltype(op.template operator()()) + { detail::abort_unsupported("dispatch_lagrange_topology(): unsupported evaluator topology"); return op.template operator()(); - } + }); } template @@ -156,6 +163,88 @@ struct MonomialGradShapeOp } }; +template +LIBMESH_DEVICE_INLINE auto +dispatch_shape_family(libMesh::FEShapeKey key, + const LagrangeOp & lagrange_op, + const MonomialOp & monomial_op, + const char * unsupported_message) + -> decltype(lagrange_op()) +{ + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_op(); + + case libMesh::MONOMIAL: + return monomial_op(); + + default: + detail::abort_unsupported(unsupported_message); + return lagrange_op(); + } +} + +struct KeyedLagrangeShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE Real operator()() const + { + return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + } +}; + +struct KeyedLagrangeGradShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + } +}; + +struct KeyedMonomialShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE Real operator()() const + { + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialShapeOp{i, xi, eta, zeta}); + } +}; + +struct KeyedMonomialGradShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialGradShapeOp{i, xi, eta, zeta}); + } +}; + } // namespace detail // ── On-device helpers: element class -> spatial dimension ───────────────────── @@ -305,20 +394,11 @@ shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) return Real(0); } - switch (key.family) - { - case libMesh::LAGRANGE: - return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); - - case libMesh::MONOMIAL: - return detail::dispatch_monomial(key.elem_type, - key.order, - detail::MonomialShapeOp{i, xi, eta, zeta}); - - default: - detail::abort_unsupported("shape(): unsupported FE family"); - return Real(0); - } + return detail::dispatch_shape_family( + key, + detail::KeyedLagrangeShapeOp{key, i, xi, eta, zeta}, + detail::KeyedMonomialShapeOp{key, i, xi, eta, zeta}, + "shape(): unsupported FE family"); } /// Evaluate the reference-space gradient of the i-th physics shape function. @@ -333,20 +413,11 @@ grad_shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) return zero_vector(); } - switch (key.family) - { - case libMesh::LAGRANGE: - return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); - - case libMesh::MONOMIAL: - return detail::dispatch_monomial(key.elem_type, - key.order, - detail::MonomialGradShapeOp{i, xi, eta, zeta}); - - default: - detail::abort_unsupported("grad_shape(): unsupported FE family"); - return zero_vector(); - } + return detail::dispatch_shape_family( + key, + detail::KeyedLagrangeGradShapeOp{key, i, xi, eta, zeta}, + detail::KeyedMonomialGradShapeOp{key, i, xi, eta, zeta}, + "grad_shape(): unsupported FE family"); } } // namespace libMesh::Kokkos diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h index 9a47ffa4b18..b98e2f89acd 100644 --- a/include/gpu/kokkos_fe_lagrange_1d.h +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -1,6 +1,6 @@ // Kokkos FEEvaluator specializations for 1-D Lagrange elements. // -// Covers EDGE2 (linear) and EDGE3 (quadratic). +// Covers EDGE2 (linear), EDGE3 (quadratic), and EDGE4 (cubic). // Reference-element coordinate convention (libMesh-compatible): // EDGE2/EDGE3: xi in [-1, 1] // @@ -66,6 +66,29 @@ struct FEEvaluator #endif }; +// ── EDGE4 (cubic edge, 4 nodes) ────────────────────────────────────────────── +// Node ordering matches libMesh: 0->left(-1), 1->right(+1), 2->(-1/3), 3->(+1/3) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return libMesh::fe_lagrange_1D_cubic_shape(i, xi); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::fe_lagrange_1D_cubic_shape_deriv(i, 0, xi), 0.0, 0.0); + } +#endif +}; + } // namespace libMesh::Kokkos #endif // LIBMESH_KOKKOS_FE_LAGRANGE_1D_H diff --git a/include/gpu/kokkos_fe_shape_dispatch.h b/include/gpu/kokkos_fe_shape_dispatch.h index 8f4503ed746..2989eb6d193 100644 --- a/include/gpu/kokkos_fe_shape_dispatch.h +++ b/include/gpu/kokkos_fe_shape_dispatch.h @@ -13,86 +13,6 @@ namespace libMesh::Kokkos { -template -struct lagrange_evaluator_topology -{ - static const libMesh::ElemType value = libMesh::INVALID_ELEM; -}; - -#define LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(exact_topo, exact_order, evaluator_topo) \ - template <> \ - struct lagrange_evaluator_topology \ - { \ - static const libMesh::ElemType value = libMesh::evaluator_topo; \ - } - -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE2, FIRST, EDGE2); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE3, FIRST, EDGE2); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE3, SECOND, EDGE3); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(EDGE4, FIRST, EDGE2); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI3, FIRST, TRI3); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI6, FIRST, TRI3); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI6, SECOND, TRI6); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI7, FIRST, TRI3); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TRI7, SECOND, TRI6); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD4, FIRST, QUAD4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD8, FIRST, QUAD4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD8, SECOND, QUAD8); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD9, FIRST, QUAD4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(QUAD9, SECOND, QUAD9); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET4, FIRST, TET4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET10, FIRST, TET4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET10, SECOND, TET10); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET14, FIRST, TET4); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(TET14, SECOND, TET10); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX8, FIRST, HEX8); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX20, FIRST, HEX8); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX20, SECOND, HEX20); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX27, FIRST, HEX8); -LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE(HEX27, SECOND, HEX27); - -#undef LIBMESH_KOKKOS_LAGRANGE_TOPOLOGY_CASE - -template -struct monomial_evaluator_dim -{ - static const unsigned int value = 0; -}; - -#define LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(exact_topo, dim_value) \ - template <> \ - struct monomial_evaluator_dim \ - { \ - static const unsigned int value = dim_value; \ - } - -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE2, 1); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE3, 1); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(EDGE4, 1); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI3, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI6, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TRI7, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD4, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD8, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(QUAD9, 2); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET4, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET10, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(TET14, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX8, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX20, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(HEX27, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM6, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM15, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM18, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM20, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PRISM21, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID5, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID13, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID14, 3); -LIBMESH_KOKKOS_MONOMIAL_DIM_CASE(PYRAMID18, 3); - -#undef LIBMESH_KOKKOS_MONOMIAL_DIM_CASE - template struct monomial_order_evaluator; @@ -142,12 +62,16 @@ struct exact_shape_evaluator; template struct exact_shape_evaluator { + static constexpr libMesh::FEShapeKey exact_key{ libMesh::LAGRANGE, ExactTopo, ExactOrder }; + static constexpr libMesh::ElemType evaluator_topology = + libMesh::lagrange_shape_topology_or_invalid(exact_key); + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) { - return map_shape::value>( + return map_shape( i, xi, eta, zeta); } @@ -156,7 +80,7 @@ struct exact_shape_evaluator libMesh::Real eta, libMesh::Real zeta) { - return grad_map_shape::value>( + return grad_map_shape( i, xi, eta, zeta); } }; @@ -164,12 +88,15 @@ struct exact_shape_evaluator template struct exact_shape_evaluator { + static constexpr unsigned int evaluator_dim = + libMesh::monomial_evaluator_dim_or_zero(ExactTopo); + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) { - return monomial_order_evaluator::value, ExactOrder>::shape( + return monomial_order_evaluator::shape( i, xi, eta, zeta); } @@ -178,7 +105,7 @@ struct exact_shape_evaluator libMesh::Real eta, libMesh::Real zeta) { - return monomial_order_evaluator::value, ExactOrder>::grad_shape( + return monomial_order_evaluator::grad_shape( i, xi, eta, zeta); } }; @@ -222,11 +149,8 @@ dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispa template inline int -dispatch_supported_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +dispatch_exact_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) { - if (key.family != libMesh::LAGRANGE || !libMesh::supports_shape(key)) - return dispatcher.unsupported_key(key); - switch (key.elem_type) { case libMesh::EDGE2: @@ -252,6 +176,8 @@ dispatch_supported_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher { case libMesh::FIRST: return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); default: return dispatcher.unsupported_key(key); } @@ -372,120 +298,270 @@ dispatch_supported_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher } } -inline bool -is_supported_lagrange_map_topology(libMesh::ElemType topo) +template +inline int +dispatch_exact_lagrange_shape_key_with_map(libMesh::FEShapeKey key, const Dispatcher & dispatcher) { - switch (topo) + switch (key.elem_type) { case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } case libMesh::HEX27: - return true; - + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } default: - return false; + return dispatcher.unsupported_key(key); } } template inline int -dispatch_supported_lagrange_shape_key_with_map(libMesh::FEShapeKey key, - const Dispatcher & dispatcher) +dispatch_exact_monomial_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) { - if (key.family != libMesh::LAGRANGE || - !libMesh::supports_shape(key) || - !is_supported_lagrange_map_topology(key.elem_type)) - return dispatcher.unsupported_key(key); - - return dispatch_supported_lagrange_shape_key(key, dispatcher); + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI7: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM15: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM18: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM21: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID5: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID13: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID18: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } } template inline int -dispatch_supported_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +dispatch_exact_monomial_shape_key_with_map(libMesh::FEShapeKey key, const Dispatcher & dispatcher) { - if (!libMesh::supports_shape(key)) - return dispatcher.unsupported_key(key); + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } +} +template +inline int +dispatch_exact_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ switch (key.family) { case libMesh::LAGRANGE: - return dispatch_supported_lagrange_shape_key(key, dispatcher); + return dispatch_exact_lagrange_shape_key(key, dispatcher); case libMesh::MONOMIAL: - switch (key.elem_type) - { - case libMesh::EDGE2: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI7: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD9: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET10: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET14: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX27: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM15: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM18: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PRISM21: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID5: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID13: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID14: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::PYRAMID18: - return dispatch_supported_monomial_order(key.order, dispatcher); - default: - return dispatcher.unsupported_key(key); - } + return dispatch_exact_monomial_shape_key(key, dispatcher); default: return dispatcher.unsupported_key(key); } } +template +inline int +dispatch_supported_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (!libMesh::supports_shape(key)) + return dispatcher.unsupported_key(key); + + return dispatch_exact_shape_key(key, dispatcher); +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo) +{ + return libMesh::supports_lagrange_map_topology(topo); +} + inline bool supports_shape_key_with_lagrange_map(libMesh::FEShapeKey key) { - return libMesh::supports_shape(key) && - is_supported_lagrange_map_topology(key.elem_type); + return libMesh::supports_shape_with_lagrange_map(key); } template @@ -499,38 +575,10 @@ dispatch_supported_shape_key_with_lagrange_map(libMesh::FEShapeKey key, switch (key.family) { case libMesh::LAGRANGE: - return dispatch_supported_lagrange_shape_key_with_map(key, dispatcher); + return dispatch_exact_lagrange_shape_key_with_map(key, dispatcher); case libMesh::MONOMIAL: - switch (key.elem_type) - { - case libMesh::EDGE2: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::EDGE3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI3: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TRI6: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::QUAD9: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET4: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::TET10: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX8: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX20: - return dispatch_supported_monomial_order(key.order, dispatcher); - case libMesh::HEX27: - return dispatch_supported_monomial_order(key.order, dispatcher); - default: - return dispatcher.unsupported_key(key); - } + return dispatch_exact_monomial_shape_key_with_map(key, dispatcher); default: return dispatcher.unsupported_key(key); @@ -540,20 +588,7 @@ dispatch_supported_shape_key_with_lagrange_map(libMesh::FEShapeKey key, inline bool is_supported_lagrange_face_map_topology(libMesh::ElemType topo) { - switch (topo) - { - case libMesh::EDGE2: - case libMesh::EDGE3: - case libMesh::TRI3: - case libMesh::TRI6: - case libMesh::QUAD4: - case libMesh::QUAD8: - case libMesh::QUAD9: - return true; - - default: - return false; - } + return libMesh::supports_lagrange_face_map_topology(topo); } template @@ -561,35 +596,10 @@ inline int dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, const Dispatcher & dispatcher) { - switch (topo) - { - case libMesh::EDGE2: - return dispatcher.template operator()(); - case libMesh::EDGE3: - return dispatcher.template operator()(); - case libMesh::TRI3: - return dispatcher.template operator()(); - case libMesh::TRI6: - return dispatcher.template operator()(); - case libMesh::QUAD4: - return dispatcher.template operator()(); - case libMesh::QUAD8: - return dispatcher.template operator()(); - case libMesh::QUAD9: - return dispatcher.template operator()(); - case libMesh::TET4: - return dispatcher.template operator()(); - case libMesh::TET10: - return dispatcher.template operator()(); - case libMesh::HEX8: - return dispatcher.template operator()(); - case libMesh::HEX20: - return dispatcher.template operator()(); - case libMesh::HEX27: - return dispatcher.template operator()(); - default: - return dispatcher.unsupported_topology(topo); - } + return libMesh::dispatch_lagrange_map_topology_or( + topo, + dispatcher, + [&](libMesh::ElemType unsupported) { return dispatcher.unsupported_topology(unsupported); }); } template diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h index 71926b92b1f..9b62289c36c 100644 --- a/include/gpu/kokkos_tensor_ops.h +++ b/include/gpu/kokkos_tensor_ops.h @@ -514,20 +514,18 @@ auto det(const TensorLike & T_in) return T_in.det(); } -template -LIBMESH_DEVICE_INLINE -auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) - -> std::enable_if_t, ResultTensor> -{ - return detail::inverse(T_in, dim); -} - -template +template LIBMESH_DEVICE_INLINE auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) - -> std::enable_if_t, tensor_semantic_type_t> -{ - return inverse>(T_in, dim); + -> std::enable_if_t, + std::conditional_t::value, + tensor_semantic_type_t, + ResultTensor>> +{ + using output_type = std::conditional_t::value, + tensor_semantic_type_t, + ResultTensor>; + return detail::inverse(T_in, dim); } template diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index f0bba052ded..a560c36c9a3 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -82,6 +82,11 @@ BUILT_SOURCES = \ fe_lagrange_shape_1D.h \ fe_macro.h \ fe_map.h \ + fe_reference_element_traits.h \ + fe_serendipity_lagrange.h \ + fe_shape_traits.h \ + fe_simplex_lagrange.h \ + fe_tensor_product_lagrange.h \ fe_transformation_base.h \ fe_type.h \ fe_xyz_map.h \ @@ -174,7 +179,18 @@ BUILT_SOURCES = \ overlap_coupling.h \ point_neighbor_coupling.h \ sibling_coupling.h \ + kokkos_fe_base.h \ + kokkos_fe_evaluator.h \ + kokkos_fe_face_map.h \ + kokkos_fe_lagrange_1d.h \ + kokkos_fe_lagrange_2d.h \ + kokkos_fe_lagrange_3d.h \ + kokkos_fe_map.h \ + kokkos_fe_monomial.h \ + kokkos_fe_shape_dispatch.h \ + kokkos_fe_types.h \ kokkos_linalg_base.h \ + kokkos_quadrature.h \ kokkos_storage.h \ kokkos_storage_policy.h \ kokkos_tensor_ops.h \ @@ -347,6 +363,7 @@ BUILT_SOURCES = \ quadrature_conical.h \ quadrature_gauss.h \ quadrature_gauss_lobatto.h \ + quadrature_gauss_rules.h \ quadrature_gm.h \ quadrature_grid.h \ quadrature_jacobi.h \ @@ -853,6 +870,21 @@ fe_macro.h: $(top_srcdir)/include/fe/fe_macro.h fe_map.h: $(top_srcdir)/include/fe/fe_map.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +fe_reference_element_traits.h: $(top_srcdir)/include/fe/fe_reference_element_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_serendipity_lagrange.h: $(top_srcdir)/include/fe/fe_serendipity_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_shape_traits.h: $(top_srcdir)/include/fe/fe_shape_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_simplex_lagrange.h: $(top_srcdir)/include/fe/fe_simplex_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_tensor_product_lagrange.h: $(top_srcdir)/include/fe/fe_tensor_product_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + fe_transformation_base.h: $(top_srcdir)/include/fe/fe_transformation_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1129,9 +1161,42 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_fe_base.h: $(top_srcdir)/include/gpu/kokkos_fe_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_evaluator.h: $(top_srcdir)/include/gpu/kokkos_fe_evaluator.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_face_map.h: $(top_srcdir)/include/gpu/kokkos_fe_face_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_1d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_1d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_2d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_2d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_3d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_3d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_map.h: $(top_srcdir)/include/gpu/kokkos_fe_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_monomial.h: $(top_srcdir)/include/gpu/kokkos_fe_monomial.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_shape_dispatch.h: $(top_srcdir)/include/gpu/kokkos_fe_shape_dispatch.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_types.h: $(top_srcdir)/include/gpu/kokkos_fe_types.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_quadrature.h: $(top_srcdir)/include/gpu/kokkos_quadrature.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1648,6 +1713,9 @@ quadrature_gauss.h: $(top_srcdir)/include/quadrature/quadrature_gauss.h quadrature_gauss_lobatto.h: $(top_srcdir)/include/quadrature/quadrature_gauss_lobatto.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +quadrature_gauss_rules.h: $(top_srcdir)/include/quadrature/quadrature_gauss_rules.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + quadrature_gm.h: $(top_srcdir)/include/quadrature/quadrature_gm.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/src/fe/fe_abstract.C b/src/fe/fe_abstract.C index 20d435ae44a..2d39d86f39f 100644 --- a/src/fe/fe_abstract.C +++ b/src/fe/fe_abstract.C @@ -26,6 +26,7 @@ #include "libmesh/dof_map.h" #include "libmesh/elem.h" #include "libmesh/fe_interface.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/numeric_vector.h" #include "libmesh/periodic_boundaries.h" #include "libmesh/periodic_boundary.h" @@ -405,230 +406,10 @@ void FEAbstract::get_refspace_nodes(const ElemType itemType, std::vector Utility::enum_to_string(itemType)); nodes.resize(n_nodes); - switch(itemType) - { - case NODEELEM: - { - nodes[0] = Point (0.,0.,0.); - return; - } - case EDGE3: - { - nodes[2] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case EDGE2: - { - nodes[0] = Point (-1.,0.,0.); - nodes[1] = Point (1.,0.,0.); - return; - } - case EDGE4: // not nested with EDGE3 - { - nodes[0] = Point (-1.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (-1./3.,0.,0.); - nodes[3] - Point (1./3.,0.,0.); - return; - } - case TRI7: - { - nodes[6] = Point (1./3.,1./3.,0.); - libmesh_fallthrough(); - } - case TRI6: - { - nodes[3] = Point (.5,0.,0.); - nodes[4] = Point (.5,.5,0.); - nodes[5] = Point (0.,.5,0.); - libmesh_fallthrough(); - } - case TRI3: - case TRISHELL3: - { - nodes[0] = Point (0.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (0.,1.,0.); - return; - } - case QUAD9: - case QUADSHELL9: - { - nodes[8] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case QUAD8: - case QUADSHELL8: - { - nodes[4] = Point (0.,-1.,0.); - nodes[5] = Point (1.,0.,0.); - nodes[6] = Point (0.,1.,0.); - nodes[7] = Point (-1.,0.,0.); - libmesh_fallthrough(); - } - case QUAD4: - case QUADSHELL4: - { - nodes[0] = Point (-1.,-1.,0.); - nodes[1] = Point (1.,-1.,0.); - nodes[2] = Point (1.,1.,0.); - nodes[3] = Point (-1.,1.,0.); - return; - } - case TET14: - { - nodes[10] = Point (1/Real(3),1/Real(3),0.); - nodes[11] = Point (1/Real(3),0.,1/Real(3)); - nodes[12] = Point (1/Real(3),1/Real(3),1/Real(3)); - nodes[13] = Point (0.,1/Real(3),1/Real(3)); - libmesh_fallthrough(); - } - case TET10: - { - nodes[4] = Point (.5,0.,0.); - nodes[5] = Point (.5,.5,0.); - nodes[6] = Point (0.,.5,0.); - nodes[7] = Point (0.,0.,.5); - nodes[8] = Point (.5,0.,.5); - nodes[9] = Point (0.,.5,.5); - libmesh_fallthrough(); - } - case TET4: - { - nodes[0] = Point (0.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (0.,1.,0.); - nodes[3] = Point (0.,0.,1.); - return; - } - case HEX27: - { - nodes[20] = Point (0.,0.,-1.); - nodes[21] = Point (0.,-1.,0.); - nodes[22] = Point (1.,0.,0.); - nodes[23] = Point (0.,1.,0.); - nodes[24] = Point (-1.,0.,0.); - nodes[25] = Point (0.,0.,1.); - nodes[26] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case HEX20: - { - nodes[8] = Point (0.,-1.,-1.); - nodes[9] = Point (1.,0.,-1.); - nodes[10] = Point (0.,1.,-1.); - nodes[11] = Point (-1.,0.,-1.); - nodes[12] = Point (-1.,-1.,0.); - nodes[13] = Point (1.,-1.,0.); - nodes[14] = Point (1.,1.,0.); - nodes[15] = Point (-1.,1.,0.); - nodes[16] = Point (0.,-1.,1.); - nodes[17] = Point (1.,0.,1.); - nodes[18] = Point (0.,1.,1.); - nodes[19] = Point (-1.,0.,1.); - libmesh_fallthrough(); - } - case HEX8: - { - nodes[0] = Point (-1.,-1.,-1.); - nodes[1] = Point (1.,-1.,-1.); - nodes[2] = Point (1.,1.,-1.); - nodes[3] = Point (-1.,1.,-1.); - nodes[4] = Point (-1.,-1.,1.); - nodes[5] = Point (1.,-1.,1.); - nodes[6] = Point (1.,1.,1.); - nodes[7] = Point (-1.,1.,1.); - return; - } - case PRISM21: - { - nodes[20] = Point (1/Real(3),1/Real(3),0); - libmesh_fallthrough(); - } - case PRISM20: - { - nodes[18] = Point (1/Real(3),1/Real(3),-1); - nodes[19] = Point (1/Real(3),1/Real(3),1); - libmesh_fallthrough(); - } - case PRISM18: - { - nodes[15] = Point (.5,0.,0.); - nodes[16] = Point (.5,.5,0.); - nodes[17] = Point (0.,.5,0.); - libmesh_fallthrough(); - } - case PRISM15: - { - nodes[6] = Point (.5,0.,-1.); - nodes[7] = Point (.5,.5,-1.); - nodes[8] = Point (0.,.5,-1.); - nodes[9] = Point (0.,0.,0.); - nodes[10] = Point (1.,0.,0.); - nodes[11] = Point (0.,1.,0.); - nodes[12] = Point (.5,0.,1.); - nodes[13] = Point (.5,.5,1.); - nodes[14] = Point (0.,.5,1.); - libmesh_fallthrough(); - } - case PRISM6: - { - nodes[0] = Point (0.,0.,-1.); - nodes[1] = Point (1.,0.,-1.); - nodes[2] = Point (0.,1.,-1.); - nodes[3] = Point (0.,0.,1.); - nodes[4] = Point (1.,0.,1.); - nodes[5] = Point (0.,1.,1.); - return; - } - case PYRAMID18: - { - // triangle centers - nodes[14] = Point (-2/Real(3),0.,1/Real(3)); - nodes[15] = Point (0.,2/Real(3),1/Real(3)); - nodes[16] = Point (2/Real(3),0.,1/Real(3)); - nodes[17] = Point (0.,-2/Real(3),1/Real(3)); - - libmesh_fallthrough(); - } - case PYRAMID14: - { - // base center - nodes[13] = Point (0.,0.,0.); - - libmesh_fallthrough(); - } - case PYRAMID13: - { - // base midedge - nodes[5] = Point (0.,-1.,0.); - nodes[6] = Point (1.,0.,0.); - nodes[7] = Point (0.,1.,0.); - nodes[8] = Point (-1,0.,0.); - - // lateral midedge - nodes[9] = Point (-.5,-.5,.5); - nodes[10] = Point (.5,-.5,.5); - nodes[11] = Point (.5,.5,.5); - nodes[12] = Point (-.5,.5,.5); - - libmesh_fallthrough(); - } - case PYRAMID5: - { - // base corners - nodes[0] = Point (-1.,-1.,0.); - nodes[1] = Point (1.,-1.,0.); - nodes[2] = Point (1.,1.,0.); - nodes[3] = Point (-1.,1.,0.); - // apex - nodes[4] = Point (0.,0.,1.); - return; - } - - default: - libmesh_error_msg("ERROR: Unknown element type " << Utility::enum_to_string(itemType)); - } + for (unsigned int i = 0; i != n_nodes; ++i) + if (!try_refspace_node(itemType, i, nodes[i])) + libmesh_error_msg("ERROR: Unknown reference-space node " << i << " for element type " << + Utility::enum_to_string(itemType)); } diff --git a/src/geom/cell_hex20.C b/src/geom/cell_hex20.C index d695ce4e15a..a1f54f00fe0 100644 --- a/src/geom/cell_hex20.C +++ b/src/geom/cell_hex20.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_hex20.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,32 +35,6 @@ const int Hex20::num_nodes; const int Hex20::nodes_per_side; const int Hex20::nodes_per_edge; -const unsigned int Hex20::side_nodes_map[Hex20::num_sides][Hex20::nodes_per_side] = - { - {0, 3, 2, 1, 11, 10, 9, 8}, // Side 0 - {0, 1, 5, 4, 8, 13, 16, 12}, // Side 1 - {1, 2, 6, 5, 9, 14, 17, 13}, // Side 2 - {2, 3, 7, 6, 10, 15, 18, 14}, // Side 3 - {3, 0, 4, 7, 11, 12, 19, 15}, // Side 4 - {4, 5, 6, 7, 16, 17, 18, 19} // Side 5 - }; - -const unsigned int Hex20::edge_nodes_map[Hex20::num_edges][Hex20::nodes_per_edge] = - { - {0, 1, 8}, // Edge 0 - {1, 2, 9}, // Edge 1 - {2, 3, 10}, // Edge 2 - {0, 3, 11}, // Edge 3 - {0, 4, 12}, // Edge 4 - {1, 5, 13}, // Edge 5 - {2, 6, 14}, // Edge 6 - {3, 7, 15}, // Edge 7 - {4, 5, 16}, // Edge 8 - {5, 6, 17}, // Edge 9 - {6, 7, 18}, // Edge 10 - {4, 7, 19} // Edge 11 - }; - // ------------------------------------------------------------ // Hex20 class member functions @@ -86,32 +61,44 @@ bool Hex20::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Hex20::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Hex20::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Hex20::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -182,7 +169,10 @@ unsigned int Hex20::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Hex20::nodes_per_side); - return Hex20::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Hex20::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -193,7 +183,10 @@ unsigned int Hex20::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Hex20::nodes_per_edge); - return Hex20::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Hex20::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_hex27.C b/src/geom/cell_hex27.C index c432ddfb7fc..395590ec269 100644 --- a/src/geom/cell_hex27.C +++ b/src/geom/cell_hex27.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_hex27.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,32 +35,6 @@ const int Hex27::num_nodes; const int Hex27::nodes_per_side; const int Hex27::nodes_per_edge; -const unsigned int Hex27::side_nodes_map[Hex27::num_sides][Hex27::nodes_per_side] = - { - {0, 3, 2, 1, 11, 10, 9, 8, 20}, // Side 0 - {0, 1, 5, 4, 8, 13, 16, 12, 21}, // Side 1 - {1, 2, 6, 5, 9, 14, 17, 13, 22}, // Side 2 - {2, 3, 7, 6, 10, 15, 18, 14, 23}, // Side 3 - {3, 0, 4, 7, 11, 12, 19, 15, 24}, // Side 4 - {4, 5, 6, 7, 16, 17, 18, 19, 25} // Side 5 - }; - -const unsigned int Hex27::edge_nodes_map[Hex27::num_edges][Hex27::nodes_per_edge] = - { - {0, 1, 8}, // Edge 0 - {1, 2, 9}, // Edge 1 - {2, 3, 10}, // Edge 2 - {0, 3, 11}, // Edge 3 - {0, 4, 12}, // Edge 4 - {1, 5, 13}, // Edge 5 - {2, 6, 14}, // Edge 6 - {3, 7, 15}, // Edge 7 - {4, 5, 16}, // Edge 8 - {5, 6, 17}, // Edge 9 - {6, 7, 18}, // Edge 10 - {4, 7, 19} // Edge 11 - }; - // ------------------------------------------------------------ // Hex27 class member functions @@ -92,32 +67,44 @@ bool Hex27::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Hex27::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Hex27::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Hex27::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -225,7 +212,10 @@ unsigned int Hex27::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Hex27::nodes_per_side); - return Hex27::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Hex27::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -236,7 +226,10 @@ unsigned int Hex27::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Hex27::nodes_per_edge); - return Hex27::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Hex27::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_prism15.C b/src/geom/cell_prism15.C index 51f0adb30de..a03dd164a4f 100644 --- a/src/geom/cell_prism15.C +++ b/src/geom/cell_prism15.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism15.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" @@ -35,28 +36,6 @@ const int Prism15::num_nodes; const int Prism15::nodes_per_side; const int Prism15::nodes_per_edge; -const unsigned int Prism15::side_nodes_map[Prism15::num_sides][Prism15::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11}, // Side 3 - {3, 4, 5, 12, 13, 14, 99, 99} // Side 4 - }; - -const unsigned int Prism15::edge_nodes_map[Prism15::num_edges][Prism15::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism15 class member functions @@ -83,33 +62,44 @@ bool Prism15::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism15::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism15::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism15::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -162,7 +152,10 @@ unsigned int Prism15::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 6); - return Prism15::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism15::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -173,7 +166,10 @@ unsigned int Prism15::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism15::nodes_per_edge); - return Prism15::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism15::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -205,7 +201,7 @@ std::unique_ptr Prism15::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism15::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -252,7 +248,7 @@ void Prism15::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism15::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism18.C b/src/geom/cell_prism18.C index 56bce347090..d8651c7fe4a 100644 --- a/src/geom/cell_prism18.C +++ b/src/geom/cell_prism18.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism18.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" @@ -36,28 +37,6 @@ const int Prism18::num_nodes; const int Prism18::nodes_per_side; const int Prism18::nodes_per_edge; -const unsigned int Prism18::side_nodes_map[Prism18::num_sides][Prism18::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 99, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 99, 99, 99} // Side 4 - }; - -const unsigned int Prism18::edge_nodes_map[Prism18::num_edges][Prism18::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism18 class member functions @@ -88,33 +67,44 @@ bool Prism18::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism18::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 3; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism18::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism18::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -201,7 +191,10 @@ unsigned int Prism18::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 6); - return Prism18::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism18::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -212,7 +205,10 @@ unsigned int Prism18::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism18::nodes_per_edge); - return Prism18::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism18::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -244,7 +240,7 @@ std::unique_ptr Prism18::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism18::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -292,7 +288,7 @@ void Prism18::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism18::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism20.C b/src/geom/cell_prism20.C index c1cab408568..d1f1866dfe2 100644 --- a/src/geom/cell_prism20.C +++ b/src/geom/cell_prism20.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism20.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" @@ -36,28 +37,6 @@ const int Prism20::num_nodes; const int Prism20::nodes_per_side; const int Prism20::nodes_per_edge; -const unsigned int Prism20::side_nodes_map[Prism20::num_sides][Prism20::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 18, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 19, 99, 99} // Side 4 - }; - -const unsigned int Prism20::edge_nodes_map[Prism20::num_edges][Prism20::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism20 class member functions @@ -88,33 +67,44 @@ bool Prism20::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism20::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism20::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism20::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -211,7 +201,10 @@ unsigned int Prism20::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 7); - return Prism20::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism20::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -222,7 +215,10 @@ unsigned int Prism20::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism20::nodes_per_edge); - return Prism20::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism20::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -254,7 +250,7 @@ std::unique_ptr Prism20::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism20::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -302,7 +298,7 @@ void Prism20::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism20::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism21.C b/src/geom/cell_prism21.C index 2e6a5777849..ad733084217 100644 --- a/src/geom/cell_prism21.C +++ b/src/geom/cell_prism21.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism21.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" @@ -51,28 +52,6 @@ const int Prism21::num_nodes; const int Prism21::nodes_per_side; const int Prism21::nodes_per_edge; -const unsigned int Prism21::side_nodes_map[Prism21::num_sides][Prism21::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 18, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 19, 99, 99} // Side 4 - }; - -const unsigned int Prism21::edge_nodes_map[Prism21::num_edges][Prism21::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism21 class member functions @@ -105,33 +84,44 @@ bool Prism21::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism21::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism21::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism21::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -231,7 +221,10 @@ unsigned int Prism21::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 7); - return Prism21::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism21::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -242,7 +235,10 @@ unsigned int Prism21::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism21::nodes_per_edge); - return Prism21::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism21::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -274,7 +270,7 @@ std::unique_ptr Prism21::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism21::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -322,7 +318,7 @@ void Prism21::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism21::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid13.C b/src/geom/cell_pyramid13.C index faf84e00f3d..8a69a61590d 100644 --- a/src/geom/cell_pyramid13.C +++ b/src/geom/cell_pyramid13.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid13.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid13::num_nodes; const int Pyramid13::nodes_per_side; const int Pyramid13::nodes_per_edge; -const unsigned int Pyramid13::side_nodes_map[Pyramid13::num_sides][Pyramid13::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5} // Side 4 (base) - }; - -const unsigned int Pyramid13::edge_nodes_map[Pyramid13::num_edges][Pyramid13::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid13 class member functions @@ -89,33 +69,44 @@ bool Pyramid13::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid13::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid13::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid13::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -147,7 +138,10 @@ unsigned int Pyramid13::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(side == 4 || side_node < 6); - return Pyramid13::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid13::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -158,7 +152,10 @@ unsigned int Pyramid13::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid13::nodes_per_edge); - return Pyramid13::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid13::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -190,7 +187,7 @@ std::unique_ptr Pyramid13::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid13::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -236,7 +233,7 @@ void Pyramid13::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid13::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid14.C b/src/geom/cell_pyramid14.C index bcdd7e0f9e6..69d3e164eab 100644 --- a/src/geom/cell_pyramid14.C +++ b/src/geom/cell_pyramid14.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid14.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid14::num_nodes; const int Pyramid14::nodes_per_side; const int Pyramid14::nodes_per_edge; -const unsigned int Pyramid14::side_nodes_map[Pyramid14::num_sides][Pyramid14::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 99, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 99, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 99, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 99, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5, 13} // Side 4 (base) - }; - -const unsigned int Pyramid14::edge_nodes_map[Pyramid14::num_edges][Pyramid14::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid14 class member functions @@ -93,33 +73,44 @@ bool Pyramid14::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid14::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 3; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid14::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid14::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } bool Pyramid14::has_affine_map() const @@ -171,7 +162,10 @@ unsigned int Pyramid14::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(side == 4 || side_node < 6); - return Pyramid14::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid14::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -182,7 +176,10 @@ unsigned int Pyramid14::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid14::nodes_per_edge); - return Pyramid14::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid14::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -214,7 +211,7 @@ std::unique_ptr Pyramid14::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid14::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -260,7 +257,7 @@ void Pyramid14::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid14::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid18.C b/src/geom/cell_pyramid18.C index 12f7ad69f5f..f4c2c5e6d6a 100644 --- a/src/geom/cell_pyramid18.C +++ b/src/geom/cell_pyramid18.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid18.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid18::num_nodes; const int Pyramid18::nodes_per_side; const int Pyramid18::nodes_per_edge; -const unsigned int Pyramid18::side_nodes_map[Pyramid18::num_sides][Pyramid18::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 14, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 15, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 16, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 17, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5, 13} // Side 4 (base) - }; - -const unsigned int Pyramid18::edge_nodes_map[Pyramid18::num_edges][Pyramid18::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid18 class member functions @@ -93,33 +73,44 @@ bool Pyramid18::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid18::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid18::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid18::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -173,7 +164,10 @@ unsigned int Pyramid18::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(side == 4 || side_node < 7); - return Pyramid18::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid18::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -184,7 +178,10 @@ unsigned int Pyramid18::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid18::nodes_per_edge); - return Pyramid18::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid18::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -216,7 +213,7 @@ std::unique_ptr Pyramid18::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid18::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -262,7 +259,7 @@ void Pyramid18::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid18::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_tet10.C b/src/geom/cell_tet10.C index f876afdcccd..edab2defba8 100644 --- a/src/geom/cell_tet10.C +++ b/src/geom/cell_tet10.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_tet10.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,24 +35,6 @@ const int Tet10::num_nodes; const int Tet10::nodes_per_side; const int Tet10::nodes_per_edge; -const unsigned int Tet10::side_nodes_map[Tet10::num_sides][Tet10::nodes_per_side] = - { - {0, 2, 1, 6, 5, 4}, // Side 0 - {0, 1, 3, 4, 8, 7}, // Side 1 - {1, 2, 3, 5, 9, 8}, // Side 2 - {2, 0, 3, 6, 7, 9} // Side 3 - }; - -const unsigned int Tet10::edge_nodes_map[Tet10::num_edges][Tet10::nodes_per_edge] = - { - {0, 1, 4}, // Edge 0 - {1, 2, 5}, // Edge 1 - {0, 2, 6}, // Edge 2 - {0, 3, 7}, // Edge 3 - {1, 3, 8}, // Edge 4 - {2, 3, 9} // Edge 5 - }; - // ------------------------------------------------------------ // Tet10 class member functions @@ -78,32 +61,44 @@ bool Tet10::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tet10::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Tet10::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Tet10::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -185,7 +180,10 @@ unsigned int Tet10::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tet10::nodes_per_side); - return Tet10::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tet10::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -196,7 +194,10 @@ unsigned int Tet10::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Tet10::nodes_per_edge); - return Tet10::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Tet10::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_tet14.C b/src/geom/cell_tet14.C index b214ee1c36b..7962ae5e51b 100644 --- a/src/geom/cell_tet14.C +++ b/src/geom/cell_tet14.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_tet14.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -42,24 +43,6 @@ const int Tet14::num_nodes; const int Tet14::nodes_per_side; const int Tet14::nodes_per_edge; -const unsigned int Tet14::side_nodes_map[Tet14::num_sides][Tet14::nodes_per_side] = - { - {0, 2, 1, 6, 5, 4, 10}, // Side 0 - {0, 1, 3, 4, 8, 7, 11}, // Side 1 - {1, 2, 3, 5, 9, 8, 12}, // Side 2 - {2, 0, 3, 6, 7, 9, 13} // Side 3 - }; - -const unsigned int Tet14::edge_nodes_map[Tet14::num_edges][Tet14::nodes_per_edge] = - { - {0, 1, 4}, // Edge 0 - {1, 2, 5}, // Edge 1 - {0, 2, 6}, // Edge 2 - {0, 3, 7}, // Edge 3 - {1, 3, 8}, // Edge 4 - {2, 3, 9} // Edge 5 - }; - // ------------------------------------------------------------ // Tet14 class member functions @@ -88,32 +71,44 @@ bool Tet14::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tet14::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Tet14::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Tet14::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -210,7 +205,10 @@ unsigned int Tet14::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tet14::nodes_per_side); - return Tet14::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tet14::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -221,7 +219,10 @@ unsigned int Tet14::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Tet14::nodes_per_edge); - return Tet14::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Tet14::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/face_quad8.C b/src/geom/face_quad8.C index 720f77dd4d1..e710f4b1ae4 100644 --- a/src/geom/face_quad8.C +++ b/src/geom/face_quad8.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,15 +33,6 @@ namespace libMesh const int Quad8::num_nodes; const int Quad8::nodes_per_side; -const unsigned int Quad8::side_nodes_map[Quad8::num_sides][Quad8::nodes_per_side] = - { - {0, 1, 4}, // Side 0 - {1, 2, 5}, // Side 1 - {2, 3, 6}, // Side 2 - {3, 0, 7} // Side 3 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Quad8::_embedding_matrix[Quad8::num_children][Quad8::num_nodes][Quad8::num_nodes] = @@ -128,16 +120,22 @@ bool Quad8::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Quad8::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -212,7 +210,10 @@ unsigned int Quad8::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Quad8::nodes_per_side); - return Quad8::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Quad8::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_quad9.C b/src/geom/face_quad9.C index 7182b023a43..60f56aca69f 100644 --- a/src/geom/face_quad9.C +++ b/src/geom/face_quad9.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,15 +33,6 @@ namespace libMesh const int Quad9::num_nodes; const int Quad9::nodes_per_side; -const unsigned int Quad9::side_nodes_map[Quad9::num_sides][Quad9::nodes_per_side] = - { - {0, 1, 4}, // Side 0 - {1, 2, 5}, // Side 1 - {2, 3, 6}, // Side 2 - {3, 0, 7} // Side 3 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Quad9::_embedding_matrix[Quad9::num_children][Quad9::num_nodes][Quad9::num_nodes] = @@ -136,16 +128,22 @@ bool Quad9::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Quad9::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -230,7 +228,10 @@ unsigned int Quad9::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Quad9::nodes_per_side); - return Quad9::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Quad9::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_tri6.C b/src/geom/face_tri6.C index fb24d0fe701..f844ebfe635 100644 --- a/src/geom/face_tri6.C +++ b/src/geom/face_tri6.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,14 +33,6 @@ namespace libMesh const int Tri6::num_nodes; const int Tri6::nodes_per_side; -const unsigned int Tri6::side_nodes_map[Tri6::num_sides][Tri6::nodes_per_side] = - { - {0, 1, 3}, // Side 0 - {1, 2, 4}, // Side 1 - {2, 0, 5} // Side 2 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Tri6::_embedding_matrix[Tri6::num_children][Tri6::num_nodes][Tri6::num_nodes] = @@ -119,16 +112,22 @@ bool Tri6::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tri6::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -199,7 +198,10 @@ unsigned int Tri6::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tri6::nodes_per_side); - return Tri6::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tri6::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_tri7.C b/src/geom/face_tri7.C index e30c72ced52..e007678d0fc 100644 --- a/src/geom/face_tri7.C +++ b/src/geom/face_tri7.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -38,14 +39,6 @@ namespace libMesh const int Tri7::num_nodes; const int Tri7::nodes_per_side; -const unsigned int Tri7::side_nodes_map[Tri7::num_sides][Tri7::nodes_per_side] = - { - {0, 1, 3}, // Side 0 - {1, 2, 4}, // Side 1 - {2, 0, 5} // Side 2 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Tri7::_embedding_matrix[Tri7::num_children][Tri7::num_nodes][Tri7::num_nodes] = @@ -143,16 +136,22 @@ bool Tri7::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tri7::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -236,7 +235,10 @@ unsigned int Tri7::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tri7::nodes_per_side); - return Tri7::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tri7::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/tests/fe/kokkos_fe_contract_test.K b/tests/fe/kokkos_fe_contract_test.K index 826693a2e93..4fd2d0243fd 100644 --- a/tests/fe/kokkos_fe_contract_test.K +++ b/tests/fe/kokkos_fe_contract_test.K @@ -32,6 +32,7 @@ namespace struct contract_case { const char * name; + bool expect_abort; }; struct element_fixture @@ -294,30 +295,32 @@ main(int argc, char ** argv) } const contract_case cases[] = { - { "get_side_topology_prism6" }, - { "get_side_topology_pyramid5" }, - { "shape_lagrange_edge4_third" }, - { "grad_shape_lagrange_prism6_first" }, - { "shape_monomial_hex27_sixth" }, - { "grad_shape_monomial_tri7_sixth" }, - { "ndofs_lagrange_prism6_first" }, - { "map_shape_rational" }, - { "grad_map_shape_rational" }, - { "face_normal_parent_dim2" }, - { "face_jacobian_prism20_tri7" }, - { "face_jacobian_prism21_tri7" }, - { "face_jacobian_pyramid18_tri7" }, - { "map_face_qp_to_parent_prism20_tri7" }, - { "map_face_qp_to_parent_prism21_tri7" }, - { "map_face_qp_to_parent_pyramid18_tri7" } + { "get_side_topology_prism6", true }, + { "get_side_topology_pyramid5", true }, + { "shape_lagrange_edge4_third", false }, + { "grad_shape_lagrange_prism6_first", true }, + { "shape_monomial_hex27_sixth", true }, + { "grad_shape_monomial_tri7_sixth", true }, + { "ndofs_lagrange_prism6_first", true }, + { "map_shape_rational", true }, + { "grad_map_shape_rational", true }, + { "face_normal_parent_dim2", true }, + { "face_jacobian_prism20_tri7", true }, + { "face_jacobian_prism21_tri7", true }, + { "face_jacobian_pyramid18_tri7", true }, + { "map_face_qp_to_parent_prism20_tri7", true }, + { "map_face_qp_to_parent_prism21_tri7", true }, + { "map_face_qp_to_parent_pyramid18_tri7", true } }; int total_fail = 0; for (const auto & info : cases) { - const bool passed = expect_child_abort(argv[0], info.name); + const bool passed = info.expect_abort ? expect_child_abort(argv[0], info.name) + : expect_child_success(argv[0], info.name); const int fail = passed ? 0 : 1; - std::printf("[contract_abort] [%s] %s (%d failures)\n", + std::printf("[%s] [%s] %s (%d failures)\n", + info.expect_abort ? "contract_abort" : "contract_success", info.name, passed ? "PASS" : "FAIL", fail); diff --git a/tests/fe/kokkos_fe_map_oracle_test.K b/tests/fe/kokkos_fe_map_oracle_test.K index c7e0075450f..4658289e82d 100644 --- a/tests/fe/kokkos_fe_map_oracle_test.K +++ b/tests/fe/kokkos_fe_map_oracle_test.K @@ -433,6 +433,7 @@ main(int argc, char ** argv) const map_helper_case cases[] = { { libMesh::EDGE2, "EDGE2" }, { libMesh::EDGE3, "EDGE3" }, + { libMesh::EDGE4, "EDGE4" }, { libMesh::TRI3, "TRI3" }, { libMesh::TRI6, "TRI6" }, { libMesh::QUAD4, "QUAD4" }, diff --git a/tests/fe/kokkos_fe_oracle_test_utils.h b/tests/fe/kokkos_fe_oracle_test_utils.h index 3b8a956749c..9e2e0915afa 100644 --- a/tests/fe/kokkos_fe_oracle_test_utils.h +++ b/tests/fe/kokkos_fe_oracle_test_utils.h @@ -6,6 +6,7 @@ #include "gpu/kokkos_fe_map.h" #include "gpu/kokkos_fe_shape_dispatch.h" #include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_storage_policy.h" #include "libmesh/elem.h" #include "libmesh/fe_base.h" @@ -249,9 +250,7 @@ build_reference_fixture(libMesh::ElemType elem_type) for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) { - libMesh::Point master; - libmesh_error_msg_if(!libMesh::try_reference_node(elem_type, i, master), - "build_reference_fixture(): unsupported reference-node lookup"); + const libMesh::Point master = fixture.elem->master_point(i); const libMesh::Real xi = master(0); const libMesh::Real eta = master(1); const libMesh::Real zeta = master(2); diff --git a/tests/fe/kokkos_fe_shape_oracle_test.K b/tests/fe/kokkos_fe_shape_oracle_test.K index db3b3f77949..8d664723221 100644 --- a/tests/fe/kokkos_fe_shape_oracle_test.K +++ b/tests/fe/kokkos_fe_shape_oracle_test.K @@ -112,6 +112,7 @@ static const physics_shape_info lagrange_physics_cases[] = { { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "EDGE3/FIRST" }, { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "EDGE3/SECOND" }, { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, 1, 2, "EDGE4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, 1, 4, "EDGE4/THIRD" }, { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "TRI3/FIRST" }, { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "TRI6/FIRST" }, diff --git a/tests/fe/kokkos_fe_types_oracle_test.K b/tests/fe/kokkos_fe_types_oracle_test.K index 7f239971ba2..9dfe558e499 100644 --- a/tests/fe/kokkos_fe_types_oracle_test.K +++ b/tests/fe/kokkos_fe_types_oracle_test.K @@ -358,7 +358,7 @@ test_support_contract() { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, true }, { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::CONSTANT }, false }, { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::FIRST }, false }, - { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, true }, { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::THIRD }, false }, { { libMesh::LAGRANGE, libMesh::TET14, libMesh::THIRD }, false }, { { libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST }, false }, From befa74fb124decdd066e9074a60173974b8290fd Mon Sep 17 00:00:00 2001 From: rochi00 Date: Thu, 14 May 2026 14:34:28 -0600 Subject: [PATCH 37/48] Add native Kokkos Hilbert system path --- Makefile.am | 22 + Makefile.in | 406 ++++- configure | 17 +- contrib/Makefile.in | 1 + contrib/capnproto/Makefile.in | 1 + contrib/eigen/gitshim/Makefile.in | 1 + contrib/exodusii/5.22b/exodus/Makefile.in | 1 + contrib/exodusii/5.22b/nemesis/Makefile.in | 1 + contrib/exodusii/Lib/Makefile.in | 1 + contrib/exodusii/v8.11/exodus/Makefile.in | 1 + contrib/exodusii/v8.11/nemesis/Makefile.in | 1 + contrib/fparser/Makefile.in | 1 + contrib/fparser/extrasrc/Makefile.in | 1 + contrib/fparser/fparser.hh | 1 - contrib/fparser/fparser_ad.cc | 13 + contrib/fparser/fparser_ad.hh | 4 +- contrib/gmv/Makefile.in | 1 + contrib/gzstream/Makefile.in | 1 + contrib/laspack/Makefile.in | 1 + contrib/libHilbert/Makefile.in | 1 + contrib/metis/Makefile.in | 1 + contrib/nanoflann/Makefile.in | 1 + contrib/nemesis/Lib/Makefile.in | 1 + contrib/netgen/Makefile.in | 3 +- contrib/parmetis/Makefile.in | 1 + contrib/poly2tri/modified/Makefile.in | 1 + contrib/qhull/2012.1/Makefile.in | 1 + contrib/sfcurves/Makefile.in | 1 + contrib/tecplot/binary/Makefile.in | 1 + contrib/tecplot/tecio/Makefile.in | 1 + contrib/tetgen/Makefile.in | 1 + contrib/triangle/Makefile.in | 1 + doc/Makefile.in | 1 + doc/html/Makefile.in | 1 + examples/Makefile.in | 1 + .../adaptivity/adaptivity_ex1/Makefile.in | 1 + .../adaptivity/adaptivity_ex2/Makefile.in | 1 + .../adaptivity/adaptivity_ex3/Makefile.in | 1 + .../adaptivity/adaptivity_ex4/Makefile.in | 1 + .../adaptivity/adaptivity_ex5/Makefile.in | 1 + examples/adjoints/adjoints_ex1/Makefile.in | 1 + examples/adjoints/adjoints_ex2/Makefile.in | 1 + examples/adjoints/adjoints_ex3/Makefile.in | 1 + examples/adjoints/adjoints_ex4/Makefile.in | 1 + examples/adjoints/adjoints_ex5/Makefile.in | 1 + examples/adjoints/adjoints_ex6/Makefile.in | 1 + examples/adjoints/adjoints_ex7/Makefile.in | 1 + .../eigenproblems_ex1/Makefile.in | 1 + .../eigenproblems_ex2/Makefile.in | 1 + .../eigenproblems_ex3/Makefile.in | 1 + .../eigenproblems_ex4/Makefile.in | 1 + .../fem_system/fem_system_ex1/Makefile.in | 1 + .../fem_system/fem_system_ex2/Makefile.in | 1 + .../fem_system/fem_system_ex3/Makefile.in | 1 + .../fem_system/fem_system_ex4/Makefile.in | 1 + .../fem_system/fem_system_ex5/Makefile.in | 1 + .../introduction/introduction_ex1/Makefile.in | 1 + .../introduction/introduction_ex2/Makefile.in | 1 + .../introduction/introduction_ex3/Makefile.in | 1 + .../introduction/introduction_ex4/Makefile.in | 1 + .../introduction/introduction_ex5/Makefile.in | 1 + .../miscellaneous_ex1/Makefile.in | 1 + .../miscellaneous_ex10/Makefile.in | 1 + .../miscellaneous_ex11/Makefile.in | 1 + .../miscellaneous_ex12/Makefile.in | 1 + .../miscellaneous_ex13/Makefile.in | 1 + .../miscellaneous_ex14/Makefile.in | 1 + .../miscellaneous_ex15/Makefile.in | 1 + .../miscellaneous_ex16/Makefile.in | 1 + .../miscellaneous_ex17/Makefile.in | 1 + .../miscellaneous_ex2/Makefile.in | 1 + .../miscellaneous_ex3/Makefile.in | 1 + .../miscellaneous_ex4/Makefile.in | 1 + .../miscellaneous_ex5/Makefile.in | 1 + .../miscellaneous_ex6/Makefile.in | 1 + .../miscellaneous_ex7/Makefile.in | 1 + .../miscellaneous_ex8/Makefile.in | 1 + .../miscellaneous_ex9/Makefile.in | 1 + .../optimization/optimization_ex1/Makefile.in | 1 + .../optimization/optimization_ex2/Makefile.in | 1 + .../reduced_basis_ex1/Makefile.in | 1 + .../reduced_basis_ex2/Makefile.in | 1 + .../reduced_basis_ex3/Makefile.in | 1 + .../reduced_basis_ex4/Makefile.in | 1 + .../reduced_basis_ex5/Makefile.in | 1 + .../reduced_basis_ex6/Makefile.in | 1 + .../reduced_basis_ex7/Makefile.in | 1 + .../solution_transfer_ex1/Makefile.in | 1 + .../subdomains/subdomains_ex1/Makefile.in | 1 + .../subdomains/subdomains_ex2/Makefile.in | 1 + .../subdomains/subdomains_ex3/Makefile.in | 1 + .../systems_of_equations_ex1/Makefile.in | 1 + .../systems_of_equations_ex2/Makefile.in | 1 + .../systems_of_equations_ex3/Makefile.in | 1 + .../systems_of_equations_ex4/Makefile.in | 1 + .../systems_of_equations_ex5/Makefile.in | 1 + .../systems_of_equations_ex6/Makefile.in | 1 + .../systems_of_equations_ex7/Makefile.in | 1 + .../systems_of_equations_ex8/Makefile.in | 1 + .../systems_of_equations_ex9/Makefile.in | 1 + examples/transient/transient_ex1/Makefile.in | 1 + examples/transient/transient_ex2/Makefile.in | 1 + examples/transient/transient_ex3/Makefile.in | 1 + examples/vector_fe/vector_fe_ex1/Makefile.in | 1 + examples/vector_fe/vector_fe_ex10/Makefile.in | 1 + examples/vector_fe/vector_fe_ex2/Makefile.in | 1 + examples/vector_fe/vector_fe_ex3/Makefile.in | 1 + examples/vector_fe/vector_fe_ex4/Makefile.in | 1 + examples/vector_fe/vector_fe_ex5/Makefile.in | 1 + examples/vector_fe/vector_fe_ex6/Makefile.in | 1 + examples/vector_fe/vector_fe_ex7/Makefile.in | 1 + examples/vector_fe/vector_fe_ex8/Makefile.in | 1 + examples/vector_fe/vector_fe_ex9/Makefile.in | 1 + include/Makefile.am | 4 +- include/Makefile.in | 86 +- include/base/dof_map.h | 56 + include/gpu/kokkos_hilbert_assembly.h | 284 ++++ include/gpu/kokkos_hilbert_system.h | 685 ++++++++ include/gpu/kokkos_parsed_function.h | 832 ++++++++++ include/include_HEADERS | 1 + include/libmesh/Makefile.am | 8 + include/libmesh/Makefile.in | 146 +- include/mesh/mesh_base.h | 62 +- include/numerics/parsed_fem_function.h | 69 +- include/numerics/parsed_function.h | 40 +- include/numerics/parsed_function_program.h | 189 +++ include/numerics/petsc_vector.h | 91 + include/systems/hilbert_assembly.h | 352 ++++ include/systems/hilbert_assembly_kernel.h | 239 +++ src/apps/L2system.C | 1477 ++++++++++++++++- src/apps/L2system.h | 117 +- src/apps/calculator.C | 50 +- src/apps/hilbert_kokkos_benchmark.C | 438 +++++ src/base/dof_map.C | 174 ++ src/libmesh_SOURCES | 1 + src/mesh/checkpoint_io.C | 2 +- src/mesh/exodusII_io.C | 2 +- src/mesh/exodusII_io_helper.C | 2 +- src/mesh/gmsh_io.C | 6 +- src/mesh/mesh_base.C | 137 ++ src/mesh/nemesis_io.C | 2 +- src/mesh/nemesis_io_helper.C | 4 +- src/mesh/tetgen_io.C | 2 +- src/mesh/ucd_io.C | 2 +- src/mesh/unv_io.C | 4 +- src/mesh/xdr_io.C | 2 +- src/numerics/parsed_function_program.C | 155 ++ src/numerics/petsc_vector.C | 2 - tests/Makefile.am | 1 + tests/Makefile.in | 490 +++++- tests/driver.C | 22 + tests/systems/hilbert_system_kokkos_test.C | 252 +++ 152 files changed, 6788 insertions(+), 272 deletions(-) create mode 100644 include/gpu/kokkos_hilbert_assembly.h create mode 100644 include/gpu/kokkos_hilbert_system.h create mode 100644 include/gpu/kokkos_parsed_function.h create mode 100644 include/numerics/parsed_function_program.h create mode 100644 include/systems/hilbert_assembly.h create mode 100644 include/systems/hilbert_assembly_kernel.h create mode 100644 src/apps/hilbert_kokkos_benchmark.C create mode 100644 src/numerics/parsed_function_program.C create mode 100644 tests/systems/hilbert_system_kokkos_test.C diff --git a/Makefile.am b/Makefile.am index dea7f3fd3aa..6bd18a0eaaa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -468,6 +468,28 @@ calculator_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) calculator_dbg_CXXFLAGS = $(CXXFLAGS_DBG) calculator_dbg_LDADD = libmesh_dbg.la +# hilbert_kokkos_benchmark +opt_programs += hilbert_kokkos_benchmark-opt +hilbert_kokkos_benchmark_opt_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_opt_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_opt_CXXFLAGS = $(CXXFLAGS_OPT) +hilbert_kokkos_benchmark_opt_LDADD = libmesh_opt.la + +devel_programs += hilbert_kokkos_benchmark-devel +hilbert_kokkos_benchmark_devel_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_devel_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_devel_CPPFLAGS = $(CPPFLAGS_DEVEL) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_devel_CXXFLAGS = $(CXXFLAGS_DEVEL) +hilbert_kokkos_benchmark_devel_LDADD = libmesh_devel.la + +dbg_programs += hilbert_kokkos_benchmark-dbg +hilbert_kokkos_benchmark_dbg_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_dbg_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) +hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la + # compare opt_programs += compare-opt compare_opt_SOURCES = src/apps/compare.C diff --git a/Makefile.in b/Makefile.in index 186be47b152..a6ab5a0776e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -202,21 +202,22 @@ CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = fparser_parse-opt$(EXEEXT) getpot_parse-opt$(EXEEXT) \ amr-opt$(EXEEXT) matrixconvert-opt$(EXEEXT) \ matrixsolve-opt$(EXEEXT) meshtool-opt$(EXEEXT) \ - calculator-opt$(EXEEXT) compare-opt$(EXEEXT) \ - meshbcid-opt$(EXEEXT) meshid-opt$(EXEEXT) meshavg-opt$(EXEEXT) \ - meshdiff-opt$(EXEEXT) meshnorm-opt$(EXEEXT) \ - projection-opt$(EXEEXT) output_libmesh_version-opt$(EXEEXT) \ - meshplot-opt$(EXEEXT) solution_components-opt$(EXEEXT) \ - splitter-opt$(EXEEXT) embedding-opt$(EXEEXT) + calculator-opt$(EXEEXT) hilbert_kokkos_benchmark-opt$(EXEEXT) \ + compare-opt$(EXEEXT) meshbcid-opt$(EXEEXT) meshid-opt$(EXEEXT) \ + meshavg-opt$(EXEEXT) meshdiff-opt$(EXEEXT) \ + meshnorm-opt$(EXEEXT) projection-opt$(EXEEXT) \ + output_libmesh_version-opt$(EXEEXT) meshplot-opt$(EXEEXT) \ + solution_components-opt$(EXEEXT) splitter-opt$(EXEEXT) \ + embedding-opt$(EXEEXT) @LIBMESH_OPT_MODE_TRUE@am__EXEEXT_2 = $(am__EXEEXT_1) am__EXEEXT_3 = fparser_parse-devel$(EXEEXT) \ getpot_parse-devel$(EXEEXT) amr-devel$(EXEEXT) \ matrixconvert-devel$(EXEEXT) matrixsolve-devel$(EXEEXT) \ meshtool-devel$(EXEEXT) calculator-devel$(EXEEXT) \ - compare-devel$(EXEEXT) meshbcid-devel$(EXEEXT) \ - meshid-devel$(EXEEXT) meshavg-devel$(EXEEXT) \ - meshdiff-devel$(EXEEXT) meshnorm-devel$(EXEEXT) \ - projection-devel$(EXEEXT) \ + hilbert_kokkos_benchmark-devel$(EXEEXT) compare-devel$(EXEEXT) \ + meshbcid-devel$(EXEEXT) meshid-devel$(EXEEXT) \ + meshavg-devel$(EXEEXT) meshdiff-devel$(EXEEXT) \ + meshnorm-devel$(EXEEXT) projection-devel$(EXEEXT) \ output_libmesh_version-devel$(EXEEXT) meshplot-devel$(EXEEXT) \ solution_components-devel$(EXEEXT) splitter-devel$(EXEEXT) \ embedding-devel$(EXEEXT) @@ -224,12 +225,13 @@ am__EXEEXT_3 = fparser_parse-devel$(EXEEXT) \ am__EXEEXT_5 = fparser_parse-dbg$(EXEEXT) getpot_parse-dbg$(EXEEXT) \ amr-dbg$(EXEEXT) matrixconvert-dbg$(EXEEXT) \ matrixsolve-dbg$(EXEEXT) meshtool-dbg$(EXEEXT) \ - calculator-dbg$(EXEEXT) compare-dbg$(EXEEXT) \ - meshbcid-dbg$(EXEEXT) meshid-dbg$(EXEEXT) meshavg-dbg$(EXEEXT) \ - meshdiff-dbg$(EXEEXT) meshnorm-dbg$(EXEEXT) \ - projection-dbg$(EXEEXT) output_libmesh_version-dbg$(EXEEXT) \ - meshplot-dbg$(EXEEXT) solution_components-dbg$(EXEEXT) \ - splitter-dbg$(EXEEXT) embedding-dbg$(EXEEXT) + calculator-dbg$(EXEEXT) hilbert_kokkos_benchmark-dbg$(EXEEXT) \ + compare-dbg$(EXEEXT) meshbcid-dbg$(EXEEXT) meshid-dbg$(EXEEXT) \ + meshavg-dbg$(EXEEXT) meshdiff-dbg$(EXEEXT) \ + meshnorm-dbg$(EXEEXT) projection-dbg$(EXEEXT) \ + output_libmesh_version-dbg$(EXEEXT) meshplot-dbg$(EXEEXT) \ + solution_components-dbg$(EXEEXT) splitter-dbg$(EXEEXT) \ + embedding-dbg$(EXEEXT) @LIBMESH_DBG_MODE_TRUE@am__EXEEXT_6 = $(am__EXEEXT_5) am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ "$(DESTDIR)$(bindir)" "$(DESTDIR)$(contribbindir)" \ @@ -423,8 +425,9 @@ am__libmesh_dbg_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -866,6 +869,7 @@ am__objects_1 = src/base/libmesh_dbg_la-dirichlet_boundary.lo \ src/numerics/libmesh_dbg_la-laspack_vector.lo \ src/numerics/libmesh_dbg_la-lumped_mass_matrix.lo \ src/numerics/libmesh_dbg_la-numeric_vector.lo \ + src/numerics/libmesh_dbg_la-parsed_function_program.lo \ src/numerics/libmesh_dbg_la-petsc_matrix.lo \ src/numerics/libmesh_dbg_la-petsc_matrix_base.lo \ src/numerics/libmesh_dbg_la-petsc_matrix_shell_matrix.lo \ @@ -1238,8 +1242,9 @@ am__libmesh_devel_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -1680,6 +1685,7 @@ am__objects_2 = src/base/libmesh_devel_la-dirichlet_boundary.lo \ src/numerics/libmesh_devel_la-laspack_vector.lo \ src/numerics/libmesh_devel_la-lumped_mass_matrix.lo \ src/numerics/libmesh_devel_la-numeric_vector.lo \ + src/numerics/libmesh_devel_la-parsed_function_program.lo \ src/numerics/libmesh_devel_la-petsc_matrix.lo \ src/numerics/libmesh_devel_la-petsc_matrix_base.lo \ src/numerics/libmesh_devel_la-petsc_matrix_shell_matrix.lo \ @@ -2049,8 +2055,9 @@ am__libmesh_oprof_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -2491,6 +2498,7 @@ am__objects_3 = src/base/libmesh_oprof_la-dirichlet_boundary.lo \ src/numerics/libmesh_oprof_la-laspack_vector.lo \ src/numerics/libmesh_oprof_la-lumped_mass_matrix.lo \ src/numerics/libmesh_oprof_la-numeric_vector.lo \ + src/numerics/libmesh_oprof_la-parsed_function_program.lo \ src/numerics/libmesh_oprof_la-petsc_matrix.lo \ src/numerics/libmesh_oprof_la-petsc_matrix_base.lo \ src/numerics/libmesh_oprof_la-petsc_matrix_shell_matrix.lo \ @@ -2860,8 +2868,9 @@ am__libmesh_opt_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -3302,6 +3311,7 @@ am__objects_4 = src/base/libmesh_opt_la-dirichlet_boundary.lo \ src/numerics/libmesh_opt_la-laspack_vector.lo \ src/numerics/libmesh_opt_la-lumped_mass_matrix.lo \ src/numerics/libmesh_opt_la-numeric_vector.lo \ + src/numerics/libmesh_opt_la-parsed_function_program.lo \ src/numerics/libmesh_opt_la-petsc_matrix.lo \ src/numerics/libmesh_opt_la-petsc_matrix_base.lo \ src/numerics/libmesh_opt_la-petsc_matrix_shell_matrix.lo \ @@ -3670,8 +3680,9 @@ am__libmesh_prof_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -4112,6 +4123,7 @@ am__objects_5 = src/base/libmesh_prof_la-dirichlet_boundary.lo \ src/numerics/libmesh_prof_la-laspack_vector.lo \ src/numerics/libmesh_prof_la-lumped_mass_matrix.lo \ src/numerics/libmesh_prof_la-numeric_vector.lo \ + src/numerics/libmesh_prof_la-parsed_function_program.lo \ src/numerics/libmesh_prof_la-petsc_matrix.lo \ src/numerics/libmesh_prof_la-petsc_matrix_base.lo \ src/numerics/libmesh_prof_la-petsc_matrix_shell_matrix.lo \ @@ -4456,6 +4468,33 @@ getpot_parse_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ $(getpot_parse_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_dbg_OBJECTS = src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_dbg-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_dbg_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_dbg_OBJECTS) +hilbert_kokkos_benchmark_dbg_DEPENDENCIES = libmesh_dbg.la +hilbert_kokkos_benchmark_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_devel_OBJECTS = src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_devel-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_devel_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_devel_OBJECTS) +hilbert_kokkos_benchmark_devel_DEPENDENCIES = libmesh_devel.la +hilbert_kokkos_benchmark_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_opt_OBJECTS = src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_opt-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_opt_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_opt_OBJECTS) +hilbert_kokkos_benchmark_opt_DEPENDENCIES = libmesh_opt.la +hilbert_kokkos_benchmark_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_matrixconvert_dbg_OBJECTS = \ src/apps/matrixconvert_dbg-matrixconvert.$(OBJEXT) matrixconvert_dbg_OBJECTS = $(am_matrixconvert_dbg_OBJECTS) @@ -4771,6 +4810,12 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po \ src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po \ src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po \ src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po \ src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po \ src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po \ @@ -6107,6 +6152,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo \ @@ -6145,6 +6191,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo \ @@ -6183,6 +6230,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo \ @@ -6221,6 +6269,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo \ @@ -6259,6 +6308,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo \ @@ -7238,21 +7288,24 @@ SOURCES = $(libmesh_dbg_la_SOURCES) $(libmesh_devel_la_SOURCES) \ $(embedding_opt_SOURCES) $(fparser_parse_dbg_SOURCES) \ $(fparser_parse_devel_SOURCES) $(fparser_parse_opt_SOURCES) \ $(getpot_parse_dbg_SOURCES) $(getpot_parse_devel_SOURCES) \ - $(getpot_parse_opt_SOURCES) $(matrixconvert_dbg_SOURCES) \ - $(matrixconvert_devel_SOURCES) $(matrixconvert_opt_SOURCES) \ - $(matrixsolve_dbg_SOURCES) $(matrixsolve_devel_SOURCES) \ - $(matrixsolve_opt_SOURCES) $(meshavg_dbg_SOURCES) \ - $(meshavg_devel_SOURCES) $(meshavg_opt_SOURCES) \ - $(meshbcid_dbg_SOURCES) $(meshbcid_devel_SOURCES) \ - $(meshbcid_opt_SOURCES) $(meshdiff_dbg_SOURCES) \ - $(meshdiff_devel_SOURCES) $(meshdiff_opt_SOURCES) \ - $(meshid_dbg_SOURCES) $(meshid_devel_SOURCES) \ - $(meshid_opt_SOURCES) $(meshnorm_dbg_SOURCES) \ - $(meshnorm_devel_SOURCES) $(meshnorm_opt_SOURCES) \ - $(meshplot_dbg_SOURCES) $(meshplot_devel_SOURCES) \ - $(meshplot_opt_SOURCES) $(meshtool_dbg_SOURCES) \ - $(meshtool_devel_SOURCES) $(meshtool_opt_SOURCES) \ - $(output_libmesh_version_dbg_SOURCES) \ + $(getpot_parse_opt_SOURCES) \ + $(hilbert_kokkos_benchmark_dbg_SOURCES) \ + $(hilbert_kokkos_benchmark_devel_SOURCES) \ + $(hilbert_kokkos_benchmark_opt_SOURCES) \ + $(matrixconvert_dbg_SOURCES) $(matrixconvert_devel_SOURCES) \ + $(matrixconvert_opt_SOURCES) $(matrixsolve_dbg_SOURCES) \ + $(matrixsolve_devel_SOURCES) $(matrixsolve_opt_SOURCES) \ + $(meshavg_dbg_SOURCES) $(meshavg_devel_SOURCES) \ + $(meshavg_opt_SOURCES) $(meshbcid_dbg_SOURCES) \ + $(meshbcid_devel_SOURCES) $(meshbcid_opt_SOURCES) \ + $(meshdiff_dbg_SOURCES) $(meshdiff_devel_SOURCES) \ + $(meshdiff_opt_SOURCES) $(meshid_dbg_SOURCES) \ + $(meshid_devel_SOURCES) $(meshid_opt_SOURCES) \ + $(meshnorm_dbg_SOURCES) $(meshnorm_devel_SOURCES) \ + $(meshnorm_opt_SOURCES) $(meshplot_dbg_SOURCES) \ + $(meshplot_devel_SOURCES) $(meshplot_opt_SOURCES) \ + $(meshtool_dbg_SOURCES) $(meshtool_devel_SOURCES) \ + $(meshtool_opt_SOURCES) $(output_libmesh_version_dbg_SOURCES) \ $(output_libmesh_version_devel_SOURCES) \ $(output_libmesh_version_opt_SOURCES) \ $(projection_dbg_SOURCES) $(projection_devel_SOURCES) \ @@ -7273,21 +7326,24 @@ DIST_SOURCES = $(am__libmesh_dbg_la_SOURCES_DIST) \ $(embedding_opt_SOURCES) $(fparser_parse_dbg_SOURCES) \ $(fparser_parse_devel_SOURCES) $(fparser_parse_opt_SOURCES) \ $(getpot_parse_dbg_SOURCES) $(getpot_parse_devel_SOURCES) \ - $(getpot_parse_opt_SOURCES) $(matrixconvert_dbg_SOURCES) \ - $(matrixconvert_devel_SOURCES) $(matrixconvert_opt_SOURCES) \ - $(matrixsolve_dbg_SOURCES) $(matrixsolve_devel_SOURCES) \ - $(matrixsolve_opt_SOURCES) $(meshavg_dbg_SOURCES) \ - $(meshavg_devel_SOURCES) $(meshavg_opt_SOURCES) \ - $(meshbcid_dbg_SOURCES) $(meshbcid_devel_SOURCES) \ - $(meshbcid_opt_SOURCES) $(meshdiff_dbg_SOURCES) \ - $(meshdiff_devel_SOURCES) $(meshdiff_opt_SOURCES) \ - $(meshid_dbg_SOURCES) $(meshid_devel_SOURCES) \ - $(meshid_opt_SOURCES) $(meshnorm_dbg_SOURCES) \ - $(meshnorm_devel_SOURCES) $(meshnorm_opt_SOURCES) \ - $(meshplot_dbg_SOURCES) $(meshplot_devel_SOURCES) \ - $(meshplot_opt_SOURCES) $(meshtool_dbg_SOURCES) \ - $(meshtool_devel_SOURCES) $(meshtool_opt_SOURCES) \ - $(output_libmesh_version_dbg_SOURCES) \ + $(getpot_parse_opt_SOURCES) \ + $(hilbert_kokkos_benchmark_dbg_SOURCES) \ + $(hilbert_kokkos_benchmark_devel_SOURCES) \ + $(hilbert_kokkos_benchmark_opt_SOURCES) \ + $(matrixconvert_dbg_SOURCES) $(matrixconvert_devel_SOURCES) \ + $(matrixconvert_opt_SOURCES) $(matrixsolve_dbg_SOURCES) \ + $(matrixsolve_devel_SOURCES) $(matrixsolve_opt_SOURCES) \ + $(meshavg_dbg_SOURCES) $(meshavg_devel_SOURCES) \ + $(meshavg_opt_SOURCES) $(meshbcid_dbg_SOURCES) \ + $(meshbcid_devel_SOURCES) $(meshbcid_opt_SOURCES) \ + $(meshdiff_dbg_SOURCES) $(meshdiff_devel_SOURCES) \ + $(meshdiff_opt_SOURCES) $(meshid_dbg_SOURCES) \ + $(meshid_devel_SOURCES) $(meshid_opt_SOURCES) \ + $(meshnorm_dbg_SOURCES) $(meshnorm_devel_SOURCES) \ + $(meshnorm_opt_SOURCES) $(meshplot_dbg_SOURCES) \ + $(meshplot_devel_SOURCES) $(meshplot_opt_SOURCES) \ + $(meshtool_dbg_SOURCES) $(meshtool_devel_SOURCES) \ + $(meshtool_opt_SOURCES) $(output_libmesh_version_dbg_SOURCES) \ $(output_libmesh_version_devel_SOURCES) \ $(output_libmesh_version_opt_SOURCES) \ $(projection_dbg_SOURCES) $(projection_devel_SOURCES) \ @@ -7529,6 +7585,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -8143,6 +8200,7 @@ libmesh_SOURCES = \ src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ src/numerics/petsc_matrix.C \ src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ @@ -8398,6 +8456,8 @@ CLEANFILES = $(am__append_21) # calculator +# hilbert_kokkos_benchmark + # compare # meshbcid @@ -8423,22 +8483,23 @@ CLEANFILES = $(am__append_21) # embedding opt_programs = fparser_parse-opt getpot_parse-opt amr-opt \ matrixconvert-opt matrixsolve-opt meshtool-opt calculator-opt \ - compare-opt meshbcid-opt meshid-opt meshavg-opt meshdiff-opt \ - meshnorm-opt projection-opt output_libmesh_version-opt \ - meshplot-opt solution_components-opt splitter-opt \ - embedding-opt + hilbert_kokkos_benchmark-opt compare-opt meshbcid-opt \ + meshid-opt meshavg-opt meshdiff-opt meshnorm-opt \ + projection-opt output_libmesh_version-opt meshplot-opt \ + solution_components-opt splitter-opt embedding-opt devel_programs = fparser_parse-devel getpot_parse-devel amr-devel \ matrixconvert-devel matrixsolve-devel meshtool-devel \ - calculator-devel compare-devel meshbcid-devel meshid-devel \ - meshavg-devel meshdiff-devel meshnorm-devel projection-devel \ - output_libmesh_version-devel meshplot-devel \ - solution_components-devel splitter-devel embedding-devel + calculator-devel hilbert_kokkos_benchmark-devel compare-devel \ + meshbcid-devel meshid-devel meshavg-devel meshdiff-devel \ + meshnorm-devel projection-devel output_libmesh_version-devel \ + meshplot-devel solution_components-devel splitter-devel \ + embedding-devel dbg_programs = fparser_parse-dbg getpot_parse-dbg amr-dbg \ matrixconvert-dbg matrixsolve-dbg meshtool-dbg calculator-dbg \ - compare-dbg meshbcid-dbg meshid-dbg meshavg-dbg meshdiff-dbg \ - meshnorm-dbg projection-dbg output_libmesh_version-dbg \ - meshplot-dbg solution_components-dbg splitter-dbg \ - embedding-dbg + hilbert_kokkos_benchmark-dbg compare-dbg meshbcid-dbg \ + meshid-dbg meshavg-dbg meshdiff-dbg meshnorm-dbg \ + projection-dbg output_libmesh_version-dbg meshplot-dbg \ + solution_components-dbg splitter-dbg embedding-dbg prof_programs = # empty, append below oprof_programs = # empty, append below fparser_parse_opt_SOURCES = src/apps/fparser_parse.C @@ -8528,6 +8589,24 @@ calculator_dbg_SOURCES = src/apps/calculator.C src/apps/L2system.C \ calculator_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) calculator_dbg_CXXFLAGS = $(CXXFLAGS_DBG) calculator_dbg_LDADD = libmesh_dbg.la +hilbert_kokkos_benchmark_opt_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_opt_CXXFLAGS = $(CXXFLAGS_OPT) +hilbert_kokkos_benchmark_opt_LDADD = libmesh_opt.la +hilbert_kokkos_benchmark_devel_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_devel_CPPFLAGS = $(CPPFLAGS_DEVEL) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_devel_CXXFLAGS = $(CXXFLAGS_DEVEL) +hilbert_kokkos_benchmark_devel_LDADD = libmesh_devel.la +hilbert_kokkos_benchmark_dbg_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) +hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la compare_opt_SOURCES = src/apps/compare.C compare_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) compare_opt_CXXFLAGS = $(CXXFLAGS_OPT) @@ -9440,6 +9519,9 @@ src/numerics/libmesh_dbg_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_dbg_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_dbg_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_dbg_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -10665,6 +10747,9 @@ src/numerics/libmesh_devel_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_devel_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_devel_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_devel_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -11839,6 +11924,9 @@ src/numerics/libmesh_oprof_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_oprof_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_oprof_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_oprof_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -13013,6 +13101,9 @@ src/numerics/libmesh_opt_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_opt_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_opt_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_opt_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -14184,6 +14275,9 @@ src/numerics/libmesh_prof_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_prof_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_prof_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_prof_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -14891,6 +14985,30 @@ src/apps/getpot_parse_opt-getpot_parse.$(OBJEXT): \ getpot_parse-opt$(EXEEXT): $(getpot_parse_opt_OBJECTS) $(getpot_parse_opt_DEPENDENCIES) $(EXTRA_getpot_parse_opt_DEPENDENCIES) @rm -f getpot_parse-opt$(EXEEXT) $(AM_V_CXXLD)$(getpot_parse_opt_LINK) $(getpot_parse_opt_OBJECTS) $(getpot_parse_opt_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_dbg-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-dbg$(EXEEXT): $(hilbert_kokkos_benchmark_dbg_OBJECTS) $(hilbert_kokkos_benchmark_dbg_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_dbg_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-dbg$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_dbg_LINK) $(hilbert_kokkos_benchmark_dbg_OBJECTS) $(hilbert_kokkos_benchmark_dbg_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_devel-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-devel$(EXEEXT): $(hilbert_kokkos_benchmark_devel_OBJECTS) $(hilbert_kokkos_benchmark_devel_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_devel_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-devel$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_devel_LINK) $(hilbert_kokkos_benchmark_devel_OBJECTS) $(hilbert_kokkos_benchmark_devel_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_opt-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-opt$(EXEEXT): $(hilbert_kokkos_benchmark_opt_OBJECTS) $(hilbert_kokkos_benchmark_opt_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_opt_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-opt$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_opt_LINK) $(hilbert_kokkos_benchmark_opt_OBJECTS) $(hilbert_kokkos_benchmark_opt_LDADD) $(LIBS) src/apps/matrixconvert_dbg-matrixconvert.$(OBJEXT): \ src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) @@ -15291,6 +15409,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po@am__quote@ # am--include-marker @@ -16627,6 +16751,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16665,6 +16790,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16703,6 +16829,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16741,6 +16868,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16779,6 +16907,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -19652,6 +19781,13 @@ src/numerics/libmesh_dbg_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_dbg_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_dbg_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_dbg_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_dbg_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_dbg_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_dbg_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_dbg_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_dbg_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_dbg_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo @@ -22984,6 +23120,13 @@ src/numerics/libmesh_devel_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_devel_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_devel_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_devel_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_devel_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_devel_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_devel_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_devel_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_devel_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_devel_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo @@ -26316,6 +26459,13 @@ src/numerics/libmesh_oprof_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_oprof_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_oprof_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_oprof_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_oprof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_oprof_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_oprof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_oprof_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_oprof_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_oprof_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo @@ -29648,6 +29798,13 @@ src/numerics/libmesh_opt_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_opt_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_opt_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_opt_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_opt_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_opt_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_opt_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_opt_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_opt_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_opt_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo @@ -32980,6 +33137,13 @@ src/numerics/libmesh_prof_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_prof_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_prof_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_prof_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_prof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_prof_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_prof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_prof_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_prof_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_prof_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo @@ -34695,6 +34859,90 @@ src/apps/getpot_parse_opt-getpot_parse.obj: src/apps/getpot_parse.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(getpot_parse_opt_CPPFLAGS) $(CPPFLAGS) $(getpot_parse_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/getpot_parse_opt-getpot_parse.obj `if test -f 'src/apps/getpot_parse.C'; then $(CYGPATH_W) 'src/apps/getpot_parse.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/getpot_parse.C'; fi` +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_dbg-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_devel-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_devel-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_devel-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_devel-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_opt-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_opt-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_opt-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_opt-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + src/apps/matrixconvert_dbg-matrixconvert.o: src/apps/matrixconvert.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(matrixconvert_dbg_CPPFLAGS) $(CPPFLAGS) $(matrixconvert_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/matrixconvert_dbg-matrixconvert.o -MD -MP -MF src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Tpo -c -o src/apps/matrixconvert_dbg-matrixconvert.o `test -f 'src/apps/matrixconvert.C' || echo '$(srcdir)/'`src/apps/matrixconvert.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Tpo src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po @@ -35746,6 +35994,12 @@ distclean: distclean-recursive -rm -f src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po @@ -37082,6 +37336,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo @@ -37120,6 +37375,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo @@ -37158,6 +37414,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo @@ -37196,6 +37453,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo @@ -37234,6 +37492,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo @@ -38237,6 +38496,12 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po @@ -39573,6 +39838,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo @@ -39611,6 +39877,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo @@ -39649,6 +39916,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo @@ -39687,6 +39955,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo @@ -39725,6 +39994,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo diff --git a/configure b/configure index 481367f38f0..c0250de457a 100755 --- a/configure +++ b/configure @@ -64288,6 +64288,7 @@ ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + if test "x$enablempi" = "xyes" then : @@ -64308,7 +64309,7 @@ int main(int argc, char ** argv) } _ACEOF -if ac_fn_c_try_link "$LINENO" +if ac_fn_cxx_try_link "$LINENO" then : kokkos_config_works=yes else case e in #( @@ -64332,7 +64333,7 @@ int main(int argc, char ** argv) } _ACEOF -if ac_fn_c_try_link "$LINENO" +if ac_fn_cxx_try_link "$LINENO" then : kokkos_config_works=yes else case e in #( @@ -64344,17 +64345,18 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ ;; esac fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + CXX="$libmesh_save_CXX" CPPFLAGS="$libmesh_save_CPPFLAGS" CXXFLAGS="$libmesh_save_CXXFLAGS" LDFLAGS="$libmesh_save_LDFLAGS" LIBS="$libmesh_save_LIBS" - ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "x$kokkos_config_works" = "xyes" then : @@ -69063,3 +69065,4 @@ printf "%s\n" "--------- Done Configuring libMesh ----------" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ---------------------------------------------" >&5 printf "%s\n" "---------------------------------------------" >&6; } # rm -f -r conftest* config.cache include/libmesh/libmesh_config.h.tmp + diff --git a/contrib/Makefile.in b/contrib/Makefile.in index 668594d8c04..8f435734690 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in @@ -616,6 +616,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/capnproto/Makefile.in b/contrib/capnproto/Makefile.in index 56c7844f40d..baef088d0b1 100644 --- a/contrib/capnproto/Makefile.in +++ b/contrib/capnproto/Makefile.in @@ -465,6 +465,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/eigen/gitshim/Makefile.in b/contrib/eigen/gitshim/Makefile.in index bab4d953ac5..9fb8ce682e8 100644 --- a/contrib/eigen/gitshim/Makefile.in +++ b/contrib/eigen/gitshim/Makefile.in @@ -349,6 +349,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/exodusii/5.22b/exodus/Makefile.in b/contrib/exodusii/5.22b/exodus/Makefile.in index 66a5e6a0357..9d470e1a03f 100644 --- a/contrib/exodusii/5.22b/exodus/Makefile.in +++ b/contrib/exodusii/5.22b/exodus/Makefile.in @@ -3332,6 +3332,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/exodusii/5.22b/nemesis/Makefile.in b/contrib/exodusii/5.22b/nemesis/Makefile.in index 17636fa0144..40204256e72 100644 --- a/contrib/exodusii/5.22b/nemesis/Makefile.in +++ b/contrib/exodusii/5.22b/nemesis/Makefile.in @@ -411,6 +411,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/exodusii/Lib/Makefile.in b/contrib/exodusii/Lib/Makefile.in index 59c360d8928..b4f9a349981 100644 --- a/contrib/exodusii/Lib/Makefile.in +++ b/contrib/exodusii/Lib/Makefile.in @@ -1967,6 +1967,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/exodusii/v8.11/exodus/Makefile.in b/contrib/exodusii/v8.11/exodus/Makefile.in index 6c21419de79..2d72bdbc847 100644 --- a/contrib/exodusii/v8.11/exodus/Makefile.in +++ b/contrib/exodusii/v8.11/exodus/Makefile.in @@ -4260,6 +4260,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/exodusii/v8.11/nemesis/Makefile.in b/contrib/exodusii/v8.11/nemesis/Makefile.in index 715ed90b4fe..a6197c64b1f 100644 --- a/contrib/exodusii/v8.11/nemesis/Makefile.in +++ b/contrib/exodusii/v8.11/nemesis/Makefile.in @@ -421,6 +421,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/fparser/Makefile.in b/contrib/fparser/Makefile.in index dd6e31b0e76..325e1c48d59 100644 --- a/contrib/fparser/Makefile.in +++ b/contrib/fparser/Makefile.in @@ -879,6 +879,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/fparser/extrasrc/Makefile.in b/contrib/fparser/extrasrc/Makefile.in index 3eafacaf7a0..9bd3bff32c8 100644 --- a/contrib/fparser/extrasrc/Makefile.in +++ b/contrib/fparser/extrasrc/Makefile.in @@ -351,6 +351,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/fparser/fparser.hh b/contrib/fparser/fparser.hh index 85c70c5759a..cb0d792a402 100644 --- a/contrib/fparser/fparser.hh +++ b/contrib/fparser/fparser.hh @@ -91,7 +91,6 @@ class FunctionParserBase void Optimize(); - int ParseAndDeduceVariables(const std::string& function, int* amountOfVariablesFound = 0, bool useDegrees = false); diff --git a/contrib/fparser/fparser_ad.cc b/contrib/fparser/fparser_ad.cc index 2720e0cc5ac..c3f80c17d08 100644 --- a/contrib/fparser/fparser_ad.cc +++ b/contrib/fparser/fparser_ad.cc @@ -187,6 +187,19 @@ bool FunctionParserADBase::isZero() this->mData->mByteCode[0] == cImmed && this->mData->mImmed[0] == Value_t(0)); } +template +bool FunctionParserADBase::isEmpty() +{ + return this->mData->mByteCode.empty(); +} + +template +const typename FunctionParserBase::Data * +FunctionParserADBase::parser_data() const +{ + return const_cast *>(this)->getParserData(); +} + template void FunctionParserADBase::setZero() { diff --git a/contrib/fparser/fparser_ad.hh b/contrib/fparser/fparser_ad.hh index 133cb2b8046..f06dae17455 100644 --- a/contrib/fparser/fparser_ad.hh +++ b/contrib/fparser/fparser_ad.hh @@ -47,7 +47,9 @@ public: /** * check if the function's byte code is empty. */ - bool isEmpty() { return this->mData->mByteCode.empty(); } + bool isEmpty(); + + const typename FunctionParserBase::Data * parser_data() const; /** * set the bytecode of this function to return constant zero. diff --git a/contrib/gmv/Makefile.in b/contrib/gmv/Makefile.in index 249f658088a..51d91b1684d 100644 --- a/contrib/gmv/Makefile.in +++ b/contrib/gmv/Makefile.in @@ -406,6 +406,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/gzstream/Makefile.in b/contrib/gzstream/Makefile.in index d7694ab76c8..d5c845abf3d 100644 --- a/contrib/gzstream/Makefile.in +++ b/contrib/gzstream/Makefile.in @@ -458,6 +458,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/laspack/Makefile.in b/contrib/laspack/Makefile.in index 125376d4347..47a8abd00be 100644 --- a/contrib/laspack/Makefile.in +++ b/contrib/laspack/Makefile.in @@ -516,6 +516,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/libHilbert/Makefile.in b/contrib/libHilbert/Makefile.in index 35d049071ec..2c594e8ad9b 100644 --- a/contrib/libHilbert/Makefile.in +++ b/contrib/libHilbert/Makefile.in @@ -489,6 +489,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/metis/Makefile.in b/contrib/metis/Makefile.in index 5e8047805b9..642793791bf 100644 --- a/contrib/metis/Makefile.in +++ b/contrib/metis/Makefile.in @@ -1033,6 +1033,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/nanoflann/Makefile.in b/contrib/nanoflann/Makefile.in index 94694c9b03c..0ebeeff7406 100644 --- a/contrib/nanoflann/Makefile.in +++ b/contrib/nanoflann/Makefile.in @@ -455,6 +455,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/nemesis/Lib/Makefile.in b/contrib/nemesis/Lib/Makefile.in index 722bf4b86f0..122bd09254d 100644 --- a/contrib/nemesis/Lib/Makefile.in +++ b/contrib/nemesis/Lib/Makefile.in @@ -801,6 +801,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/netgen/Makefile.in b/contrib/netgen/Makefile.in index 931be784598..5044c6fd1a9 100644 --- a/contrib/netgen/Makefile.in +++ b/contrib/netgen/Makefile.in @@ -353,6 +353,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -942,7 +943,7 @@ vtkversion = @vtkversion@ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib.h \ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.cpp \ @LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/nglib_occ.h \ -@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/parallelfunc.cpp +@LIBMESH_ENABLE_NETGEN_TRUE@ netgen/nglib/parallelfunc.cpp @LIBMESH_ENABLE_NETGEN_TRUE@netgenincludedir = $(includedir)/netgen @LIBMESH_ENABLE_NETGEN_TRUE@nglibincludedir = $(includedir)/netgen/nglib diff --git a/contrib/parmetis/Makefile.in b/contrib/parmetis/Makefile.in index cde4b12aab9..c7935e17638 100644 --- a/contrib/parmetis/Makefile.in +++ b/contrib/parmetis/Makefile.in @@ -867,6 +867,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/poly2tri/modified/Makefile.in b/contrib/poly2tri/modified/Makefile.in index 514fd6a38a6..8ea3f7cd8cf 100644 --- a/contrib/poly2tri/modified/Makefile.in +++ b/contrib/poly2tri/modified/Makefile.in @@ -553,6 +553,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/qhull/2012.1/Makefile.in b/contrib/qhull/2012.1/Makefile.in index 03f2cbcb819..e84b600ef25 100644 --- a/contrib/qhull/2012.1/Makefile.in +++ b/contrib/qhull/2012.1/Makefile.in @@ -1176,6 +1176,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/sfcurves/Makefile.in b/contrib/sfcurves/Makefile.in index 18be38e4660..ed6554d0ba3 100644 --- a/contrib/sfcurves/Makefile.in +++ b/contrib/sfcurves/Makefile.in @@ -426,6 +426,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/tecplot/binary/Makefile.in b/contrib/tecplot/binary/Makefile.in index 9a13af38818..ec79f2ea10a 100644 --- a/contrib/tecplot/binary/Makefile.in +++ b/contrib/tecplot/binary/Makefile.in @@ -396,6 +396,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/tecplot/tecio/Makefile.in b/contrib/tecplot/tecio/Makefile.in index f937b27b6db..1edfb7397dd 100644 --- a/contrib/tecplot/tecio/Makefile.in +++ b/contrib/tecplot/tecio/Makefile.in @@ -635,6 +635,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/tetgen/Makefile.in b/contrib/tetgen/Makefile.in index 725d7a30d36..fdecf33a03f 100644 --- a/contrib/tetgen/Makefile.in +++ b/contrib/tetgen/Makefile.in @@ -441,6 +441,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/contrib/triangle/Makefile.in b/contrib/triangle/Makefile.in index f87cb42296f..1e25b331714 100644 --- a/contrib/triangle/Makefile.in +++ b/contrib/triangle/Makefile.in @@ -436,6 +436,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/doc/Makefile.in b/doc/Makefile.in index 812994568fe..472cd35389b 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -359,6 +359,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index d3e9103668c..302703289ee 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -319,6 +319,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/Makefile.in b/examples/Makefile.in index 27a8db4d760..a662cf88a28 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -387,6 +387,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adaptivity/adaptivity_ex1/Makefile.in b/examples/adaptivity/adaptivity_ex1/Makefile.in index 3dc4c77a9dd..cc396d29f42 100644 --- a/examples/adaptivity/adaptivity_ex1/Makefile.in +++ b/examples/adaptivity/adaptivity_ex1/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adaptivity/adaptivity_ex2/Makefile.in b/examples/adaptivity/adaptivity_ex2/Makefile.in index 04c82fb9137..9c39660be70 100644 --- a/examples/adaptivity/adaptivity_ex2/Makefile.in +++ b/examples/adaptivity/adaptivity_ex2/Makefile.in @@ -496,6 +496,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adaptivity/adaptivity_ex3/Makefile.in b/examples/adaptivity/adaptivity_ex3/Makefile.in index 7e31802fa14..5f7d55a3477 100644 --- a/examples/adaptivity/adaptivity_ex3/Makefile.in +++ b/examples/adaptivity/adaptivity_ex3/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adaptivity/adaptivity_ex4/Makefile.in b/examples/adaptivity/adaptivity_ex4/Makefile.in index e155bc31268..f013de36926 100644 --- a/examples/adaptivity/adaptivity_ex4/Makefile.in +++ b/examples/adaptivity/adaptivity_ex4/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adaptivity/adaptivity_ex5/Makefile.in b/examples/adaptivity/adaptivity_ex5/Makefile.in index 50cfa24eefd..7deb22866e0 100644 --- a/examples/adaptivity/adaptivity_ex5/Makefile.in +++ b/examples/adaptivity/adaptivity_ex5/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex1/Makefile.in b/examples/adjoints/adjoints_ex1/Makefile.in index f658f98c231..9559de61b70 100644 --- a/examples/adjoints/adjoints_ex1/Makefile.in +++ b/examples/adjoints/adjoints_ex1/Makefile.in @@ -571,6 +571,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex2/Makefile.in b/examples/adjoints/adjoints_ex2/Makefile.in index 1583e06ba69..1ed48513c88 100644 --- a/examples/adjoints/adjoints_ex2/Makefile.in +++ b/examples/adjoints/adjoints_ex2/Makefile.in @@ -539,6 +539,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex3/Makefile.in b/examples/adjoints/adjoints_ex3/Makefile.in index 8e744ca96ee..32dac179bc2 100644 --- a/examples/adjoints/adjoints_ex3/Makefile.in +++ b/examples/adjoints/adjoints_ex3/Makefile.in @@ -574,6 +574,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex4/Makefile.in b/examples/adjoints/adjoints_ex4/Makefile.in index 8ff3dceb0f3..2b6f750b92c 100644 --- a/examples/adjoints/adjoints_ex4/Makefile.in +++ b/examples/adjoints/adjoints_ex4/Makefile.in @@ -574,6 +574,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex5/Makefile.in b/examples/adjoints/adjoints_ex5/Makefile.in index f4bdc7d11ae..077e3ba31f9 100644 --- a/examples/adjoints/adjoints_ex5/Makefile.in +++ b/examples/adjoints/adjoints_ex5/Makefile.in @@ -574,6 +574,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex6/Makefile.in b/examples/adjoints/adjoints_ex6/Makefile.in index 43ada1d6034..e8a09524ff0 100644 --- a/examples/adjoints/adjoints_ex6/Makefile.in +++ b/examples/adjoints/adjoints_ex6/Makefile.in @@ -539,6 +539,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/adjoints/adjoints_ex7/Makefile.in b/examples/adjoints/adjoints_ex7/Makefile.in index c807bd13682..e5a98c02318 100644 --- a/examples/adjoints/adjoints_ex7/Makefile.in +++ b/examples/adjoints/adjoints_ex7/Makefile.in @@ -589,6 +589,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/eigenproblems/eigenproblems_ex1/Makefile.in b/examples/eigenproblems/eigenproblems_ex1/Makefile.in index b2aeeba0c70..d6696c9657b 100644 --- a/examples/eigenproblems/eigenproblems_ex1/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex1/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/eigenproblems/eigenproblems_ex2/Makefile.in b/examples/eigenproblems/eigenproblems_ex2/Makefile.in index e4584dfbd8f..b2a5fb48010 100644 --- a/examples/eigenproblems/eigenproblems_ex2/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex2/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/eigenproblems/eigenproblems_ex3/Makefile.in b/examples/eigenproblems/eigenproblems_ex3/Makefile.in index 1bb7ca59f97..33072d3d79b 100644 --- a/examples/eigenproblems/eigenproblems_ex3/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex3/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/eigenproblems/eigenproblems_ex4/Makefile.in b/examples/eigenproblems/eigenproblems_ex4/Makefile.in index becef7ee553..585db452640 100644 --- a/examples/eigenproblems/eigenproblems_ex4/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex4/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/fem_system/fem_system_ex1/Makefile.in b/examples/fem_system/fem_system_ex1/Makefile.in index ab49a448096..25e7f610ded 100644 --- a/examples/fem_system/fem_system_ex1/Makefile.in +++ b/examples/fem_system/fem_system_ex1/Makefile.in @@ -511,6 +511,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/fem_system/fem_system_ex2/Makefile.in b/examples/fem_system/fem_system_ex2/Makefile.in index 5990a9ddcc7..8b30230a3b1 100644 --- a/examples/fem_system/fem_system_ex2/Makefile.in +++ b/examples/fem_system/fem_system_ex2/Makefile.in @@ -526,6 +526,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/fem_system/fem_system_ex3/Makefile.in b/examples/fem_system/fem_system_ex3/Makefile.in index 614048a0e48..2d141f77288 100644 --- a/examples/fem_system/fem_system_ex3/Makefile.in +++ b/examples/fem_system/fem_system_ex3/Makefile.in @@ -511,6 +511,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/fem_system/fem_system_ex4/Makefile.in b/examples/fem_system/fem_system_ex4/Makefile.in index 1a31f0ddab4..522bb49f8e9 100644 --- a/examples/fem_system/fem_system_ex4/Makefile.in +++ b/examples/fem_system/fem_system_ex4/Makefile.in @@ -511,6 +511,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/fem_system/fem_system_ex5/Makefile.in b/examples/fem_system/fem_system_ex5/Makefile.in index 2526a04fe16..01fb482aa91 100644 --- a/examples/fem_system/fem_system_ex5/Makefile.in +++ b/examples/fem_system/fem_system_ex5/Makefile.in @@ -526,6 +526,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/introduction/introduction_ex1/Makefile.in b/examples/introduction/introduction_ex1/Makefile.in index 13087163eff..e8308c46d86 100644 --- a/examples/introduction/introduction_ex1/Makefile.in +++ b/examples/introduction/introduction_ex1/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/introduction/introduction_ex2/Makefile.in b/examples/introduction/introduction_ex2/Makefile.in index 5e11b336ef9..36d5f45eee7 100644 --- a/examples/introduction/introduction_ex2/Makefile.in +++ b/examples/introduction/introduction_ex2/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/introduction/introduction_ex3/Makefile.in b/examples/introduction/introduction_ex3/Makefile.in index bb24117c806..f9d6287a98e 100644 --- a/examples/introduction/introduction_ex3/Makefile.in +++ b/examples/introduction/introduction_ex3/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/introduction/introduction_ex4/Makefile.in b/examples/introduction/introduction_ex4/Makefile.in index efdd5b9b127..2d2474a658f 100644 --- a/examples/introduction/introduction_ex4/Makefile.in +++ b/examples/introduction/introduction_ex4/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/introduction/introduction_ex5/Makefile.in b/examples/introduction/introduction_ex5/Makefile.in index b08a51822e9..437ce7b05f2 100644 --- a/examples/introduction/introduction_ex5/Makefile.in +++ b/examples/introduction/introduction_ex5/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex1/Makefile.in b/examples/miscellaneous/miscellaneous_ex1/Makefile.in index fa1d6604c10..9bcaf44480c 100644 --- a/examples/miscellaneous/miscellaneous_ex1/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex1/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex10/Makefile.in b/examples/miscellaneous/miscellaneous_ex10/Makefile.in index 4ceaf8ccf94..bf721ec793f 100644 --- a/examples/miscellaneous/miscellaneous_ex10/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex10/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex11/Makefile.in b/examples/miscellaneous/miscellaneous_ex11/Makefile.in index e36b21636b2..8db1b8690a6 100644 --- a/examples/miscellaneous/miscellaneous_ex11/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex11/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex12/Makefile.in b/examples/miscellaneous/miscellaneous_ex12/Makefile.in index 2c4c4fc193d..2d6c2fbd728 100644 --- a/examples/miscellaneous/miscellaneous_ex12/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex12/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex13/Makefile.in b/examples/miscellaneous/miscellaneous_ex13/Makefile.in index 4f0080367ed..b76863f34c0 100644 --- a/examples/miscellaneous/miscellaneous_ex13/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex13/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex14/Makefile.in b/examples/miscellaneous/miscellaneous_ex14/Makefile.in index 768948775af..e4cef4ae163 100644 --- a/examples/miscellaneous/miscellaneous_ex14/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex14/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex15/Makefile.in b/examples/miscellaneous/miscellaneous_ex15/Makefile.in index 438762128b7..643317916e6 100644 --- a/examples/miscellaneous/miscellaneous_ex15/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex15/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex16/Makefile.in b/examples/miscellaneous/miscellaneous_ex16/Makefile.in index acaecda9d82..ecd2b7973a7 100644 --- a/examples/miscellaneous/miscellaneous_ex16/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex16/Makefile.in @@ -493,6 +493,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex17/Makefile.in b/examples/miscellaneous/miscellaneous_ex17/Makefile.in index 8ab2dc797f8..6d61cb4ed36 100644 --- a/examples/miscellaneous/miscellaneous_ex17/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex17/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex2/Makefile.in b/examples/miscellaneous/miscellaneous_ex2/Makefile.in index 5e2d6279049..8e59e6ad478 100644 --- a/examples/miscellaneous/miscellaneous_ex2/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex2/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex3/Makefile.in b/examples/miscellaneous/miscellaneous_ex3/Makefile.in index d58bfb75a85..5146d27ca1f 100644 --- a/examples/miscellaneous/miscellaneous_ex3/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex3/Makefile.in @@ -482,6 +482,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex4/Makefile.in b/examples/miscellaneous/miscellaneous_ex4/Makefile.in index f35a439ba5f..2b48571cd36 100644 --- a/examples/miscellaneous/miscellaneous_ex4/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex4/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex5/Makefile.in b/examples/miscellaneous/miscellaneous_ex5/Makefile.in index dc7dbeeef7b..707ea4dc6e1 100644 --- a/examples/miscellaneous/miscellaneous_ex5/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex5/Makefile.in @@ -491,6 +491,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex6/Makefile.in b/examples/miscellaneous/miscellaneous_ex6/Makefile.in index a985950b56d..aa8bc685743 100644 --- a/examples/miscellaneous/miscellaneous_ex6/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex6/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex7/Makefile.in b/examples/miscellaneous/miscellaneous_ex7/Makefile.in index e3515b9f785..9ac39bc4084 100644 --- a/examples/miscellaneous/miscellaneous_ex7/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex7/Makefile.in @@ -520,6 +520,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex8/Makefile.in b/examples/miscellaneous/miscellaneous_ex8/Makefile.in index d990641f148..939249e5570 100644 --- a/examples/miscellaneous/miscellaneous_ex8/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex8/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/miscellaneous/miscellaneous_ex9/Makefile.in b/examples/miscellaneous/miscellaneous_ex9/Makefile.in index 3860b7d8388..66bcac9673a 100644 --- a/examples/miscellaneous/miscellaneous_ex9/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex9/Makefile.in @@ -517,6 +517,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/optimization/optimization_ex1/Makefile.in b/examples/optimization/optimization_ex1/Makefile.in index fb3baf00e2d..50737c7c35f 100644 --- a/examples/optimization/optimization_ex1/Makefile.in +++ b/examples/optimization/optimization_ex1/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/optimization/optimization_ex2/Makefile.in b/examples/optimization/optimization_ex2/Makefile.in index c7816e72654..16f38051df1 100644 --- a/examples/optimization/optimization_ex2/Makefile.in +++ b/examples/optimization/optimization_ex2/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex1/Makefile.in b/examples/reduced_basis/reduced_basis_ex1/Makefile.in index fa1d195a31a..b33bf22f98c 100644 --- a/examples/reduced_basis/reduced_basis_ex1/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex1/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex2/Makefile.in b/examples/reduced_basis/reduced_basis_ex2/Makefile.in index 2c34d349aeb..fb18b6c3a8b 100644 --- a/examples/reduced_basis/reduced_basis_ex2/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex2/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex3/Makefile.in b/examples/reduced_basis/reduced_basis_ex3/Makefile.in index 41d6871eef3..66e9889d4e6 100644 --- a/examples/reduced_basis/reduced_basis_ex3/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex3/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex4/Makefile.in b/examples/reduced_basis/reduced_basis_ex4/Makefile.in index a7bd31cb843..4018e00c9b7 100644 --- a/examples/reduced_basis/reduced_basis_ex4/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex4/Makefile.in @@ -506,6 +506,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex5/Makefile.in b/examples/reduced_basis/reduced_basis_ex5/Makefile.in index 82b5b2fbc05..f5c5f996a49 100644 --- a/examples/reduced_basis/reduced_basis_ex5/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex5/Makefile.in @@ -516,6 +516,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex6/Makefile.in b/examples/reduced_basis/reduced_basis_ex6/Makefile.in index dbc24072e30..704b9196cf4 100644 --- a/examples/reduced_basis/reduced_basis_ex6/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex6/Makefile.in @@ -506,6 +506,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/reduced_basis/reduced_basis_ex7/Makefile.in b/examples/reduced_basis/reduced_basis_ex7/Makefile.in index a8446610838..88b4bb8693a 100644 --- a/examples/reduced_basis/reduced_basis_ex7/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex7/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/solution_transfer/solution_transfer_ex1/Makefile.in b/examples/solution_transfer/solution_transfer_ex1/Makefile.in index 5a0d14c6136..0cf9f53ad59 100644 --- a/examples/solution_transfer/solution_transfer_ex1/Makefile.in +++ b/examples/solution_transfer/solution_transfer_ex1/Makefile.in @@ -477,6 +477,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/subdomains/subdomains_ex1/Makefile.in b/examples/subdomains/subdomains_ex1/Makefile.in index 52deb3d0ade..799e2edc23d 100644 --- a/examples/subdomains/subdomains_ex1/Makefile.in +++ b/examples/subdomains/subdomains_ex1/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/subdomains/subdomains_ex2/Makefile.in b/examples/subdomains/subdomains_ex2/Makefile.in index b32dcd0e3c9..e81cd89393f 100644 --- a/examples/subdomains/subdomains_ex2/Makefile.in +++ b/examples/subdomains/subdomains_ex2/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/subdomains/subdomains_ex3/Makefile.in b/examples/subdomains/subdomains_ex3/Makefile.in index a3d5c4a101d..303ef37b9d2 100644 --- a/examples/subdomains/subdomains_ex3/Makefile.in +++ b/examples/subdomains/subdomains_ex3/Makefile.in @@ -486,6 +486,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in index c0fc74e97bf..bff0732e60d 100644 --- a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in index a89c3ec3026..5e5a2e62e27 100644 --- a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in @@ -483,6 +483,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in index f0afcad3be3..776822ee34e 100644 --- a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in index dcdbbe495d2..4626e3bedea 100644 --- a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in index ff6e7676c11..22fe2d48bf0 100644 --- a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in index da837f22e98..b6b15092aba 100644 --- a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in index 4ef11376ea4..9c036b14331 100644 --- a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in @@ -484,6 +484,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in index e5c7279beb3..50acb9ef0a2 100644 --- a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in @@ -522,6 +522,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in index 495f1b401f7..b6f6ee6544e 100644 --- a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in @@ -484,6 +484,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/transient/transient_ex1/Makefile.in b/examples/transient/transient_ex1/Makefile.in index cb1f7123cd7..0e39e5979cf 100644 --- a/examples/transient/transient_ex1/Makefile.in +++ b/examples/transient/transient_ex1/Makefile.in @@ -492,6 +492,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/transient/transient_ex2/Makefile.in b/examples/transient/transient_ex2/Makefile.in index 3a5648a9715..19c83b89de5 100644 --- a/examples/transient/transient_ex2/Makefile.in +++ b/examples/transient/transient_ex2/Makefile.in @@ -478,6 +478,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/transient/transient_ex3/Makefile.in b/examples/transient/transient_ex3/Makefile.in index 418ce7dfed8..f79116ac7dc 100644 --- a/examples/transient/transient_ex3/Makefile.in +++ b/examples/transient/transient_ex3/Makefile.in @@ -526,6 +526,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex1/Makefile.in b/examples/vector_fe/vector_fe_ex1/Makefile.in index 66462dd6f04..25a34ed4414 100644 --- a/examples/vector_fe/vector_fe_ex1/Makefile.in +++ b/examples/vector_fe/vector_fe_ex1/Makefile.in @@ -490,6 +490,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex10/Makefile.in b/examples/vector_fe/vector_fe_ex10/Makefile.in index 97e297f7568..6242a9689e2 100644 --- a/examples/vector_fe/vector_fe_ex10/Makefile.in +++ b/examples/vector_fe/vector_fe_ex10/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex2/Makefile.in b/examples/vector_fe/vector_fe_ex2/Makefile.in index ad09c3743fa..bcc7bda8e92 100644 --- a/examples/vector_fe/vector_fe_ex2/Makefile.in +++ b/examples/vector_fe/vector_fe_ex2/Makefile.in @@ -516,6 +516,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex3/Makefile.in b/examples/vector_fe/vector_fe_ex3/Makefile.in index 5994d90a2bf..76252e51ad4 100644 --- a/examples/vector_fe/vector_fe_ex3/Makefile.in +++ b/examples/vector_fe/vector_fe_ex3/Makefile.in @@ -516,6 +516,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex4/Makefile.in b/examples/vector_fe/vector_fe_ex4/Makefile.in index 91be49794d4..c88ea1a27c9 100644 --- a/examples/vector_fe/vector_fe_ex4/Makefile.in +++ b/examples/vector_fe/vector_fe_ex4/Makefile.in @@ -516,6 +516,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex5/Makefile.in b/examples/vector_fe/vector_fe_ex5/Makefile.in index 62b45033645..4464f32ec13 100644 --- a/examples/vector_fe/vector_fe_ex5/Makefile.in +++ b/examples/vector_fe/vector_fe_ex5/Makefile.in @@ -503,6 +503,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex6/Makefile.in b/examples/vector_fe/vector_fe_ex6/Makefile.in index d092712abe2..89576c57f18 100644 --- a/examples/vector_fe/vector_fe_ex6/Makefile.in +++ b/examples/vector_fe/vector_fe_ex6/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex7/Makefile.in b/examples/vector_fe/vector_fe_ex7/Makefile.in index 65dce847a17..d45faaa9f85 100644 --- a/examples/vector_fe/vector_fe_ex7/Makefile.in +++ b/examples/vector_fe/vector_fe_ex7/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex8/Makefile.in b/examples/vector_fe/vector_fe_ex8/Makefile.in index d5c542a648e..2ea4632110f 100644 --- a/examples/vector_fe/vector_fe_ex8/Makefile.in +++ b/examples/vector_fe/vector_fe_ex8/Makefile.in @@ -501,6 +501,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/examples/vector_fe/vector_fe_ex9/Makefile.in b/examples/vector_fe/vector_fe_ex9/Makefile.in index 0e362c7a3f8..3f031e8bf29 100644 --- a/examples/vector_fe/vector_fe_ex9/Makefile.in +++ b/examples/vector_fe/vector_fe_ex9/Makefile.in @@ -511,6 +511,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ diff --git a/include/Makefile.am b/include/Makefile.am index a78f189a29e..d9b00ef9d32 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -15,7 +15,9 @@ nobase_include_HEADERS = \ gpu/kokkos_fe_monomial.h \ gpu/kokkos_fe_face_map.h \ gpu/kokkos_fe_map.h \ - gpu/kokkos_quadrature.h + gpu/kokkos_quadrature.h \ + gpu/kokkos_hilbert_system.h \ + gpu/kokkos_parsed_function.h endif # special handholding for prefix_config.m4 generated files diff --git a/include/Makefile.in b/include/Makefile.in index a68272a0d25..6e0b5936a18 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -157,7 +157,8 @@ am__aclocal_m4_deps = \ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ - $(noinst_HEADERS) $(am__DIST_COMMON) + $(am__nobase_include_HEADERS_DIST) $(noinst_HEADERS) \ + $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = libmesh_config.h.tmp CONFIG_CLEAN_FILES = @@ -215,8 +216,16 @@ am__uninstall_files_from_dir = { \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && echo $$files | $(am__xargs_n) 40 $(am__rm_f); }; \ } -am__installdirs = "$(DESTDIR)$(includedir)" -HEADERS = $(include_HEADERS) $(noinst_HEADERS) +am__installdirs = "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" +am__nobase_include_HEADERS_DIST = gpu/kokkos_fe_types.h \ + gpu/kokkos_fe_shape_dispatch.h gpu/kokkos_fe_base.h \ + gpu/kokkos_fe_evaluator.h gpu/kokkos_fe_lagrange_1d.h \ + gpu/kokkos_fe_lagrange_2d.h gpu/kokkos_fe_lagrange_3d.h \ + gpu/kokkos_fe_monomial.h gpu/kokkos_fe_face_map.h \ + gpu/kokkos_fe_map.h gpu/kokkos_quadrature.h \ + gpu/kokkos_hilbert_system.h gpu/kokkos_parsed_function.h +HEADERS = $(include_HEADERS) $(nobase_include_HEADERS) \ + $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ @@ -392,6 +401,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -606,6 +616,25 @@ vtkmajor = @vtkmajor@ vtkversion = @vtkversion@ SUBDIRS = libmesh +# GPU (Kokkos) FE math headers — installed preserving the gpu/ subdirectory so +# downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. +# nobase_ is used instead of the standard flat install to keep the namespace. +@LIBMESH_ENABLE_KOKKOS_TRUE@nobase_include_HEADERS = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_types.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_shape_dispatch.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_base.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_evaluator.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_1d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_2d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_3d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_monomial.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_face_map.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_map.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_quadrature.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_hilbert_system.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_parsed_function.h + + # special handholding for prefix_config.m4 generated files # so that 'make clean ; make' works as does 'make distcheck' # libmesh_config.h is made by ./configure, so it should get @@ -665,6 +694,7 @@ include_HEADERS = \ enums/enum_elem_quality.h \ enums/enum_elem_type.h \ enums/enum_error_estimator_type.h \ + enums/enum_fe_elem_class.h \ enums/enum_fe_family.h \ enums/enum_inf_map_type.h \ enums/enum_io_package.h \ @@ -703,8 +733,13 @@ include_HEADERS = \ fe/fe_interface.h \ fe/fe_interface_macros.h \ fe/fe_lagrange_shape_1D.h \ + fe/fe_reference_element_traits.h \ + fe/fe_serendipity_lagrange.h \ + fe/fe_simplex_lagrange.h \ + fe/fe_tensor_product_lagrange.h \ fe/fe_macro.h \ fe/fe_map.h \ + fe/fe_shape_traits.h \ fe/fe_transformation_base.h \ fe/fe_type.h \ fe/fe_xyz_map.h \ @@ -891,6 +926,7 @@ include_HEADERS = \ numerics/parsed_fem_function.h \ numerics/parsed_fem_function_parameter.h \ numerics/parsed_function.h \ + numerics/parsed_function_program.h \ numerics/parsed_function_parameter.h \ numerics/petsc_macro.h \ numerics/petsc_matrix.h \ @@ -964,6 +1000,7 @@ include_HEADERS = \ quadrature/quadrature_composite.h \ quadrature/quadrature_conical.h \ quadrature/quadrature_gauss.h \ + quadrature/quadrature_gauss_rules.h \ quadrature/quadrature_gauss_lobatto.h \ quadrature/quadrature_gm.h \ quadrature/quadrature_grid.h \ @@ -1229,6 +1266,30 @@ uninstall-includeHEADERS: @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nobase_includeHEADERS: $(nobase_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(includedir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(includedir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. @@ -1395,7 +1456,7 @@ check: check-recursive all-am: Makefile $(HEADERS) libmesh_config.h.tmp installdirs: installdirs-recursive installdirs-am: - for dir in "$(DESTDIR)$(includedir)"; do \ + for dir in "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive @@ -1449,7 +1510,7 @@ info: info-recursive info-am: -install-data-am: install-includeHEADERS +install-data-am: install-includeHEADERS install-nobase_includeHEADERS install-dvi: install-dvi-recursive @@ -1493,7 +1554,7 @@ ps: ps-recursive ps-am: -uninstall-am: uninstall-includeHEADERS +uninstall-am: uninstall-includeHEADERS uninstall-nobase_includeHEADERS .MAKE: $(am__recursive_targets) all install-am install-strip @@ -1505,12 +1566,13 @@ uninstall-am: uninstall-includeHEADERS install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am \ install-includeHEADERS install-info install-info-am \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installcheck-local installdirs installdirs-am maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-generic \ - mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ - uninstall-am uninstall-includeHEADERS + install-man install-nobase_includeHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installcheck-local installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-includeHEADERS uninstall-nobase_includeHEADERS .PRECIOUS: Makefile diff --git a/include/base/dof_map.h b/include/base/dof_map.h index 566d3fcba6c..6f33ff529e1 100644 --- a/include/base/dof_map.h +++ b/include/base/dof_map.h @@ -40,6 +40,10 @@ #include "libmesh/mesh_subdivision_support.h" #include "libmesh/dof_map_base.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + // TIMPI includes #include "timpi/parallel_implementation.h" #include "timpi/parallel_sync.h" @@ -50,6 +54,7 @@ #include #include #include +#include #include #include @@ -1813,6 +1818,50 @@ class DofMap : public DofMapBase, */ void reinit_static_condensation(); +#ifdef LIBMESH_HAVE_KOKKOS + struct KokkosDofIndexCache + { + using elem_id_view = ::Kokkos::View; + using elem_dof_id_view = ::Kokkos::View; + using elem_dof_count_view = ::Kokkos::View; + using elem_subdomain_view = ::Kokkos::View; + + elem_id_view element_ids; + elem_dof_id_view element_dof_indices; + elem_dof_count_view element_n_dofs; + elem_subdomain_view element_subdomains; + std::vector host_element_ids; + std::vector host_element_dof_indices; + std::vector host_element_n_dofs; + std::vector host_element_subdomains; + unsigned int max_dofs = 0; + }; + + struct KokkosLocalIndexCache + { + using elem_local_index_view = ::Kokkos::View; + + elem_local_index_view element_local_indices; + unsigned int max_dofs = 0; + }; + + const KokkosDofIndexCache * + get_kokkos_dof_index_cache(const unsigned int vn = libMesh::invalid_uint) const; + + const KokkosLocalIndexCache * + get_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + + const KokkosLocalIndexCache * + require_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + + void prepare_kokkos_dof_index_caches() const; + void prepare_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + void clear_kokkos_caches() const; +#endif + private: /** @@ -2139,6 +2188,13 @@ class DofMap : public DofMapBase, */ MeshBase & _mesh; +#ifdef LIBMESH_HAVE_KOKKOS + mutable std::map> _kokkos_dof_index_caches; + mutable std::map *>, + std::unique_ptr> _kokkos_local_index_caches; +#endif + /** * Additional matrices handled by this object. These pointers do \e * not handle the memory, instead, \p System, who diff --git a/include/gpu/kokkos_hilbert_assembly.h b/include/gpu/kokkos_hilbert_assembly.h new file mode 100644 index 00000000000..c28f67c9e43 --- /dev/null +++ b/include/gpu/kokkos_hilbert_assembly.h @@ -0,0 +1,284 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H +#define LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H + +#include "../systems/hilbert_assembly_kernel.h" + +#include "kokkos_fe_evaluator.h" +#include "kokkos_fe_map.h" +#include "kokkos_quadrature.h" + +#include +#include + +namespace libMesh::Kokkos::detail +{ + +LIBMESH_DEVICE_INLINE bool +supports_hilbert_local_assembly(libMesh::FEShapeKey key, + libMesh::ElemMappingType mapping_type, + const unsigned int quadrature_order) +{ + return mapping_type == libMesh::LAGRANGE_MAP && + libMesh::supports_shape_with_lagrange_map(key) && + libMesh::supports_grad_shape(key) && + libMesh::Kokkos::GaussQuadrature::n_points(key.elem_type, quadrature_order) > 0; +} + +template +class HilbertFEAccess +{ +public: + using node_storage_type = std::decay_t; + class QpData + { + public: + LIBMESH_DEVICE_INLINE + QpData(const HilbertFEAccess & fe, + const unsigned int qp, + const bool need_gradients) + : _fe(fe), + _qp(qp), + _qp_ref(GaussQuadrature::point(fe._key.elem_type, fe._quadrature_order, qp)), + _JxW(0.), + _need_gradients(need_gradients) + { + RealVector xyz = zero_vector(); + RealTensor J = zero_tensor(); + + physical_point_and_jacobian(fe._mapping_type, + fe._key.elem_type, + fe._nodes, + fe._n_nodes, + _qp_ref(0), + _qp_ref(1), + _qp_ref(2), + xyz, + J); + + _xyz = Point(xyz(0), xyz(1), xyz(2)); + _JxW = + volume_jxw(J, + fe._dim, + GaussQuadrature::weight(fe._key.elem_type, fe._quadrature_order, qp)); + + if (_need_gradients) + _Jinv = libMesh::Kokkos::inverse(J, fe._dim); + } + + LIBMESH_DEVICE_INLINE + Real JxW() const + { + return _JxW; + } + + LIBMESH_DEVICE_INLINE + Real phi(const unsigned int i) const + { + return shape(_fe._key, i, _qp_ref(0), _qp_ref(1), _qp_ref(2)); + } + + LIBMESH_DEVICE_INLINE + Gradient dphi(const unsigned int i) const + { + libmesh_assert(_need_gradients); + return _Jinv * grad_shape(_fe._key, i, _qp_ref(0), _qp_ref(1), _qp_ref(2)); + } + + LIBMESH_DEVICE_INLINE + const Point & xyz() const + { + return _xyz; + } + + LIBMESH_DEVICE_INLINE + const RealVector & reference_point() const + { + return _qp_ref; + } + + LIBMESH_DEVICE_INLINE + const RealTensor & inverse_jacobian() const + { + libmesh_assert(_need_gradients); + return _Jinv; + } + + LIBMESH_DEVICE_INLINE + unsigned int qp_index() const + { + return _qp; + } + + LIBMESH_DEVICE_INLINE + unsigned int elem_index() const + { + return _fe._elem_index; + } + + private: + const HilbertFEAccess & _fe; + unsigned int _qp; + RealVector _qp_ref; + Point _xyz; + Real _JxW; + RealTensor _Jinv; + bool _need_gradients; + }; + + LIBMESH_DEVICE_INLINE + HilbertFEAccess(libMesh::FEShapeKey key, + libMesh::ElemMappingType mapping_type, + const NodeStorage & nodes, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const unsigned int elem_index = 0) + : _key(key), + _mapping_type(mapping_type), + _nodes(nodes), + _n_nodes(n_nodes), + _quadrature_order(quadrature_order), + _dim(dim_from_topology(key.elem_type)), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + unsigned int n_qpoints() const + { + return GaussQuadrature::n_points(_key.elem_type, _quadrature_order); + } + + LIBMESH_DEVICE_INLINE + unsigned int n_dofs() const + { + return libMesh::Kokkos::n_dofs(_key); + } + + LIBMESH_DEVICE_INLINE + QpData qp_data(const unsigned int qp, + const bool need_gradients) const + { + return QpData(*this, qp, need_gradients); + } + +private: + libMesh::FEShapeKey _key; + libMesh::ElemMappingType _mapping_type; + node_storage_type _nodes; + unsigned int _n_nodes; + unsigned int _quadrature_order; + unsigned int _dim; + unsigned int _elem_index; +}; + +template +using HilbertSolutionAccess = libMesh::detail::HilbertSolutionAccess; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_solution_access(const FEAccess & fe, + CoeffStorage && coeff, + const Number solution_derivative) +{ + return libMesh::detail::make_hilbert_solution_access( + fe, + std::forward(coeff), + solution_derivative); +} + +template +using AnalyticHilbertGoalAccess = + libMesh::detail::HilbertAnalyticGoalAccess; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_analytic_goal_access(GoalFunction && goal_func, + GoalGradient && goal_grad) +{ + return libMesh::detail::make_hilbert_analytic_goal_access( + std::forward(goal_func), + std::forward(goal_grad)); +} + +template +class LocalHilbertAccumulator +{ +public: + LIBMESH_DEVICE_INLINE + explicit LocalHilbertAccumulator(const unsigned int n_dofs) + : _n_dofs(n_dofs) + { + zero(); + } + + LIBMESH_DEVICE_INLINE + void zero() + { + for (unsigned int i = 0; i != MaxDofs; ++i) + { + _F[i] = 0.; + for (unsigned int j = 0; j != MaxDofs; ++j) + _K[i][j] = 0.; + } + } + + LIBMESH_DEVICE_INLINE + void add_residual(const unsigned int i, + const Number value) + { + _F[i] += value; + } + + LIBMESH_DEVICE_INLINE + void add_jacobian(const unsigned int i, + const unsigned int j, + const Number value) + { + _K[i][j] += value; + } + + LIBMESH_DEVICE_INLINE + Number residual(const unsigned int i) const + { + return _F[i]; + } + + LIBMESH_DEVICE_INLINE + Number jacobian(const unsigned int i, + const unsigned int j) const + { + return _K[i][j]; + } + + LIBMESH_DEVICE_INLINE + unsigned int n_dofs() const + { + return _n_dofs; + } + +private: + Number _F[MaxDofs]; + Number _K[MaxDofs][MaxDofs]; + unsigned int _n_dofs; +}; + +} // namespace libMesh::Kokkos::detail + +#endif // LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H diff --git a/include/gpu/kokkos_hilbert_system.h b/include/gpu/kokkos_hilbert_system.h new file mode 100644 index 00000000000..0796d34ee03 --- /dev/null +++ b/include/gpu/kokkos_hilbert_system.h @@ -0,0 +1,685 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_HILBERT_SYSTEM_H +#define LIBMESH_KOKKOS_HILBERT_SYSTEM_H + +#include "libmesh/libmesh_common.h" + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "kokkos_hilbert_assembly.h" +#include "kokkos_parsed_function.h" + +namespace libMesh::Kokkos::detail +{ + +template +LIBMESH_DEVICE_INLINE decltype(auto) +storage_at(const Storage & storage, + const unsigned int i) +{ + return storage(i); +} + +template +LIBMESH_DEVICE_INLINE const T & +storage_at(const T * storage, + const unsigned int i) +{ + return storage[i]; +} + +template +struct StaticArrayAccess +{ + using value_type = T; + T values[N] = {}; + unsigned int size = 0; + + LIBMESH_DEVICE_INLINE + const T & operator()(const unsigned int i) const + { + return values[i]; + } +}; + +template +class ElementNodeAccess +{ +public: + LIBMESH_DEVICE_INLINE + ElementNodeAccess(NodeCoordinateView node_coordinates, + ElemNodeIdView element_node_ids, + const unsigned int elem_index) + : _node_coordinates(node_coordinates), + _element_node_ids(element_node_ids), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int node, + const unsigned int component) const + { + return _node_coordinates(_element_node_ids(_elem_index, node), component); + } + +private: + NodeCoordinateView _node_coordinates; + ElemNodeIdView _element_node_ids; + unsigned int _elem_index; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_element_node_access(NodeCoordinateView node_coordinates, + ElemNodeIdView element_node_ids, + const unsigned int elem_index) +{ + return ElementNodeAccess(node_coordinates, + element_node_ids, + elem_index); +} + +template +class GatheredCoeffAccess +{ +public: + LIBMESH_DEVICE_INLINE + GatheredCoeffAccess(GlobalCoeffView global_coeffs, + LocalIndexView local_indices, + const unsigned int elem_index) + : _global_coeffs(global_coeffs), + _local_indices(local_indices), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int i) const + { + return storage_at(_global_coeffs, _local_indices(_elem_index, i)); + } + +private: + GlobalCoeffView _global_coeffs; + LocalIndexView _local_indices; + unsigned int _elem_index; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_gathered_coeff_access(GlobalCoeffView global_coeffs, + LocalIndexView local_indices, + const unsigned int elem_index) +{ + return GatheredCoeffAccess(global_coeffs, + local_indices, + elem_index); +} + +template +struct DenseElementOutputSink +{ + ResidualView residual; + JacobianView jacobian; + unsigned int n_dofs = 0; + bool request_jacobian = false; + + template + LIBMESH_DEVICE_INLINE + void write(const Accumulator & accum) const + { + for (unsigned int i = 0; i != n_dofs; ++i) + { + residual(i) = accum.residual(i); + if (request_jacobian) + for (unsigned int j = 0; j != n_dofs; ++j) + jacobian(i, j) = accum.jacobian(i, j); + } + } +}; + +template +struct FlatDeviceValueSink +{ + ResidualView residual; + JacobianView jacobian; + unsigned int n_dofs = 0; + + template + LIBMESH_DEVICE_INLINE + void write(const Accumulator & accum) const + { + for (unsigned int i = 0; i != n_dofs; ++i) + { + residual(i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + jacobian(i * n_dofs + j) = accum.jacobian(i, j); + } + } +}; + +struct ZeroCoeffAccess +{ + LIBMESH_DEVICE_INLINE + Number operator()(const unsigned int) const + { + return Number(0); + } +}; + +template +class GatheredParsedFEMGoalAccess +{ +public: + LIBMESH_DEVICE_INLINE + GatheredParsedFEMGoalAccess(FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + GlobalCoeffView global_coeffs, + const LocalIndexView * field_local_indices, + GoalFunction goal) + : _field_keys(field_keys), + _field_dofs(field_dofs), + _global_coeffs(global_coeffs), + _goal(goal) + { + for (unsigned int i = 0; i != MaxFieldVariables; ++i) + _field_local_indices[i] = field_local_indices[i]; + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + MaxFieldVariables] = {}; + fill_variables(qp_data, xyz, vars); + return _goal.value(vars); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + MaxFieldVariables] = {}; + Gradient field_gradients[MaxFieldVariables]; + fill_variables(qp_data, xyz, vars); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + field_gradients[field] = sample_field_gradient(qp_data, field); + + return _goal.gradient(vars, field_gradients); + } + +private: + template + LIBMESH_DEVICE_INLINE + void fill_variables(const QpData & qp_data, + const Point & xyz, + Number * vars) const + { + vars[0] = xyz(0); +#if LIBMESH_DIM > 1 + vars[1] = xyz(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = xyz(2); +#endif + vars[LIBMESH_DIM] = _goal.time(); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + vars[LIBMESH_DIM + 1 + field] = sample_field_value(qp_data, field); + } + + template + LIBMESH_DEVICE_INLINE + Number sample_field_value(const QpData & qp_data, + const unsigned int field) const + { + const auto & qp_ref = qp_data.reference_point(); + const auto field_key = _field_keys(field); + const unsigned int n_dofs = _field_dofs(field); + const unsigned int elem_index = qp_data.elem_index(); + + Number value = 0.; + for (unsigned int i = 0; i != n_dofs; ++i) + value += storage_at(_global_coeffs, _field_local_indices[field](elem_index, i)) * + shape(field_key, i, qp_ref(0), qp_ref(1), qp_ref(2)); + + return value; + } + + template + LIBMESH_DEVICE_INLINE + Gradient sample_field_gradient(const QpData & qp_data, + const unsigned int field) const + { + Gradient grad; + grad.zero(); + + const auto & qp_ref = qp_data.reference_point(); + const auto & Jinv = qp_data.inverse_jacobian(); + const auto field_key = _field_keys(field); + const unsigned int n_dofs = _field_dofs(field); + const unsigned int elem_index = qp_data.elem_index(); + + for (unsigned int i = 0; i != n_dofs; ++i) + grad.add_scaled(Jinv * grad_shape(field_key, i, qp_ref(0), qp_ref(1), qp_ref(2)), + storage_at(_global_coeffs, _field_local_indices[field](elem_index, i))); + + return grad; + } + + FieldKeyStorage _field_keys; + FieldDofStorage _field_dofs; + GlobalCoeffView _global_coeffs; + LocalIndexView _field_local_indices[MaxFieldVariables] = {}; + GoalFunction _goal; +}; + +template +bool +run_hilbert_system_assembly(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const unsigned int elem_index, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const CoeffStorage & coeff, + const Number solution_derivative, + GoalAccess goal_access, + const bool request_jacobian, + const Sink & sink, + const char * const kernel_name) +{ + if (sink.n_dofs > MaxDofs) + return false; + + if (!supports_hilbert_local_assembly(key, mapping_type, quadrature_order)) + return false; + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe(key, + mapping_type, + elem_nodes, + n_nodes, + quadrature_order, + elem_index); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, 1), + KOKKOS_LAMBDA(const int) { + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, coeff, solution_derivative); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(sink.n_dofs); + libMesh::detail::assemble_hilbert_element(fe, + solution, + goal_access, + request_jacobian, + hilbert_order, + accum); + sink.write(accum); + }); + ::Kokkos::fence(); + + return true; +} + +template +void +run_hilbert_system_value_batch(const libMesh::FEFamily family, + const libMesh::Order base_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemTypeStorage & element_types, + const ElemMappingTypeStorage & element_mapping_types, + const ElemNodeCountStorage & element_n_nodes, + const ElemPLevelStorage & element_p_levels, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const QuadratureOrderStorage & quadrature_orders, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const unsigned int hilbert_order, + GoalAccess goal_access, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + const auto key = + libMesh::FEShapeKey{family, + element_types(elem_index), + static_cast(base_order + + cast_int(element_p_levels(elem_index)))}; + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, + element_mapping_types(elem_index), + elem_nodes, + element_n_nodes(elem_index), + quadrature_orders(record_index), + elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); + ::Kokkos::fence(); +} + +template +void +run_hilbert_system_bucket_value_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const unsigned int hilbert_order, + GoalAccess goal_access, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); + ::Kokkos::fence(); +} + +template +void +run_hilbert_system_fem_value_batch(const libMesh::FEFamily family, + const libMesh::Order base_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemTypeStorage & element_types, + const ElemMappingTypeStorage & element_mapping_types, + const ElemNodeCountStorage & element_n_nodes, + const ElemPLevelStorage & element_p_levels, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const QuadratureOrderStorage & quadrature_orders, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const FieldKeyRecordStorage & field_keys, + const FieldDofRecordStorage & field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + const auto key = + libMesh::FEShapeKey{family, + element_types(elem_index), + static_cast(base_order + + cast_int(element_p_levels(elem_index)))}; + + StaticArrayAccess record_field_keys; + StaticArrayAccess record_field_dofs; + record_field_keys.size = goal_function.n_field_variables(); + record_field_dofs.size = goal_function.n_field_variables(); + for (unsigned int field = 0; field != goal_function.n_field_variables(); ++field) + { + record_field_keys.values[field] = field_keys(field, record_index); + record_field_dofs.values[field] = field_dofs(field, record_index); + } + + const auto goal_access = + GatheredParsedFEMGoalAccess(record_field_keys, + record_field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, + element_mapping_types(elem_index), + elem_nodes, + element_n_nodes(elem_index), + quadrature_orders(record_index), + elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); + ::Kokkos::fence(); +} + +template +void +run_hilbert_system_fem_bucket_value_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto goal_access = + GatheredParsedFEMGoalAccess(field_keys, + field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); + ::Kokkos::fence(); +} + +} // namespace libMesh::Kokkos::detail + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_HILBERT_SYSTEM_H diff --git a/include/gpu/kokkos_parsed_function.h b/include/gpu/kokkos_parsed_function.h new file mode 100644 index 00000000000..1942ea0fca0 --- /dev/null +++ b/include/gpu/kokkos_parsed_function.h @@ -0,0 +1,832 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_PARSED_FUNCTION_H +#define LIBMESH_KOKKOS_PARSED_FUNCTION_H + +#include "libmesh/libmesh_common.h" + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "libmesh/libmesh_device.h" +#include "libmesh/parsed_function_program.h" +#include "libmesh/point.h" +#include "libmesh/vector_value.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMesh::Kokkos +{ +namespace detail +{ + +template +struct DeviceParsedFunctionProgram +{ + ::Kokkos::View bytecode; + ::Kokkos::View immediates; + unsigned int stack_size = 0; + unsigned int n_variables = 0; + Scalar epsilon = 0; + + bool empty() const { return bytecode.extent(0) == 0; } +}; + +template +inline void +validate_program_stack(const DeviceParsedFunctionProgram & program, + const char * program_name, + const unsigned int max_stack) +{ + libmesh_error_msg_if(program.stack_size > max_stack, + "KokkosParsedFunction requires a larger MaxStack bound for " << + program_name << " bytecode"); +} + +template +inline void +validate_coordinate_program_variables(const DeviceParsedFunctionProgram & program, + const char * program_name) +{ + libmesh_error_msg_if(program.n_variables > LIBMESH_DIM + 1, + "KokkosParsedFunction currently supports only x/y/z/t variables in " << + program_name << " bytecode"); +} + +template +inline ::Kokkos::View +upload_scalar_buffer(const std::vector & values, + const std::string & label) +{ + ::Kokkos::View d(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline DeviceParsedFunctionProgram +make_device_program(const libMesh::ParsedFunctionProgram & program, + const std::string & label) +{ + DeviceParsedFunctionProgram d_program; + d_program.bytecode = upload_scalar_buffer(program.bytecode, label + "_bytecode"); + d_program.immediates = upload_scalar_buffer(program.immediates, label + "_immediates"); + d_program.stack_size = program.stack_size; + d_program.n_variables = program.n_variables; + d_program.epsilon = program.epsilon; + return d_program; +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_abs(const Scalar x) +{ + using std::abs; + return abs(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_floor(const Scalar x) +{ + using std::floor; + return floor(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_ceil(const Scalar x) +{ + using std::ceil; + return ceil(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log(const Scalar x) +{ + using std::log; + return log(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log10(const Scalar x) +{ + using std::log10; + return log10(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log2(const Scalar x) +{ + using std::log2; + return log2(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sin(const Scalar x) +{ + using std::sin; + return sin(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cos(const Scalar x) +{ + using std::cos; + return cos(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_tan(const Scalar x) +{ + using std::tan; + return tan(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sinh(const Scalar x) +{ + using std::sinh; + return sinh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cosh(const Scalar x) +{ + using std::cosh; + return cosh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_tanh(const Scalar x) +{ + using std::tanh; + return tanh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_exp(const Scalar x) +{ + using std::exp; + return exp(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_exp2(const Scalar x) +{ + using std::exp2; + return exp2(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sqrt(const Scalar x) +{ + using std::sqrt; + return sqrt(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_pow(const Scalar x, + const Scalar y) +{ + using std::pow; + return pow(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_hypot(const Scalar x, + const Scalar y) +{ + using std::hypot; + return hypot(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cbrt(const Scalar x) +{ + using std::cbrt; + return cbrt(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_asin(const Scalar x) +{ + using std::asin; + return asin(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_acos(const Scalar x) +{ + using std::acos; + return acos(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atan(const Scalar x) +{ + using std::atan; + return atan(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atan2(const Scalar y, + const Scalar x) +{ + using std::atan2; + return atan2(y, x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_asinh(const Scalar x) +{ + using std::asinh; + return asinh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_acosh(const Scalar x) +{ + using std::acosh; + return acosh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atanh(const Scalar x) +{ + using std::atanh; + return atanh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_mod(const Scalar x, + const Scalar y) +{ + using std::fmod; + return fmod(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_trunc(const Scalar x) +{ + return x < Scalar(0) ? pf_ceil(x) : pf_floor(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_int(const Scalar x) +{ + return x < Scalar(0) ? pf_ceil(x - Scalar(0.5)) : pf_floor(x + Scalar(0.5)); +} + +template +LIBMESH_DEVICE_INLINE bool +pf_equal(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return pf_abs(x - y) <= epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_nequal(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return pf_abs(x - y) > epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_less(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return x < y - epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_less_or_eq(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return x <= y + epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_truth(const Scalar x) +{ + return pf_abs(x) >= Scalar(0.5); +} + +template +LIBMESH_DEVICE_INLINE bool +pf_abs_truth(const Scalar x) +{ + return x >= Scalar(0.5); +} + +template +LIBMESH_DEVICE_INLINE Scalar +eval_parsed_function_program(const DeviceParsedFunctionProgram & program, + const Scalar * vars) +{ + if (program.empty()) + return 0; + + Scalar stack[MaxStack]; + unsigned int dp = 0; + int sp = -1; + + for (unsigned int ip = 0; ip < program.bytecode.extent(0); ++ip) + { + const unsigned int opcode = program.bytecode(ip); + + if (libMesh::parsed_function_is_var_opcode(opcode)) + { + stack[++sp] = vars[opcode - libMesh::parsed_function_var_begin()]; + continue; + } + + switch (static_cast(opcode)) + { + case libMesh::ParsedFunctionOpcode::cAbs: stack[sp] = pf_abs(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAcos: stack[sp] = pf_acos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAcosh: stack[sp] = pf_acosh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAsin: stack[sp] = pf_asin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAsinh: stack[sp] = pf_asinh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAtan: stack[sp] = pf_atan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAtan2: stack[sp - 1] = pf_atan2(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cAtanh: stack[sp] = pf_atanh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCbrt: stack[sp] = pf_cbrt(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCeil: stack[sp] = pf_ceil(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCos: stack[sp] = pf_cos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCosh: stack[sp] = pf_cosh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCot: stack[sp] = Scalar(1) / pf_tan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCsc: stack[sp] = Scalar(1) / pf_sin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cExp: stack[sp] = pf_exp(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cExp2: stack[sp] = pf_exp2(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cFloor: stack[sp] = pf_floor(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cHypot: stack[sp - 1] = pf_hypot(stack[sp - 1], stack[sp]); --sp; break; + + case libMesh::ParsedFunctionOpcode::cIf: + if (pf_truth(stack[sp--])) + ip += 2; + else + { + const unsigned int jump_ip = program.bytecode(ip + 1); + const unsigned int jump_dp = program.bytecode(ip + 2); + ip = jump_ip; + dp = jump_dp; + } + break; + + case libMesh::ParsedFunctionOpcode::cInt: stack[sp] = pf_int(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog: stack[sp] = pf_log(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog10: stack[sp] = pf_log10(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog2: stack[sp] = pf_log2(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cMax: stack[sp - 1] = stack[sp - 1] > stack[sp] ? stack[sp - 1] : stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMin: stack[sp - 1] = stack[sp - 1] < stack[sp] ? stack[sp - 1] : stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cPow: stack[sp - 1] = pf_pow(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cSec: stack[sp] = Scalar(1) / pf_cos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSin: stack[sp] = pf_sin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSinh: stack[sp] = pf_sinh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSqrt: stack[sp] = pf_sqrt(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTan: stack[sp] = pf_tan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTanh: stack[sp] = pf_tanh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTrunc: stack[sp] = pf_trunc(stack[sp]); break; + + case libMesh::ParsedFunctionOpcode::cImmed: stack[++sp] = program.immediates(dp++); break; + case libMesh::ParsedFunctionOpcode::cJump: + ip = program.bytecode(ip + 1); + dp = program.bytecode(ip + 2); + break; + + case libMesh::ParsedFunctionOpcode::cNeg: stack[sp] = -stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cAdd: stack[sp - 1] += stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cSub: stack[sp - 1] -= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMul: stack[sp - 1] *= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cDiv: stack[sp - 1] /= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMod: stack[sp - 1] = pf_mod(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cEqual: stack[sp - 1] = Scalar(pf_equal(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cNEqual: stack[sp - 1] = Scalar(pf_nequal(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cLess: stack[sp - 1] = Scalar(pf_less(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cLessOrEq: stack[sp - 1] = Scalar(pf_less_or_eq(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cGreater: stack[sp - 1] = Scalar(pf_less(stack[sp], stack[sp - 1], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cGreaterOrEq: stack[sp - 1] = Scalar(pf_less_or_eq(stack[sp], stack[sp - 1], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cNot: stack[sp] = Scalar(!pf_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAnd: stack[sp - 1] = Scalar(pf_truth(stack[sp - 1]) && pf_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cOr: stack[sp - 1] = Scalar(pf_truth(stack[sp - 1]) || pf_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cNotNot: stack[sp] = Scalar(pf_truth(stack[sp])); break; + + case libMesh::ParsedFunctionOpcode::cDeg: stack[sp] = stack[sp] * Scalar(180.) / libMesh::pi; break; + case libMesh::ParsedFunctionOpcode::cRad: stack[sp] = stack[sp] * libMesh::pi / Scalar(180.); break; + + case libMesh::ParsedFunctionOpcode::cPopNMov: + { + const unsigned int target = program.bytecode(++ip); + const unsigned int source = program.bytecode(++ip); + stack[target] = stack[source]; + sp = static_cast(target); + break; + } + + case libMesh::ParsedFunctionOpcode::cLog2by: + stack[sp - 1] = pf_log2(stack[sp - 1]) * stack[sp]; + --sp; + break; + + case libMesh::ParsedFunctionOpcode::cNop: + break; + + case libMesh::ParsedFunctionOpcode::cSinCos: + stack[sp + 1] = pf_cos(stack[sp]); + stack[sp] = pf_sin(stack[sp]); + ++sp; + break; + + case libMesh::ParsedFunctionOpcode::cSinhCosh: + stack[sp + 1] = pf_cosh(stack[sp]); + stack[sp] = pf_sinh(stack[sp]); + ++sp; + break; + + case libMesh::ParsedFunctionOpcode::cAbsNot: stack[sp] = Scalar(!pf_abs_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAbsNotNot: stack[sp] = Scalar(pf_abs_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAbsAnd: stack[sp - 1] = Scalar(pf_abs_truth(stack[sp - 1]) && pf_abs_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cAbsOr: stack[sp - 1] = Scalar(pf_abs_truth(stack[sp - 1]) || pf_abs_truth(stack[sp])); --sp; break; + + case libMesh::ParsedFunctionOpcode::cAbsIf: + if (pf_abs_truth(stack[sp--])) + ip += 2; + else + { + const unsigned int jump_ip = program.bytecode(ip + 1); + const unsigned int jump_dp = program.bytecode(ip + 2); + ip = jump_ip; + dp = jump_dp; + } + break; + + case libMesh::ParsedFunctionOpcode::cDup: stack[sp + 1] = stack[sp]; ++sp; break; + + case libMesh::ParsedFunctionOpcode::cFetch: + { + const unsigned int stack_offset = program.bytecode(++ip); + stack[sp + 1] = stack[stack_offset]; + ++sp; + break; + } + + case libMesh::ParsedFunctionOpcode::cInv: stack[sp] = Scalar(1) / stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cSqr: stack[sp] = stack[sp] * stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cRDiv: stack[sp - 1] = stack[sp] / stack[sp - 1]; --sp; break; + case libMesh::ParsedFunctionOpcode::cRSub: stack[sp - 1] = stack[sp] - stack[sp - 1]; --sp; break; + case libMesh::ParsedFunctionOpcode::cRSqrt: stack[sp] = Scalar(1) / pf_sqrt(stack[sp]); break; + + default: + return Scalar(0); + } + } + + return stack[sp]; +} + +template +LIBMESH_DEVICE_INLINE Scalar +eval_coordinate_parsed_function_program(const DeviceParsedFunctionProgram & program, + const Point & p, + const Real time) +{ + Scalar vars[LIBMESH_DIM + 1]; + vars[0] = p(0); +#if LIBMESH_DIM > 1 + vars[1] = p(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = p(2); +#endif + vars[LIBMESH_DIM] = time; + return eval_parsed_function_program(program, vars); +} + +} // namespace detail + +template +class KokkosParsedFunction; + +template +class KokkosParsedScalarProgram +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedScalarProgram() = default; + + explicit KokkosParsedScalarProgram(const libMesh::ParsedFunctionProgram & program, + const std::string & label) + : _program(detail::make_device_program(program, label)) + { + detail::validate_program_stack(_program, label.c_str(), MaxStack); + } + + LIBMESH_DEVICE_INLINE + unsigned int n_variables() const + { + return _program.n_variables; + } + + template + LIBMESH_DEVICE_INLINE + Scalar operator()(const VariableStorage & vars) const + { + return detail::eval_parsed_function_program(_program, vars); + } + +private: + detail::DeviceParsedFunctionProgram _program; +}; + +template +class KokkosParsedGradient +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedGradient() = default; + + LIBMESH_DEVICE_INLINE + explicit KokkosParsedGradient(const KokkosParsedFunction & func) + : _func(func) + { + } + + LIBMESH_DEVICE_INLINE + Gradient operator()(const Point & p) const; + +private: + KokkosParsedFunction _func; +}; + +template +class KokkosParsedFunction +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedFunction() = default; + + explicit KokkosParsedFunction(const libMesh::ParsedFunctionProgramBundle & program_bundle, + const Real time = 0.) + : _value(detail::make_device_program(program_bundle.value, "parsed_function_value")), + _dx(detail::make_device_program(program_bundle.dx, "parsed_function_dx")), +#if LIBMESH_DIM > 1 + _dy(detail::make_device_program(program_bundle.dy, "parsed_function_dy")), +#endif +#if LIBMESH_DIM > 2 + _dz(detail::make_device_program(program_bundle.dz, "parsed_function_dz")), +#endif + _dt(detail::make_device_program(program_bundle.dt, "parsed_function_dt")), + _time(time) + { + detail::validate_program_stack(_value, "value", MaxStack); + detail::validate_coordinate_program_variables(_value, "value"); + detail::validate_program_stack(_dx, "dx", MaxStack); + detail::validate_coordinate_program_variables(_dx, "dx"); +#if LIBMESH_DIM > 1 + detail::validate_program_stack(_dy, "dy", MaxStack); + detail::validate_coordinate_program_variables(_dy, "dy"); +#endif +#if LIBMESH_DIM > 2 + detail::validate_program_stack(_dz, "dz", MaxStack); + detail::validate_coordinate_program_variables(_dz, "dz"); +#endif + detail::validate_program_stack(_dt, "dt", MaxStack); + detail::validate_coordinate_program_variables(_dt, "dt"); + } + + KokkosParsedFunction + with_time(const Real time) const + { + auto copy = *this; + copy._time = time; + return copy; + } + + LIBMESH_DEVICE_INLINE + Scalar operator()(const Point & p) const + { + return detail::eval_coordinate_parsed_function_program(_value, p, _time); + } + + LIBMESH_DEVICE_INLINE + Scalar time_derivative(const Point & p) const + { + return detail::eval_coordinate_parsed_function_program(_dt, p, _time); + } + + LIBMESH_DEVICE_INLINE + Gradient gradient(const Point & p) const + { + Gradient g; + g(0) = detail::eval_coordinate_parsed_function_program(_dx, p, _time); +#if LIBMESH_DIM > 1 + g(1) = detail::eval_coordinate_parsed_function_program(_dy, p, _time); +#endif +#if LIBMESH_DIM > 2 + g(2) = detail::eval_coordinate_parsed_function_program(_dz, p, _time); +#endif + return g; + } + + LIBMESH_DEVICE_INLINE + KokkosParsedGradient gradient_function() const + { + return KokkosParsedGradient(*this); + } + +private: + detail::DeviceParsedFunctionProgram _value; + detail::DeviceParsedFunctionProgram _dx; +#if LIBMESH_DIM > 1 + detail::DeviceParsedFunctionProgram _dy; +#endif +#if LIBMESH_DIM > 2 + detail::DeviceParsedFunctionProgram _dz; +#endif + detail::DeviceParsedFunctionProgram _dt; + Real _time = 0.; + + friend class KokkosParsedGradient; +}; + +template +LIBMESH_DEVICE_INLINE +Gradient +KokkosParsedGradient::operator()(const Point & p) const +{ + return _func.gradient(p); +} + +template +class KokkosParsedFEMFunction +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedFEMFunction() = default; + + explicit KokkosParsedFEMFunction(const libMesh::ParsedFEMFunctionProgramBundle & program_bundle, + const Real time = 0.) + : _value(program_bundle.value, "parsed_fem_function_value"), + _dx(program_bundle.dx, "parsed_fem_function_dx"), +#if LIBMESH_DIM > 1 + _dy(program_bundle.dy, "parsed_fem_function_dy"), +#endif +#if LIBMESH_DIM > 2 + _dz(program_bundle.dz, "parsed_fem_function_dz"), +#endif + _dt(program_bundle.dt, "parsed_fem_function_dt"), + _n_field_variables(cast_int(program_bundle.value_variable_numbers.size())), + _time(time) + { + libmesh_error_msg_if(!program_bundle.supports_kokkos_value_goal(), + "KokkosParsedFEMFunction currently supports only value-based ParsedFEMFunction expressions"); + libmesh_error_msg_if(_n_field_variables > MaxFieldVariables, + "KokkosParsedFEMFunction exceeds MaxFieldVariables"); + + for (unsigned int i = 0; i != _n_field_variables; ++i) + { + _field_variable_numbers[i] = program_bundle.value_variable_numbers[i]; + _field_value_derivatives[i] = + KokkosParsedScalarProgram( + program_bundle.value_variable_derivatives[i], + "parsed_fem_function_dvalue_" + std::to_string(i)); + } + } + + LIBMESH_DEVICE_INLINE + KokkosParsedFEMFunction + with_time(const Real time) const + { + auto copy = *this; + copy._time = time; + return copy; + } + + LIBMESH_DEVICE_INLINE + unsigned int n_field_variables() const + { + return _n_field_variables; + } + + LIBMESH_DEVICE_INLINE + Real time() const + { + return _time; + } + + LIBMESH_DEVICE_INLINE + unsigned int field_variable_number(const unsigned int i) const + { + return _field_variable_numbers[i]; + } + + template + LIBMESH_DEVICE_INLINE + Scalar value(const VariableStorage & vars) const + { + return _value(vars); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const VariableStorage & vars, + const Gradient * field_gradients) const + { + Gradient g; + g(0) = _dx(vars); +#if LIBMESH_DIM > 1 + g(1) = _dy(vars); +#endif +#if LIBMESH_DIM > 2 + g(2) = _dz(vars); +#endif + + for (unsigned int i = 0; i != _n_field_variables; ++i) + g.add_scaled(field_gradients[i], _field_value_derivatives[i](vars)); + + return g; + } + +private: + KokkosParsedScalarProgram _value; + KokkosParsedScalarProgram _dx; +#if LIBMESH_DIM > 1 + KokkosParsedScalarProgram _dy; +#endif +#if LIBMESH_DIM > 2 + KokkosParsedScalarProgram _dz; +#endif + KokkosParsedScalarProgram _dt; + KokkosParsedScalarProgram _field_value_derivatives[MaxFieldVariables]; + unsigned int _field_variable_numbers[MaxFieldVariables] = {}; + unsigned int _n_field_variables = 0; + Real _time = 0.; +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_PARSED_FUNCTION_H diff --git a/include/include_HEADERS b/include/include_HEADERS index 8c4283f5aea..3ab4efe3bc3 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -282,6 +282,7 @@ include_HEADERS = \ numerics/parsed_fem_function.h \ numerics/parsed_fem_function_parameter.h \ numerics/parsed_function.h \ + numerics/parsed_function_program.h \ numerics/parsed_function_parameter.h \ numerics/petsc_macro.h \ numerics/petsc_matrix.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index a560c36c9a3..e1c2d95aa03 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -187,6 +187,7 @@ BUILT_SOURCES = \ kokkos_fe_lagrange_3d.h \ kokkos_fe_map.h \ kokkos_fe_monomial.h \ + kokkos_parsed_function.h \ kokkos_fe_shape_dispatch.h \ kokkos_fe_types.h \ kokkos_linalg_base.h \ @@ -286,6 +287,7 @@ BUILT_SOURCES = \ parsed_fem_function.h \ parsed_fem_function_parameter.h \ parsed_function.h \ + parsed_function_program.h \ parsed_function_parameter.h \ petsc_macro.h \ petsc_matrix.h \ @@ -1182,6 +1184,9 @@ kokkos_fe_lagrange_3d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_3d.h kokkos_fe_map.h: $(top_srcdir)/include/gpu/kokkos_fe_map.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_parsed_function.h: $(top_srcdir)/include/gpu/kokkos_parsed_function.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + kokkos_fe_monomial.h: $(top_srcdir)/include/gpu/kokkos_fe_monomial.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1482,6 +1487,9 @@ parsed_fem_function_parameter.h: $(top_srcdir)/include/numerics/parsed_fem_funct parsed_function.h: $(top_srcdir)/include/numerics/parsed_function.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +parsed_function_program.h: $(top_srcdir)/include/numerics/parsed_function_program.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + parsed_function_parameter.h: $(top_srcdir)/include/numerics/parsed_function_parameter.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index 823e23c2643..9f0c5dc37e7 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -321,6 +321,7 @@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -564,7 +565,10 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ weighted_patch_recovery_error_estimator.h fe.h fe_abstract.h \ fe_base.h fe_compute_data.h fe_interface.h \ fe_interface_macros.h fe_lagrange_shape_1D.h fe_macro.h \ - fe_map.h fe_transformation_base.h fe_type.h fe_xyz_map.h \ + fe_map.h fe_reference_element_traits.h \ + fe_serendipity_lagrange.h fe_shape_traits.h \ + fe_simplex_lagrange.h fe_tensor_product_lagrange.h \ + fe_transformation_base.h fe_type.h fe_xyz_map.h \ h1_fe_transformation.h hcurl_fe_transformation.h \ hdiv_fe_transformation.h inf_fe.h inf_fe_instantiate_1D.h \ inf_fe_instantiate_2D.h inf_fe_instantiate_3D.h inf_fe_macro.h \ @@ -589,25 +593,30 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ remote_elem.h sphere.h stored_range.h surface.h \ default_coupling.h ghost_point_neighbors.h ghosting_functor.h \ non_manifold_coupling.h overlap_coupling.h \ - point_neighbor_coupling.h sibling_coupling.h \ - kokkos_linalg_base.h kokkos_storage.h kokkos_storage_policy.h \ - kokkos_tensor_ops.h kokkos_vector_ops.h abaqus_io.h \ - boundary_info.h boundary_mesh.h checkpoint_io.h \ - distributed_mesh.h dyna_io.h ensight_io.h exodusII_io.h \ - exodusII_io_helper.h exodus_header_info.h fro_io.h gmsh_io.h \ - gmv_io.h gnuplot_io.h inf_elem_builder.h matlab_io.h \ - medit_io.h mesh.h mesh_base.h mesh_communication.h \ - mesh_function.h mesh_generation.h mesh_input.h \ - mesh_modification.h mesh_netgen_interface.h mesh_output.h \ - mesh_refinement.h mesh_serializer.h mesh_smoother.h \ - mesh_smoother_laplace.h mesh_smoother_vsmoother.h \ - mesh_subdivision_support.h mesh_tet_interface.h \ - mesh_tetgen_interface.h mesh_tetgen_wrapper.h mesh_tools.h \ - mesh_triangle_holes.h mesh_triangle_interface.h \ - mesh_triangle_wrapper.h namebased_io.h nemesis_io.h \ - nemesis_io_helper.h off_io.h parallel_mesh.h patch.h \ - poly2tri_triangulator.h postscript_io.h replicated_mesh.h \ - serial_mesh.h sides_to_elem_map.h simplex_refiner.h stl_io.h \ + point_neighbor_coupling.h sibling_coupling.h kokkos_fe_base.h \ + kokkos_fe_evaluator.h kokkos_fe_face_map.h \ + kokkos_fe_lagrange_1d.h kokkos_fe_lagrange_2d.h \ + kokkos_fe_lagrange_3d.h kokkos_fe_map.h kokkos_fe_monomial.h \ + kokkos_parsed_function.h kokkos_fe_shape_dispatch.h \ + kokkos_fe_types.h kokkos_linalg_base.h kokkos_quadrature.h \ + kokkos_storage.h kokkos_storage_policy.h kokkos_tensor_ops.h \ + kokkos_vector_ops.h abaqus_io.h boundary_info.h \ + boundary_mesh.h checkpoint_io.h distributed_mesh.h dyna_io.h \ + ensight_io.h exodusII_io.h exodusII_io_helper.h \ + exodus_header_info.h fro_io.h gmsh_io.h gmv_io.h gnuplot_io.h \ + inf_elem_builder.h matlab_io.h medit_io.h mesh.h mesh_base.h \ + mesh_communication.h mesh_function.h mesh_generation.h \ + mesh_input.h mesh_modification.h mesh_netgen_interface.h \ + mesh_output.h mesh_refinement.h mesh_serializer.h \ + mesh_smoother.h mesh_smoother_laplace.h \ + mesh_smoother_vsmoother.h mesh_subdivision_support.h \ + mesh_tet_interface.h mesh_tetgen_interface.h \ + mesh_tetgen_wrapper.h mesh_tools.h mesh_triangle_holes.h \ + mesh_triangle_interface.h mesh_triangle_wrapper.h \ + namebased_io.h nemesis_io.h nemesis_io_helper.h off_io.h \ + parallel_mesh.h patch.h poly2tri_triangulator.h \ + postscript_io.h replicated_mesh.h serial_mesh.h \ + sides_to_elem_map.h simplex_refiner.h stl_io.h \ sync_refinement_flags.h tecplot_io.h tetgen_io.h \ triangulator_interface.h ucd_io.h unstructured_mesh.h unv_io.h \ vtk_io.h xdr_io.h analytic_function.h composite_fem_function.h \ @@ -621,13 +630,14 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ function_base.h laspack_matrix.h laspack_vector.h \ lumped_mass_matrix.h numeric_vector.h parsed_fem_function.h \ parsed_fem_function_parameter.h parsed_function.h \ - parsed_function_parameter.h petsc_macro.h petsc_matrix.h \ - petsc_matrix_base.h petsc_matrix_shell_matrix.h \ - petsc_mffd_matrix.h petsc_preconditioner.h \ - petsc_shell_matrix.h petsc_solver_exception.h petsc_vector.h \ - preconditioner.h raw_accessor.h refinement_selector.h \ - shell_matrix.h sparse_matrix.h sparse_shell_matrix.h \ - static_condensation.h static_condensation_dof_map.h \ + parsed_function_program.h parsed_function_parameter.h \ + petsc_macro.h petsc_matrix.h petsc_matrix_base.h \ + petsc_matrix_shell_matrix.h petsc_mffd_matrix.h \ + petsc_preconditioner.h petsc_shell_matrix.h \ + petsc_solver_exception.h petsc_vector.h preconditioner.h \ + raw_accessor.h refinement_selector.h shell_matrix.h \ + sparse_matrix.h sparse_shell_matrix.h static_condensation.h \ + static_condensation_dof_map.h \ static_condensation_preconditioner.h sum_shell_matrix.h \ tensor_shell_matrix.h tensor_tools.h tensor_value.h \ trilinos_epetra_matrix.h trilinos_epetra_vector.h \ @@ -648,18 +658,19 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ sfc_partitioner.h subdomain_partitioner.h diff_physics.h \ diff_qoi.h fem_physics.h quadrature.h quadrature_clough.h \ quadrature_composite.h quadrature_conical.h quadrature_gauss.h \ - quadrature_gauss_lobatto.h quadrature_gm.h quadrature_grid.h \ - quadrature_jacobi.h quadrature_monomial.h quadrature_nodal.h \ - quadrature_simpson.h quadrature_trap.h rb_assembly_expansion.h \ - rb_construction.h rb_construction_base.h \ - rb_data_deserialization.h rb_data_serialization.h \ - rb_eim_assembly.h rb_eim_construction.h rb_eim_evaluation.h \ - rb_eim_theta.h rb_evaluation.h rb_parameters.h \ - rb_parametrized.h rb_parametrized_function.h \ - rb_scm_construction.h rb_scm_evaluation.h \ - rb_temporal_discretization.h rb_theta.h rb_theta_expansion.h \ - transient_rb_assembly_expansion.h transient_rb_construction.h \ - transient_rb_evaluation.h transient_rb_theta_expansion.h \ + quadrature_gauss_lobatto.h quadrature_gauss_rules.h \ + quadrature_gm.h quadrature_grid.h quadrature_jacobi.h \ + quadrature_monomial.h quadrature_nodal.h quadrature_simpson.h \ + quadrature_trap.h rb_assembly_expansion.h rb_construction.h \ + rb_construction_base.h rb_data_deserialization.h \ + rb_data_serialization.h rb_eim_assembly.h \ + rb_eim_construction.h rb_eim_evaluation.h rb_eim_theta.h \ + rb_evaluation.h rb_parameters.h rb_parametrized.h \ + rb_parametrized_function.h rb_scm_construction.h \ + rb_scm_evaluation.h rb_temporal_discretization.h rb_theta.h \ + rb_theta_expansion.h transient_rb_assembly_expansion.h \ + transient_rb_construction.h transient_rb_evaluation.h \ + transient_rb_theta_expansion.h \ boundary_volume_solution_transfer.h direct_solution_transfer.h \ dtk_adapter.h dtk_evaluator.h dtk_solution_transfer.h \ meshfree_interpolation.h meshfree_interpolation_function.h \ @@ -1192,6 +1203,21 @@ fe_macro.h: $(top_srcdir)/include/fe/fe_macro.h fe_map.h: $(top_srcdir)/include/fe/fe_map.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +fe_reference_element_traits.h: $(top_srcdir)/include/fe/fe_reference_element_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_serendipity_lagrange.h: $(top_srcdir)/include/fe/fe_serendipity_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_shape_traits.h: $(top_srcdir)/include/fe/fe_shape_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_simplex_lagrange.h: $(top_srcdir)/include/fe/fe_simplex_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_tensor_product_lagrange.h: $(top_srcdir)/include/fe/fe_tensor_product_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + fe_transformation_base.h: $(top_srcdir)/include/fe/fe_transformation_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1468,9 +1494,45 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_fe_base.h: $(top_srcdir)/include/gpu/kokkos_fe_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_evaluator.h: $(top_srcdir)/include/gpu/kokkos_fe_evaluator.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_face_map.h: $(top_srcdir)/include/gpu/kokkos_fe_face_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_1d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_1d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_2d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_2d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_3d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_3d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_map.h: $(top_srcdir)/include/gpu/kokkos_fe_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_parsed_function.h: $(top_srcdir)/include/gpu/kokkos_parsed_function.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_monomial.h: $(top_srcdir)/include/gpu/kokkos_fe_monomial.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_shape_dispatch.h: $(top_srcdir)/include/gpu/kokkos_fe_shape_dispatch.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_types.h: $(top_srcdir)/include/gpu/kokkos_fe_types.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_quadrature.h: $(top_srcdir)/include/gpu/kokkos_quadrature.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1756,6 +1818,9 @@ parsed_fem_function_parameter.h: $(top_srcdir)/include/numerics/parsed_fem_funct parsed_function.h: $(top_srcdir)/include/numerics/parsed_function.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +parsed_function_program.h: $(top_srcdir)/include/numerics/parsed_function_program.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + parsed_function_parameter.h: $(top_srcdir)/include/numerics/parsed_function_parameter.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1987,6 +2052,9 @@ quadrature_gauss.h: $(top_srcdir)/include/quadrature/quadrature_gauss.h quadrature_gauss_lobatto.h: $(top_srcdir)/include/quadrature/quadrature_gauss_lobatto.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +quadrature_gauss_rules.h: $(top_srcdir)/include/quadrature/quadrature_gauss_rules.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + quadrature_gm.h: $(top_srcdir)/include/quadrature/quadrature_gm.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/include/mesh/mesh_base.h b/include/mesh/mesh_base.h index 59d727a4282..de6c6fd6baf 100644 --- a/include/mesh/mesh_base.h +++ b/include/mesh/mesh_base.h @@ -31,10 +31,16 @@ #include "libmesh/parallel_object.h" #include "libmesh/simple_range.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + // C++ Includes #include #include #include +#include +#include #include "libmesh/vector_value.h" @@ -188,6 +194,48 @@ class MeshBase : public ParallelObject */ virtual void clear (); +#ifdef LIBMESH_HAVE_KOKKOS + struct KokkosGeometryCache + { + using node_id_view = ::Kokkos::View; + using elem_id_view = ::Kokkos::View; + using node_coord_view = ::Kokkos::View; + using elem_node_id_view = ::Kokkos::View; + using elem_type_view = ::Kokkos::View; + using elem_mapping_type_view = ::Kokkos::View; + using elem_n_nodes_view = ::Kokkos::View; + using elem_p_level_view = ::Kokkos::View; + using elem_subdomain_view = ::Kokkos::View; + + node_id_view node_ids; + elem_id_view element_ids; + node_coord_view node_coordinates; + elem_node_id_view element_node_ids; + elem_type_view element_types; + elem_mapping_type_view element_mapping_types; + elem_n_nodes_view element_n_nodes; + elem_p_level_view element_p_levels; + elem_subdomain_view element_subdomains; + std::vector host_node_ids; + std::vector host_element_ids; + std::unordered_map node_lookup; + std::unordered_map element_lookup; + unsigned int max_nodes = 0; + }; + + const KokkosGeometryCache & get_kokkos_geometry_cache() const; + unsigned int get_kokkos_elem_index(const Elem & elem) const; + void prepare_kokkos_geometry_cache() const; +#else + void prepare_kokkos_geometry_cache() const {} +#endif + void clear_kokkos_geometry_cache() const + { +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif + } + /** * Deletes all the element data that is currently stored. * @@ -240,7 +288,10 @@ class MeshBase : public ParallelObject * generally more efficient to mark finer-grained settings instead. */ void unset_is_prepared() - { _preparation = false; } + { + _preparation = false; + this->clear_kokkos_geometry_cache(); + } /** * Tells this we have done some operation creating unpartitioned @@ -250,7 +301,10 @@ class MeshBase : public ParallelObject * them too or call this method. */ void unset_is_partitioned() - { _preparation.is_partitioned = false; } + { + _preparation.is_partitioned = false; + this->clear_kokkos_geometry_cache(); + } /** * Tells this we have done some operation (e.g. adding objects to a @@ -2145,6 +2199,10 @@ class MeshBase : public ParallelObject */ mutable std::unique_ptr _point_locator; +#ifdef LIBMESH_HAVE_KOKKOS + mutable std::unique_ptr _kokkos_geometry_cache; +#endif + /** * Do we count lower dimensional elements in point locator refinement? * This is relevant in tree-based point locators, for example. diff --git a/include/numerics/parsed_fem_function.h b/include/numerics/parsed_fem_function.h index 9f5646b1bd6..8e0f3ce4778 100644 --- a/include/numerics/parsed_fem_function.h +++ b/include/numerics/parsed_fem_function.h @@ -25,12 +25,13 @@ // Local Includes #include "libmesh/fem_function_base.h" #include "libmesh/int_range.h" +#include "libmesh/parsed_function_program.h" #include "libmesh/point.h" #include "libmesh/system.h" #ifdef LIBMESH_HAVE_FPARSER // FParser includes -#include "libmesh/fparser.hh" +#include "libmesh/fparser_ad.hh" #endif // C++ includes @@ -106,6 +107,10 @@ class ParsedFEMFunction : public FEMFunctionBase const std::string & expression() { return _expression; } +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::ParsedFEMFunctionProgramBundle build_program_bundle() const; +#endif + /** * \returns The value of an inline variable. * @@ -440,6 +445,68 @@ ParsedFEMFunction::clone () const (_sys, _expression, &_additional_vars, &_initial_vals); } +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +template +inline +libMesh::ParsedFEMFunctionProgramBundle +ParsedFEMFunction::build_program_bundle() const +{ + libmesh_error_msg_if(_subexpressions.size() != 1, + "Kokkos ParsedFEMFunction export currently supports scalar expressions only"); + + libMesh::ParsedFEMFunctionProgramBundle bundle; + bundle.uses_field_gradients = _n_requested_grad_components > 0; + bundle.uses_field_hessians = _n_requested_hess_components > 0; + bundle.uses_normals = _requested_normals; + bundle.uses_additional_variables = !_additional_vars.empty(); + + auto fp = std::make_unique>(); + fp->AddConstant("NaN", std::numeric_limits::quiet_NaN()); + fp->AddConstant("pi", std::acos(Real(-1))); + fp->AddConstant("e", std::exp(Real(1))); + libmesh_error_msg_if + (fp->Parse(_subexpressions.front(), variables) != -1, // -1 for success + "ERROR: FunctionParser is unable to parse expression for Kokkos export: " + << _subexpressions.front() << '\n' << fp->ErrorMsg()); + + fp->SetADFlags(FunctionParserADBase::ADSilenceErrors | + FunctionParserADBase::ADAutoOptimize); + fp->Optimize(); + bundle.value = build_parsed_function_program(*fp); + + auto dx_fp = std::make_unique>(*fp); + dx_fp->AutoDiff("x"); + bundle.dx = build_parsed_function_program(*dx_fp); +#if LIBMESH_DIM > 1 + auto dy_fp = std::make_unique>(*fp); + dy_fp->AutoDiff("y"); + bundle.dy = build_parsed_function_program(*dy_fp); +#endif +#if LIBMESH_DIM > 2 + auto dz_fp = std::make_unique>(*fp); + dz_fp->AutoDiff("z"); + bundle.dz = build_parsed_function_program(*dz_fp); +#endif + auto dt_fp = std::make_unique>(*fp); + dt_fp->AutoDiff("t"); + bundle.dt = build_parsed_function_program(*dt_fp); + + for (unsigned int v = 0; v != _n_vars; ++v) + { + if (!_need_var[v]) + continue; + + const std::string & varname = _sys.variable_name(v); + auto dvar_fp = std::make_unique>(*fp); + dvar_fp->AutoDiff(varname); + bundle.value_variable_numbers.push_back(v); + bundle.value_variable_derivatives.push_back(build_parsed_function_program(*dvar_fp)); + } + + return bundle; +} +#endif + template inline Output diff --git a/include/numerics/parsed_function.h b/include/numerics/parsed_function.h index cc9f34f61d4..ef0206955f3 100644 --- a/include/numerics/parsed_function.h +++ b/include/numerics/parsed_function.h @@ -26,6 +26,7 @@ // Local includes #include "libmesh/dense_vector.h" #include "libmesh/int_range.h" +#include "libmesh/parsed_function_program.h" #include "libmesh/vector_value.h" #include "libmesh/point.h" @@ -88,7 +89,7 @@ class ParsedFunction : public FunctionBase /** * Query if the automatic derivative generation was successful. */ - virtual bool has_derivatives() { return _valid_derivatives; } + virtual bool has_derivatives() const { return _valid_derivatives; } virtual Output dot(const Point & p, const Real time = 0); @@ -113,6 +114,12 @@ class ParsedFunction : public FunctionBase virtual std::unique_ptr> clone() const override; + ParsedFunctionProgram + build_program(unsigned int component = 0) const; + + ParsedFunctionProgramBundle + build_program_bundle(unsigned int component = 0) const; + /** * \returns The value of an inline variable. * @@ -379,6 +386,37 @@ ParsedFunction::clone() const &_initial_vals); } +template +inline +ParsedFunctionProgram +ParsedFunction::build_program(const unsigned int component) const +{ + libmesh_assert_less(component, parsers.size()); + return libMesh::build_parsed_function_program(*parsers[component]); +} + +template +inline +ParsedFunctionProgramBundle +ParsedFunction::build_program_bundle(const unsigned int component) const +{ + libmesh_assert_less(component, parsers.size()); + libmesh_error_msg_if(!this->has_derivatives(), + "Cannot build a parsed-function program bundle without valid derivative programs"); + + ParsedFunctionProgramBundle bundle; + bundle.value = this->build_program(component); + bundle.dx = libMesh::build_parsed_function_program(*dx_parsers[component]); +#if LIBMESH_DIM > 1 + bundle.dy = libMesh::build_parsed_function_program(*dy_parsers[component]); +#endif +#if LIBMESH_DIM > 2 + bundle.dz = libMesh::build_parsed_function_program(*dz_parsers[component]); +#endif + bundle.dt = libMesh::build_parsed_function_program(*dt_parsers[component]); + return bundle; +} + template inline Output diff --git a/include/numerics/parsed_function_program.h b/include/numerics/parsed_function_program.h new file mode 100644 index 00000000000..78ddbc5e9c2 --- /dev/null +++ b/include/numerics/parsed_function_program.h @@ -0,0 +1,189 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_PARSED_FUNCTION_PROGRAM_H +#define LIBMESH_PARSED_FUNCTION_PROGRAM_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" + +#include + +template +class FunctionParserBase; +template +class FunctionParserADBase; + +namespace libMesh +{ + +enum class ParsedFunctionOpcode : unsigned int +{ + cAbs, + cAcos, + cAcosh, + cArg, + cAsin, + cAsinh, + cAtan, + cAtan2, + cAtanh, + cCbrt, + cCeil, + cConj, + cCos, + cCosh, + cCot, + cCsc, + cExp, + cExp2, + cFloor, + cHypot, + cIf, + cImag, + cInt, + cLog, + cLog10, + cLog2, + cMax, + cMin, + cPolar, + cPow, + cReal, + cSec, + cSin, + cSinh, + cSqrt, + cTan, + cTanh, + cTrunc, + cImmed, + cJump, + cNeg, + cAdd, + cSub, + cMul, + cDiv, + cMod, + cEqual, + cNEqual, + cLess, + cLessOrEq, + cGreater, + cGreaterOrEq, + cNot, + cAnd, + cOr, + cNotNot, + cDeg, + cRad, + cFCall, + cPCall, + cPopNMov, + cLog2by, + cNop, + cSinCos, + cSinhCosh, + cAbsAnd, + cAbsOr, + cAbsNot, + cAbsNotNot, + cAbsIf, + cDup, + cFetch, + cInv, + cSqr, + cRDiv, + cRSub, + cRSqrt, + VarBegin +}; + +LIBMESH_DEVICE_INLINE constexpr unsigned int +parsed_function_var_begin() +{ + return static_cast(ParsedFunctionOpcode::VarBegin); +} + +LIBMESH_DEVICE_INLINE constexpr bool +parsed_function_is_var_opcode(const unsigned int opcode) +{ + return opcode >= parsed_function_var_begin(); +} + +template +struct ParsedFunctionProgram +{ + std::vector bytecode; + std::vector immediates; + unsigned int stack_size = 0; + unsigned int n_variables = 0; + Scalar epsilon = 0; + + bool empty() const { return bytecode.empty(); } +}; + +template +struct ParsedFunctionProgramBundle +{ + ParsedFunctionProgram value; + ParsedFunctionProgram dx; +#if LIBMESH_DIM > 1 + ParsedFunctionProgram dy; +#endif +#if LIBMESH_DIM > 2 + ParsedFunctionProgram dz; +#endif + ParsedFunctionProgram dt; +}; + +template +struct ParsedFEMFunctionProgramBundle +{ + ParsedFunctionProgram value; + ParsedFunctionProgram dx; +#if LIBMESH_DIM > 1 + ParsedFunctionProgram dy; +#endif +#if LIBMESH_DIM > 2 + ParsedFunctionProgram dz; +#endif + ParsedFunctionProgram dt; + std::vector value_variable_numbers; + std::vector> value_variable_derivatives; + bool uses_field_gradients = false; + bool uses_field_hessians = false; + bool uses_normals = false; + bool uses_additional_variables = false; + + bool supports_kokkos_value_goal() const + { + return !uses_field_gradients && + !uses_field_hessians && + !uses_normals && + !uses_additional_variables && + value_variable_numbers.size() == value_variable_derivatives.size(); + } +}; + +template +ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser); + +} // namespace libMesh + +#endif // LIBMESH_PARSED_FUNCTION_PROGRAM_H diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 4fd5631acc5..b3e9937d56f 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -44,6 +44,10 @@ # undef I // Avoid complex.h contamination #endif +#ifdef LIBMESH_HAVE_KOKKOS +#include +#endif + // C++ includes #include #include @@ -227,6 +231,57 @@ class PetscVector final : public NumericVector */ const PetscScalar * get_array_read() const; + /** + * Query PETSc for the memory type backing this vector. + * + * \note If the raw array is currently borrowed via get_array() or + * get_array_read(), this method is not valid. + */ + PetscMemType get_mem_type() const; + + bool supports_kokkos_access() const; + +#ifdef LIBMESH_HAVE_KOKKOS + using kokkos_read_view = + ::Kokkos::View>; + + class KokkosReadViewGuard + { + public: + explicit KokkosReadViewGuard(PetscVector & vector) + : _vector(vector), + _data(reinterpret_cast(vector.get_array_read())), + _view(_data, vector.local_size()) + { + } + + KokkosReadViewGuard(const KokkosReadViewGuard &) = delete; + KokkosReadViewGuard & operator=(const KokkosReadViewGuard &) = delete; + + ~KokkosReadViewGuard() + { + _vector.restore_array(); + } + + const kokkos_read_view & view() const + { + return _view; + } + + private: + PetscVector & _vector; + const T * _data; + kokkos_read_view _view; + }; + + KokkosReadViewGuard make_kokkos_read_view_guard() + { + return KokkosReadViewGuard(*this); + } +#endif + /** * Restore the data array. * @@ -1196,6 +1251,42 @@ const PetscScalar * PetscVector::get_array_read() const return _read_only_values; } +template +inline +PetscMemType PetscVector::get_mem_type() const +{ + libmesh_error_msg_if(_values_manually_retrieved, + "Cannot query PetscVector memory type while a raw array is borrowed"); + +#ifdef LIBMESH_HAVE_CXX11_THREAD + const bool array_is_present = _array_is_present.load(std::memory_order_acquire); +#else + const bool array_is_present = _array_is_present; +#endif + + if (array_is_present) + _restore_array(); + + PetscScalar * dummyarray = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall(VecGetArrayAndMemType(_vec, &dummyarray, &mem_type)); + LibmeshPetscCall(VecRestoreArrayAndMemType(_vec, &dummyarray)); + return mem_type; +} + +template +inline +bool PetscVector::supports_kokkos_access() const +{ +#ifdef LIBMESH_HAVE_KOKKOS + return !PetscMemTypeHost(this->get_mem_type()) || + ::Kokkos::SpaceAccessibility::accessible; +#else + return false; +#endif +} + template inline void PetscVector::restore_array() diff --git a/include/systems/hilbert_assembly.h b/include/systems/hilbert_assembly.h new file mode 100644 index 00000000000..21c0882fba7 --- /dev/null +++ b/include/systems/hilbert_assembly.h @@ -0,0 +1,352 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_HILBERT_ASSEMBLY_H +#define LIBMESH_HILBERT_ASSEMBLY_H + +#include "hilbert_assembly_kernel.h" + +#include "libmesh/fdm_gradient.h" +#include "libmesh/fe_abstract.h" +#include "libmesh/fem_context.h" +#include "libmesh/fem_function_base.h" +#include "libmesh/function_base.h" +#include "libmesh/libmesh_common.h" +#include "libmesh/quadrature.h" +#include "libmesh/tensor_tools.h" + +namespace libMesh +{ +namespace detail +{ + +class HostHilbertFEAccess +{ +public: + class QpData + { + public: + LIBMESH_DEVICE_INLINE + QpData(const HostHilbertFEAccess & fe, + const unsigned int qp) + : _fe(fe), + _qp(qp), + _Jinv(fe.build_inverse_jacobian(qp)) + { + } + + LIBMESH_DEVICE_INLINE + Real JxW() const + { + return _fe.JxW(_qp); + } + + LIBMESH_DEVICE_INLINE + Real phi(const unsigned int i) const + { + return _fe.phi(i, _qp); + } + + LIBMESH_DEVICE_INLINE + const RealGradient & dphi(const unsigned int i) const + { + return _fe.dphi(i, _qp); + } + + LIBMESH_DEVICE_INLINE + const Point & xyz() const + { + return _fe.xyz(_qp); + } + + LIBMESH_DEVICE_INLINE + unsigned int qp_index() const + { + return _qp; + } + + LIBMESH_DEVICE_INLINE + const Point & reference_point() const + { + return _fe.reference_point(_qp); + } + + LIBMESH_DEVICE_INLINE + const RealTensor & inverse_jacobian() const + { + return _Jinv; + } + + LIBMESH_DEVICE_INLINE + unsigned int elem_index() const + { + return _fe.elem_index(); + } + + private: + const HostHilbertFEAccess & _fe; + unsigned int _qp; + RealTensor _Jinv; + }; + + HostHilbertFEAccess(FEMContext & c, + const unsigned int var, + const unsigned int hilbert_order, + const unsigned int elem_index = libMesh::invalid_uint) + : _n_dofs(c.n_dof_indices(var)), + _elem_index(elem_index), + _JxW(c.get_element_fe(var)->get_JxW()), + _phi(c.get_element_fe(var)->get_phi()), + _xyz(c.get_element_fe(var)->get_xyz()), + _reference_points(c.get_element_qrule().get_points()), + _fe_map(c.get_element_fe(var)->get_fe_map()), + _dphi(hilbert_order > 0 ? &c.get_element_fe(var)->get_dphi() : nullptr) + { + } + + unsigned int n_qpoints() const + { + return cast_int(_JxW.size()); + } + + unsigned int n_dofs() const + { + return _n_dofs; + } + + Real JxW(const unsigned int qp) const + { + return _JxW[qp]; + } + + Real phi(const unsigned int i, const unsigned int qp) const + { + return _phi[i][qp]; + } + + const RealGradient & dphi(const unsigned int i, const unsigned int qp) const + { + libmesh_assert(_dphi); + return (*_dphi)[i][qp]; + } + + const Point & xyz(const unsigned int qp) const + { + return _xyz[qp]; + } + + const Point & reference_point(const unsigned int qp) const + { + return _reference_points[qp]; + } + + unsigned int elem_index() const + { + return _elem_index; + } + + RealTensor build_inverse_jacobian(const unsigned int qp) const + { + RealTensor Jinv; + Jinv(0, 0) = _fe_map.get_dxidx()[qp]; +#if LIBMESH_DIM > 1 + Jinv(0, 1) = _fe_map.get_dxidy()[qp]; + Jinv(1, 0) = _fe_map.get_detadx()[qp]; + Jinv(1, 1) = _fe_map.get_detady()[qp]; +#endif +#if LIBMESH_DIM > 2 + Jinv(0, 2) = _fe_map.get_dxidz()[qp]; + Jinv(1, 2) = _fe_map.get_detadz()[qp]; + Jinv(2, 0) = _fe_map.get_dzetadx()[qp]; + Jinv(2, 1) = _fe_map.get_dzetady()[qp]; + Jinv(2, 2) = _fe_map.get_dzetadz()[qp]; +#endif + return Jinv; + } + + LIBMESH_DEVICE_INLINE + QpData qp_data(const unsigned int qp, + const bool) const + { + return QpData(*this, qp); + } + +private: + const unsigned int _n_dofs; + const unsigned int _elem_index; + const std::vector & _JxW; + const std::vector> & _phi; + const std::vector & _xyz; + const std::vector & _reference_points; + const FEMap & _fe_map; + const std::vector> * _dphi; +}; + +class HostHilbertGoalAccess +{ +public: + HostHilbertGoalAccess(FEMFunctionBase & goal_func, + FDMGradient * goal_grad, + FEMContext & input_context) + : _goal_func(goal_func), + _goal_grad(goal_grad), + _input_context(input_context) + { + } + + template + Number value(const QpData &, const Point & p) + { + return _goal_func(_input_context, p); + } + + template + Gradient gradient(const QpData &, const Point & p) + { + libmesh_assert(_goal_grad); + return (*_goal_grad)(_input_context, p); + } + +private: + FEMFunctionBase & _goal_func; + FDMGradient * const _goal_grad; + FEMContext & _input_context; +}; + +template +class FunctionFDMGradient : public FunctionBase +{ +public: + typedef typename TensorTools::DecrementRank::type ValType; + + FunctionFDMGradient(FunctionBase & value_func, + const Real eps) + : _val_func(value_func.clone()), + _eps(eps) + { + } + + virtual std::unique_ptr> clone() const override + { + return std::make_unique>(*_val_func, _eps); + } + + virtual GradType operator()(const Point & p, + const Real time = 0.) override + { + GradType g; + + auto & val = *_val_func; + const Real one_over_dim = Real(0.5) / _eps; + + g(0) = (val(p + Point(_eps), time) - + val(p + Point(-_eps), time)) * one_over_dim; +#if LIBMESH_DIM > 1 + g(1) = (val(p + Point(0, _eps), time) - + val(p + Point(0, -_eps), time)) * one_over_dim; +#endif +#if LIBMESH_DIM > 2 + g(2) = (val(p + Point(0, 0, _eps), time) - + val(p + Point(0, 0, -_eps), time)) * one_over_dim; +#endif + + return g; + } + + virtual void operator()(const Point & p, + const Real time, + DenseVector & output) override + { + const unsigned int sz = cast_int(output.size()); + DenseVector v(sz); + + auto & val = *_val_func; + + val(p + Point(_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(0) = v(i); + + val(p + Point(-_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(0) -= v(i); + output(i)(0) /= (2 * _eps); + } + +#if LIBMESH_DIM > 1 + val(p + Point(0, _eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(1) = v(i); + + val(p + Point(0, -_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(1) -= v(i); + output(i)(1) /= (2 * _eps); + } +#endif +#if LIBMESH_DIM > 2 + val(p + Point(0, 0, _eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(2) = v(i); + + val(p + Point(0, 0, -_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(2) -= v(i); + output(i)(2) /= (2 * _eps); + } +#endif + } + +private: + std::unique_ptr> _val_func; + Real _eps; +}; + +class HostHilbertAccumulator +{ +public: + HostHilbertAccumulator(DenseSubVector & F, + DenseSubMatrix & K) + : _F(F), + _K(K) + { + } + + void add_residual(const unsigned int i, + const Number value) + { + _F(i) += value; + } + + void add_jacobian(const unsigned int i, + const unsigned int j, + const Number value) + { + _K(i, j) += value; + } + +private: + DenseSubVector & _F; + DenseSubMatrix & _K; +}; + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_HILBERT_ASSEMBLY_H diff --git a/include/systems/hilbert_assembly_kernel.h b/include/systems/hilbert_assembly_kernel.h new file mode 100644 index 00000000000..9b8c57a48e5 --- /dev/null +++ b/include/systems/hilbert_assembly_kernel.h @@ -0,0 +1,239 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_HILBERT_ASSEMBLY_KERNEL_H +#define LIBMESH_HILBERT_ASSEMBLY_KERNEL_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" +#include "libmesh/function_base.h" +#include "libmesh/vector_value.h" + +#include +#include + +namespace libMesh +{ +namespace detail +{ + +template +using hilbert_storage_t = + std::conditional_t, T, std::decay_t>; + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE decltype(auto) +coeff_at(const CoeffStorage & coeff, const unsigned int i) +{ + return coeff(i); +} + +template +LIBMESH_DEVICE_INLINE const Scalar & +coeff_at(const Scalar * coeff, const unsigned int i) +{ + return coeff[i]; +} + +template +LIBMESH_DEVICE_INLINE const Scalar & +coeff_at(const Scalar (&coeff)[N], const unsigned int i) +{ + libmesh_ignore(N); + return coeff[i]; +} + +template +LIBMESH_DEVICE_INLINE +Number interpolate_hilbert_value(const QpData & qp_data, + const CoeffStorage & coeff, + const unsigned int n_dofs) +{ + Number u = 0.; + + for (unsigned int i = 0; i != n_dofs; ++i) + u += coeff_at(coeff, i) * qp_data.phi(i); + + return u; +} + +template +LIBMESH_DEVICE_INLINE +Gradient interpolate_hilbert_gradient(const QpData & qp_data, + const CoeffStorage & coeff, + const unsigned int n_dofs) +{ + Gradient grad_u; + grad_u.zero(); + + for (unsigned int i = 0; i != n_dofs; ++i) + grad_u.add_scaled(qp_data.dphi(i), coeff_at(coeff, i)); + + return grad_u; +} + +template +class HilbertSolutionAccess +{ +public: + LIBMESH_DEVICE_INLINE + HilbertSolutionAccess(const FEAccess & fe, + CoeffStorage coeff, + const Number solution_derivative) + : _fe(fe), + _coeff(coeff), + _solution_derivative(solution_derivative) + { + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData & qp_data) const + { + return interpolate_hilbert_value(qp_data, _coeff, _fe.n_dofs()); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData & qp_data) const + { + return interpolate_hilbert_gradient(qp_data, _coeff, _fe.n_dofs()); + } + + LIBMESH_DEVICE_INLINE + Number solution_derivative() const + { + return _solution_derivative; + } + +private: + const FEAccess & _fe; + CoeffStorage _coeff; + Number _solution_derivative; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_solution_access(const FEAccess & fe, + CoeffStorage && coeff, + const Number solution_derivative) +{ + return HilbertSolutionAccess>( + fe, + std::forward(coeff), + solution_derivative); +} + +template +class HilbertAnalyticGoalAccess +{ +public: + LIBMESH_DEVICE_INLINE + HilbertAnalyticGoalAccess(GoalFunction goal_func, + GoalGradient goal_grad) + : _goal_func(goal_func), + _goal_grad(goal_grad) + { + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData &, const Point & p) const + { + return _goal_func(p); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData &, const Point & p) const + { + return _goal_grad(p); + } + +private: + GoalFunction _goal_func; + GoalGradient _goal_grad; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_analytic_goal_access(GoalFunction && goal_func, + GoalGradient && goal_grad) +{ + return HilbertAnalyticGoalAccess, + hilbert_storage_t>( + std::forward(goal_func), + std::forward(goal_grad)); +} + +template +LIBMESH_DEVICE_INLINE void +assemble_hilbert_element(const FEAccess & fe, + const SolutionAccess & solution, + GoalAccess & goal, + const bool request_jacobian, + const unsigned int hilbert_order, + Accumulator & accum) +{ + const unsigned int n_qpoints = fe.n_qpoints(); + const unsigned int n_u_dofs = fe.n_dofs(); + + for (unsigned int qp = 0; qp != n_qpoints; qp++) + { + const auto qp_data = fe.qp_data(qp, hilbert_order > 0); + const Point & xyz = qp_data.xyz(); + const Number err_u = solution.value(qp_data) - goal.value(qp_data, xyz); + + for (unsigned int i = 0; i != n_u_dofs; i++) + accum.add_residual(i, qp_data.JxW() * (err_u * qp_data.phi(i))); + + if (hilbert_order > 0) + { + const Gradient err_grad_u = + solution.gradient(qp_data) - goal.gradient(qp_data, xyz); + + for (unsigned int i = 0; i != n_u_dofs; i++) + accum.add_residual(i, qp_data.JxW() * (err_grad_u * qp_data.dphi(i))); + } + + if (request_jacobian) + { + const Number JxWxD = qp_data.JxW() * solution.solution_derivative(); + + for (unsigned int i = 0; i != n_u_dofs; i++) + for (unsigned int j = 0; j != n_u_dofs; ++j) + accum.add_jacobian(i, j, JxWxD * (qp_data.phi(i) * qp_data.phi(j))); + + if (hilbert_order > 0) + for (unsigned int i = 0; i != n_u_dofs; i++) + for (unsigned int j = 0; j != n_u_dofs; ++j) + accum.add_jacobian(i, j, JxWxD * (qp_data.dphi(i) * qp_data.dphi(j))); + } + } +} + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_HILBERT_ASSEMBLY_KERNEL_H diff --git a/src/apps/L2system.C b/src/apps/L2system.C index 5d1bae58f71..107bd9347f1 100644 --- a/src/apps/L2system.C +++ b/src/apps/L2system.C @@ -17,20 +17,1346 @@ #include "L2system.h" +#include "libmesh/dof_map.h" #include "libmesh/elem.h" #include "libmesh/fe_base.h" #include "libmesh/fe_interface.h" #include "libmesh/fem_context.h" #include "libmesh/getpot.h" +#include "libmesh/linear_solver.h" #include "libmesh/mesh.h" +#include "libmesh/numeric_vector.h" #include "libmesh/quadrature.h" #include "libmesh/string_to_enum.h" #include "libmesh/utility.h" +#include +#include +#include + +#ifdef LIBMESH_HAVE_PETSC +#include "libmesh/petsc_matrix_base.h" +#include "libmesh/petsc_macro.h" +#include "libmesh/petsc_vector.h" +#endif + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +#include "../../include/gpu/kokkos_hilbert_system.h" +#include "libmesh/fe_shape_traits.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif + using namespace libMesh; +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +constexpr unsigned int kokkos_hilbert_max_dofs = 27; +constexpr unsigned int kokkos_parsed_fem_max_fields = 16; + +template +using KokkosScalarView = ::Kokkos::View; + +using KokkosDenseJacobianView = ::Kokkos::View; +using KokkosFlatJacobianView = ::Kokkos::View; +using KokkosUnsignedIntView = ::Kokkos::View; +using KokkosSizeView = ::Kokkos::View; +using KokkosFieldKeyRecordView = ::Kokkos::View; +using KokkosFieldDofRecordView = ::Kokkos::View; +using KokkosFieldKeyStorage = + libMesh::Kokkos::detail::StaticArrayAccess; +using KokkosFieldDofStorage = + libMesh::Kokkos::detail::StaticArrayAccess; +using KokkosLocalIndexView = DofMap::KokkosLocalIndexCache::elem_local_index_view; + +struct HilbertElementAssemblyRecord +{ + dof_id_type elem_id = DofObject::invalid_id; + std::size_t rhs_offset = 0; + std::size_t mat_offset = 0; + unsigned int elem_index = libMesh::invalid_uint; + unsigned int quadrature_order = 0; + unsigned int n_dofs = 0; +}; + +struct KokkosHilbertAssemblyBucket +{ + std::size_t begin = 0; + std::size_t end = 0; + FEShapeKey key; + ElemType elem_type = INVALID_ELEM; + ElemMappingType mapping_type = LAGRANGE_MAP; + unsigned int n_nodes = 0; + unsigned int elem_p_level = 0; + unsigned int quadrature_order = 0; +}; + +FEShapeKey +make_hilbert_shape_key(const Elem & elem, + const FEType & fe_type) +{ + return {fe_type.family, + elem.type(), + static_cast(fe_type.order.get_order() + cast_int(elem.p_level()))}; +} + +FEShapeKey +make_hilbert_shape_key(const ElemType elem_type, + const unsigned int elem_p_level, + const FEType & fe_type) +{ + return {fe_type.family, + elem_type, + static_cast(fe_type.order.get_order() + cast_int(elem_p_level))}; +} + +void +accumulate_hilbert_dense_outputs(const KokkosScalarView & d_F, + const KokkosDenseJacobianView & d_K, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + auto h_F = ::Kokkos::create_mirror_view(d_F); + ::Kokkos::deep_copy(h_F, d_F); + + for (unsigned int i = 0; i != F.size(); ++i) + F(i) += h_F(i); + + if (!request_jacobian) + return; + + auto h_K = ::Kokkos::create_mirror_view(d_K); + ::Kokkos::deep_copy(h_K, d_K); + + for (unsigned int i = 0; i != F.size(); ++i) + for (unsigned int j = 0; j != F.size(); ++j) + K(i, j) += h_K(i, j); +} + +void +build_hilbert_element_records(HilbertSystem & sys, + std::vector & records, + std::size_t & total_rhs_entries, + std::size_t & total_mat_entries) +{ + DofMap & dof_map = sys.get_dof_map(); + const auto * dof_index_cache = dof_map.get_kokkos_dof_index_cache(0); + libmesh_assert(dof_index_cache); + total_rhs_entries = 0; + total_mat_entries = 0; + + for (auto elem_index : index_range(dof_index_cache->host_element_ids)) + { + if (!sys.subdomains_list().empty() && + !sys.subdomains_list().count(dof_index_cache->host_element_subdomains[elem_index])) + continue; + + const unsigned int n_dofs = dof_index_cache->host_element_n_dofs[elem_index]; + if (!n_dofs) + continue; + + HilbertElementAssemblyRecord record; + record.elem_id = dof_index_cache->host_element_ids[elem_index]; + record.rhs_offset = total_rhs_entries; + record.mat_offset = total_mat_entries; + record.elem_index = cast_int(elem_index); + record.n_dofs = n_dofs; + total_rhs_entries += n_dofs; + total_mat_entries += n_dofs * n_dofs; + records.push_back(std::move(record)); + } +} + +bool +build_hilbert_record_quadrature_orders(HilbertSystem & sys, + std::vector & records) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const FEType fe_type = sys.variable_type(0); + const int quadrature_order = + cast_int(fe_type.default_quadrature_order()) + sys.extra_quadrature_order; + + for (auto & record : records) + { + libmesh_error_msg_if(quadrature_order < 0, + "Negative quadrature order is not supported for Kokkos Hilbert assembly"); + record.quadrature_order = cast_int(quadrature_order); + + const FEShapeKey shape_key = + make_hilbert_shape_key(geometry_cache.element_types(record.elem_index), + geometry_cache.element_p_levels(record.elem_index), + fe_type); + if (!libMesh::Kokkos::detail::supports_hilbert_local_assembly( + shape_key, + geometry_cache.element_mapping_types(record.elem_index), + record.quadrature_order) || + record.n_dofs > kokkos_hilbert_max_dofs) + return false; + } + + return true; +} + +template +const CachedGoal * +ensure_kokkos_goal_cache(std::unique_ptr & cache, + const HostGoalPtr & host_goal, + BuildCache && build_cache) +{ + if (cache) + return cache.get(); + + if (!host_goal) + return nullptr; + + const auto * parsed_goal = dynamic_cast(host_goal.get()); + if (!parsed_goal) + return nullptr; + + cache = build_cache(*parsed_goal); + return cache.get(); +} + +void +prewarm_kokkos_hilbert_entities(HilbertSystem & sys, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal) +{ + if (sys.current_local_solution) + sys.get_dof_map().prepare_kokkos_local_index_cache(*sys.current_local_solution, 0); + + if (!fem_goal || !sys.input_system || !sys.input_system->current_local_solution) + return; + + for (unsigned int field = 0; field != fem_goal->n_field_variables(); ++field) + sys.input_system->get_dof_map().prepare_kokkos_local_index_cache( + *sys.input_system->current_local_solution, + fem_goal->field_variable_number(field)); +} + +#if defined(LIBMESH_HAVE_PETSC) +void +build_hilbert_coo_indices(const DofMap::KokkosDofIndexCache & dof_index_cache, + const std::vector & records, + std::vector & rhs_rows, + std::vector & mat_rows, + std::vector & mat_cols) +{ + for (const auto & record : records) + { + const unsigned int n_dofs = record.n_dofs; + + for (unsigned int i = 0; i != n_dofs; ++i) + rhs_rows[record.rhs_offset + i] = + cast_int( + dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + i]); + + for (unsigned int i = 0; i != n_dofs; ++i) + for (unsigned int j = 0; j != n_dofs; ++j) + { + const std::size_t offset = record.mat_offset + i * n_dofs + j; + mat_rows[offset] = cast_int( + dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + + i]); + mat_cols[offset] = cast_int( + dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + + j]); + } + } +} + +class HostExactParsedFEMGoalAccess +{ +public: + HostExactParsedFEMGoalAccess(const libMesh::Kokkos::KokkosParsedFEMFunction & goal, + FEMContext & input_context) + : _goal(goal), + _input_context(input_context) + { + } + + template + Number value(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + kokkos_parsed_fem_max_fields] = {}; + fill_variables(qp_data, xyz, vars); + return _goal.value(vars); + } + + template + Gradient gradient(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + kokkos_parsed_fem_max_fields] = {}; + Gradient field_gradients[kokkos_parsed_fem_max_fields]; + fill_variables(qp_data, xyz, vars); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + field_gradients[field] = + _input_context.interior_gradient(_goal.field_variable_number(field), qp_data.qp_index()); + + return _goal.gradient(vars, field_gradients); + } + +private: + template + void fill_variables(const QpData & qp_data, + const Point & xyz, + Number * vars) const + { + vars[0] = xyz(0); +#if LIBMESH_DIM > 1 + vars[1] = xyz(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = xyz(2); +#endif + vars[LIBMESH_DIM] = _goal.time(); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + vars[LIBMESH_DIM + 1 + field] = + _input_context.interior_value(_goal.field_variable_number(field), qp_data.qp_index()); + } + + const libMesh::Kokkos::KokkosParsedFEMFunction & _goal; + FEMContext & _input_context; +}; + +struct KokkosElementAssemblyState +{ + const MeshBase::KokkosGeometryCache * geometry_cache = nullptr; + const DofMap::KokkosLocalIndexCache * solution_local_indices = nullptr; + PetscVector * solution_vector = nullptr; + unsigned int elem_index = libMesh::invalid_uint; + ElemType elem_type = INVALID_ELEM; + ElemMappingType mapping_type = LAGRANGE_MAP; + unsigned int elem_n_nodes = 0; + unsigned int elem_p_level = 0; +}; + +struct KokkosFEMGoalData +{ + KokkosFieldKeyStorage field_keys; + KokkosFieldDofStorage field_dofs; + std::array field_local_indices; + PetscVector * input_vector = nullptr; +}; + +struct KokkosHilbertBatchData +{ + KokkosUnsignedIntView elem_indices; + KokkosUnsignedIntView elem_n_dofs; + KokkosUnsignedIntView quadrature_orders; + KokkosSizeView rhs_offsets; + KokkosSizeView mat_offsets; +}; + +struct KokkosFEMGoalBatchData +{ + std::vector bucket_field_keys; + std::vector bucket_field_dofs; + libMesh::Kokkos::detail::StaticArrayAccess + field_local_indices; + PetscVector * input_vector = nullptr; +}; + +struct KokkosPetscAssemblyPlan +{ + std::vector records; + std::vector buckets; + std::vector rhs_rows; + std::vector mat_rows; + std::vector mat_cols; + KokkosHilbertBatchData batch_data; + ::Kokkos::View rhs_values; + ::Kokkos::View mat_values; + KokkosFEMGoalBatchData fem_goal_batch_data; + const void * geometry_cache_id = nullptr; + const void * dof_index_cache_id = nullptr; + FEType fe_type; + unsigned int hilbert_order = 0; + int extra_quadrature_order = 0; + std::set subdomains; + const void * matrix_target = nullptr; + const void * rhs_target = nullptr; + const void * fem_goal_target = nullptr; + const void * input_vector_target = nullptr; +}; + +namespace +{ + +bool +build_kokkos_element_state(HilbertSystem & sys, + const Elem & elem, + KokkosElementAssemblyState & state) +{ + auto * solution_vector = dynamic_cast *>(sys.current_local_solution.get()); + if (!solution_vector || !solution_vector->supports_kokkos_access()) + return false; + + const DofMap & dof_map = sys.get_dof_map(); + state.geometry_cache = &sys.get_mesh().get_kokkos_geometry_cache(); + state.elem_index = sys.get_mesh().get_kokkos_elem_index(elem); + if (state.elem_index == libMesh::invalid_uint) + return false; + + state.elem_type = state.geometry_cache->element_types(state.elem_index); + state.mapping_type = state.geometry_cache->element_mapping_types(state.elem_index); + state.elem_n_nodes = state.geometry_cache->element_n_nodes(state.elem_index); + state.elem_p_level = state.geometry_cache->element_p_levels(state.elem_index); + + state.solution_local_indices = + dof_map.require_kokkos_local_index_cache(*sys.current_local_solution, 0); + if (!state.solution_local_indices) + return false; + + state.solution_vector = solution_vector; + return true; +} + +bool +build_kokkos_fem_goal_data(const ElemType elem_type, + const ElemMappingType mapping_type, + const unsigned int elem_p_level, + const unsigned int hilbert_order, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosFEMGoalData & goal_data) +{ + const unsigned int n_fields = goal_function.n_field_variables(); + if (n_fields > kokkos_parsed_fem_max_fields) + return false; + + auto * input_vector = dynamic_cast *>(input_system.current_local_solution.get()); + if (!input_vector || !input_vector->supports_kokkos_access()) + return false; + + goal_data.field_keys.size = n_fields; + goal_data.field_dofs.size = n_fields; + goal_data.input_vector = input_vector; + + for (unsigned int field = 0; field != n_fields; ++field) + { + const unsigned int var_num = goal_function.field_variable_number(field); + const FEType field_type = input_system.variable_type(var_num); + const FEShapeKey field_key = make_hilbert_shape_key(elem_type, elem_p_level, field_type); + + if (mapping_type != LAGRANGE_MAP || + !supports_shape_with_lagrange_map(field_key) || + (hilbert_order > 0 && !supports_grad_shape(field_key))) + return false; + + const unsigned int field_n_dofs = + FEInterface::n_dofs(libMesh::Kokkos::dim_from_topology(elem_type), field_type, elem_type); + if (field_n_dofs > kokkos_hilbert_max_dofs) + return false; + + const auto * local_index_cache = + input_system.get_dof_map().require_kokkos_local_index_cache( + *input_system.current_local_solution, var_num); + if (!local_index_cache) + return false; + + goal_data.field_keys.values[field] = field_key; + goal_data.field_dofs.values[field] = field_n_dofs; + goal_data.field_local_indices[field] = local_index_cache->element_local_indices; + } + + return true; +} + +auto +make_hilbert_bucket_sort_key(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + const HilbertElementAssemblyRecord & record) +{ + const unsigned int elem_index = record.elem_index; + return std::make_tuple(cast_int(geometry_cache.element_types(elem_index)), + cast_int(geometry_cache.element_mapping_types(elem_index)), + cast_int(fe_type.order.get_order() + + cast_int(geometry_cache.element_p_levels(elem_index))), + record.quadrature_order, + geometry_cache.element_n_nodes(elem_index)); +} + +void +sort_hilbert_element_records(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + std::vector & records) +{ + std::sort(records.begin(), + records.end(), + [&geometry_cache, &fe_type](const auto & lhs, const auto & rhs) + { + return make_hilbert_bucket_sort_key(geometry_cache, fe_type, lhs) < + make_hilbert_bucket_sort_key(geometry_cache, fe_type, rhs); + }); +} + +void +build_hilbert_assembly_buckets(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + const std::vector & records, + std::vector & buckets) +{ + buckets.clear(); + if (records.empty()) + return; + + auto fill_bucket = [&geometry_cache, &fe_type, &records](KokkosHilbertAssemblyBucket & bucket, + const std::size_t begin, + const std::size_t end) + { + bucket.begin = begin; + bucket.end = end; + const auto & record = records[begin]; + const unsigned int elem_index = record.elem_index; + bucket.key = make_hilbert_shape_key(geometry_cache.element_types(elem_index), + geometry_cache.element_p_levels(elem_index), + fe_type); + bucket.elem_type = geometry_cache.element_types(elem_index); + bucket.mapping_type = geometry_cache.element_mapping_types(elem_index); + bucket.n_nodes = geometry_cache.element_n_nodes(elem_index); + bucket.elem_p_level = geometry_cache.element_p_levels(elem_index); + bucket.quadrature_order = record.quadrature_order; + }; + + std::size_t bucket_begin = 0; + auto current_key = make_hilbert_bucket_sort_key(geometry_cache, fe_type, records.front()); + for (std::size_t i = 1; i != records.size(); ++i) + { + const auto next_key = make_hilbert_bucket_sort_key(geometry_cache, fe_type, records[i]); + if (next_key == current_key) + continue; + + buckets.emplace_back(); + fill_bucket(buckets.back(), bucket_begin, i); + bucket_begin = i; + current_key = next_key; + } + + buckets.emplace_back(); + fill_bucket(buckets.back(), bucket_begin, records.size()); +} + +void +build_hilbert_batch_data(const std::vector & records, + KokkosHilbertBatchData & batch_data) +{ + batch_data.elem_indices = KokkosUnsignedIntView("hilbert_elem_indices", records.size()); + batch_data.elem_n_dofs = KokkosUnsignedIntView("hilbert_elem_n_dofs", records.size()); + batch_data.quadrature_orders = + KokkosUnsignedIntView("hilbert_quadrature_orders", records.size()); + batch_data.rhs_offsets = KokkosSizeView("hilbert_rhs_offsets", records.size()); + batch_data.mat_offsets = KokkosSizeView("hilbert_mat_offsets", records.size()); + + auto h_elem_indices = ::Kokkos::create_mirror_view(batch_data.elem_indices); + auto h_elem_n_dofs = ::Kokkos::create_mirror_view(batch_data.elem_n_dofs); + auto h_quadrature_orders = ::Kokkos::create_mirror_view(batch_data.quadrature_orders); + auto h_rhs_offsets = ::Kokkos::create_mirror_view(batch_data.rhs_offsets); + auto h_mat_offsets = ::Kokkos::create_mirror_view(batch_data.mat_offsets); + + for (auto record_index : index_range(records)) + { + const auto & record = records[record_index]; + h_elem_indices(record_index) = record.elem_index; + h_elem_n_dofs(record_index) = record.n_dofs; + h_quadrature_orders(record_index) = record.quadrature_order; + h_rhs_offsets(record_index) = record.rhs_offset; + h_mat_offsets(record_index) = record.mat_offset; + } + + ::Kokkos::deep_copy(batch_data.elem_indices, h_elem_indices); + ::Kokkos::deep_copy(batch_data.elem_n_dofs, h_elem_n_dofs); + ::Kokkos::deep_copy(batch_data.quadrature_orders, h_quadrature_orders); + ::Kokkos::deep_copy(batch_data.rhs_offsets, h_rhs_offsets); + ::Kokkos::deep_copy(batch_data.mat_offsets, h_mat_offsets); +} + +bool +kokkos_petsc_plan_matches(const HilbertSystem & sys, + const MeshBase::KokkosGeometryCache & geometry_cache, + const DofMap::KokkosDofIndexCache & dof_index_cache, + const KokkosPetscAssemblyPlan & plan) +{ + return plan.geometry_cache_id == &geometry_cache && + plan.dof_index_cache_id == &dof_index_cache && + plan.fe_type == sys.variable_type(0) && + plan.hilbert_order == sys.hilbert_order() && + plan.extra_quadrature_order == sys.extra_quadrature_order && + plan.subdomains == sys.subdomains_list(); +} + +bool +build_kokkos_petsc_assembly_plan(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const auto * dof_index_cache = sys.get_dof_map().get_kokkos_dof_index_cache(0); + if (!dof_index_cache) + return false; + + std::size_t total_rhs_entries = 0; + std::size_t total_mat_entries = 0; + std::vector records; + build_hilbert_element_records(sys, records, total_rhs_entries, total_mat_entries); + if (!build_hilbert_record_quadrature_orders(sys, records)) + return false; + sort_hilbert_element_records(geometry_cache, sys.variable_type(0), records); + + std::vector rhs_rows(total_rhs_entries); + std::vector mat_rows(total_mat_entries); + std::vector mat_cols(total_mat_entries); + build_hilbert_coo_indices(*dof_index_cache, records, rhs_rows, mat_rows, mat_cols); + + KokkosHilbertBatchData batch_data; + build_hilbert_batch_data(records, batch_data); + std::vector buckets; + build_hilbert_assembly_buckets(geometry_cache, sys.variable_type(0), records, buckets); + + plan.records = std::move(records); + plan.buckets = std::move(buckets); + plan.rhs_rows = std::move(rhs_rows); + plan.mat_rows = std::move(mat_rows); + plan.mat_cols = std::move(mat_cols); + plan.batch_data = std::move(batch_data); + plan.rhs_values = ::Kokkos::View("hilbert_rhs_values", plan.rhs_rows.size()); + plan.mat_values = ::Kokkos::View("hilbert_mat_values", plan.mat_rows.size()); + plan.geometry_cache_id = &geometry_cache; + plan.dof_index_cache_id = dof_index_cache; + plan.fe_type = sys.variable_type(0); + plan.hilbert_order = sys.hilbert_order(); + plan.extra_quadrature_order = sys.extra_quadrature_order; + plan.subdomains = sys.subdomains_list(); + plan.matrix_target = nullptr; + plan.rhs_target = nullptr; + plan.fem_goal_target = nullptr; + plan.input_vector_target = nullptr; + return true; +} + +bool +build_kokkos_fem_goal_batch_data(HilbertSystem & sys, + const KokkosPetscAssemblyPlan & plan, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosFEMGoalBatchData & batch_data) +{ + libmesh_assert(sys.input_system); + System & input_system = *sys.input_system; + const unsigned int n_fields = goal_function.n_field_variables(); + if (n_fields > kokkos_parsed_fem_max_fields) + return false; + + auto * input_vector = dynamic_cast *>(input_system.current_local_solution.get()); + if (!input_vector || !input_vector->supports_kokkos_access()) + return false; + + batch_data.input_vector = input_vector; + batch_data.field_local_indices.size = n_fields; + batch_data.bucket_field_keys.assign(plan.buckets.size(), KokkosFieldKeyStorage{}); + batch_data.bucket_field_dofs.assign(plan.buckets.size(), KokkosFieldDofStorage{}); + + for (unsigned int field = 0; field != n_fields; ++field) + { + const unsigned int var_num = goal_function.field_variable_number(field); + const FEType field_type = input_system.variable_type(var_num); + const auto * local_index_cache = + input_system.get_dof_map().require_kokkos_local_index_cache( + *input_system.current_local_solution, var_num); + if (!local_index_cache) + return false; + + batch_data.field_local_indices.values[field] = local_index_cache->element_local_indices; + + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + const FEShapeKey field_key = + make_hilbert_shape_key(bucket.elem_type, bucket.elem_p_level, field_type); + + if (bucket.mapping_type != LAGRANGE_MAP || + !supports_shape_with_lagrange_map(field_key) || + (sys.hilbert_order() > 0 && !supports_grad_shape(field_key))) + return false; + + const unsigned int field_n_dofs = + FEInterface::n_dofs(libMesh::Kokkos::dim_from_topology(bucket.elem_type), + field_type, + bucket.elem_type); + if (field_n_dofs > kokkos_hilbert_max_dofs) + return false; + + batch_data.bucket_field_keys[bucket_index].size = n_fields; + batch_data.bucket_field_dofs[bucket_index].size = n_fields; + batch_data.bucket_field_keys[bucket_index].values[field] = field_key; + batch_data.bucket_field_dofs[bucket_index].values[field] = field_n_dofs; + } + } + + return true; +} + +bool +ensure_kokkos_fem_goal_batch_data(HilbertSystem & sys, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosPetscAssemblyPlan & plan) +{ + libmesh_assert(sys.input_system); + const void * input_vector_target = sys.input_system->current_local_solution.get(); + if (plan.fem_goal_target == &goal_function && plan.input_vector_target == input_vector_target) + return true; + + KokkosFEMGoalBatchData batch_data; + if (!build_kokkos_fem_goal_batch_data(sys, plan, goal_function, batch_data)) + return false; + + plan.fem_goal_batch_data = std::move(batch_data); + plan.fem_goal_target = &goal_function; + plan.input_vector_target = input_vector_target; + return true; +} + +bool +assemble_kokkos_hilbert_element(const FEShapeKey key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const Number solution_derivative, + const libMesh::Kokkos::KokkosParsedFunction & goal_function, + const KokkosElementAssemblyState & state, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + const unsigned int n_dofs = F.size(); + KokkosScalarView d_F("hilbert_residual", n_dofs); + KokkosDenseJacobianView d_K("hilbert_jacobian", n_dofs, n_dofs); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(goal_function, + goal_function.gradient_function()); + const libMesh::Kokkos::detail::DenseElementOutputSink sink{ + d_F, d_K, n_dofs, request_jacobian}; + + auto coeff_guard = state.solution_vector->make_kokkos_read_view_guard(); + const auto coeff = + libMesh::Kokkos::detail::make_gathered_coeff_access( + coeff_guard.view(), state.solution_local_indices->element_local_indices, state.elem_index); + + const bool success = + libMesh::Kokkos::detail::run_hilbert_system_assembly( + key, + state.mapping_type, + state.geometry_cache->node_coordinates, + state.geometry_cache->element_node_ids, + state.elem_index, + state.elem_n_nodes, + quadrature_order, + hilbert_order, + coeff, + solution_derivative, + goal_access, + request_jacobian, + sink, + "hilbert_local_assembly"); + + if (!success) + return false; + + accumulate_hilbert_dense_outputs(d_F, d_K, request_jacobian, F, K); + return true; +} + +bool +assemble_kokkos_hilbert_fem_goal_element(const FEShapeKey output_key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const Number solution_derivative, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + const KokkosElementAssemblyState & state, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + KokkosFEMGoalData goal_data; + if (!build_kokkos_fem_goal_data(state.elem_type, + state.mapping_type, + state.elem_p_level, + hilbert_order, + input_system, + goal_function, + goal_data)) + return false; + + const unsigned int n_dofs = F.size(); + KokkosScalarView d_F("hilbert_residual", n_dofs); + KokkosDenseJacobianView d_K("hilbert_jacobian", n_dofs, n_dofs); + const libMesh::Kokkos::detail::DenseElementOutputSink sink{ + d_F, d_K, n_dofs, request_jacobian}; + + const auto assemble_with_input_coeffs = [&](const auto & coeff_values, + const auto & input_coeff_values) + { + const auto coeff = + libMesh::Kokkos::detail::make_gathered_coeff_access(coeff_values, + state.solution_local_indices + ->element_local_indices, + state.elem_index); + + const auto goal_access = + libMesh::Kokkos::detail::GatheredParsedFEMGoalAccess, + KokkosLocalIndexView, + libMesh::Kokkos::KokkosParsedFEMFunction, + kokkos_parsed_fem_max_fields>( + goal_data.field_keys, + goal_data.field_dofs, + input_coeff_values, + goal_data.field_local_indices.data(), + goal_function); + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + output_key, + state.mapping_type, + state.geometry_cache->node_coordinates, + state.geometry_cache->element_node_ids, + state.elem_index, + state.elem_n_nodes, + quadrature_order, + hilbert_order, + coeff, + solution_derivative, + goal_access, + request_jacobian, + sink, + "hilbert_local_fem_goal_assembly"); + }; + + auto coeff_guard = state.solution_vector->make_kokkos_read_view_guard(); + const bool success = + (goal_data.input_vector == state.solution_vector) + ? assemble_with_input_coeffs(coeff_guard.view(), coeff_guard.view()) + : [&]() + { + auto input_guard = goal_data.input_vector->make_kokkos_read_view_guard(); + return assemble_with_input_coeffs(coeff_guard.view(), input_guard.view()); + }(); + + if (!success) + return false; + + accumulate_hilbert_dense_outputs(d_F, d_K, request_jacobian, F, K); + return true; +} + +#if defined(LIBMESH_HAVE_PETSC) +bool +assemble_host_exact_parsed_fem_goal_element(HilbertSystem & sys, + FEMContext & c, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function) +{ + if (!sys.input_system) + return false; + + FEMContext * goal_context_ptr = sys.get_input_context(c); + if (!goal_context_ptr) + return false; + + FEMContext & goal_context = *goal_context_ptr; + goal_context.pre_fe_reinit(*sys.input_system, &c.get_elem()); + goal_context.elem_fe_reinit(); + + detail::HostHilbertFEAccess fe(c, 0, sys.hilbert_order()); + detail::HostHilbertAccumulator accum(F, K); + auto solution = + detail::make_hilbert_solution_access(fe, + c.get_elem_solution(0), + c.get_elem_solution_derivative()); + HostExactParsedFEMGoalAccess goal_access(goal_function, goal_context); + detail::assemble_hilbert_element(fe, + solution, + goal_access, + request_jacobian, + sys.hilbert_order(), + accum); + return true; +} +#endif + +template +bool +assemble_kokkos_hilbert_element_device_values(const FEShapeKey key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const unsigned int elem_index, + const MeshBase::KokkosGeometryCache & geometry_cache, + const libMesh::Kokkos::KokkosParsedFunction & goal_function, + ResidualView d_rhs_values, + JacobianView d_mat_values) +{ + const unsigned int n_dofs = cast_int(d_rhs_values.extent(0)); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(goal_function, + goal_function.gradient_function()); + const libMesh::Kokkos::detail::FlatDeviceValueSink sink{ + d_rhs_values, d_mat_values, n_dofs}; + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + key, + geometry_cache.element_mapping_types(elem_index), + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + elem_index, + geometry_cache.element_n_nodes(elem_index), + quadrature_order, + hilbert_order, + libMesh::Kokkos::detail::ZeroCoeffAccess{}, + Number(1.), + goal_access, + true, + sink, + "hilbert_device_values"); +} + +template +bool +assemble_kokkos_hilbert_fem_goal_device_values(const FEShapeKey output_key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const unsigned int elem_index, + const MeshBase::KokkosGeometryCache & geometry_cache, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + ResidualView d_rhs_values, + JacobianView d_mat_values) +{ + KokkosFEMGoalData goal_data; + if (!build_kokkos_fem_goal_data(geometry_cache.element_types(elem_index), + geometry_cache.element_mapping_types(elem_index), + geometry_cache.element_p_levels(elem_index), + hilbert_order, + input_system, + goal_function, + goal_data)) + return false; + + const unsigned int n_dofs = cast_int(d_rhs_values.extent(0)); + const libMesh::Kokkos::detail::FlatDeviceValueSink sink{ + d_rhs_values, d_mat_values, n_dofs}; + + auto input_guard = goal_data.input_vector->make_kokkos_read_view_guard(); + const auto goal_access = + libMesh::Kokkos::detail::GatheredParsedFEMGoalAccess, + kokkos_parsed_fem_max_fields>( + goal_data.field_keys, + goal_data.field_dofs, + input_guard.view(), + goal_data.field_local_indices.data(), + goal_function); + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + output_key, + geometry_cache.element_mapping_types(elem_index), + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + elem_index, + geometry_cache.element_n_nodes(elem_index), + quadrature_order, + hilbert_order, + libMesh::Kokkos::detail::ZeroCoeffAccess{}, + Number(1.), + goal_access, + true, + sink, + "hilbert_device_fem_goal_values"); +} + +template +bool +assemble_kokkos_hilbert_record_values(HilbertSystem & sys, + const HilbertElementAssemblyRecord & record, + const unsigned int quadrature_order, + const libMesh::Kokkos::KokkosParsedFunction * analytic_goal, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal, + ResidualView rhs_slice, + JacobianView mat_slice) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const FEShapeKey shape_key = + make_hilbert_shape_key(geometry_cache.element_types(record.elem_index), + geometry_cache.element_p_levels(record.elem_index), + sys.variable_type(0)); + + return analytic_goal ? + assemble_kokkos_hilbert_element_device_values(shape_key, + quadrature_order, + sys.hilbert_order(), + record.elem_index, + geometry_cache, + analytic_goal->with_time(sys.time), + rhs_slice, + mat_slice) : + assemble_kokkos_hilbert_fem_goal_device_values(shape_key, + quadrature_order, + sys.hilbert_order(), + record.elem_index, + geometry_cache, + *sys.input_system, + fem_goal->with_time(sys.time), + rhs_slice, + mat_slice); +} + +bool +assemble_kokkos_petsc_global_system(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + const libMesh::Kokkos::KokkosParsedFunction * analytic_goal, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal, + PetscMatrixBase & system_matrix, + PetscVector & system_rhs) +{ + if (sys.has_static_condensation() || sys.get_dof_map().n_constrained_dofs()) + return false; + + if (!analytic_goal && !fem_goal) + return false; + + if (analytic_goal) + { + const auto timed_goal = analytic_goal->with_time(sys.time); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(timed_goal, + timed_goal.gradient_function()); + + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + for (const auto & bucket : plan.buckets) + libMesh::Kokkos::detail::run_hilbert_system_bucket_value_batch( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + sys.hilbert_order(), + goal_access, + plan.rhs_values, + plan.mat_values, + "hilbert_value_bucket_batch"); + } + else + { + const auto timed_goal = fem_goal->with_time(sys.time); + if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) + return false; + + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_value_batch< + kokkos_hilbert_max_dofs, + kokkos_parsed_fem_max_fields>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.fem_goal_batch_data.bucket_field_keys[bucket_index], + plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], + plan.fem_goal_batch_data.field_local_indices, + input_guard.view(), + timed_goal, + sys.hilbert_order(), + plan.rhs_values, + plan.mat_values, + "hilbert_fem_value_bucket_batch"); + } + } + + if (plan.matrix_target != &system_matrix) + { + LibmeshPetscCall2(sys.comm(), + MatSetPreallocationCOO(system_matrix.mat(), + static_cast(plan.mat_rows.size()), + plan.mat_rows.empty() ? nullptr : plan.mat_rows.data(), + plan.mat_cols.empty() ? nullptr : plan.mat_cols.data())); + plan.matrix_target = &system_matrix; + } + if (plan.rhs_target != &system_rhs) + { + LibmeshPetscCall2(sys.comm(), + VecSetPreallocationCOO(system_rhs.vec(), + static_cast(plan.rhs_rows.size()), + plan.rhs_rows.empty() ? nullptr : plan.rhs_rows.data())); + plan.rhs_target = &system_rhs; + } + LibmeshPetscCall2(sys.comm(), + MatSetValuesCOO(system_matrix.mat(), + reinterpret_cast(plan.mat_values.data()), + INSERT_VALUES)); + LibmeshPetscCall2(sys.comm(), + VecSetValuesCOO(system_rhs.vec(), + reinterpret_cast(plan.rhs_values.data()), + INSERT_VALUES)); + + return true; +} +#endif + +} // anonymous namespace +#endif + HilbertSystem::~HilbertSystem () = default; +HilbertSystem::HilbertSystem(libMesh::EquationSystems & es, + const std::string & name, + const unsigned int number) + : libMesh::FEMSystem(es, name, number), + input_system(nullptr), + _fe_family("LAGRANGE"), + _fe_order(1), + _hilbert_order(0), + _use_kokkos_backend(false), + _fdm_eps(libMesh::TOLERANCE), + _subdomains_list() +{ +} + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +const libMesh::Kokkos::KokkosParsedFunction * +HilbertSystem::ensure_kokkos_goal_func() +{ + return ensure_kokkos_goal_cache>( + _kokkos_goal_func, + _analytic_goal_func, + [](const auto & parsed_goal) -> std::unique_ptr> + { +#ifdef LIBMESH_HAVE_FPARSER + return std::make_unique>( + parsed_goal.build_program_bundle()); +#else + libmesh_ignore(parsed_goal); + return nullptr; +#endif + }); +} + +const libMesh::Kokkos::KokkosParsedFEMFunction * +HilbertSystem::ensure_kokkos_fem_goal_func() +{ + return ensure_kokkos_goal_cache>( + _kokkos_fem_goal_func, + _goal_func, + [](const auto & parsed_goal) + -> std::unique_ptr> + { +#ifdef LIBMESH_HAVE_FPARSER + const auto program_bundle = parsed_goal.build_program_bundle(); + if (!program_bundle.supports_kokkos_value_goal() || + program_bundle.value_variable_numbers.size() > kokkos_parsed_fem_max_fields) + return nullptr; + + return std::make_unique>( + program_bundle); +#else + libmesh_ignore(parsed_goal); + return nullptr; +#endif + }); +} + +void +HilbertSystem::reset_kokkos_goal_cache() +{ + _kokkos_goal_func.reset(); + _kokkos_fem_goal_func.reset(); +#if defined(LIBMESH_HAVE_PETSC) + _kokkos_petsc_plan.reset(); +#endif +} + +KokkosPetscAssemblyPlan * +HilbertSystem::ensure_kokkos_petsc_plan(bool * rebuilt) +{ + const auto & geometry_cache = this->get_mesh().get_kokkos_geometry_cache(); + const auto * dof_index_cache = this->get_dof_map().get_kokkos_dof_index_cache(0); + if (!dof_index_cache) + return nullptr; + + if (_kokkos_petsc_plan && + kokkos_petsc_plan_matches(*this, geometry_cache, *dof_index_cache, *_kokkos_petsc_plan)) + { + if (rebuilt) + *rebuilt = false; + return _kokkos_petsc_plan.get(); + } + + auto plan = std::make_unique(); + if (!build_kokkos_petsc_assembly_plan(*this, *plan)) + return nullptr; + + _kokkos_petsc_plan = std::move(plan); + if (rebuilt) + *rebuilt = true; + return _kokkos_petsc_plan.get(); +} + +bool +HilbertSystem::try_kokkos_element_assembly(FEMContext & c, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ +#if defined(LIBMESH_HAVE_PETSC) + const Elem & elem = c.get_elem(); + const unsigned int quadrature_order = + cast_int(c.get_element_qrule().get_order()); + KokkosElementAssemblyState state; + if (!build_kokkos_element_state(*this, elem, state)) + return false; + const FEShapeKey shape_key = + make_hilbert_shape_key(state.elem_type, state.elem_p_level, this->variable_type(0)); + + if (const auto * kokkos_goal = this->ensure_kokkos_goal_func(); + kokkos_goal && + assemble_kokkos_hilbert_element(shape_key, + quadrature_order, + _hilbert_order, + c.get_elem_solution_derivative(), + kokkos_goal->with_time(this->time), + state, + request_jacobian, + F, + K)) + return true; + + if (!input_system) + return false; + + if (const auto * kokkos_goal = this->ensure_kokkos_fem_goal_func(); + kokkos_goal && + assemble_kokkos_hilbert_fem_goal_element(shape_key, + quadrature_order, + _hilbert_order, + c.get_elem_solution_derivative(), + *input_system, + kokkos_goal->with_time(this->time), + state, + request_jacobian, + F, + K)) + return true; + + return false; +#else + libmesh_ignore(c, request_jacobian, F, K); + return false; +#endif +} + +#if defined(LIBMESH_HAVE_PETSC) +bool +HilbertSystem::try_kokkos_petsc_solve() +{ + using clock = std::chrono::steady_clock; + auto * petsc_matrix = dynamic_cast *>(this->matrix); + auto * petsc_rhs = dynamic_cast *>(this->rhs); + auto * petsc_solution = dynamic_cast *>(this->solution.get()); + + const auto * analytic_goal = this->ensure_kokkos_goal_func(); + const auto * fem_goal = this->ensure_kokkos_fem_goal_func(); + + if (!petsc_matrix || !petsc_rhs || !petsc_solution || !(analytic_goal || fem_goal)) + return false; + + prewarm_kokkos_hilbert_entities(*this, fem_goal); + this->_last_kokkos_timing = {}; + const auto total_start = clock::now(); + petsc_matrix->zero(); + petsc_rhs->zero(); + petsc_solution->zero(); + + bool rebuilt_plan = false; + const auto plan_start = clock::now(); + auto * plan = this->ensure_kokkos_petsc_plan(&rebuilt_plan); + if (!plan) + return false; + const auto plan_stop = clock::now(); + this->_last_kokkos_timing.plan_seconds = + rebuilt_plan ? + std::chrono::duration_cast>(plan_stop - plan_start).count() : + 0.; + + const auto assembly_start = clock::now(); + if (!assemble_kokkos_petsc_global_system(*this, + *plan, + analytic_goal, + fem_goal, + *petsc_matrix, + *petsc_rhs)) + return false; + const auto assembly_stop = clock::now(); + this->_last_kokkos_timing.assembly_seconds = + std::chrono::duration_cast>(assembly_stop - assembly_start).count(); + + petsc_matrix->close(); + petsc_rhs->close(); + petsc_solution->close(); + + LinearSolver * solver = this->get_linear_solver(); + if (this->prefix_with_name()) + solver->init(this->prefix().c_str()); + else + solver->init(); + + const auto [maxlinearits, linear_tol] = this->get_linear_solve_parameters(); + const auto solve_start = clock::now(); + solver->solve(*this->matrix, + *this->solution, + *this->rhs, + linear_tol, + maxlinearits); + const auto solve_stop = clock::now(); + this->_last_kokkos_timing.solve_seconds = + std::chrono::duration_cast>(solve_stop - solve_start).count(); + + this->update(); + this->mesh_position_set(); + const auto total_stop = clock::now(); + this->_last_kokkos_timing.total_seconds = + std::chrono::duration_cast>(total_stop - total_start).count(); + return true; +} +#endif +#endif + void HilbertSystem::init_data () { this->add_variable ("u", static_cast(_fe_order), @@ -76,10 +1402,27 @@ void HilbertSystem::init_context(DiffContext & context) if (input_system && !input_context) { input_context = std::make_unique(*input_system); + } + + libmesh_assert(_goal_func || _analytic_goal_func); + + if (_goal_func) + _goal_func->init_context(input_system ? *input_context : c); - libmesh_assert(_goal_func.get()); - _goal_func->init_context(*input_context); +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (input_system && + this->_hilbert_order > 0 && + dynamic_cast *>(_goal_func.get()) && + this->ensure_kokkos_fem_goal_func()) + { + for (const auto & dim : elem_dims) + for (unsigned int var = 0; var != input_system->n_vars(); ++var) + { + input_context->get_element_fe(var, my_fe, dim); + my_fe->get_dphi(); + } } +#endif FEMSystem::init_context(context); } @@ -96,74 +1439,96 @@ bool HilbertSystem::element_time_derivative (bool request_jacobian, !_subdomains_list.count(elem.subdomain_id())) return request_jacobian; - // First we get some references to cell-specific data that - // will be used to assemble the linear system. - - // Element Jacobian * quadrature weights for interior integration - const std::vector & JxW = c.get_element_fe(0)->get_JxW(); - - const std::vector> & phi = c.get_element_fe(0)->get_phi(); - - const std::vector & xyz = c.get_element_fe(0)->get_xyz(); - - // The number of local degrees of freedom in each variable - const unsigned int n_u_dofs = c.n_dof_indices(0); - // The subvectors and submatrices we need to fill: DenseSubMatrix & K = c.get_elem_jacobian(0, 0); DenseSubVector & F = c.get_elem_residual(0); - unsigned int n_qpoints = c.get_element_qrule().n_points(); - - FEMContext & input_c = *libmesh_map_find(input_contexts, &c); - if (input_system) +#ifdef LIBMESH_HAVE_KOKKOS + if (_use_kokkos_backend) { - input_c.pre_fe_reinit(*input_system, &elem); - input_c.elem_fe_reinit(); +#if !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (this->try_kokkos_element_assembly(c, request_jacobian, F, K)) + return request_jacobian; +#else + if (_analytic_goal_func && + dynamic_cast *>(_analytic_goal_func.get())) + libmesh_error_msg("HilbertSystem Kokkos backend does not support ParsedFunction goals " + "when libMesh is built with complex Number."); +#endif } +#endif - for (unsigned int qp=0; qp != n_qpoints; qp++) - { - const Number u = c.interior_value(0, qp); - const Number ufunc = (*_goal_func)(input_c, xyz[qp]); - const Number err_u = u - ufunc; + detail::HostHilbertFEAccess fe(c, 0, _hilbert_order); + const auto assemble_with_goal = [&](auto & goal) + { + auto solution = + detail::make_hilbert_solution_access(fe, + c.get_elem_solution(0), + c.get_elem_solution_derivative()); + detail::HostHilbertAccumulator accum(F, K); + detail::assemble_hilbert_element(fe, + solution, + goal, + request_jacobian, + _hilbert_order, + accum); + }; - for (unsigned int i=0; i != n_u_dofs; i++) - F(i) += JxW[qp] * (err_u * phi[i][qp]); - - if (_hilbert_order > 0) - { - const std::vector> & dphi = - c.get_element_fe(0)->get_dphi(); +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (const auto * kokkos_goal = this->ensure_kokkos_goal_func()) + { + const auto parsed_goal = kokkos_goal->with_time(this->time); + auto goal = detail::make_hilbert_analytic_goal_access(parsed_goal, + parsed_goal.gradient_function()); + assemble_with_goal(goal); + return request_jacobian; + } - const Gradient grad_u = c.interior_gradient(0, qp); - Gradient ufuncgrad = (*_goal_grad)(input_c, xyz[qp]); - const Gradient err_grad_u = grad_u - ufuncgrad; +#if defined(LIBMESH_HAVE_PETSC) + if (input_system) + if (const auto * kokkos_fem_goal = this->ensure_kokkos_fem_goal_func(); + kokkos_fem_goal && + assemble_host_exact_parsed_fem_goal_element(*this, + c, + request_jacobian, + F, + K, + kokkos_fem_goal->with_time(this->time))) + return request_jacobian; +#endif +#endif - for (unsigned int i=0; i != n_u_dofs; i++) - F(i) += JxW[qp] * (err_grad_u * dphi[i][qp]); - } + if (_analytic_goal_func) + { + auto goal = detail::make_hilbert_analytic_goal_access(*_analytic_goal_func, + *_analytic_goal_grad); + assemble_with_goal(goal); + } + else + { + FEMContext & goal_context = + input_system ? *libmesh_map_find(input_contexts, &c) : c; - if (request_jacobian) + if (input_system) { - const Number JxWxD = JxW[qp] * - context.get_elem_solution_derivative(); + goal_context.pre_fe_reinit(*input_system, &elem); + goal_context.elem_fe_reinit(); + } - for (unsigned int i=0; i != n_u_dofs; i++) - for (unsigned int j=0; j != n_u_dofs; ++j) - K(i,j) += JxWxD * (phi[i][qp] * phi[j][qp]); + detail::HostHilbertGoalAccess goal(*_goal_func, _goal_grad.get(), goal_context); + assemble_with_goal(goal); + } - if (_hilbert_order > 0) - { - const std::vector> & dphi = - c.get_element_fe(0)->get_dphi(); + return request_jacobian; +} - for (unsigned int i=0; i != n_u_dofs; i++) - for (unsigned int j=0; j != n_u_dofs; ++j) - K(i,j) += JxWxD * (dphi[i][qp] * dphi[j][qp]); - } - } - } // end of the quadrature point qp-loop +void HilbertSystem::solve() +{ + _last_kokkos_timing = {}; +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (_use_kokkos_backend && this->try_kokkos_petsc_solve()) + return; +#endif - return request_jacobian; + FEMSystem::solve(); } diff --git a/src/apps/L2system.h b/src/apps/L2system.h index c3d197c521a..8532ee811e7 100644 --- a/src/apps/L2system.h +++ b/src/apps/L2system.h @@ -16,10 +16,19 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // libMesh includes +#include "../../include/systems/hilbert_assembly.h" +#include "../../include/numerics/parsed_fem_function.h" +#include "../../include/numerics/parsed_function.h" + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +#include "../../include/gpu/kokkos_parsed_function.h" +#endif + #include "libmesh/enum_fe_family.h" #include "libmesh/fdm_gradient.h" #include "libmesh/fem_function_base.h" #include "libmesh/fem_system.h" +#include "libmesh/function_base.h" #include "libmesh/libmesh_common.h" // C++ includes @@ -29,19 +38,25 @@ // FEMSystem, TimeSolver and NewtonSolver will handle most tasks, // but we must specify element residuals +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +struct KokkosPetscAssemblyPlan; +#endif + class HilbertSystem : public libMesh::FEMSystem { public: + struct KokkosTimingInfo + { + libMesh::Real plan_seconds = 0.; + libMesh::Real assembly_seconds = 0.; + libMesh::Real solve_seconds = 0.; + libMesh::Real total_seconds = 0.; + }; + // Constructor HilbertSystem(libMesh::EquationSystems & es, const std::string & name, - const unsigned int number) - : libMesh::FEMSystem(es, name, number), - input_system(nullptr), - _fe_family("LAGRANGE"), - _fe_order(1), - _hilbert_order(0), - _subdomains_list() {} + const unsigned int number); // Default destructor ~HilbertSystem(); @@ -49,18 +64,37 @@ class HilbertSystem : public libMesh::FEMSystem std::string & fe_family() { return _fe_family; } unsigned int & fe_order() { return _fe_order; } std::set & subdomains_list() { return _subdomains_list; } + const std::set & subdomains_list() const { return _subdomains_list; } unsigned int & hilbert_order() { return _hilbert_order; } + unsigned int hilbert_order() const { return _hilbert_order; } + void use_kokkos_backend(bool use) { _use_kokkos_backend = use; } + bool use_kokkos_backend() const { return _use_kokkos_backend; } + const KokkosTimingInfo & last_kokkos_timing() const { return _last_kokkos_timing; } + virtual void solve () override; void set_fdm_eps(libMesh::Real eps) { _fdm_eps = eps; - if (_goal_func.get()) - _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + rebuild_goal_gradient(); + rebuild_analytic_goal_gradient(); } void set_goal_func(libMesh::FEMFunctionBase & goal) { _goal_func = goal.clone(); - _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + _analytic_goal_func.reset(); + _analytic_goal_grad.reset(); + reset_kokkos_goal_cache(); + rebuild_goal_gradient(); + } + + void set_goal_func(libMesh::FunctionBase & goal) + { + _analytic_goal_func = goal.clone(); + _analytic_goal_func->init(); + _goal_func.reset(); + _goal_grad.reset(); + reset_kokkos_goal_cache(); + rebuild_analytic_goal_gradient(); } // We want to be able to project functions based on *other* systems' @@ -70,22 +104,29 @@ class HilbertSystem : public libMesh::FEMSystem // case) for that system. libMesh::System * input_system; + libMesh::FEMContext * get_input_context(libMesh::FEMContext & c) + { + const auto it = input_contexts.find(&c); + return (it == input_contexts.end()) ? nullptr : it->second.get(); + } + protected: std::unique_ptr > _goal_func; + std::unique_ptr> _analytic_goal_func; std::map> input_contexts; // System initialization - virtual void init_data (); + virtual void init_data () override; // Context initialization - virtual void init_context (libMesh::DiffContext & context); + virtual void init_context (libMesh::DiffContext & context) override; // Element residual and jacobian calculations // Time dependent parts virtual bool element_time_derivative (bool request_jacobian, - libMesh::DiffContext & context); + libMesh::DiffContext & context) override; // The FE type to use std::string _fe_family; @@ -94,9 +135,19 @@ class HilbertSystem : public libMesh::FEMSystem // The Hilbert order our subclass will project with unsigned int _hilbert_order; + bool _use_kokkos_backend; + // The function we will call to finite difference our goal // function std::unique_ptr> _goal_grad; + std::unique_ptr> _analytic_goal_grad; +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + std::unique_ptr> _kokkos_goal_func; + std::unique_ptr> _kokkos_fem_goal_func; +#if defined(LIBMESH_HAVE_PETSC) + std::unique_ptr _kokkos_petsc_plan; +#endif +#endif // The perturbation we will use when finite differencing our goal // function @@ -104,4 +155,44 @@ class HilbertSystem : public libMesh::FEMSystem // Which subdomains to integrate on (all subdomains, if empty()) std::set _subdomains_list; + KokkosTimingInfo _last_kokkos_timing; + +private: +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + const libMesh::Kokkos::KokkosParsedFunction * ensure_kokkos_goal_func(); + const libMesh::Kokkos::KokkosParsedFEMFunction * ensure_kokkos_fem_goal_func(); + bool try_kokkos_element_assembly(libMesh::FEMContext & c, + bool request_jacobian, + libMesh::DenseSubVector & F, + libMesh::DenseSubMatrix & K); +#if defined(LIBMESH_HAVE_PETSC) + bool try_kokkos_petsc_solve(); + KokkosPetscAssemblyPlan * ensure_kokkos_petsc_plan(bool * rebuilt = nullptr); +#endif + + void reset_kokkos_goal_cache(); +#else + void reset_kokkos_goal_cache() {} +#endif + + void rebuild_goal_gradient() + { + if (_goal_func) + _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + else + _goal_grad.reset(); + } + + void rebuild_analytic_goal_gradient() + { + if (_analytic_goal_func) + { + _analytic_goal_grad = + std::make_unique>(*_analytic_goal_func, + _fdm_eps); + _analytic_goal_grad->init(); + } + else + _analytic_goal_grad.reset(); + } }; diff --git a/src/apps/calculator.C b/src/apps/calculator.C index 639910a81fd..827d37ce0e5 100644 --- a/src/apps/calculator.C +++ b/src/apps/calculator.C @@ -53,6 +53,11 @@ #include #include +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif using namespace libMesh; @@ -81,6 +86,7 @@ void usage_error(const char * progname) << " Hilbert order [default: off]\n" << " --jump_slits calculate jumps across slits [default: off]\n" << " --integral only calculate func integral, not projection\n" + << " --kokkos use Kokkos local element assembly when supported\n" << std::endl; exit(1); @@ -159,11 +165,44 @@ private: Number _integral; }; +#ifdef LIBMESH_HAVE_KOKKOS +struct KokkosScope +{ + KokkosScope(int & argc, + char ** & argv, + const bool enable) + : _enabled(enable) + { + if (_enabled) + ::Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + if (_enabled) + ::Kokkos::finalize(); + } + + bool _enabled; +}; +#endif + int main(int argc, char ** argv) { LibMeshInit init(argc, argv); + const bool use_kokkos = libMesh::on_command_line("--kokkos"); + +#ifndef LIBMESH_HAVE_KOKKOS + libmesh_error_msg_if(use_kokkos, + "--kokkos was requested, but this libMesh build does not have Kokkos enabled"); +#endif + +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv, use_kokkos); +#endif + // In case the mesh file doesn't let us auto-infer dimension, we let // the user specify it on the command line const unsigned char requested_dim = @@ -228,6 +267,7 @@ int main(int argc, char ** argv) const unsigned int order = libMesh::command_line_next("--order", 1u); std::unique_ptr> goal_function; + std::unique_ptr> analytic_goal_function; if (solnname != "") { @@ -270,8 +310,10 @@ int main(int argc, char ** argv) old_es.print_info(); + analytic_goal_function = + std::make_unique>(calcfunc); goal_function = - std::make_unique>(ParsedFunction(calcfunc)); + std::make_unique>(*analytic_goal_function); } libMesh::out << "Calculating with system " << current_sys_name << std::endl; @@ -310,8 +352,12 @@ int main(int argc, char ** argv) new_sys.fe_family() = family; new_sys.fe_order() = order; + new_sys.use_kokkos_backend(use_kokkos); - new_sys.set_goal_func(*goal_function); + if (analytic_goal_function) + new_sys.set_goal_func(*analytic_goal_function); + else + new_sys.set_goal_func(*goal_function); const Real fdm_eps = libMesh::command_line_next("--fdm_eps", Real(TOLERANCE)); diff --git a/src/apps/hilbert_kokkos_benchmark.C b/src/apps/hilbert_kokkos_benchmark.C new file mode 100644 index 00000000000..75d93d7cc9f --- /dev/null +++ b/src/apps/hilbert_kokkos_benchmark.C @@ -0,0 +1,438 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#include "L2system.h" + +#include "libmesh/libmesh_common.h" +#include "libmesh/diff_solver.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_preconditioner_type.h" +#include "libmesh/enum_solver_type.h" +#include "libmesh/equation_systems.h" +#include "libmesh/explicit_system.h" +#include "libmesh/libmesh.h" +#include "libmesh/linear_solver.h" +#include "libmesh/mesh_generation.h" +#include "libmesh/mesh.h" +#include "libmesh/numeric_vector.h" +#include "libmesh/parsed_fem_function.h" +#include "libmesh/parsed_function.h" +#include "libmesh/steady_solver.h" +#include "libmesh/string_to_enum.h" + +#include +#include +#include +#include + +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif + +using namespace libMesh; + +namespace +{ + +enum class GoalKind +{ + analytic, + fem +}; + +struct BenchmarkOptions +{ + unsigned int nx = 256; + unsigned int ny = 256; + unsigned int repeats = 1; + unsigned int hilbert_order = 0; + unsigned int fe_order = 1; + unsigned int max_linear_iterations = 5000; + std::string fe_family = "LAGRANGE"; + std::string calc_expression = "sin(pi*x) + 0.25*y"; + std::string input_expression = "sin(pi*x) + 0.5*y"; + ElemType elem_type = QUAD4; + Real linear_tolerance = 1.e-10; + Real fdm_eps = 1.e-7; + SolverType solver_type = CG; + PreconditionerType preconditioner_type = JACOBI_PRECOND; + GoalKind goal_kind = GoalKind::analytic; +}; + +struct BenchmarkResult +{ + std::vector solution; + Real average_solve_seconds = 0.; + Real average_plan_seconds = 0.; + Real average_assembly_seconds = 0.; + Real average_solver_seconds = 0.; + Real average_total_seconds = 0.; + dof_id_type n_dofs = 0; +}; + +#ifdef LIBMESH_HAVE_KOKKOS +struct KokkosScope +{ + KokkosScope(int & argc, char ** & argv) + { + ::Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + ::Kokkos::finalize(); + } +}; +#endif + +void usage_error(const char * progname) +{ + libMesh::out + << "Options: " << progname << '\n' + << " --nx n number of elements in x [default: 256]\n" + << " --ny n number of elements in y [default: 256]\n" + << " --elem-type type element type [default: QUAD4]\n" + << " --goal kind analytic|fem [default: analytic]\n" + << " --calc expr projection goal expression\n" + << " default analytic: sin(pi*x) + 0.25*y\n" + << " default fem: u*u + x - 0.25*y\n" + << " --input-func expr input field for fem goal [default: sin(pi*x) + 0.5*y]\n" + << " --family fam output FE family [default: LAGRANGE]\n" + << " --order p output FE order [default: 1]\n" + << " --hilbert p Hilbert order [default: 0]\n" + << " --fdm_eps eps fallback finite-difference eps [default: 1e-7]\n" + << " --linear_tol tol linear solve tolerance [default: 1e-10]\n" + << " --max_its n linear max iterations [default: 5000]\n" + << " --solver type solver type [default: CG]\n" + << " --pc type preconditioner type [default: JACOBI_PRECOND]\n" + << " --repeats n number of fresh runs to average [default: 1]\n" + << std::endl; + + std::exit(1); +} + +GoalKind parse_goal_kind(const std::string & goal_string) +{ + if (goal_string == "analytic") + return GoalKind::analytic; + if (goal_string == "fem") + return GoalKind::fem; + + libmesh_error_msg("Unsupported --goal value '" << goal_string << "'. Use analytic or fem."); +} + +void configure_hilbert_system(HilbertSystem & sys, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + sys.hilbert_order() = options.hilbert_order; + sys.fe_family() = options.fe_family; + sys.fe_order() = options.fe_order; + sys.use_kokkos_backend(use_kokkos); + sys.set_fdm_eps(options.fdm_eps); + sys.time_solver = std::make_unique(sys); +} + +void configure_linear_solver(HilbertSystem & sys, + const BenchmarkOptions & options) +{ + DiffSolver & solver = *sys.time_solver->diff_solver(); + solver.quiet = true; + solver.verbose = false; + solver.relative_step_tolerance = 1.e-12; + + sys.parameters.set("linear solver maximum iterations") = + options.max_linear_iterations; + sys.parameters.set("linear solver tolerance") = + options.linear_tolerance; + + auto * linear_solver = sys.get_linear_solver(); + linear_solver->set_solver_type(options.solver_type); + linear_solver->set_preconditioner_type(options.preconditioner_type); +} + +BenchmarkResult solve_projection_once(const Parallel::Communicator & comm, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + Mesh mesh(comm, 2); + MeshTools::Generation::build_square(mesh, + options.nx, + options.ny, + 0., + 1., + 0., + 1., + options.elem_type); + + EquationSystems es(mesh); + ExplicitSystem * input_system = nullptr; + if (options.goal_kind == GoalKind::fem) + { + input_system = &es.add_system("input"); + input_system->add_variable("u", FIRST, LAGRANGE); + } + + HilbertSystem & projection = es.add_system("projection"); + configure_hilbert_system(projection, options, use_kokkos); + projection.input_system = input_system; + es.init(); + + if (options.goal_kind == GoalKind::analytic) + { + ParsedFunction goal(options.calc_expression); + projection.set_goal_func(goal); + } + else + { + libmesh_assert(input_system); + ParsedFunction input_function(options.input_expression); + input_system->project_solution(&input_function); + + ParsedFEMFunction goal(*input_system, options.calc_expression); + projection.set_goal_func(goal); + } + + configure_linear_solver(projection, options); + + const auto start = std::chrono::steady_clock::now(); + projection.solve(); + const auto stop = std::chrono::steady_clock::now(); + + BenchmarkResult result; + result.average_solve_seconds = + std::chrono::duration_cast>(stop - start).count(); + result.average_total_seconds = result.average_solve_seconds; + if (use_kokkos) + { + const auto & timing = projection.last_kokkos_timing(); + result.average_plan_seconds = timing.plan_seconds; + result.average_assembly_seconds = timing.assembly_seconds; + result.average_solver_seconds = timing.solve_seconds; + result.average_total_seconds = timing.total_seconds; + } + result.n_dofs = projection.n_dofs(); + projection.solution->localize(result.solution); + return result; +} + +BenchmarkResult solve_projection(const Parallel::Communicator & comm, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + BenchmarkResult result; + + for (unsigned int repeat = 0; repeat != options.repeats; ++repeat) + { + auto single = solve_projection_once(comm, options, use_kokkos); + result.average_solve_seconds += single.average_solve_seconds; + result.average_plan_seconds += single.average_plan_seconds; + result.average_assembly_seconds += single.average_assembly_seconds; + result.average_solver_seconds += single.average_solver_seconds; + result.average_total_seconds += single.average_total_seconds; + result.n_dofs = single.n_dofs; + result.solution = std::move(single.solution); + } + + result.average_solve_seconds /= options.repeats; + result.average_plan_seconds /= options.repeats; + result.average_assembly_seconds /= options.repeats; + result.average_solver_seconds /= options.repeats; + result.average_total_seconds /= options.repeats; + return result; +} + +void +print_kokkos_phase_diagnostics(const BenchmarkResult & result) +{ + if (result.average_total_seconds <= 0.) + return; + + const Real plan_fraction = result.average_plan_seconds / result.average_total_seconds; + const Real assembly_fraction = result.average_assembly_seconds / result.average_total_seconds; + const Real solver_fraction = result.average_solver_seconds / result.average_total_seconds; + const Real accounted_fraction = plan_fraction + assembly_fraction + solver_fraction; + const Real other_fraction = std::max(0., 1. - accounted_fraction); + + const char * dominant_name = "plan"; + Real dominant_fraction = plan_fraction; + if (assembly_fraction > dominant_fraction) + { + dominant_name = "assembly"; + dominant_fraction = assembly_fraction; + } + if (solver_fraction > dominant_fraction) + { + dominant_name = "solver"; + dominant_fraction = solver_fraction; + } + if (other_fraction > dominant_fraction) + { + dominant_name = "other"; + dominant_fraction = other_fraction; + } + + libMesh::out << "Kokkos phase fractions:" + << " plan=" << 100. * plan_fraction << '%' + << " assembly=" << 100. * assembly_fraction << '%' + << " solver=" << 100. * solver_fraction << '%' + << " other=" << 100. * other_fraction << '%' + << std::endl; + libMesh::out << "Dominant Kokkos phase: " + << dominant_name + << " (" << 100. * dominant_fraction << "% of total)" + << std::endl; +} + +void compute_difference_metrics(const std::vector & host_solution, + const std::vector & kokkos_solution, + Real & max_abs_host, + Real & max_abs_diff) +{ + libmesh_assert_equal_to(host_solution.size(), kokkos_solution.size()); + + max_abs_host = 0.; + max_abs_diff = 0.; + + for (const auto i : index_range(host_solution)) + { + max_abs_host = std::max(max_abs_host, std::abs(libmesh_real(host_solution[i]))); + max_abs_diff = std::max(max_abs_diff, + std::abs(libmesh_real(host_solution[i] - kokkos_solution[i]))); + } +} + +BenchmarkOptions parse_options() +{ + BenchmarkOptions options; + + if (libMesh::on_command_line("--help")) + usage_error("hilbert_kokkos_benchmark"); + + options.nx = libMesh::command_line_next("--nx", options.nx); + options.ny = libMesh::command_line_next("--ny", options.ny); + options.repeats = libMesh::command_line_next("--repeats", options.repeats); + options.hilbert_order = libMesh::command_line_next("--hilbert", options.hilbert_order); + options.fe_order = libMesh::command_line_next("--order", options.fe_order); + options.fe_family = libMesh::command_line_next("--family", options.fe_family); + options.input_expression = libMesh::command_line_next("--input-func", options.input_expression); + options.elem_type = + Utility::string_to_enum(libMesh::command_line_next("--elem-type", + std::string("QUAD4"))); + options.fdm_eps = libMesh::command_line_next("--fdm_eps", options.fdm_eps); + options.linear_tolerance = libMesh::command_line_next("--linear_tol", options.linear_tolerance); + options.max_linear_iterations = + libMesh::command_line_next("--max_its", options.max_linear_iterations); + options.solver_type = + Utility::string_to_enum(libMesh::command_line_next("--solver", + std::string("CG"))); + options.preconditioner_type = + Utility::string_to_enum( + libMesh::command_line_next("--pc", std::string("JACOBI_PRECOND"))); + options.goal_kind = + parse_goal_kind(libMesh::command_line_next("--goal", std::string("analytic"))); + + const std::string default_calc = + options.goal_kind == GoalKind::analytic ? + std::string("sin(pi*x) + 0.25*y") : + std::string("u*u + x - 0.25*y"); + options.calc_expression = libMesh::command_line_next("--calc", default_calc); + + libmesh_error_msg_if(options.nx == 0 || options.ny == 0, + "--nx and --ny must both be positive"); + libmesh_error_msg_if(options.repeats == 0, + "--repeats must be positive"); + + return options; +} + +} // namespace + +int main(int argc, char ** argv) +{ +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv); +#endif + LibMeshInit init(argc, argv); + +#ifndef LIBMESH_HAVE_KOKKOS + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with Kokkos enabled"); +#endif +#ifndef LIBMESH_HAVE_PETSC + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with PETSc enabled"); +#endif +#ifndef LIBMESH_HAVE_FPARSER + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with FPARSER enabled"); +#endif +#ifdef LIBMESH_USE_COMPLEX_NUMBERS + libmesh_error_msg("hilbert_kokkos_benchmark does not support complex Number builds"); +#endif + + const auto options = parse_options(); + + libMesh::out << std::setprecision(std::numeric_limits::max_digits10); + libMesh::out << "Running Hilbert benchmark with" + << " nx=" << options.nx + << " ny=" << options.ny + << " elem_type=" << Utility::enum_to_string(options.elem_type) + << " goal=" << (options.goal_kind == GoalKind::analytic ? "analytic" : "fem") + << " family=" << options.fe_family + << " order=" << options.fe_order + << " hilbert=" << options.hilbert_order + << " solver=" << Utility::enum_to_string(options.solver_type) + << " pc=" << Utility::enum_to_string(options.preconditioner_type) + << " repeats=" << options.repeats + << std::endl; + + libMesh::out << "Starting host projection" << std::endl; + const auto host_result = solve_projection(init.comm(), options, false); + + libMesh::out << "Starting Kokkos projection" << std::endl; + const auto kokkos_result = solve_projection(init.comm(), options, true); + + libmesh_assert_equal_to(host_result.n_dofs, kokkos_result.n_dofs); + libmesh_assert_equal_to(host_result.solution.size(), kokkos_result.solution.size()); + + Real max_abs_host = 0.; + Real max_abs_diff = 0.; + compute_difference_metrics(host_result.solution, + kokkos_result.solution, + max_abs_host, + max_abs_diff); + + libMesh::out << "Degrees of freedom: " << host_result.n_dofs << std::endl; + libMesh::out << "Host solve time: " << host_result.average_solve_seconds << " s" << std::endl; + libMesh::out << "Kokkos solve time: " << kokkos_result.average_solve_seconds << " s" << std::endl; + libMesh::out << "Kokkos plan time: " << kokkos_result.average_plan_seconds << " s" << std::endl; + libMesh::out << "Kokkos assembly: " << kokkos_result.average_assembly_seconds << " s" << std::endl; + libMesh::out << "Kokkos solver: " << kokkos_result.average_solver_seconds << " s" << std::endl; + libMesh::out << "Kokkos total time: " << kokkos_result.average_total_seconds << " s" << std::endl; + print_kokkos_phase_diagnostics(kokkos_result); + + if (kokkos_result.average_solve_seconds > 0.) + libMesh::out << "Host/Kokkos ratio: " + << host_result.average_solve_seconds / kokkos_result.average_solve_seconds + << std::endl; + + libMesh::out << "Max |host|: " << max_abs_host << std::endl; + libMesh::out << "Max |host-kokkos|: " << max_abs_diff << std::endl; + + return 0; +} diff --git a/src/base/dof_map.C b/src/base/dof_map.C index d0443d27be4..c259ba33f47 100644 --- a/src/base/dof_map.C +++ b/src/base/dof_map.C @@ -43,6 +43,14 @@ #include "libmesh/system.h" #include "libmesh/parallel_fe_type.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + +#ifdef LIBMESH_HAVE_PETSC +#include "libmesh/petsc_vector.h" +#endif + // TIMPI includes #include "timpi/parallel_implementation.h" #include "timpi/parallel_sync.h" @@ -211,6 +219,156 @@ DofMap::~DofMap() _mesh.remove_ghosting_functor(*_default_evaluating); } +#ifdef LIBMESH_HAVE_KOKKOS +const DofMap::KokkosDofIndexCache * +DofMap::get_kokkos_dof_index_cache(const unsigned int vn) const +{ + if (auto it = _kokkos_dof_index_caches.find(vn); it != _kokkos_dof_index_caches.end()) + return it->second.get(); + + const auto & geometry_cache = _mesh.get_kokkos_geometry_cache(); + auto cache = std::make_unique(); + std::vector dof_indices; + cache->host_element_ids = geometry_cache.host_element_ids; + + for (const auto elem_id : cache->host_element_ids) + { + const Elem * elem = _mesh.query_elem_ptr(elem_id); + libmesh_assert(elem); + this->dof_indices(elem, dof_indices, vn); + cache->max_dofs = std::max(cache->max_dofs, cast_int(dof_indices.size())); + } + + cache->element_ids = + KokkosDofIndexCache::elem_id_view("dof_map_kokkos_element_ids", + cache->host_element_ids.size()); + cache->element_dof_indices = + KokkosDofIndexCache::elem_dof_id_view("dof_map_kokkos_dof_indices", + cache->host_element_ids.size(), + cache->max_dofs); + cache->element_n_dofs = + KokkosDofIndexCache::elem_dof_count_view("dof_map_kokkos_n_dofs", + cache->host_element_ids.size()); + cache->element_subdomains = + KokkosDofIndexCache::elem_subdomain_view("dof_map_kokkos_elem_subdomains", + cache->host_element_ids.size()); + + auto h_element_ids = ::Kokkos::create_mirror_view(cache->element_ids); + auto h_dof_indices = ::Kokkos::create_mirror_view(cache->element_dof_indices); + auto h_n_dofs = ::Kokkos::create_mirror_view(cache->element_n_dofs); + auto h_subdomains = ::Kokkos::create_mirror_view(cache->element_subdomains); + cache->host_element_dof_indices.resize(cache->host_element_ids.size() * cache->max_dofs, + DofObject::invalid_id); + cache->host_element_n_dofs.resize(cache->host_element_ids.size(), 0); + cache->host_element_subdomains.resize(cache->host_element_ids.size(), + static_cast(Elem::invalid_subdomain_id)); + + for (auto elem_index : index_range(cache->host_element_ids)) + { + const dof_id_type elem_id = cache->host_element_ids[elem_index]; + const Elem * elem = _mesh.query_elem_ptr(elem_id); + libmesh_assert(elem); + h_element_ids(cast_int(elem_index)) = elem_id; + h_subdomains(cast_int(elem_index)) = elem->subdomain_id(); + cache->host_element_subdomains[elem_index] = elem->subdomain_id(); + this->dof_indices(elem, dof_indices, vn); + h_n_dofs(cast_int(elem_index)) = cast_int(dof_indices.size()); + cache->host_element_n_dofs[elem_index] = cast_int(dof_indices.size()); + for (auto i : index_range(dof_indices)) + { + h_dof_indices(cast_int(elem_index), cast_int(i)) = + dof_indices[i]; + cache->host_element_dof_indices[elem_index * cache->max_dofs + i] = dof_indices[i]; + } + } + + ::Kokkos::deep_copy(cache->element_ids, h_element_ids); + ::Kokkos::deep_copy(cache->element_dof_indices, h_dof_indices); + ::Kokkos::deep_copy(cache->element_n_dofs, h_n_dofs); + ::Kokkos::deep_copy(cache->element_subdomains, h_subdomains); + + auto [it, inserted] = _kokkos_dof_index_caches.emplace(vn, std::move(cache)); + libmesh_ignore(inserted); + return it->second.get(); +} + +const DofMap::KokkosLocalIndexCache * +DofMap::get_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ +#ifdef LIBMESH_HAVE_PETSC + const auto key = std::make_pair(vn, &local_vector); + if (auto it = _kokkos_local_index_caches.find(key); it != _kokkos_local_index_caches.end()) + return it->second.get(); + + const auto * petsc_vector = dynamic_cast *>(&local_vector); + if (!petsc_vector) + return nullptr; + + const auto * dof_index_cache = this->get_kokkos_dof_index_cache(vn); + if (!dof_index_cache) + return nullptr; + + const auto & geometry_cache = _mesh.get_kokkos_geometry_cache(); + auto cache = std::make_unique(); + cache->max_dofs = dof_index_cache->max_dofs; + + cache->element_local_indices = + KokkosLocalIndexCache::elem_local_index_view("dof_map_kokkos_local_indices", + geometry_cache.host_element_ids.size(), + cache->max_dofs); + auto h_local_indices = ::Kokkos::create_mirror_view(cache->element_local_indices); + + for (auto elem_index : index_range(geometry_cache.host_element_ids)) + { + const unsigned int n_dofs = dof_index_cache->host_element_n_dofs[elem_index]; + for (unsigned int i = 0; i != n_dofs; ++i) + h_local_indices(cast_int(elem_index), + i) = + cast_int( + petsc_vector->map_global_to_local_index( + dof_index_cache->host_element_dof_indices[elem_index * cache->max_dofs + i])); + } + + ::Kokkos::deep_copy(cache->element_local_indices, h_local_indices); + auto [it, inserted] = _kokkos_local_index_caches.emplace(key, std::move(cache)); + libmesh_ignore(inserted); + return it->second.get(); +#else + libmesh_ignore(local_vector, vn); + return nullptr; +#endif +} + +const DofMap::KokkosLocalIndexCache * +DofMap::require_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ + this->prepare_kokkos_local_index_cache(local_vector, vn); + return this->get_kokkos_local_index_cache(local_vector, vn); +} + +void +DofMap::prepare_kokkos_dof_index_caches() const +{ + for (auto vn : make_range(this->n_variables())) + libmesh_ignore(this->get_kokkos_dof_index_cache(vn)); +} + +void +DofMap::prepare_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ + libmesh_ignore(this->get_kokkos_local_index_cache(local_vector, vn)); +} + +void DofMap::clear_kokkos_caches() const +{ + _kokkos_dof_index_caches.clear(); + _kokkos_local_index_caches.clear(); +} +#endif + #ifdef LIBMESH_ENABLE_PERIODIC @@ -472,6 +630,10 @@ void DofMap::reinit { libmesh_assert (mesh.is_prepared()); +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + LOG_SCOPE("reinit()", "DofMap"); // This is the common case and we want to optimize for it @@ -861,6 +1023,10 @@ void DofMap::invalidate_dofs(MeshBase & mesh) const void DofMap::clear() { +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + DofMapBase::clear(); // we don't want to clear @@ -939,6 +1105,10 @@ void DofMap::clear() std::size_t DofMap::distribute_dofs (MeshBase & mesh) { +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + // This function must be run on all processors at once parallel_object_only(); @@ -1104,6 +1274,10 @@ std::size_t DofMap::distribute_dofs (MeshBase & mesh) // dependencies to the send_list too. // this->sort_send_list (); +#ifdef LIBMESH_HAVE_KOKKOS + this->prepare_kokkos_dof_index_caches(); +#endif + return n_dofs; } diff --git a/src/libmesh_SOURCES b/src/libmesh_SOURCES index d73987f9168..7fbf2e69689 100644 --- a/src/libmesh_SOURCES +++ b/src/libmesh_SOURCES @@ -274,6 +274,7 @@ libmesh_SOURCES = \ src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ src/numerics/petsc_matrix.C \ src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ diff --git a/src/mesh/checkpoint_io.C b/src/mesh/checkpoint_io.C index 7e7ef2b2df3..d25dac6ea54 100644 --- a/src/mesh/checkpoint_io.C +++ b/src/mesh/checkpoint_io.C @@ -1194,7 +1194,7 @@ void CheckpointIO::read_connectivity (Xdr & io) cast_int (elem_data[2] % mesh.n_processors()); const subdomain_id_type subdomain_id = - restrict_int(elem_data[3]); + cast_int(elem_data[3]); // Old broken files used processsor_id_type(-1)... // But we *know* our first element will be level 0 diff --git a/src/mesh/exodusII_io.C b/src/mesh/exodusII_io.C index 4bf7bcf164e..2c83d16f0cc 100644 --- a/src/mesh/exodusII_io.C +++ b/src/mesh/exodusII_io.C @@ -441,7 +441,7 @@ void ExodusII_IO::read (const std::string & fname) // Read the information for block i exio_helper->read_elem_in_block (i); const subdomain_id_type subdomain_id = - restrict_int(exio_helper->get_block_id(i)); + cast_int(exio_helper->get_block_id(i)); max_subdomain_id = std::max(max_subdomain_id, subdomain_id); // populate the map of names diff --git a/src/mesh/exodusII_io_helper.C b/src/mesh/exodusII_io_helper.C index 13f74055cf0..365c030630e 100644 --- a/src/mesh/exodusII_io_helper.C +++ b/src/mesh/exodusII_io_helper.C @@ -3435,7 +3435,7 @@ void ExodusII_IO_Helper::initialize_element_variables(std::vector n std::set current_set; if (vars_active_subdomains[var_num].empty()) for (auto block_id : block_ids) - current_set.insert(restrict_int(block_id)); + current_set.insert(cast_int(block_id)); else current_set = vars_active_subdomains[var_num]; diff --git a/src/mesh/gmsh_io.C b/src/mesh/gmsh_io.C index 9684b4ed00f..11a0d08c49a 100644 --- a/src/mesh/gmsh_io.C +++ b/src/mesh/gmsh_io.C @@ -274,12 +274,12 @@ void GmshIO::read_mesh(std::istream & in) // conditions. if (s.find("lower_dimensional_block") != std::string::npos) { - lower_dimensional_blocks.insert(restrict_int(phys_id)); + lower_dimensional_blocks.insert(cast_int(phys_id)); // The user has explicitly told us that this // block is a subdomain, so set that association // in the Mesh. - mesh.subdomain_name(restrict_int(phys_id)) = phys_name; + mesh.subdomain_name(cast_int(phys_id)) = phys_name; } } } @@ -795,7 +795,7 @@ void GmshIO::read_mesh(std::istream & in) // If the physical's dimension matches the largest // dimension we've seen, it's a subdomain name. if (phys_dim == max_elem_dimension_seen) - mesh.subdomain_name(restrict_int(phys_id)) = phys_name; + mesh.subdomain_name(cast_int(phys_id)) = phys_name; // If it's zero-dimensional then it's a nodeset else if (phys_dim == 0) diff --git a/src/mesh/mesh_base.C b/src/mesh/mesh_base.C index 7dc7a6f31cf..e1ece45bfb7 100644 --- a/src/mesh/mesh_base.C +++ b/src/mesh/mesh_base.C @@ -36,6 +36,7 @@ #include "libmesh/point_locator_base.h" #include "libmesh/sparse_matrix.h" #include "libmesh/threads.h" +#include "libmesh/utility.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_point_locator_type.h" #include "libmesh/enum_to_string.h" @@ -192,6 +193,9 @@ MeshBase& MeshBase::operator= (MeshBase && other_mesh) _default_mapping_data = other_mesh.default_mapping_data(); _preparation = other_mesh._preparation; _point_locator = std::move(other_mesh._point_locator); +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif _count_lower_dim_elems_in_point_locator = other_mesh.get_count_lower_dim_elems_in_point_locator(); #ifdef LIBMESH_ENABLE_UNIQUE_ID _next_unique_id = other_mesh.next_unique_id(); @@ -603,6 +607,9 @@ void MeshBase::set_spatial_dimension(unsigned char d) // libMesh will only *increase* the spatial dimension, however, // never decrease it. _spatial_dimension = d; +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } @@ -998,6 +1005,10 @@ void MeshBase::complete_preparation() MeshTools::libmesh_assert_valid_unique_ids(*this); #endif #endif + +#ifdef LIBMESH_HAVE_KOKKOS + this->prepare_kokkos_geometry_cache(); +#endif } void @@ -1036,8 +1047,127 @@ void MeshBase::clear () // Clear our point locator. this->clear_point_locator(); +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif +} + +#ifdef LIBMESH_HAVE_KOKKOS +const MeshBase::KokkosGeometryCache & +MeshBase::get_kokkos_geometry_cache() const +{ + if (_kokkos_geometry_cache) + return *_kokkos_geometry_cache; + + auto cache = std::make_unique(); + cache->host_element_ids.reserve(this->n_active_local_elem()); + + for (const auto & elem : this->active_local_element_ptr_range()) + { + cache->element_lookup.emplace(elem->id(), cast_int(cache->host_element_ids.size())); + cache->host_element_ids.push_back(elem->id()); + cache->max_nodes = std::max(cache->max_nodes, elem->n_nodes()); + + for (unsigned int n = 0; n != elem->n_nodes(); ++n) + { + const dof_id_type node_id = elem->node_id(n); + if (!cache->node_lookup.count(node_id)) + { + cache->node_lookup.emplace(node_id, cast_int(cache->host_node_ids.size())); + cache->host_node_ids.push_back(node_id); + } + } + } + + cache->node_ids = + KokkosGeometryCache::node_id_view("mesh_kokkos_node_ids", cache->host_node_ids.size()); + cache->element_ids = + KokkosGeometryCache::elem_id_view("mesh_kokkos_element_ids", cache->host_element_ids.size()); + cache->node_coordinates = + KokkosGeometryCache::node_coord_view("mesh_kokkos_node_coordinates", + cache->host_node_ids.size(), + LIBMESH_DIM); + cache->element_node_ids = + KokkosGeometryCache::elem_node_id_view("mesh_kokkos_element_node_ids", + cache->host_element_ids.size(), + cache->max_nodes); + cache->element_types = + KokkosGeometryCache::elem_type_view("mesh_kokkos_element_types", cache->host_element_ids.size()); + cache->element_mapping_types = + KokkosGeometryCache::elem_mapping_type_view("mesh_kokkos_element_mapping_types", + cache->host_element_ids.size()); + cache->element_n_nodes = + KokkosGeometryCache::elem_n_nodes_view("mesh_kokkos_element_n_nodes", cache->host_element_ids.size()); + cache->element_p_levels = + KokkosGeometryCache::elem_p_level_view("mesh_kokkos_element_p_levels", cache->host_element_ids.size()); + cache->element_subdomains = + KokkosGeometryCache::elem_subdomain_view("mesh_kokkos_element_subdomains", + cache->host_element_ids.size()); + + auto h_node_ids = ::Kokkos::create_mirror_view(cache->node_ids); + auto h_element_ids = ::Kokkos::create_mirror_view(cache->element_ids); + auto h_node_coordinates = ::Kokkos::create_mirror_view(cache->node_coordinates); + auto h_element_node_ids = ::Kokkos::create_mirror_view(cache->element_node_ids); + auto h_element_types = ::Kokkos::create_mirror_view(cache->element_types); + auto h_element_mapping_types = ::Kokkos::create_mirror_view(cache->element_mapping_types); + auto h_element_n_nodes = ::Kokkos::create_mirror_view(cache->element_n_nodes); + auto h_element_p_levels = ::Kokkos::create_mirror_view(cache->element_p_levels); + auto h_element_subdomains = ::Kokkos::create_mirror_view(cache->element_subdomains); + + for (auto node_index : index_range(cache->host_node_ids)) + { + const dof_id_type node_id = cache->host_node_ids[node_index]; + const Node & node = *this->query_node_ptr(node_id); + h_node_ids(cast_int(node_index)) = node_id; + for (unsigned int component = 0; component != LIBMESH_DIM; ++component) + h_node_coordinates(cast_int(node_index), component) = node(component); + } + + for (auto elem_index : index_range(cache->host_element_ids)) + { + const dof_id_type elem_id = cache->host_element_ids[elem_index]; + const Elem & elem = *this->query_elem_ptr(elem_id); + h_element_ids(cast_int(elem_index)) = elem_id; + h_element_types(cast_int(elem_index)) = elem.type(); + h_element_mapping_types(cast_int(elem_index)) = elem.mapping_type(); + h_element_n_nodes(cast_int(elem_index)) = elem.n_nodes(); + h_element_p_levels(cast_int(elem_index)) = elem.p_level(); + h_element_subdomains(cast_int(elem_index)) = elem.subdomain_id(); + for (unsigned int n = 0; n != elem.n_nodes(); ++n) + h_element_node_ids(cast_int(elem_index), n) = + libmesh_map_find(cache->node_lookup, elem.node_id(n)); + } + + ::Kokkos::deep_copy(cache->node_ids, h_node_ids); + ::Kokkos::deep_copy(cache->element_ids, h_element_ids); + ::Kokkos::deep_copy(cache->node_coordinates, h_node_coordinates); + ::Kokkos::deep_copy(cache->element_node_ids, h_element_node_ids); + ::Kokkos::deep_copy(cache->element_types, h_element_types); + ::Kokkos::deep_copy(cache->element_mapping_types, h_element_mapping_types); + ::Kokkos::deep_copy(cache->element_n_nodes, h_element_n_nodes); + ::Kokkos::deep_copy(cache->element_p_levels, h_element_p_levels); + ::Kokkos::deep_copy(cache->element_subdomains, h_element_subdomains); + _kokkos_geometry_cache = std::move(cache); + return *_kokkos_geometry_cache; } +void +MeshBase::prepare_kokkos_geometry_cache() const +{ + libmesh_ignore(this->get_kokkos_geometry_cache()); +} + +unsigned int +MeshBase::get_kokkos_elem_index(const Elem & elem) const +{ + const auto & cache = this->get_kokkos_geometry_cache(); + if (auto it = cache.element_lookup.find(elem.id()); it != cache.element_lookup.end()) + return it->second; + + return libMesh::invalid_uint; +} +#endif + bool MeshBase::is_prepared() const { @@ -1741,6 +1871,9 @@ void MeshBase::partition (const unsigned int n_parts) } _preparation.is_partitioned = true; +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } void MeshBase::all_second_order (const bool full_ordered) @@ -2290,6 +2423,10 @@ MeshBase::post_dofobject_moves(MeshBase && other_mesh) if (other_mesh.partitioner()) _partitioner = std::move(other_mesh.partitioner()); + +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } diff --git a/src/mesh/nemesis_io.C b/src/mesh/nemesis_io.C index 1c83c38728f..f05892c357c 100644 --- a/src/mesh/nemesis_io.C +++ b/src/mesh/nemesis_io.C @@ -857,7 +857,7 @@ void Nemesis_IO::read (const std::string & base_filename) // Set subdomain ID based on the block ID. subdomain_id_type subdomain_id = - restrict_int(nemhelper->block_ids[i]); + cast_int(nemhelper->block_ids[i]); // Create a type string (this uses the null-terminated string ctor). const std::string type_str ( nemhelper->elem_type.data() ); diff --git a/src/mesh/nemesis_io_helper.C b/src/mesh/nemesis_io_helper.C index c116232393e..46400016eef 100644 --- a/src/mesh/nemesis_io_helper.C +++ b/src/mesh/nemesis_io_helper.C @@ -2285,7 +2285,7 @@ void Nemesis_IO_Helper::write_elements(const MeshBase & mesh, bool /*use_discont // empty string if there is no name associated with the current // block. names_table.push_back_entry - (mesh.subdomain_name(restrict_int(this->global_elem_blk_ids[i]))); + (mesh.subdomain_name(cast_int(this->global_elem_blk_ids[i]))); // Search for the current global block ID in the map if (const auto it = this->block_id_to_elem_connectivity.find( this->global_elem_blk_ids[i] ); @@ -2661,7 +2661,7 @@ Nemesis_IO_Helper::write_element_values(const MeshBase & mesh, for (const int sbd_id_int : global_elem_blk_ids) { const subdomain_id_type sbd_id = - restrict_int(sbd_id_int); + cast_int(sbd_id_int); auto it = subdomain_map.find(sbd_id); const std::vector empty_vec; const std::vector & elem_ids = diff --git a/src/mesh/tetgen_io.C b/src/mesh/tetgen_io.C index 9010d01a330..b61169d75ab 100644 --- a/src/mesh/tetgen_io.C +++ b/src/mesh/tetgen_io.C @@ -257,7 +257,7 @@ void TetGenIO::element_in (std::istream & ele_stream) // Make sure that the id we read can be successfully cast to // an integral value of type subdomain_id_type. - elem->subdomain_id() = restrict_int(region); + elem->subdomain_id() = cast_int(region); } } } diff --git a/src/mesh/ucd_io.C b/src/mesh/ucd_io.C index b0b6feb745b..e6f6cd565c8 100644 --- a/src/mesh/ucd_io.C +++ b/src/mesh/ucd_io.C @@ -221,7 +221,7 @@ void UCDIO::read_implementation (std::istream & in) elems_of_dimension[elem->dim()] = true; // Set the element's subdomain ID based on the material_id. - elem->subdomain_id() = restrict_int(material_id); + elem->subdomain_id() = cast_int(material_id); // Add the element to the mesh elem->set_id(i); diff --git a/src/mesh/unv_io.C b/src/mesh/unv_io.C index 4f4344502a0..0d333b2dafd 100644 --- a/src/mesh/unv_io.C +++ b/src/mesh/unv_io.C @@ -519,7 +519,7 @@ void UNVIO::groups_in (std::istream & in_file) // Set the current group number as the lower-dimensional element's subdomain ID. // We will use this later to set a boundary ID. group_elem->subdomain_id() = - restrict_int(group_number); + cast_int(group_number); // Store the lower-dimensional element in the provide_bcs container. provide_bcs.emplace(group_elem->key(), group_elem); @@ -530,7 +530,7 @@ void UNVIO::groups_in (std::istream & in_file) { is_subdomain_group = true; group_elem->subdomain_id() = - restrict_int(group_number); + cast_int(group_number); } else diff --git a/src/mesh/xdr_io.C b/src/mesh/xdr_io.C index 097d7c2f46b..862313e8444 100644 --- a/src/mesh/xdr_io.C +++ b/src/mesh/xdr_io.C @@ -1897,7 +1897,7 @@ XdrIO::read_serialized_connectivity (Xdr & io, cast_int(*it++); const subdomain_id_type subdomain_id = - restrict_int(*it++); + cast_int(*it++); tmp = *it++; #ifdef LIBMESH_ENABLE_AMR diff --git a/src/numerics/parsed_function_program.C b/src/numerics/parsed_function_program.C new file mode 100644 index 00000000000..a93ca327038 --- /dev/null +++ b/src/numerics/parsed_function_program.C @@ -0,0 +1,155 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#include "libmesh/parsed_function_program.h" + +#include "fparser_ad.hh" +#include "extrasrc/fptypes.hh" + +namespace +{ + +template +void +validate_kokkos_program_opcode(const unsigned int opcode) +{ + using libMesh::ParsedFunctionOpcode; + + if (libMesh::parsed_function_is_var_opcode(opcode)) + return; + + switch (static_cast(opcode)) + { + case ParsedFunctionOpcode::cAbs: + case ParsedFunctionOpcode::cAcos: + case ParsedFunctionOpcode::cAcosh: + case ParsedFunctionOpcode::cAsin: + case ParsedFunctionOpcode::cAsinh: + case ParsedFunctionOpcode::cAtan: + case ParsedFunctionOpcode::cAtan2: + case ParsedFunctionOpcode::cAtanh: + case ParsedFunctionOpcode::cCbrt: + case ParsedFunctionOpcode::cCeil: + case ParsedFunctionOpcode::cCos: + case ParsedFunctionOpcode::cCosh: + case ParsedFunctionOpcode::cCot: + case ParsedFunctionOpcode::cCsc: + case ParsedFunctionOpcode::cExp: + case ParsedFunctionOpcode::cExp2: + case ParsedFunctionOpcode::cFloor: + case ParsedFunctionOpcode::cHypot: + case ParsedFunctionOpcode::cIf: + case ParsedFunctionOpcode::cInt: + case ParsedFunctionOpcode::cLog: + case ParsedFunctionOpcode::cLog10: + case ParsedFunctionOpcode::cLog2: + case ParsedFunctionOpcode::cMax: + case ParsedFunctionOpcode::cMin: + case ParsedFunctionOpcode::cPow: + case ParsedFunctionOpcode::cSec: + case ParsedFunctionOpcode::cSin: + case ParsedFunctionOpcode::cSinh: + case ParsedFunctionOpcode::cSqrt: + case ParsedFunctionOpcode::cTan: + case ParsedFunctionOpcode::cTanh: + case ParsedFunctionOpcode::cTrunc: + case ParsedFunctionOpcode::cImmed: + case ParsedFunctionOpcode::cJump: + case ParsedFunctionOpcode::cNeg: + case ParsedFunctionOpcode::cAdd: + case ParsedFunctionOpcode::cSub: + case ParsedFunctionOpcode::cMul: + case ParsedFunctionOpcode::cDiv: + case ParsedFunctionOpcode::cMod: + case ParsedFunctionOpcode::cEqual: + case ParsedFunctionOpcode::cNEqual: + case ParsedFunctionOpcode::cLess: + case ParsedFunctionOpcode::cLessOrEq: + case ParsedFunctionOpcode::cGreater: + case ParsedFunctionOpcode::cGreaterOrEq: + case ParsedFunctionOpcode::cNot: + case ParsedFunctionOpcode::cAnd: + case ParsedFunctionOpcode::cOr: + case ParsedFunctionOpcode::cNotNot: + case ParsedFunctionOpcode::cDeg: + case ParsedFunctionOpcode::cRad: + case ParsedFunctionOpcode::cPopNMov: + case ParsedFunctionOpcode::cLog2by: + case ParsedFunctionOpcode::cNop: + case ParsedFunctionOpcode::cSinCos: + case ParsedFunctionOpcode::cSinhCosh: + case ParsedFunctionOpcode::cAbsAnd: + case ParsedFunctionOpcode::cAbsOr: + case ParsedFunctionOpcode::cAbsNot: + case ParsedFunctionOpcode::cAbsNotNot: + case ParsedFunctionOpcode::cAbsIf: + case ParsedFunctionOpcode::cDup: + case ParsedFunctionOpcode::cFetch: + case ParsedFunctionOpcode::cInv: + case ParsedFunctionOpcode::cSqr: + case ParsedFunctionOpcode::cRDiv: + case ParsedFunctionOpcode::cRSub: + case ParsedFunctionOpcode::cRSqrt: + return; + + case ParsedFunctionOpcode::cArg: + case ParsedFunctionOpcode::cConj: + case ParsedFunctionOpcode::cImag: + case ParsedFunctionOpcode::cPolar: + case ParsedFunctionOpcode::cReal: + libmesh_error_msg("Kokkos parsed-function export does not support complex-valued fparser opcodes"); + + case ParsedFunctionOpcode::cFCall: + case ParsedFunctionOpcode::cPCall: + libmesh_error_msg("Kokkos parsed-function export does not support user-defined or nested parser calls"); + + case ParsedFunctionOpcode::VarBegin: + return; + } + + libmesh_error_msg("Kokkos parsed-function export encountered an unknown opcode " << opcode); +} + +} // anonymous namespace + +namespace libMesh +{ + +template +ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser) +{ + ParsedFunctionProgram program; + const auto * data = parser.parser_data(); + libmesh_assert(data); + + program.bytecode.assign(data->mByteCode.begin(), data->mByteCode.end()); + program.immediates.assign(data->mImmed.begin(), data->mImmed.end()); + program.stack_size = data->mStackSize; + program.n_variables = data->mVariablesAmount; + program.epsilon = FunctionParserBase::epsilon(); + + for (const auto opcode : program.bytecode) + validate_kokkos_program_opcode(opcode); + + return program; +} + +template ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser); + +} // namespace libMesh diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 129f3abfeab..c9d3161f5d0 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -1178,8 +1178,6 @@ void PetscVector::create_subvector(NumericVector & subvector, petsc_subvector->_is_closed = true; } - - template void PetscVector::_get_array(bool read_only) const { diff --git a/tests/Makefile.am b/tests/Makefile.am index d752bcfd487..d0b5f15ae55 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -142,6 +142,7 @@ unit_tests_sources = \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ systems/periodic_bc_test.C \ systems/disjoint_neighbor_test.C \ systems/systems_test.C \ diff --git a/tests/Makefile.in b/tests/Makefile.in index 96c87de5252..b45feaec91d 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -99,8 +99,30 @@ check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ $(am__EXEEXT_7) TESTS = $(am__EXEEXT_1) $(am__append_11) @LIBMESH_ENABLE_KOKKOS_TRUE@am__append_2 = -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) -@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_3 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit -@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_4 = kokkos_vector_ops_oracle_unit kokkos_tensor_ops_oracle_unit +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_3 = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_types_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_4 = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_types_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, @@ -188,7 +210,16 @@ mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/include/libmesh_config.h.tmp CONFIG_CLEAN_FILES = run_unit_tests.sh CONFIG_CLEAN_VPATH_FILES = -@LIBMESH_ENABLE_KOKKOS_TRUE@am__EXEEXT_1 = kokkos_vector_ops_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@am__EXEEXT_1 = kokkos_fe_types_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit$(EXEEXT) \ @LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit$(EXEEXT) @ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) @ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-dbg$(EXEEXT) @@ -196,13 +227,94 @@ CONFIG_CLEAN_VPATH_FILES = @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-prof$(EXEEXT) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_6 = unit_tests-oprof$(EXEEXT) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_7 = unit_tests-opt$(EXEEXT) +am__kokkos_fe_contract_unit_SOURCES_DIST = \ + fe/kokkos_fe_contract_test.K +am__dirstamp = $(am__leading_dot)dirstamp +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_contract_unit_OBJECTS = fe/kokkos_fe_contract_test.$(OBJEXT) +kokkos_fe_contract_unit_OBJECTS = \ + $(am_kokkos_fe_contract_unit_OBJECTS) +am__DEPENDENCIES_1 = +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_invariant_unit_SOURCES_DIST = \ + fe/kokkos_fe_invariant_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_invariant_unit_OBJECTS = fe/kokkos_fe_invariant_test.$(OBJEXT) +kokkos_fe_invariant_unit_OBJECTS = \ + $(am_kokkos_fe_invariant_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_map_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_map_oracle_unit_OBJECTS = fe/kokkos_fe_map_oracle_test.$(OBJEXT) +kokkos_fe_map_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_map_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_permuted_map_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_permuted_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_permuted_map_oracle_unit_OBJECTS = fe/kokkos_fe_permuted_map_oracle_test.$(OBJEXT) +kokkos_fe_permuted_map_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_permuted_map_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_reconstruction_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_reconstruction_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_reconstruction_oracle_unit_OBJECTS = fe/kokkos_fe_reconstruction_oracle_test.$(OBJEXT) +kokkos_fe_reconstruction_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_reconstruction_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_shape_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_shape_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_shape_oracle_unit_OBJECTS = fe/kokkos_fe_shape_oracle_test.$(OBJEXT) +kokkos_fe_shape_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_shape_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_side_trace_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_side_trace_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_side_trace_oracle_unit_OBJECTS = fe/kokkos_fe_side_trace_oracle_test.$(OBJEXT) +kokkos_fe_side_trace_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_side_trace_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_types_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_types_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_types_oracle_unit_OBJECTS = fe/kokkos_fe_types_oracle_test.$(OBJEXT) +kokkos_fe_types_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_types_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_quadrature_oracle_unit_SOURCES_DIST = \ + fe/kokkos_quadrature_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_quadrature_oracle_unit_OBJECTS = fe/kokkos_quadrature_oracle_test.$(OBJEXT) +kokkos_quadrature_oracle_unit_OBJECTS = \ + $(am_kokkos_quadrature_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST = \ numerics/kokkos_tensor_ops_oracle_test.K -am__dirstamp = $(am__leading_dot)dirstamp @LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_tensor_ops_oracle_unit_OBJECTS = numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT) kokkos_tensor_ops_oracle_unit_OBJECTS = \ $(am_kokkos_tensor_ops_oracle_unit_OBJECTS) -am__DEPENDENCIES_1 = @LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_DEPENDENCIES = \ @LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ @LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ @@ -281,15 +393,18 @@ am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_1 = fparser/unit_tests_dbg-autodiff.$(OBJEXT) am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ base/unit_tests_dbg-dof_map_test.$(OBJEXT) \ @@ -402,6 +517,7 @@ am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ solvers/unit_tests_dbg-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_dbg-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_dbg-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_dbg-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_dbg-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_dbg-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_dbg-systems_test.$(OBJEXT) \ @@ -487,15 +603,18 @@ am__unit_tests_devel_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_3 = fparser/unit_tests_devel-autodiff.$(OBJEXT) am__objects_4 = unit_tests_devel-driver.$(OBJEXT) \ base/unit_tests_devel-dof_map_test.$(OBJEXT) \ @@ -608,6 +727,7 @@ am__objects_4 = unit_tests_devel-driver.$(OBJEXT) \ solvers/unit_tests_devel-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_devel-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_devel-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_devel-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_devel-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_devel-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_devel-systems_test.$(OBJEXT) \ @@ -689,15 +809,18 @@ am__unit_tests_oprof_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_5 = fparser/unit_tests_oprof-autodiff.$(OBJEXT) am__objects_6 = unit_tests_oprof-driver.$(OBJEXT) \ base/unit_tests_oprof-dof_map_test.$(OBJEXT) \ @@ -810,6 +933,7 @@ am__objects_6 = unit_tests_oprof-driver.$(OBJEXT) \ solvers/unit_tests_oprof-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_oprof-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_oprof-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_oprof-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_oprof-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_oprof-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_oprof-systems_test.$(OBJEXT) \ @@ -891,15 +1015,18 @@ am__unit_tests_opt_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_7 = fparser/unit_tests_opt-autodiff.$(OBJEXT) am__objects_8 = unit_tests_opt-driver.$(OBJEXT) \ base/unit_tests_opt-dof_map_test.$(OBJEXT) \ @@ -1012,6 +1139,7 @@ am__objects_8 = unit_tests_opt-driver.$(OBJEXT) \ solvers/unit_tests_opt-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_opt-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_opt-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_opt-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_opt-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_opt-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_opt-systems_test.$(OBJEXT) \ @@ -1093,15 +1221,18 @@ am__unit_tests_prof_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_9 = fparser/unit_tests_prof-autodiff.$(OBJEXT) am__objects_10 = unit_tests_prof-driver.$(OBJEXT) \ base/unit_tests_prof-dof_map_test.$(OBJEXT) \ @@ -1214,6 +1345,7 @@ am__objects_10 = unit_tests_prof-driver.$(OBJEXT) \ solvers/unit_tests_prof-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_prof-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_prof-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_prof-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_prof-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_prof-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_prof-systems_test.$(OBJEXT) \ @@ -1798,26 +1930,31 @@ am__depfiles_remade = ./$(DEPDIR)/unit_tests_dbg-driver.Po \ systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po \ systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_devel-systems_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po \ systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_opt-systems_test.Po \ systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_prof-systems_test.Po \ utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po \ @@ -1887,12 +2024,30 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(kokkos_tensor_ops_oracle_unit_SOURCES) \ +SOURCES = $(kokkos_fe_contract_unit_SOURCES) \ + $(kokkos_fe_invariant_unit_SOURCES) \ + $(kokkos_fe_map_oracle_unit_SOURCES) \ + $(kokkos_fe_permuted_map_oracle_unit_SOURCES) \ + $(kokkos_fe_reconstruction_oracle_unit_SOURCES) \ + $(kokkos_fe_shape_oracle_unit_SOURCES) \ + $(kokkos_fe_side_trace_oracle_unit_SOURCES) \ + $(kokkos_fe_types_oracle_unit_SOURCES) \ + $(kokkos_quadrature_oracle_unit_SOURCES) \ + $(kokkos_tensor_ops_oracle_unit_SOURCES) \ $(kokkos_vector_ops_oracle_unit_SOURCES) \ $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ $(unit_tests_oprof_SOURCES) $(unit_tests_opt_SOURCES) \ $(unit_tests_prof_SOURCES) -DIST_SOURCES = $(am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST) \ +DIST_SOURCES = $(am__kokkos_fe_contract_unit_SOURCES_DIST) \ + $(am__kokkos_fe_invariant_unit_SOURCES_DIST) \ + $(am__kokkos_fe_map_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_permuted_map_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_reconstruction_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_shape_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_side_trace_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_types_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_quadrature_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST) \ $(am__kokkos_vector_ops_oracle_unit_SOURCES_DIST) \ $(am__unit_tests_dbg_SOURCES_DIST) \ $(am__unit_tests_devel_SOURCES_DIST) \ @@ -2103,11 +2258,11 @@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ -KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ KOKKOS_CXX = @KOKKOS_CXX@ KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2394,15 +2549,18 @@ unit_tests_sources = driver.C libmesh_cppunit.h stream_redirector.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C $(am__append_1) + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C $(am__append_1) data = matrices/geom_1_extraction_op.h5 \ matrices/geom_1_extraction_op.m \ matrices/geom_1_extraction_op.m.gz \ @@ -2490,7 +2648,52 @@ data = matrices/geom_1_extraction_op.h5 \ unit_tests_data = $(data) # Why isn't this working automatically? -EXTRA_DIST = $(data) +EXTRA_DIST = $(data) fe/kokkos_fe_oracle_test_utils.h +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) @LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K @LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) @LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) @@ -2533,6 +2736,42 @@ EXTRA_DIST = $(data) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_DATA = $(data) # Custom link rules so the Kokkos compiler drives the final link step. +kokkos_fe_types_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_types_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_shape_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_shape_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_invariant_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_invariant_unit_LDFLAGS) -o $@ + +kokkos_fe_contract_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_contract_unit_LDFLAGS) -o $@ + +kokkos_fe_permuted_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_permuted_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_reconstruction_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_reconstruction_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_side_trace_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_side_trace_oracle_unit_LDFLAGS) -o $@ + +kokkos_quadrature_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_quadrature_oracle_unit_LDFLAGS) -o $@ + kokkos_vector_ops_oracle_unit_LINK = \ $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ @@ -2619,6 +2858,66 @@ run_unit_tests.sh: $(top_builddir)/config.status $(srcdir)/run_unit_tests.sh.in clean-checkPROGRAMS: $(am__rm_f) $(check_PROGRAMS) test -z "$(EXEEXT)" || $(am__rm_f) $(check_PROGRAMS:$(EXEEXT)=) +fe/$(am__dirstamp): + @$(MKDIR_P) fe + @: >>fe/$(am__dirstamp) +fe/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fe/$(DEPDIR) + @: >>fe/$(DEPDIR)/$(am__dirstamp) +fe/kokkos_fe_contract_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_contract_unit$(EXEEXT): $(kokkos_fe_contract_unit_OBJECTS) $(kokkos_fe_contract_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_contract_unit_DEPENDENCIES) + @rm -f kokkos_fe_contract_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_contract_unit_LINK) $(kokkos_fe_contract_unit_OBJECTS) $(kokkos_fe_contract_unit_LDADD) $(LIBS) +fe/kokkos_fe_invariant_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_invariant_unit$(EXEEXT): $(kokkos_fe_invariant_unit_OBJECTS) $(kokkos_fe_invariant_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_invariant_unit_DEPENDENCIES) + @rm -f kokkos_fe_invariant_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_invariant_unit_LINK) $(kokkos_fe_invariant_unit_OBJECTS) $(kokkos_fe_invariant_unit_LDADD) $(LIBS) +fe/kokkos_fe_map_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_map_oracle_unit$(EXEEXT): $(kokkos_fe_map_oracle_unit_OBJECTS) $(kokkos_fe_map_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_map_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_map_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_map_oracle_unit_LINK) $(kokkos_fe_map_oracle_unit_OBJECTS) $(kokkos_fe_map_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_permuted_map_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_permuted_map_oracle_unit$(EXEEXT): $(kokkos_fe_permuted_map_oracle_unit_OBJECTS) $(kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_permuted_map_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_permuted_map_oracle_unit_LINK) $(kokkos_fe_permuted_map_oracle_unit_OBJECTS) $(kokkos_fe_permuted_map_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_reconstruction_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_reconstruction_oracle_unit$(EXEEXT): $(kokkos_fe_reconstruction_oracle_unit_OBJECTS) $(kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_reconstruction_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_reconstruction_oracle_unit_LINK) $(kokkos_fe_reconstruction_oracle_unit_OBJECTS) $(kokkos_fe_reconstruction_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_shape_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_shape_oracle_unit$(EXEEXT): $(kokkos_fe_shape_oracle_unit_OBJECTS) $(kokkos_fe_shape_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_shape_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_shape_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_shape_oracle_unit_LINK) $(kokkos_fe_shape_oracle_unit_OBJECTS) $(kokkos_fe_shape_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_side_trace_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_side_trace_oracle_unit$(EXEEXT): $(kokkos_fe_side_trace_oracle_unit_OBJECTS) $(kokkos_fe_side_trace_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_side_trace_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_side_trace_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_side_trace_oracle_unit_LINK) $(kokkos_fe_side_trace_oracle_unit_OBJECTS) $(kokkos_fe_side_trace_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_types_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_types_oracle_unit$(EXEEXT): $(kokkos_fe_types_oracle_unit_OBJECTS) $(kokkos_fe_types_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_types_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_types_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_types_oracle_unit_LINK) $(kokkos_fe_types_oracle_unit_OBJECTS) $(kokkos_fe_types_oracle_unit_LDADD) $(LIBS) +fe/kokkos_quadrature_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_quadrature_oracle_unit$(EXEEXT): $(kokkos_quadrature_oracle_unit_OBJECTS) $(kokkos_quadrature_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_quadrature_oracle_unit_DEPENDENCIES) + @rm -f kokkos_quadrature_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_quadrature_oracle_unit_LINK) $(kokkos_quadrature_oracle_unit_OBJECTS) $(kokkos_quadrature_oracle_unit_LDADD) $(LIBS) numerics/$(am__dirstamp): @$(MKDIR_P) numerics @: >>numerics/$(am__dirstamp) @@ -2657,12 +2956,6 @@ base/unit_tests_dbg-nonmanifold_coupling_test.$(OBJEXT): \ base/$(am__dirstamp) base/$(DEPDIR)/$(am__dirstamp) base/unit_tests_dbg-multi_evaluable_pred_test.$(OBJEXT): \ base/$(am__dirstamp) base/$(DEPDIR)/$(am__dirstamp) -fe/$(am__dirstamp): - @$(MKDIR_P) fe - @: >>fe/$(am__dirstamp) -fe/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) fe/$(DEPDIR) - @: >>fe/$(DEPDIR)/$(am__dirstamp) fe/unit_tests_dbg-fe_bernstein_test.$(OBJEXT): fe/$(am__dirstamp) \ fe/$(DEPDIR)/$(am__dirstamp) fe/unit_tests_dbg-fe_clough_test.$(OBJEXT): fe/$(am__dirstamp) \ @@ -2919,6 +3212,8 @@ systems/unit_tests_dbg-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_dbg-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-disjoint_neighbor_test.$(OBJEXT): \ @@ -3183,6 +3478,8 @@ systems/unit_tests_devel-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_devel-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-disjoint_neighbor_test.$(OBJEXT): \ @@ -3435,6 +3732,8 @@ systems/unit_tests_oprof-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_oprof-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-disjoint_neighbor_test.$(OBJEXT): \ @@ -3687,6 +3986,8 @@ systems/unit_tests_opt-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_opt-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-disjoint_neighbor_test.$(OBJEXT): \ @@ -3939,6 +4240,8 @@ systems/unit_tests_prof-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_prof-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-disjoint_neighbor_test.$(OBJEXT): \ @@ -4535,26 +4838,31 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po@am__quote@ # am--include-marker @@ -6172,6 +6480,20 @@ systems/unit_tests_dbg-equation_systems_test.obj: systems/equation_systems_test. @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_dbg-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_dbg-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_dbg-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_dbg-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_dbg-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Tpo -c -o systems/unit_tests_dbg-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po @@ -7866,6 +8188,20 @@ systems/unit_tests_devel-equation_systems_test.obj: systems/equation_systems_tes @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_devel-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_devel-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_devel-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_devel-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_devel-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Tpo -c -o systems/unit_tests_devel-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po @@ -9560,6 +9896,20 @@ systems/unit_tests_oprof-equation_systems_test.obj: systems/equation_systems_tes @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_oprof-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_oprof-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_oprof-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_oprof-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_oprof-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Tpo -c -o systems/unit_tests_oprof-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po @@ -11254,6 +11604,20 @@ systems/unit_tests_opt-equation_systems_test.obj: systems/equation_systems_test. @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_opt-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_opt-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_opt-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_opt-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_opt-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Tpo -c -o systems/unit_tests_opt-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po @@ -12948,6 +13312,20 @@ systems/unit_tests_prof-equation_systems_test.obj: systems/equation_systems_test @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_prof-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_prof-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_prof-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_prof-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_prof-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Tpo -c -o systems/unit_tests_prof-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po @@ -14004,26 +14382,31 @@ distclean: distclean-am -rm -f systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-systems_test.Po -rm -f utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po @@ -14656,26 +15039,31 @@ maintainer-clean: maintainer-clean-am -rm -f systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-systems_test.Po -rm -f utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po diff --git a/tests/driver.C b/tests/driver.C index 3ad324a6e20..220f288152e 100644 --- a/tests/driver.C +++ b/tests/driver.C @@ -13,6 +13,25 @@ #include "libmesh_cppunit.h" #include "test_comm.h" +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +struct KokkosScope +{ + KokkosScope(int & argc, char ** & argv) + { + Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + Kokkos::finalize(); + } +}; +#endif + #ifdef LIBMESH_HAVE_CXX11_REGEX // C++ includes @@ -107,6 +126,9 @@ int add_matching_tests_to_runner(CppUnit::Test * test, int main(int argc, char ** argv) { +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv); +#endif // Initialize the library. This is necessary because the library // may depend on a number of other libraries (i.e. MPI and Petsc) // that require initialization before use. diff --git a/tests/systems/hilbert_system_kokkos_test.C b/tests/systems/hilbert_system_kokkos_test.C new file mode 100644 index 00000000000..46e29513e63 --- /dev/null +++ b/tests/systems/hilbert_system_kokkos_test.C @@ -0,0 +1,252 @@ +#include "test_comm.h" +#include "libmesh_cppunit.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../src/apps/L2system.C" + +using namespace libMesh; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +namespace +{ + +constexpr Real projection_tolerance = 5.e-10; + +struct TimedProjectionResult +{ + std::vector solution; + Real elapsed_seconds = 0.; +}; + +void +configure_hilbert_system(HilbertSystem & sys, const bool use_kokkos) +{ + sys.hilbert_order() = 1; + sys.fe_family() = "LAGRANGE"; + sys.fe_order() = 1; + sys.use_kokkos_backend(use_kokkos); + sys.time_solver = std::make_unique(sys); +} + +void +configure_linear_solver(HilbertSystem & sys) +{ + DiffSolver & solver = *sys.time_solver->diff_solver(); + solver.quiet = true; + solver.verbose = false; + solver.relative_step_tolerance = 1.e-12; + + sys.parameters.set("linear solver maximum iterations") = 500; + sys.parameters.set("linear solver tolerance") = 1.e-14; + + auto * linear_solver = sys.get_linear_solver(); + linear_solver->set_solver_type(CG); + linear_solver->set_preconditioner_type(IDENTITY_PRECOND); +} + +std::vector +localize_solution(const System & sys) +{ + std::vector values; + sys.solution->localize(values); + return values; +} + +void +assert_solutions_close(const std::vector & host_solution, + const std::vector & kokkos_solution) +{ + CPPUNIT_ASSERT_EQUAL(host_solution.size(), kokkos_solution.size()); + + Real max_abs_host = 0; + Real max_abs_diff = 0; + + for (const auto i : index_range(host_solution)) + { + max_abs_host = std::max(max_abs_host, std::abs(libmesh_real(host_solution[i]))); + max_abs_diff = std::max(max_abs_diff, + std::abs(libmesh_real(host_solution[i] - kokkos_solution[i]))); + } + + const Real scaled_tol = projection_tolerance * std::max(1., max_abs_host); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0., max_abs_diff, scaled_tol); +} + +template +TimedProjectionResult +time_projection_solve(SolveFunctor && solve) +{ + const auto start = std::chrono::steady_clock::now(); + auto solution = solve(); + const auto stop = std::chrono::steady_clock::now(); + + TimedProjectionResult result; + result.solution = std::move(solution); + result.elapsed_seconds = + std::chrono::duration_cast>(stop - start).count(); + return result; +} + +void +report_projection_timing(const std::string & label, + const TimedProjectionResult & host_result, + const TimedProjectionResult & kokkos_result) +{ + libMesh::out << label + << " host_time=" << host_result.elapsed_seconds << " s" + << " kokkos_time=" << kokkos_result.elapsed_seconds << " s"; + + if (kokkos_result.elapsed_seconds > 0.) + libMesh::out << " host_over_kokkos=" + << host_result.elapsed_seconds / kokkos_result.elapsed_seconds; + + libMesh::out << std::endl; +} + +std::vector +solve_analytic_projection_impl(const bool use_kokkos) +{ + ReplicatedMesh mesh(*TestCommWorld); + MeshTools::Generation::build_square(mesh, + 3, + 2, + 0., + 1., + 0., + 1., + QUAD4); + + EquationSystems es(mesh); + HilbertSystem & sys = es.add_system("projection"); + configure_hilbert_system(sys, use_kokkos); + es.init(); + + ParsedFunction goal("sin(pi*x) + 0.25*y"); + sys.set_goal_func(goal); + sys.set_fdm_eps(1.e-7); + configure_linear_solver(sys); + sys.solve(); + + return localize_solution(sys); +} + +TimedProjectionResult +solve_analytic_projection(const bool use_kokkos) +{ + return time_projection_solve([&]() { return solve_analytic_projection_impl(use_kokkos); }); +} + +std::vector +solve_parsed_fem_projection_impl(const bool use_kokkos) +{ + ReplicatedMesh mesh(*TestCommWorld); + MeshTools::Generation::build_square(mesh, + 3, + 2, + 0., + 1., + 0., + 1., + QUAD4); + + EquationSystems es(mesh); + ExplicitSystem & input = es.add_system("input"); + input.add_variable("u", FIRST, LAGRANGE); + + HilbertSystem & sys = es.add_system("projection"); + configure_hilbert_system(sys, use_kokkos); + sys.input_system = &input; + es.init(); + + ParsedFunction input_projection("sin(pi*x) + 0.5*y"); + input.project_solution(&input_projection); + + ParsedFEMFunction goal(input, "u*u + x - 0.25*y"); + sys.set_goal_func(goal); + sys.set_fdm_eps(1.e-7); + configure_linear_solver(sys); + sys.solve(); + + return localize_solution(sys); +} + +TimedProjectionResult +solve_parsed_fem_projection(const bool use_kokkos) +{ + return time_projection_solve([&]() { return solve_parsed_fem_projection_impl(use_kokkos); }); +} + +void +report_single_projection_timing(const std::string & label, + const TimedProjectionResult & result) +{ + libMesh::out << label << " time=" << result.elapsed_seconds << " s" << std::endl; +} + +} // namespace +#endif + +class HilbertSystemKokkosTest : public CppUnit::TestCase +{ +public: + LIBMESH_CPPUNIT_TEST_SUITE(HilbertSystemKokkosTest); + CPPUNIT_TEST(testAnalyticParsedFunctionEquivalence); + CPPUNIT_TEST(testParsedFEMFunctionEquivalence); + CPPUNIT_TEST_SUITE_END(); + + void testAnalyticParsedFunctionEquivalence() + { + LOG_UNIT_TEST; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::out << "Starting analytic host solve" << std::endl; + const auto host_solution = solve_analytic_projection(false); + report_single_projection_timing("Hilbert analytic host projection", host_solution); + libMesh::out << "Starting analytic kokkos solve" << std::endl; + const auto kokkos_solution = solve_analytic_projection(true); + report_single_projection_timing("Hilbert analytic kokkos projection", kokkos_solution); + report_projection_timing("Hilbert analytic projection", + host_solution, + kokkos_solution); + assert_solutions_close(host_solution.solution, kokkos_solution.solution); +#endif + } + + void testParsedFEMFunctionEquivalence() + { + LOG_UNIT_TEST; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::out << "Starting parsed FEM host solve" << std::endl; + const auto host_solution = solve_parsed_fem_projection(false); + report_single_projection_timing("Hilbert parsed FEM host projection", host_solution); + libMesh::out << "Starting parsed FEM kokkos solve" << std::endl; + const auto kokkos_solution = solve_parsed_fem_projection(true); + report_single_projection_timing("Hilbert parsed FEM kokkos projection", kokkos_solution); + report_projection_timing("Hilbert parsed FEM projection", + host_solution, + kokkos_solution); + assert_solutions_close(host_solution.solution, kokkos_solution.solution); +#endif + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(HilbertSystemKokkosTest); From 11d632e18d01643274d5eb2170a75c34e84315f3 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Sun, 17 May 2026 12:40:52 -0600 Subject: [PATCH 38/48] Refine Kokkos Hilbert assembly storage path --- include/gpu/kokkos_hilbert_system.h | 147 ++++++++++++++++ include/numerics/petsc_vector.h | 38 +++++ src/apps/L2system.C | 254 +++++++++++++++++++++++++--- src/apps/L2system.h | 14 +- src/apps/hilbert_kokkos_benchmark.C | 21 +++ 5 files changed, 451 insertions(+), 23 deletions(-) diff --git a/include/gpu/kokkos_hilbert_system.h b/include/gpu/kokkos_hilbert_system.h index 0796d34ee03..1ebf37adb18 100644 --- a/include/gpu/kokkos_hilbert_system.h +++ b/include/gpu/kokkos_hilbert_system.h @@ -431,6 +431,69 @@ run_hilbert_system_value_batch(const libMesh::FEFamily family, ::Kokkos::fence(); } +template +void +run_hilbert_system_bucket_scatter_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const SlotStorage & rhs_slots, + const SlotStorage & mat_slots, + const unsigned int hilbert_order, + GoalAccess goal_access, + GlobalResidualView rhs_values, + GlobalJacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + ::Kokkos::atomic_add(&rhs_values(rhs_slots(rhs_offset + i)), -accum.residual(i)); + for (unsigned int j = 0; j != n_dofs; ++j) + ::Kokkos::atomic_add(&mat_values(mat_slots(mat_offset + i * n_dofs + j)), + accum.jacobian(i, j)); + } + }); + ::Kokkos::fence(); +} + template +void +run_hilbert_system_fem_bucket_scatter_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const SlotStorage & rhs_slots, + const SlotStorage & mat_slots, + FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + GlobalResidualView rhs_values, + GlobalJacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto goal_access = + GatheredParsedFEMGoalAccess(field_keys, + field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + ::Kokkos::atomic_add(&rhs_values(rhs_slots(rhs_offset + i)), -accum.residual(i)); + for (unsigned int j = 0; j != n_dofs; ++j) + ::Kokkos::atomic_add(&mat_values(mat_slots(mat_offset + i * n_dofs + j)), + accum.jacobian(i, j)); + } + }); + ::Kokkos::fence(); +} + template ::Kokkos::View>; + using kokkos_write_view = + ::Kokkos::View>; class KokkosReadViewGuard { @@ -280,6 +284,40 @@ class PetscVector final : public NumericVector { return KokkosReadViewGuard(*this); } + + class KokkosWriteViewGuard + { + public: + explicit KokkosWriteViewGuard(PetscVector & vector) + : _vector(vector), + _data(reinterpret_cast(vector.get_array())), + _view(_data, vector.local_size()) + { + } + + KokkosWriteViewGuard(const KokkosWriteViewGuard &) = delete; + KokkosWriteViewGuard & operator=(const KokkosWriteViewGuard &) = delete; + + ~KokkosWriteViewGuard() + { + _vector.restore_array(); + } + + const kokkos_write_view & view() const + { + return _view; + } + + private: + PetscVector & _vector; + T * _data; + kokkos_write_view _view; + }; + + KokkosWriteViewGuard make_kokkos_write_view_guard() + { + return KokkosWriteViewGuard(*this); + } #endif /** diff --git a/src/apps/L2system.C b/src/apps/L2system.C index 107bd9347f1..d5dd1f0af2b 100644 --- a/src/apps/L2system.C +++ b/src/apps/L2system.C @@ -30,6 +30,7 @@ #include "libmesh/string_to_enum.h" #include "libmesh/utility.h" +#include #include #include #include @@ -365,12 +366,16 @@ struct KokkosPetscAssemblyPlan { std::vector records; std::vector buckets; + std::size_t total_rhs_entries = 0; + std::size_t total_mat_entries = 0; std::vector rhs_rows; std::vector mat_rows; std::vector mat_cols; KokkosHilbertBatchData batch_data; ::Kokkos::View rhs_values; ::Kokkos::View mat_values; + KokkosSizeView rhs_local_slots; + KokkosSizeView mat_value_slots; KokkosFEMGoalBatchData fem_goal_batch_data; const void * geometry_cache_id = nullptr; const void * dof_index_cache_id = nullptr; @@ -380,10 +385,113 @@ struct KokkosPetscAssemblyPlan std::set subdomains; const void * matrix_target = nullptr; const void * rhs_target = nullptr; + const void * direct_matrix_target = nullptr; + const void * direct_rhs_target = nullptr; const void * fem_goal_target = nullptr; const void * input_vector_target = nullptr; + bool direct_storage_active = false; + bool coo_storage_ready = false; }; +void +ensure_kokkos_petsc_coo_storage(const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan) +{ + if (!plan.coo_storage_ready) + { + plan.rhs_rows.resize(plan.total_rhs_entries); + plan.mat_rows.resize(plan.total_mat_entries); + plan.mat_cols.resize(plan.total_mat_entries); + build_hilbert_coo_indices( + dof_index_cache, plan.records, plan.rhs_rows, plan.mat_rows, plan.mat_cols); + plan.rhs_values = ::Kokkos::View("hilbert_rhs_values", plan.total_rhs_entries); + plan.mat_values = ::Kokkos::View("hilbert_mat_values", plan.total_mat_entries); + plan.coo_storage_ready = true; + } +} + +bool +ensure_kokkos_petsc_direct_storage(HilbertSystem & sys, + const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan, + PetscMatrixBase & system_matrix, + PetscVector & system_rhs) +{ + if (plan.direct_matrix_target == &system_matrix && plan.direct_rhs_target == &system_rhs) + return plan.direct_storage_active; + + plan.direct_matrix_target = &system_matrix; + plan.direct_rhs_target = &system_rhs; + plan.direct_storage_active = false; + + if (sys.comm().size() != 1 || PetscMemTypeDevice(system_rhs.get_mem_type())) + return false; + + const char * mat_type = nullptr; + LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); + PetscBool is_seq_aij = PETSC_FALSE; + PetscBool is_seq_aijkokkos = PETSC_FALSE; + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJ, &is_seq_aij)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJKOKKOS, &is_seq_aijkokkos)); + if (!is_seq_aij && !is_seq_aijkokkos) + return false; + + const PetscInt * row_offsets = nullptr; + const PetscInt * col_indices = nullptr; + PetscScalar * values = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); + if (!PetscMemTypeHost(mem_type)) + return false; + + std::vector rhs_local_slots(plan.total_rhs_entries); + std::vector mat_value_slots(plan.total_mat_entries); + + for (const auto & record : plan.records) + { + const auto elem_dofs = + &dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs]; + + for (unsigned int i = 0; i != record.n_dofs; ++i) + rhs_local_slots[record.rhs_offset + i] = + system_rhs.map_global_to_local_index(elem_dofs[i]); + + for (unsigned int i = 0; i != record.n_dofs; ++i) + { + const PetscInt row = cast_int(elem_dofs[i]); + const PetscInt row_begin = row_offsets[row]; + const PetscInt row_end = row_offsets[row + 1]; + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + const auto slot_it = + std::find(col_indices + row_begin, col_indices + row_end, col); + if (slot_it == col_indices + row_end) + return false; + mat_value_slots[record.mat_offset + i * record.n_dofs + j] = + cast_int(std::distance(col_indices, slot_it)); + } + } + } + + plan.rhs_local_slots = KokkosSizeView("hilbert_rhs_local_slots", rhs_local_slots.size()); + plan.mat_value_slots = KokkosSizeView("hilbert_mat_value_slots", mat_value_slots.size()); + + auto h_rhs_local_slots = ::Kokkos::create_mirror_view(plan.rhs_local_slots); + auto h_mat_value_slots = ::Kokkos::create_mirror_view(plan.mat_value_slots); + for (auto i : index_range(rhs_local_slots)) + h_rhs_local_slots(i) = rhs_local_slots[i]; + for (auto i : index_range(mat_value_slots)) + h_mat_value_slots(i) = mat_value_slots[i]; + + ::Kokkos::deep_copy(plan.rhs_local_slots, h_rhs_local_slots); + ::Kokkos::deep_copy(plan.mat_value_slots, h_mat_value_slots); + plan.direct_storage_active = true; + return true; +} + namespace { @@ -606,11 +714,6 @@ build_kokkos_petsc_assembly_plan(HilbertSystem & sys, return false; sort_hilbert_element_records(geometry_cache, sys.variable_type(0), records); - std::vector rhs_rows(total_rhs_entries); - std::vector mat_rows(total_mat_entries); - std::vector mat_cols(total_mat_entries); - build_hilbert_coo_indices(*dof_index_cache, records, rhs_rows, mat_rows, mat_cols); - KokkosHilbertBatchData batch_data; build_hilbert_batch_data(records, batch_data); std::vector buckets; @@ -618,12 +721,16 @@ build_kokkos_petsc_assembly_plan(HilbertSystem & sys, plan.records = std::move(records); plan.buckets = std::move(buckets); - plan.rhs_rows = std::move(rhs_rows); - plan.mat_rows = std::move(mat_rows); - plan.mat_cols = std::move(mat_cols); + plan.total_rhs_entries = total_rhs_entries; + plan.total_mat_entries = total_mat_entries; + plan.rhs_rows.clear(); + plan.mat_rows.clear(); + plan.mat_cols.clear(); plan.batch_data = std::move(batch_data); - plan.rhs_values = ::Kokkos::View("hilbert_rhs_values", plan.rhs_rows.size()); - plan.mat_values = ::Kokkos::View("hilbert_mat_values", plan.mat_rows.size()); + plan.rhs_values = ::Kokkos::View(); + plan.mat_values = ::Kokkos::View(); + plan.rhs_local_slots = KokkosSizeView(); + plan.mat_value_slots = KokkosSizeView(); plan.geometry_cache_id = &geometry_cache; plan.dof_index_cache_id = dof_index_cache; plan.fe_type = sys.variable_type(0); @@ -632,8 +739,12 @@ build_kokkos_petsc_assembly_plan(HilbertSystem & sys, plan.subdomains = sys.subdomains_list(); plan.matrix_target = nullptr; plan.rhs_target = nullptr; + plan.direct_matrix_target = nullptr; + plan.direct_rhs_target = nullptr; plan.fem_goal_target = nullptr; plan.input_vector_target = nullptr; + plan.direct_storage_active = false; + plan.coo_storage_ready = false; return true; } @@ -1022,7 +1133,8 @@ assemble_kokkos_petsc_global_system(HilbertSystem & sys, const libMesh::Kokkos::KokkosParsedFunction * analytic_goal, const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal, PetscMatrixBase & system_matrix, - PetscVector & system_rhs) + PetscVector & system_rhs, + HilbertSystem::KokkosAssemblyPath & assembly_path) { if (sys.has_static_condensation() || sys.get_dof_map().n_constrained_dofs()) return false; @@ -1030,6 +1142,111 @@ assemble_kokkos_petsc_global_system(HilbertSystem & sys, if (!analytic_goal && !fem_goal) return false; + const auto * dof_index_cache = sys.get_dof_map().get_kokkos_dof_index_cache(0); + libmesh_assert(dof_index_cache); + + if (ensure_kokkos_petsc_direct_storage( + sys, *dof_index_cache, plan, system_matrix, system_rhs)) + { + assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_direct_storage; + auto rhs_guard = system_rhs.make_kokkos_write_view_guard(); + const PetscInt * row_offsets = nullptr; + const PetscInt * col_indices = nullptr; + PetscScalar * values = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); + libmesh_ignore(col_indices); + libmesh_assert(PetscMemTypeHost(mem_type)); + using KokkosMatrixWriteView = + ::Kokkos::View>; + const auto n_local_rows = cast_int(system_matrix.local_m()); + KokkosMatrixWriteView matrix_values(reinterpret_cast(values), + cast_int(row_offsets[n_local_rows])); + + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + if (analytic_goal) + { + const auto timed_goal = analytic_goal->with_time(sys.time); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access( + timed_goal, timed_goal.gradient_function()); + + for (const auto & bucket : plan.buckets) + libMesh::Kokkos::detail::run_hilbert_system_bucket_scatter_batch< + kokkos_hilbert_max_dofs>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.rhs_local_slots, + plan.mat_value_slots, + sys.hilbert_order(), + goal_access, + rhs_guard.view(), + matrix_values, + "hilbert_direct_scatter_bucket_batch"); + } + else + { + const auto timed_goal = fem_goal->with_time(sys.time); + if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) + return false; + + auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_scatter_batch< + kokkos_hilbert_max_dofs, + kokkos_parsed_fem_max_fields>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.rhs_local_slots, + plan.mat_value_slots, + plan.fem_goal_batch_data.bucket_field_keys[bucket_index], + plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], + plan.fem_goal_batch_data.field_local_indices, + input_guard.view(), + timed_goal, + sys.hilbert_order(), + rhs_guard.view(), + matrix_values, + "hilbert_direct_scatter_fem_bucket_batch"); + } + } + + return true; + } + + assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_coo_fallback; + ensure_kokkos_petsc_coo_storage(*dof_index_cache, plan); + if (analytic_goal) { const auto timed_goal = analytic_goal->with_time(sys.time); @@ -1192,16 +1409,6 @@ HilbertSystem::ensure_kokkos_fem_goal_func() }); } -void -HilbertSystem::reset_kokkos_goal_cache() -{ - _kokkos_goal_func.reset(); - _kokkos_fem_goal_func.reset(); -#if defined(LIBMESH_HAVE_PETSC) - _kokkos_petsc_plan.reset(); -#endif -} - KokkosPetscAssemblyPlan * HilbertSystem::ensure_kokkos_petsc_plan(bool * rebuilt) { @@ -1315,16 +1522,19 @@ HilbertSystem::try_kokkos_petsc_solve() 0.; const auto assembly_start = clock::now(); + auto assembly_path = HilbertSystem::KokkosAssemblyPath::none; if (!assemble_kokkos_petsc_global_system(*this, *plan, analytic_goal, fem_goal, *petsc_matrix, - *petsc_rhs)) + *petsc_rhs, + assembly_path)) return false; const auto assembly_stop = clock::now(); this->_last_kokkos_timing.assembly_seconds = std::chrono::duration_cast>(assembly_stop - assembly_start).count(); + this->_last_kokkos_timing.assembly_path = assembly_path; petsc_matrix->close(); petsc_rhs->close(); diff --git a/src/apps/L2system.h b/src/apps/L2system.h index 8532ee811e7..3d6a2c7aa51 100644 --- a/src/apps/L2system.h +++ b/src/apps/L2system.h @@ -45,12 +45,20 @@ struct KokkosPetscAssemblyPlan; class HilbertSystem : public libMesh::FEMSystem { public: + enum class KokkosAssemblyPath + { + none, + petsc_direct_storage, + petsc_coo_fallback + }; + struct KokkosTimingInfo { libMesh::Real plan_seconds = 0.; libMesh::Real assembly_seconds = 0.; libMesh::Real solve_seconds = 0.; libMesh::Real total_seconds = 0.; + KokkosAssemblyPath assembly_path = KokkosAssemblyPath::none; }; // Constructor @@ -170,7 +178,11 @@ class HilbertSystem : public libMesh::FEMSystem KokkosPetscAssemblyPlan * ensure_kokkos_petsc_plan(bool * rebuilt = nullptr); #endif - void reset_kokkos_goal_cache(); + void reset_kokkos_goal_cache() + { + _kokkos_goal_func.reset(); + _kokkos_fem_goal_func.reset(); + } #else void reset_kokkos_goal_cache() {} #endif diff --git a/src/apps/hilbert_kokkos_benchmark.C b/src/apps/hilbert_kokkos_benchmark.C index 75d93d7cc9f..4e437389ce5 100644 --- a/src/apps/hilbert_kokkos_benchmark.C +++ b/src/apps/hilbert_kokkos_benchmark.C @@ -85,6 +85,7 @@ struct BenchmarkResult Real average_solver_seconds = 0.; Real average_total_seconds = 0.; dof_id_type n_dofs = 0; + HilbertSystem::KokkosAssemblyPath assembly_path = HilbertSystem::KokkosAssemblyPath::none; }; #ifdef LIBMESH_HAVE_KOKKOS @@ -168,6 +169,22 @@ void configure_linear_solver(HilbertSystem & sys, linear_solver->set_preconditioner_type(options.preconditioner_type); } +const char * +kokkos_assembly_path_name(const HilbertSystem::KokkosAssemblyPath path) +{ + switch (path) + { + case HilbertSystem::KokkosAssemblyPath::none: + return "none"; + case HilbertSystem::KokkosAssemblyPath::petsc_direct_storage: + return "direct PETSc storage"; + case HilbertSystem::KokkosAssemblyPath::petsc_coo_fallback: + return "PETSc COO fallback"; + } + + return "unknown"; +} + BenchmarkResult solve_projection_once(const Parallel::Communicator & comm, const BenchmarkOptions & options, const bool use_kokkos) @@ -227,6 +244,7 @@ BenchmarkResult solve_projection_once(const Parallel::Communicator & comm, result.average_assembly_seconds = timing.assembly_seconds; result.average_solver_seconds = timing.solve_seconds; result.average_total_seconds = timing.total_seconds; + result.assembly_path = timing.assembly_path; } result.n_dofs = projection.n_dofs(); projection.solution->localize(result.solution); @@ -248,6 +266,7 @@ BenchmarkResult solve_projection(const Parallel::Communicator & comm, result.average_solver_seconds += single.average_solver_seconds; result.average_total_seconds += single.average_total_seconds; result.n_dofs = single.n_dofs; + result.assembly_path = single.assembly_path; result.solution = std::move(single.solution); } @@ -420,6 +439,8 @@ int main(int argc, char ** argv) libMesh::out << "Degrees of freedom: " << host_result.n_dofs << std::endl; libMesh::out << "Host solve time: " << host_result.average_solve_seconds << " s" << std::endl; libMesh::out << "Kokkos solve time: " << kokkos_result.average_solve_seconds << " s" << std::endl; + libMesh::out << "Kokkos assembly path: " + << kokkos_assembly_path_name(kokkos_result.assembly_path) << std::endl; libMesh::out << "Kokkos plan time: " << kokkos_result.average_plan_seconds << " s" << std::endl; libMesh::out << "Kokkos assembly: " << kokkos_result.average_assembly_seconds << " s" << std::endl; libMesh::out << "Kokkos solver: " << kokkos_result.average_solver_seconds << " s" << std::endl; From ffc51d019985313cec6f7e733fab9eb340ce6835 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 08:12:52 -0600 Subject: [PATCH 39/48] Refine Kokkos Hilbert direct PETSc storage --- configure | 3 +- include/Makefile.in | 17 +- include/gpu/kokkos_hilbert_system.h | 86 +- include/libmesh/Makefile.am | 4 + include/libmesh/Makefile.in | 25 +- include/libmesh_config.h.in | 3 + include/numerics/petsc_matrix_base.h | 21 + include/numerics/petsc_vector.h | 82 +- m4/poly2tri.m4 | 2 +- src/apps/L2system.C | 1331 +++++++++++++++----- src/apps/L2system.h | 6 +- src/apps/hilbert_kokkos_benchmark.C | 2 - src/numerics/petsc_matrix_base.C | 1 - tests/Makefile.in | 2 +- tests/systems/hilbert_system_kokkos_test.C | 1 + 15 files changed, 1231 insertions(+), 355 deletions(-) diff --git a/configure b/configure index c0250de457a..c3eed3aee79 100755 --- a/configure +++ b/configure @@ -58955,7 +58955,7 @@ fi if test "x$enablepoly2tri" = "xyes" then : - POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified" + POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified -I\$(top_srcdir)/contrib/poly2tri/poly2tri" printf "%s\n" "#define HAVE_POLY2TRI 1" >>confdefs.h @@ -64367,7 +64367,6 @@ else case e in #( esac fi - printf "%s\n" "#define HAVE_KOKKOS 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: <<< Configuring library with Kokkos support >>>" >&5 diff --git a/include/Makefile.in b/include/Makefile.in index 6e0b5936a18..81e0e777867 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -217,13 +217,14 @@ am__uninstall_files_from_dir = { \ $(am__cd) "$$dir" && echo $$files | $(am__xargs_n) 40 $(am__rm_f); }; \ } am__installdirs = "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" -am__nobase_include_HEADERS_DIST = gpu/kokkos_fe_types.h \ - gpu/kokkos_fe_shape_dispatch.h gpu/kokkos_fe_base.h \ - gpu/kokkos_fe_evaluator.h gpu/kokkos_fe_lagrange_1d.h \ - gpu/kokkos_fe_lagrange_2d.h gpu/kokkos_fe_lagrange_3d.h \ - gpu/kokkos_fe_monomial.h gpu/kokkos_fe_face_map.h \ - gpu/kokkos_fe_map.h gpu/kokkos_quadrature.h \ - gpu/kokkos_hilbert_system.h gpu/kokkos_parsed_function.h +am__nobase_include_HEADERS_DIST = gpu/kokkos_scalar_types.h \ + gpu/kokkos_fe_types.h gpu/kokkos_fe_shape_dispatch.h \ + gpu/kokkos_fe_base.h gpu/kokkos_fe_evaluator.h \ + gpu/kokkos_fe_lagrange_1d.h gpu/kokkos_fe_lagrange_2d.h \ + gpu/kokkos_fe_lagrange_3d.h gpu/kokkos_fe_monomial.h \ + gpu/kokkos_fe_face_map.h gpu/kokkos_fe_map.h \ + gpu/kokkos_quadrature.h gpu/kokkos_hilbert_system.h \ + gpu/kokkos_parsed_function.h HEADERS = $(include_HEADERS) $(nobase_include_HEADERS) \ $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ @@ -620,6 +621,7 @@ SUBDIRS = libmesh # downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. # nobase_ is used instead of the standard flat install to keep the namespace. @LIBMESH_ENABLE_KOKKOS_TRUE@nobase_include_HEADERS = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_scalar_types.h \ @LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_types.h \ @LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_shape_dispatch.h \ @LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_base.h \ @@ -979,7 +981,6 @@ include_HEADERS = \ parallel/threads_allocators.h \ parallel/threads_none.h \ parallel/threads_pthread.h \ - parallel/threads_spin_mutex_forward.h \ parallel/threads_tbb.h \ partitioning/centroid_partitioner.h \ partitioning/hilbert_sfc_partitioner.h \ diff --git a/include/gpu/kokkos_hilbert_system.h b/include/gpu/kokkos_hilbert_system.h index 1ebf37adb18..61f6c37fd16 100644 --- a/include/gpu/kokkos_hilbert_system.h +++ b/include/gpu/kokkos_hilbert_system.h @@ -175,6 +175,59 @@ struct FlatDeviceValueSink } }; +template +struct DirectScatterAccess +{ + View values; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + ::Kokkos::atomic_add(&values(slot), value); + } +}; + +template +struct SplitScatterAccess +{ + LocalView local_values; + RemoteView remote_values; + std::size_t local_size = 0; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + if (slot < local_size) + ::Kokkos::atomic_add(&local_values(slot), value); + else + ::Kokkos::atomic_add(&remote_values(slot - local_size), value); + } +}; + +template +struct SplitMatrixScatterAccess +{ + DiagView diag_values; + OffdiagView offdiag_values; + RemoteView remote_values; + std::size_t diag_size = 0; + std::size_t offdiag_base = 0; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + if (slot < diag_size) + ::Kokkos::atomic_add(&diag_values(slot), value); + else if (slot < offdiag_base) + ::Kokkos::atomic_add(&offdiag_values(slot - diag_size), value); + else + ::Kokkos::atomic_add(&remote_values(slot - offdiag_base), value); + } +}; + struct ZeroCoeffAccess { LIBMESH_DEVICE_INLINE @@ -349,7 +402,6 @@ run_hilbert_system_assembly(const libMesh::FEShapeKey key, accum); sink.write(accum); }); - ::Kokkos::fence(); return true; } @@ -428,7 +480,6 @@ run_hilbert_system_value_batch(const libMesh::FEFamily family, mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); } }); - ::Kokkos::fence(); } template + typename ResidualScatterAccess, + typename JacobianScatterAccess> void run_hilbert_system_bucket_scatter_batch(const libMesh::FEShapeKey key, const libMesh::ElemMappingType mapping_type, @@ -456,8 +507,8 @@ run_hilbert_system_bucket_scatter_batch(const libMesh::FEShapeKey key, const SlotStorage & mat_slots, const unsigned int hilbert_order, GoalAccess goal_access, - GlobalResidualView rhs_values, - GlobalJacobianView mat_values, + ResidualScatterAccess rhs_scatter, + JacobianScatterAccess mat_scatter, const char * const kernel_name) { const auto n_records = elem_indices.extent(0); @@ -485,13 +536,11 @@ run_hilbert_system_bucket_scatter_batch(const libMesh::FEShapeKey key, const auto mat_offset = mat_offsets(record_index); for (unsigned int i = 0; i != n_dofs; ++i) { - ::Kokkos::atomic_add(&rhs_values(rhs_slots(rhs_offset + i)), -accum.residual(i)); + rhs_scatter.add(rhs_slots(rhs_offset + i), -accum.residual(i)); for (unsigned int j = 0; j != n_dofs; ++j) - ::Kokkos::atomic_add(&mat_values(mat_slots(mat_offset + i * n_dofs + j)), - accum.jacobian(i, j)); + mat_scatter.add(mat_slots(mat_offset + i * n_dofs + j), accum.jacobian(i, j)); } }); - ::Kokkos::fence(); } template + typename ResidualScatterAccess, + typename JacobianScatterAccess> void run_hilbert_system_fem_bucket_scatter_batch(const libMesh::FEShapeKey key, const libMesh::ElemMappingType mapping_type, @@ -695,8 +742,8 @@ run_hilbert_system_fem_bucket_scatter_batch(const libMesh::FEShapeKey key, const GlobalCoeffStorage & global_coeffs, GoalFunction goal_function, const unsigned int hilbert_order, - GlobalResidualView rhs_values, - GlobalJacobianView mat_values, + ResidualScatterAccess rhs_scatter, + JacobianScatterAccess mat_scatter, const char * const kernel_name) { const auto n_records = elem_indices.extent(0); @@ -736,13 +783,11 @@ run_hilbert_system_fem_bucket_scatter_batch(const libMesh::FEShapeKey key, const auto mat_offset = mat_offsets(record_index); for (unsigned int i = 0; i != n_dofs; ++i) { - ::Kokkos::atomic_add(&rhs_values(rhs_slots(rhs_offset + i)), -accum.residual(i)); + rhs_scatter.add(rhs_slots(rhs_offset + i), -accum.residual(i)); for (unsigned int j = 0; j != n_dofs; ++j) - ::Kokkos::atomic_add(&mat_values(mat_slots(mat_offset + i * n_dofs + j)), - accum.jacobian(i, j)); + mat_scatter.add(mat_slots(mat_offset + i * n_dofs + j), accum.jacobian(i, j)); } }); - ::Kokkos::fence(); } template = 2021) */ +#undef HAVE_ONETBB + /* Define if OpenMP is enabled */ #undef HAVE_OPENMP diff --git a/include/numerics/petsc_matrix_base.h b/include/numerics/petsc_matrix_base.h index 9099d94a3df..7768b07d791 100644 --- a/include/numerics/petsc_matrix_base.h +++ b/include/numerics/petsc_matrix_base.h @@ -131,6 +131,27 @@ class PetscMatrixBase : public SparseMatrix */ void set_destroy_mat_on_exit(bool destroy = true); + /** + * Replace the underlying PETSc Mat with a prebuilt object. + * + * This is intended for advanced users that need to construct a Mat + * outside of the normal PetscMatrix initialization path while still + * using the libMesh wrapper as the owning interface. + */ + void reset_mat(Mat m, bool destroy_on_exit = true) + { + if (_mat == m && _destroy_mat_on_exit == destroy_on_exit) + return; + + this->clear(); + _mat = m; + _destroy_mat_on_exit = destroy_on_exit; + this->_is_initialized = (_mat != nullptr); + + if (_mat) + this->set_context(); + } + /** * Swaps the internal data pointers of two PetscMatrices, no actual * values are swapped. diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index c7d953ab0b9..f3c363e6015 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -255,10 +255,31 @@ class PetscVector final : public NumericVector { public: explicit KokkosReadViewGuard(PetscVector & vector) - : _vector(vector), - _data(reinterpret_cast(vector.get_array_read())), - _view(_data, vector.local_size()) + : _vector(vector) { + _borrowed_vec = vector._vec; + if (vector.is_effectively_ghosted()) + { + LibmeshPetscCallA(_vector.comm().get(), VecGhostGetLocalForm(vector._vec, &_borrowed_vec)); + PetscInt my_local_size = 0; + LibmeshPetscCallA(_vector.comm().get(), VecGetLocalSize(_borrowed_vec, &my_local_size)); + _local_size = static_cast(my_local_size); + } + else + _local_size = vector.local_size(); + + const PetscScalar * data = nullptr; + LibmeshPetscCallA(_vector.comm().get(), + VecGetArrayReadAndMemType(_borrowed_vec, &data, &_mem_type)); + _data = reinterpret_cast(data); + const bool host_inaccessible = + PetscMemTypeHost(_mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + libmesh_error_msg_if(host_inaccessible, + "PetscVector Kokkos read access requires host-accessible execution " + "space for host PETSc memory."); + _view = kokkos_read_view(_data, _local_size); } KokkosReadViewGuard(const KokkosReadViewGuard &) = delete; @@ -266,7 +287,14 @@ class PetscVector final : public NumericVector ~KokkosReadViewGuard() { - _vector.restore_array(); + const PetscScalar * data = reinterpret_cast(_data); + const auto restore_ierr = VecRestoreArrayReadAndMemType(_borrowed_vec, &data); + libmesh_ignore(restore_ierr); + if (_vector.is_effectively_ghosted()) + { + const auto ghost_ierr = VecGhostRestoreLocalForm(_vector._vec, &_borrowed_vec); + libmesh_ignore(ghost_ierr); + } } const kokkos_read_view & view() const @@ -276,7 +304,10 @@ class PetscVector final : public NumericVector private: PetscVector & _vector; - const T * _data; + Vec _borrowed_vec = nullptr; + const T * _data = nullptr; + PetscMemType _mem_type = PETSC_MEMTYPE_HOST; + numeric_index_type _local_size = 0; kokkos_read_view _view; }; @@ -289,10 +320,31 @@ class PetscVector final : public NumericVector { public: explicit KokkosWriteViewGuard(PetscVector & vector) - : _vector(vector), - _data(reinterpret_cast(vector.get_array())), - _view(_data, vector.local_size()) + : _vector(vector) { + _borrowed_vec = vector._vec; + if (vector.is_effectively_ghosted()) + { + LibmeshPetscCallA(_vector.comm().get(), VecGhostGetLocalForm(vector._vec, &_borrowed_vec)); + PetscInt my_local_size = 0; + LibmeshPetscCallA(_vector.comm().get(), VecGetLocalSize(_borrowed_vec, &my_local_size)); + _local_size = static_cast(my_local_size); + } + else + _local_size = vector.local_size(); + + PetscScalar * data = nullptr; + LibmeshPetscCallA(_vector.comm().get(), + VecGetArrayWriteAndMemType(_borrowed_vec, &data, &_mem_type)); + _data = reinterpret_cast(data); + const bool host_inaccessible = + PetscMemTypeHost(_mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + libmesh_error_msg_if(host_inaccessible, + "PetscVector Kokkos write access requires host-accessible execution " + "space for host PETSc memory."); + _view = kokkos_write_view(_data, _local_size); } KokkosWriteViewGuard(const KokkosWriteViewGuard &) = delete; @@ -300,7 +352,14 @@ class PetscVector final : public NumericVector ~KokkosWriteViewGuard() { - _vector.restore_array(); + PetscScalar * data = reinterpret_cast(_data); + const auto restore_ierr = VecRestoreArrayWriteAndMemType(_borrowed_vec, &data); + libmesh_ignore(restore_ierr); + if (_vector.is_effectively_ghosted()) + { + const auto ghost_ierr = VecGhostRestoreLocalForm(_vector._vec, &_borrowed_vec); + libmesh_ignore(ghost_ierr); + } } const kokkos_write_view & view() const @@ -310,7 +369,10 @@ class PetscVector final : public NumericVector private: PetscVector & _vector; - T * _data; + Vec _borrowed_vec = nullptr; + T * _data = nullptr; + PetscMemType _mem_type = PETSC_MEMTYPE_HOST; + numeric_index_type _local_size = 0; kokkos_write_view _view; }; diff --git a/m4/poly2tri.m4 b/m4/poly2tri.m4 index 9c57750b511..9792bf44e74 100644 --- a/m4/poly2tri.m4 +++ b/m4/poly2tri.m4 @@ -19,7 +19,7 @@ AC_DEFUN([CONFIGURE_POLY2TRI], [ dnl The poly2tri API is distributed with libmesh, so we don't have to guess dnl where it might be installed... - POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified" + POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified -I\$(top_srcdir)/contrib/poly2tri/poly2tri" AC_DEFINE(HAVE_POLY2TRI, 1, [Flag indicating whether the library will be compiled with poly2tri support]) AC_MSG_RESULT(<<< Configuring library with poly2tri support >>>) ]) diff --git a/src/apps/L2system.C b/src/apps/L2system.C index d5dd1f0af2b..e930828d2d7 100644 --- a/src/apps/L2system.C +++ b/src/apps/L2system.C @@ -26,6 +26,7 @@ #include "libmesh/linear_solver.h" #include "libmesh/mesh.h" #include "libmesh/numeric_vector.h" +#include "libmesh/parallel_sync.h" #include "libmesh/quadrature.h" #include "libmesh/string_to_enum.h" #include "libmesh/utility.h" @@ -33,12 +34,17 @@ #include #include #include +#include +#include +#include #include +#include #ifdef LIBMESH_HAVE_PETSC #include "libmesh/petsc_matrix_base.h" #include "libmesh/petsc_macro.h" #include "libmesh/petsc_vector.h" +#include #endif #if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) @@ -62,6 +68,7 @@ using KokkosScalarView = ::Kokkos::View; using KokkosDenseJacobianView = ::Kokkos::View; using KokkosFlatJacobianView = ::Kokkos::View; using KokkosUnsignedIntView = ::Kokkos::View; +using KokkosPetscIntView = ::Kokkos::View; using KokkosSizeView = ::Kokkos::View; using KokkosFieldKeyRecordView = ::Kokkos::View; using KokkosFieldDofRecordView = ::Kokkos::View; @@ -236,37 +243,6 @@ prewarm_kokkos_hilbert_entities(HilbertSystem & sys, fem_goal->field_variable_number(field)); } -#if defined(LIBMESH_HAVE_PETSC) -void -build_hilbert_coo_indices(const DofMap::KokkosDofIndexCache & dof_index_cache, - const std::vector & records, - std::vector & rhs_rows, - std::vector & mat_rows, - std::vector & mat_cols) -{ - for (const auto & record : records) - { - const unsigned int n_dofs = record.n_dofs; - - for (unsigned int i = 0; i != n_dofs; ++i) - rhs_rows[record.rhs_offset + i] = - cast_int( - dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + i]); - - for (unsigned int i = 0; i != n_dofs; ++i) - for (unsigned int j = 0; j != n_dofs; ++j) - { - const std::size_t offset = record.mat_offset + i * n_dofs + j; - mat_rows[offset] = cast_int( - dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + - i]); - mat_cols[offset] = cast_int( - dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs + - j]); - } - } -} - class HostExactParsedFEMGoalAccess { public: @@ -362,20 +338,62 @@ struct KokkosFEMGoalBatchData PetscVector * input_vector = nullptr; }; +enum class KokkosDirectMatrixLayout +{ + none, + seq_aij, + mpi_aij +}; + +struct PetscIntPairHash +{ + std::size_t operator()(const std::pair & values) const noexcept + { + const auto first_hash = std::hash{}(values.first); + const auto second_hash = std::hash{}(values.second); + return first_hash ^ (second_hash + 0x9e3779b97f4a7c15ULL + (first_hash << 6) + (first_hash >> 2)); + } +}; + struct KokkosPetscAssemblyPlan { std::vector records; std::vector buckets; std::size_t total_rhs_entries = 0; std::size_t total_mat_entries = 0; - std::vector rhs_rows; - std::vector mat_rows; - std::vector mat_cols; KokkosHilbertBatchData batch_data; - ::Kokkos::View rhs_values; - ::Kokkos::View mat_values; + std::vector local_row_offsets; + std::vector local_column_indices; + std::vector diag_row_offsets; + std::vector diag_column_indices; + std::vector offdiag_row_offsets; + std::vector offdiag_column_indices; + std::vector offdiag_global_columns; + KokkosPetscIntView local_row_offsets_view; + KokkosPetscIntView local_column_indices_view; + KokkosPetscIntView diag_row_offsets_view; + KokkosPetscIntView diag_column_indices_view; + KokkosPetscIntView offdiag_row_offsets_view; + KokkosPetscIntView offdiag_column_indices_view; + KokkosPetscIntView offdiag_global_columns_view; KokkosSizeView rhs_local_slots; KokkosSizeView mat_value_slots; + ::Kokkos::View rhs_remote_values; + ::Kokkos::View mat_remote_values; + ::Kokkos::View rhs_remote_root_values; + ::Kokkos::View mat_remote_root_values; + std::vector rhs_remote_rows; + std::vector mat_remote_rows; + std::vector mat_remote_cols; + std::vector mat_remote_root_indices; + KokkosPetscIntView rhs_remote_rows_view; + KokkosPetscIntView mat_remote_rows_view; + KokkosPetscIntView mat_remote_cols_view; + KokkosPetscIntView mat_remote_root_indices_view; + std::vector rhs_remote_owners; + std::vector mat_remote_owners; + PetscSF rhs_remote_sf = nullptr; + PetscSF mat_remote_sf = nullptr; KokkosFEMGoalBatchData fem_goal_batch_data; const void * geometry_cache_id = nullptr; const void * dof_index_cache_id = nullptr; @@ -383,71 +401,492 @@ struct KokkosPetscAssemblyPlan unsigned int hilbert_order = 0; int extra_quadrature_order = 0; std::set subdomains; - const void * matrix_target = nullptr; - const void * rhs_target = nullptr; + const void * graph_matrix_target = nullptr; const void * direct_matrix_target = nullptr; const void * direct_rhs_target = nullptr; const void * fem_goal_target = nullptr; const void * input_vector_target = nullptr; + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + std::size_t rhs_local_size = 0; + std::size_t mat_diag_size = 0; + std::size_t mat_offdiag_size = 0; + KokkosDirectMatrixLayout direct_matrix_layout = KokkosDirectMatrixLayout::none; bool direct_storage_active = false; - bool coo_storage_ready = false; + + ~KokkosPetscAssemblyPlan() + { + if (rhs_remote_sf) + { + PetscErrorCode ierr = PetscSFDestroy(&rhs_remote_sf); + libmesh_ignore(ierr); + } + if (mat_remote_sf) + { + PetscErrorCode ierr = PetscSFDestroy(&mat_remote_sf); + libmesh_ignore(ierr); + } + } }; +using RemoteRhsContribution = std::pair; +using RemoteMatContribution = std::tuple; + +constexpr PetscMemType +kokkos_default_petsc_mem_type() +{ + return PETSC_MEMTYPE_KOKKOS; +} + void -ensure_kokkos_petsc_coo_storage(const DofMap::KokkosDofIndexCache & dof_index_cache, - KokkosPetscAssemblyPlan & plan) +clear_kokkos_petsc_remote_sf(KokkosPetscAssemblyPlan & plan) { - if (!plan.coo_storage_ready) + if (plan.rhs_remote_sf) + { + const auto ierr = PetscSFDestroy(&plan.rhs_remote_sf); + libmesh_ignore(ierr); + plan.rhs_remote_sf = nullptr; + } + if (plan.mat_remote_sf) { - plan.rhs_rows.resize(plan.total_rhs_entries); - plan.mat_rows.resize(plan.total_mat_entries); - plan.mat_cols.resize(plan.total_mat_entries); - build_hilbert_coo_indices( - dof_index_cache, plan.records, plan.rhs_rows, plan.mat_rows, plan.mat_cols); - plan.rhs_values = ::Kokkos::View("hilbert_rhs_values", plan.total_rhs_entries); - plan.mat_values = ::Kokkos::View("hilbert_mat_values", plan.total_mat_entries); - plan.coo_storage_ready = true; + const auto ierr = PetscSFDestroy(&plan.mat_remote_sf); + libmesh_ignore(ierr); + plan.mat_remote_sf = nullptr; } + plan.rhs_remote_root_values = {}; + plan.mat_remote_root_values = {}; + plan.mat_remote_root_indices.clear(); + plan.rhs_remote_rows_view = {}; + plan.mat_remote_rows_view = {}; + plan.mat_remote_cols_view = {}; + plan.mat_remote_root_indices_view = {}; +} + +void +sync_kokkos_petsc_int_view(const std::vector & host_values, + KokkosPetscIntView & device_view, + const std::string & name) +{ + device_view = KokkosPetscIntView(name, host_values.size()); + auto host_view = ::Kokkos::create_mirror_view(device_view); + for (auto i : index_range(host_values)) + host_view(i) = host_values[i]; + ::Kokkos::deep_copy(device_view, host_view); +} + +void +sync_kokkos_petsc_owned_graph_views(KokkosPetscAssemblyPlan & plan) +{ + sync_kokkos_petsc_int_view(plan.local_row_offsets, + plan.local_row_offsets_view, + "hilbert_local_row_offsets"); + sync_kokkos_petsc_int_view(plan.local_column_indices, + plan.local_column_indices_view, + "hilbert_local_column_indices"); + sync_kokkos_petsc_int_view(plan.diag_row_offsets, + plan.diag_row_offsets_view, + "hilbert_diag_row_offsets"); + sync_kokkos_petsc_int_view(plan.diag_column_indices, + plan.diag_column_indices_view, + "hilbert_diag_column_indices"); + sync_kokkos_petsc_int_view(plan.offdiag_row_offsets, + plan.offdiag_row_offsets_view, + "hilbert_offdiag_row_offsets"); + sync_kokkos_petsc_int_view(plan.offdiag_column_indices, + plan.offdiag_column_indices_view, + "hilbert_offdiag_column_indices"); + sync_kokkos_petsc_int_view(plan.offdiag_global_columns, + plan.offdiag_global_columns_view, + "hilbert_offdiag_global_columns"); +} + +void +sync_kokkos_petsc_remote_slot_views(KokkosPetscAssemblyPlan & plan) +{ + sync_kokkos_petsc_int_view(plan.rhs_remote_rows, + plan.rhs_remote_rows_view, + "hilbert_rhs_remote_rows"); + sync_kokkos_petsc_int_view(plan.mat_remote_rows, + plan.mat_remote_rows_view, + "hilbert_mat_remote_rows"); + sync_kokkos_petsc_int_view(plan.mat_remote_cols, + plan.mat_remote_cols_view, + "hilbert_mat_remote_cols"); + sync_kokkos_petsc_int_view(plan.mat_remote_root_indices, + plan.mat_remote_root_indices_view, + "hilbert_mat_remote_root_indices"); } bool -ensure_kokkos_petsc_direct_storage(HilbertSystem & sys, - const DofMap::KokkosDofIndexCache & dof_index_cache, +build_kokkos_petsc_owned_csr_graph(HilbertSystem & sys, KokkosPetscAssemblyPlan & plan, - PetscMatrixBase & system_matrix, - PetscVector & system_rhs) + const KokkosDirectMatrixLayout layout, + const PetscInt row_start, + const PetscInt row_stop, + const PetscInt col_start, + const PetscInt col_stop) { - if (plan.direct_matrix_target == &system_matrix && plan.direct_rhs_target == &system_rhs) - return plan.direct_storage_active; + const auto * sp = sys.get_dof_map().get_sparsity_pattern(); + if (!sp) + return false; - plan.direct_matrix_target = &system_matrix; - plan.direct_rhs_target = &system_rhs; - plan.direct_storage_active = false; + const auto & graph = sp->get_sparsity_pattern(); + plan.local_row_offsets.assign(graph.size() + 1, 0); + plan.local_column_indices.clear(); + plan.diag_row_offsets.clear(); + plan.diag_column_indices.clear(); + plan.offdiag_row_offsets.clear(); + plan.offdiag_column_indices.clear(); + plan.offdiag_global_columns.clear(); + plan.row_start = row_start; + plan.row_stop = row_stop; + plan.col_start = col_start; + plan.col_stop = col_stop; + + std::vector> row_columns(graph.size()); + std::vector> row_offdiag_globals; + + if (layout == KokkosDirectMatrixLayout::mpi_aij) + row_offdiag_globals.resize(graph.size()); + + std::vector all_offdiag_globals; + for (auto local_row : index_range(graph)) + { + auto & cols = row_columns[local_row]; + cols.reserve(graph[local_row].size()); + for (const auto dof : graph[local_row]) + cols.push_back(cast_int(dof)); + + std::sort(cols.begin(), cols.end()); + cols.erase(std::unique(cols.begin(), cols.end()), cols.end()); + plan.local_row_offsets[local_row + 1] = + plan.local_row_offsets[local_row] + cast_int(cols.size()); + plan.local_column_indices.insert(plan.local_column_indices.end(), cols.begin(), cols.end()); + + if (layout != KokkosDirectMatrixLayout::mpi_aij) + continue; - if (sys.comm().size() != 1 || PetscMemTypeDevice(system_rhs.get_mem_type())) - return false; + auto & offdiag_globals = row_offdiag_globals[local_row]; + for (const auto col : cols) + if (col < col_start || col >= col_stop) + { + offdiag_globals.push_back(col); + all_offdiag_globals.push_back(col); + } + } - const char * mat_type = nullptr; - LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); - PetscBool is_seq_aij = PETSC_FALSE; - PetscBool is_seq_aijkokkos = PETSC_FALSE; - LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJ, &is_seq_aij)); - LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJKOKKOS, &is_seq_aijkokkos)); - if (!is_seq_aij && !is_seq_aijkokkos) - return false; + if (layout != KokkosDirectMatrixLayout::mpi_aij) + return true; + + std::sort(all_offdiag_globals.begin(), all_offdiag_globals.end()); + all_offdiag_globals.erase(std::unique(all_offdiag_globals.begin(), all_offdiag_globals.end()), + all_offdiag_globals.end()); + plan.offdiag_global_columns = std::move(all_offdiag_globals); + + std::unordered_map offdiag_column_map; + offdiag_column_map.reserve(plan.offdiag_global_columns.size()); + for (auto i : index_range(plan.offdiag_global_columns)) + offdiag_column_map.emplace(plan.offdiag_global_columns[i], cast_int(i)); + + plan.diag_row_offsets.assign(graph.size() + 1, 0); + plan.offdiag_row_offsets.assign(graph.size() + 1, 0); + + for (auto local_row : index_range(graph)) + { + const auto & cols = row_columns[local_row]; + const auto & offdiag_globals = row_offdiag_globals[local_row]; + for (const auto col : cols) + if (col >= col_start && col < col_stop) + plan.diag_column_indices.push_back(col - col_start); + + for (const auto col : offdiag_globals) + plan.offdiag_column_indices.push_back(offdiag_column_map.at(col)); + + plan.diag_row_offsets[local_row + 1] = + cast_int(plan.diag_column_indices.size()); + plan.offdiag_row_offsets[local_row + 1] = + cast_int(plan.offdiag_column_indices.size()); + } - const PetscInt * row_offsets = nullptr; - const PetscInt * col_indices = nullptr; - PetscScalar * values = nullptr; - PetscMemType mem_type = PETSC_MEMTYPE_HOST; + sync_kokkos_petsc_owned_graph_views(plan); + return true; +} + +void +finalize_kokkos_direct_slots(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + const std::vector & rhs_local_slots, + const std::vector & mat_value_slots) +{ + plan.rhs_local_slots = KokkosSizeView("hilbert_rhs_local_slots", rhs_local_slots.size()); + plan.mat_value_slots = KokkosSizeView("hilbert_mat_value_slots", mat_value_slots.size()); + + auto h_rhs_local_slots = ::Kokkos::create_mirror_view(plan.rhs_local_slots); + auto h_mat_value_slots = ::Kokkos::create_mirror_view(plan.mat_value_slots); + for (auto i : index_range(rhs_local_slots)) + h_rhs_local_slots(i) = rhs_local_slots[i]; + for (auto i : index_range(mat_value_slots)) + h_mat_value_slots(i) = mat_value_slots[i]; + + ::Kokkos::deep_copy(plan.rhs_local_slots, h_rhs_local_slots); + ::Kokkos::deep_copy(plan.mat_value_slots, h_mat_value_slots); + + plan.rhs_remote_owners.resize(plan.rhs_remote_rows.size()); + for (auto i : index_range(plan.rhs_remote_rows)) + plan.rhs_remote_owners[i] = + sys.get_dof_map().dof_owner(cast_int(plan.rhs_remote_rows[i])); + + plan.mat_remote_owners.resize(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + plan.mat_remote_owners[i] = + sys.get_dof_map().dof_owner(cast_int(plan.mat_remote_rows[i])); + + if (!plan.rhs_remote_rows.empty()) + plan.rhs_remote_values = + ::Kokkos::View("hilbert_rhs_remote_values", plan.rhs_remote_rows.size()); + + if (!plan.mat_remote_rows.empty()) + plan.mat_remote_values = + ::Kokkos::View("hilbert_mat_remote_values", plan.mat_remote_rows.size()); +} + +std::size_t +lookup_kokkos_owned_matrix_slot(const KokkosPetscAssemblyPlan & plan, + const PetscInt row, + const PetscInt col) +{ + const PetscInt local_row = row - plan.row_start; + libmesh_error_msg_if(local_row < 0 || row >= plan.row_stop, + "HilbertSystem Kokkos remote matrix slot lookup received a nonlocal row."); + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt row_begin = plan.local_row_offsets[local_row]; + const PetscInt row_end = plan.local_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.local_column_indices.begin() + row_begin, + plan.local_column_indices.begin() + row_end, + col); + libmesh_error_msg_if(slot_it == plan.local_column_indices.begin() + row_end || *slot_it != col, + "HilbertSystem Kokkos owned CSR graph is missing a sequential " + "remote matrix coupling."); + return cast_int(std::distance(plan.local_column_indices.begin(), slot_it)); + } + + if (col >= plan.col_start && col < plan.col_stop) + { + const PetscInt local_col = col - plan.col_start; + const PetscInt row_begin = plan.diag_row_offsets[local_row]; + const PetscInt row_end = plan.diag_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.diag_column_indices.begin() + row_begin, + plan.diag_column_indices.begin() + row_end, + local_col); + libmesh_error_msg_if(slot_it == plan.diag_column_indices.begin() + row_end || *slot_it != local_col, + "HilbertSystem Kokkos owned CSR graph is missing a diagonal MPI " + "remote matrix coupling."); + return cast_int(std::distance(plan.diag_column_indices.begin(), slot_it)); + } + + const auto offdiag_col_it = + std::lower_bound(plan.offdiag_global_columns.begin(), plan.offdiag_global_columns.end(), col); + libmesh_error_msg_if(offdiag_col_it == plan.offdiag_global_columns.end() || *offdiag_col_it != col, + "HilbertSystem Kokkos owned CSR graph is missing an off-diagonal MPI " + "remote matrix column."); + const PetscInt offdiag_local_col = + cast_int(std::distance(plan.offdiag_global_columns.begin(), offdiag_col_it)); + const PetscInt row_begin = plan.offdiag_row_offsets[local_row]; + const PetscInt row_end = plan.offdiag_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.offdiag_column_indices.begin() + row_begin, + plan.offdiag_column_indices.begin() + row_end, + offdiag_local_col); + libmesh_error_msg_if(slot_it == plan.offdiag_column_indices.begin() + row_end || + *slot_it != offdiag_local_col, + "HilbertSystem Kokkos owned CSR graph is missing an off-diagonal MPI " + "remote matrix coupling."); + return plan.mat_diag_size + + cast_int(std::distance(plan.offdiag_column_indices.begin(), slot_it)); +} + +bool +build_kokkos_petsc_remote_rhs_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + if (plan.direct_matrix_layout != KokkosDirectMatrixLayout::mpi_aij || + plan.rhs_remote_rows.empty()) + return true; + + std::vector remote_nodes(plan.rhs_remote_rows.size()); + for (auto i : index_range(plan.rhs_remote_rows)) + { + const processor_id_type owner = plan.rhs_remote_owners[i]; + remote_nodes[i].rank = cast_int(owner); + remote_nodes[i].index = + cast_int(plan.rhs_remote_rows[i] - sys.get_dof_map().first_dof(owner)); + } + + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &plan.rhs_remote_sf)); + LibmeshPetscCall2(sys.comm(), + PetscSFSetGraph(plan.rhs_remote_sf, + cast_int(plan.rhs_local_size), + cast_int(plan.rhs_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + remote_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(plan.rhs_remote_sf)); + plan.rhs_remote_root_values = + ::Kokkos::View("hilbert_rhs_remote_root_values", plan.rhs_local_size); + return true; +} + +bool +build_kokkos_petsc_remote_mat_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + if (plan.direct_matrix_layout != KokkosDirectMatrixLayout::mpi_aij || + plan.mat_remote_rows.empty()) + return true; + + const PetscInt local_row_count = plan.row_stop - plan.row_start; + std::vector setup_nodes(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + { + const processor_id_type owner = plan.mat_remote_owners[i]; + setup_nodes[i].rank = cast_int(owner); + setup_nodes[i].index = + cast_int(plan.mat_remote_rows[i] - sys.get_dof_map().first_dof(owner)); + } + + PetscSF setup_sf = nullptr; + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &setup_sf)); + LibmeshPetscCall2(sys.comm(), + PetscSFSetGraph(setup_sf, + local_row_count, + cast_int(plan.mat_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + setup_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(setup_sf)); + + const PetscInt * degrees = nullptr; + LibmeshPetscCall2(sys.comm(), PetscSFComputeDegreeBegin(setup_sf, °rees)); + LibmeshPetscCall2(sys.comm(), PetscSFComputeDegreeEnd(setup_sf, °rees)); + + std::vector gathered_offsets(local_row_count + 1, 0); + for (PetscInt local_row = 0; local_row != local_row_count; ++local_row) + gathered_offsets[local_row + 1] = gathered_offsets[local_row] + degrees[local_row]; + + std::vector gathered_cols(gathered_offsets.back(), -1); + if (!plan.mat_remote_cols.empty()) + { + LibmeshPetscCall2(sys.comm(), + PetscSFGatherBegin(setup_sf, + MPIU_INT, + plan.mat_remote_cols.data(), + gathered_cols.data())); + LibmeshPetscCall2(sys.comm(), + PetscSFGatherEnd(setup_sf, + MPIU_INT, + plan.mat_remote_cols.data(), + gathered_cols.data())); + } + + std::vector reply_slots(gathered_cols.size(), -1); + for (PetscInt local_row = 0; local_row != local_row_count; ++local_row) + { + const PetscInt global_row = plan.row_start + local_row; + for (PetscInt k = gathered_offsets[local_row]; k != gathered_offsets[local_row + 1]; ++k) + reply_slots[k] = + cast_int(lookup_kokkos_owned_matrix_slot(plan, global_row, gathered_cols[k])); + } + + std::vector remote_root_indices(plan.mat_remote_rows.size(), -1); + if (!remote_root_indices.empty()) + { + LibmeshPetscCall2(sys.comm(), + PetscSFScatterBegin(setup_sf, + MPIU_INT, + reply_slots.data(), + remote_root_indices.data())); + LibmeshPetscCall2(sys.comm(), + PetscSFScatterEnd(setup_sf, + MPIU_INT, + reply_slots.data(), + remote_root_indices.data())); + } + LibmeshPetscCall2(sys.comm(), PetscSFDestroy(&setup_sf)); + + std::vector remote_nodes(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + { + libmesh_error_msg_if(remote_root_indices[i] < 0, + "HilbertSystem Kokkos remote matrix slot setup did not receive an " + "owner slot index."); + remote_nodes[i].rank = cast_int(plan.mat_remote_owners[i]); + remote_nodes[i].index = remote_root_indices[i]; + } + + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &plan.mat_remote_sf)); LibmeshPetscCall2(sys.comm(), - MatSeqAIJGetCSRAndMemType( - system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); - if (!PetscMemTypeHost(mem_type)) + PetscSFSetGraph(plan.mat_remote_sf, + cast_int(plan.mat_diag_size + plan.mat_offdiag_size), + cast_int(plan.mat_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + remote_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(plan.mat_remote_sf)); + plan.mat_remote_root_indices = std::move(remote_root_indices); + plan.mat_remote_root_values = + ::Kokkos::View("hilbert_mat_remote_root_values", + plan.mat_diag_size + plan.mat_offdiag_size); + sync_kokkos_petsc_remote_slot_views(plan); + return true; +} + +bool +build_kokkos_petsc_remote_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + clear_kokkos_petsc_remote_sf(plan); + if (!build_kokkos_petsc_remote_rhs_sf(sys, plan)) + return false; + if (!build_kokkos_petsc_remote_mat_sf(sys, plan)) return false; + sync_kokkos_petsc_remote_slot_views(plan); + return true; +} - std::vector rhs_local_slots(plan.total_rhs_entries); - std::vector mat_value_slots(plan.total_mat_entries); +bool +bind_kokkos_direct_slots_from_plan_graph(HilbertSystem & sys, + const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan, + const numeric_index_type rhs_first_local, + const numeric_index_type rhs_last_local, + std::vector & rhs_slots, + std::vector & mat_slots) +{ + plan.rhs_remote_rows.clear(); + plan.mat_remote_rows.clear(); + plan.mat_remote_cols.clear(); + plan.rhs_remote_owners.clear(); + plan.mat_remote_owners.clear(); + plan.rhs_remote_values = {}; + plan.mat_remote_values = {}; + clear_kokkos_petsc_remote_sf(plan); + + std::unordered_map rhs_remote_slot_map; + std::unordered_map, std::size_t, PetscIntPairHash> mat_remote_slot_map; + std::unordered_map offdiag_column_map; + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + offdiag_column_map.reserve(plan.offdiag_global_columns.size()); + for (auto i : index_range(plan.offdiag_global_columns)) + offdiag_column_map.emplace(plan.offdiag_global_columns[i], cast_int(i)); + } for (const auto & record : plan.records) { @@ -455,39 +894,247 @@ ensure_kokkos_petsc_direct_storage(HilbertSystem & sys, &dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs]; for (unsigned int i = 0; i != record.n_dofs; ++i) - rhs_local_slots[record.rhs_offset + i] = - system_rhs.map_global_to_local_index(elem_dofs[i]); + { + const auto row = elem_dofs[i]; + if (row >= rhs_first_local && row < rhs_last_local) + rhs_slots[record.rhs_offset + i] = row - rhs_first_local; + else + { + const PetscInt petsc_row = cast_int(row); + const auto [it, inserted] = + rhs_remote_slot_map.emplace(petsc_row, plan.rhs_remote_rows.size()); + if (inserted) + plan.rhs_remote_rows.push_back(petsc_row); + rhs_slots[record.rhs_offset + i] = (rhs_last_local - rhs_first_local) + it->second; + } + } for (unsigned int i = 0; i != record.n_dofs; ++i) { const PetscInt row = cast_int(elem_dofs[i]); - const PetscInt row_begin = row_offsets[row]; - const PetscInt row_end = row_offsets[row + 1]; + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij && + (row < plan.row_start || row >= plan.row_stop)) + { + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + const auto key = std::make_pair(row, col); + const auto [it, inserted] = + mat_remote_slot_map.emplace(key, plan.mat_remote_rows.size()); + if (inserted) + { + plan.mat_remote_rows.push_back(row); + plan.mat_remote_cols.push_back(col); + } + mat_slots[record.mat_offset + i * record.n_dofs + j] = + plan.mat_diag_size + plan.mat_offdiag_size + it->second; + } + continue; + } + + const PetscInt local_row = row - plan.row_start; + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt row_begin = plan.local_row_offsets[local_row]; + const PetscInt row_end = plan.local_row_offsets[local_row + 1]; + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + const auto slot_it = std::lower_bound(plan.local_column_indices.begin() + row_begin, + plan.local_column_indices.begin() + row_end, + col); + libmesh_error_msg_if(slot_it == plan.local_column_indices.begin() + row_end || + *slot_it != col, + "HilbertSystem Kokkos owned CSR graph is missing a " + "sequential matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + cast_int(std::distance(plan.local_column_indices.begin(), slot_it)); + } + continue; + } + + const PetscInt diag_row_begin = plan.diag_row_offsets[local_row]; + const PetscInt diag_row_end = plan.diag_row_offsets[local_row + 1]; + const PetscInt offdiag_row_begin = plan.offdiag_row_offsets[local_row]; + const PetscInt offdiag_row_end = plan.offdiag_row_offsets[local_row + 1]; for (unsigned int j = 0; j != record.n_dofs; ++j) { const PetscInt col = cast_int(elem_dofs[j]); - const auto slot_it = - std::find(col_indices + row_begin, col_indices + row_end, col); - if (slot_it == col_indices + row_end) - return false; - mat_value_slots[record.mat_offset + i * record.n_dofs + j] = - cast_int(std::distance(col_indices, slot_it)); + if (col >= plan.col_start && col < plan.col_stop) + { + const PetscInt local_col = col - plan.col_start; + const auto slot_it = + std::lower_bound(plan.diag_column_indices.begin() + diag_row_begin, + plan.diag_column_indices.begin() + diag_row_end, + local_col); + libmesh_error_msg_if(slot_it == plan.diag_column_indices.begin() + diag_row_end || + *slot_it != local_col, + "HilbertSystem Kokkos owned CSR graph is missing a " + "diagonal MPI matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + cast_int(std::distance(plan.diag_column_indices.begin(), slot_it)); + } + else + { + const auto offdiag_col_it = offdiag_column_map.find(col); + libmesh_error_msg_if(offdiag_col_it == offdiag_column_map.end(), + "HilbertSystem Kokkos owned CSR graph is missing an " + "off-diagonal MPI matrix column."); + const PetscInt offdiag_local_col = offdiag_col_it->second; + const auto slot_it = + std::lower_bound(plan.offdiag_column_indices.begin() + offdiag_row_begin, + plan.offdiag_column_indices.begin() + offdiag_row_end, + offdiag_local_col); + libmesh_error_msg_if(slot_it == plan.offdiag_column_indices.begin() + offdiag_row_end || + *slot_it != offdiag_local_col, + "HilbertSystem Kokkos owned CSR graph is missing an " + "off-diagonal MPI matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + plan.mat_diag_size + + cast_int(std::distance(plan.offdiag_column_indices.begin(), slot_it)); + } } } } - plan.rhs_local_slots = KokkosSizeView("hilbert_rhs_local_slots", rhs_local_slots.size()); - plan.mat_value_slots = KokkosSizeView("hilbert_mat_value_slots", mat_value_slots.size()); + return true; +} - auto h_rhs_local_slots = ::Kokkos::create_mirror_view(plan.rhs_local_slots); - auto h_mat_value_slots = ::Kokkos::create_mirror_view(plan.mat_value_slots); - for (auto i : index_range(rhs_local_slots)) - h_rhs_local_slots(i) = rhs_local_slots[i]; - for (auto i : index_range(mat_value_slots)) - h_mat_value_slots(i) = mat_value_slots[i]; +bool +ensure_kokkos_petsc_owned_matrix(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + PetscMatrixBase & system_matrix) +{ + if (plan.graph_matrix_target == &system_matrix) + return true; - ::Kokkos::deep_copy(plan.rhs_local_slots, h_rhs_local_slots); - ::Kokkos::deep_copy(plan.mat_value_slots, h_mat_value_slots); + const char * mat_type = nullptr; + const char * options_prefix = nullptr; + LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); + LibmeshPetscCall2(sys.comm(), MatGetOptionsPrefix(system_matrix.mat(), &options_prefix)); + + Mat new_mat = nullptr; + LibmeshPetscCall2(sys.comm(), MatCreate(sys.comm().get(), &new_mat)); + LibmeshPetscCall2(sys.comm(), + MatSetSizes(new_mat, + cast_int(system_matrix.local_m()), + cast_int(system_matrix.local_n()), + cast_int(system_matrix.m()), + cast_int(system_matrix.n()))); + LibmeshPetscCall2(sys.comm(), MatSetBlockSize(new_mat, 1)); + if (options_prefix) + LibmeshPetscCall2(sys.comm(), MatSetOptionsPrefix(new_mat, options_prefix)); + LibmeshPetscCall2(sys.comm(), MatSetType(new_mat, mat_type)); + LibmeshPetscCall2(sys.comm(), MatSetFromOptions(new_mat)); + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + LibmeshPetscCall2(sys.comm(), + MatSeqAIJSetPreallocationCSR(new_mat, + plan.local_row_offsets.data(), + plan.local_column_indices.data(), + nullptr)); + else if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + LibmeshPetscCall2(sys.comm(), + MatMPIAIJSetPreallocationCSR(new_mat, + plan.local_row_offsets.data(), + plan.local_column_indices.data(), + nullptr)); + else + { + LibmeshPetscCall2(sys.comm(), MatDestroy(&new_mat)); + return false; + } + + LibmeshPetscCall2(sys.comm(), MatSetOption(new_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); + LibmeshPetscCall2(sys.comm(), MatAssemblyBegin(new_mat, MAT_FINAL_ASSEMBLY)); + LibmeshPetscCall2(sys.comm(), MatAssemblyEnd(new_mat, MAT_FINAL_ASSEMBLY)); + system_matrix.reset_mat(new_mat, true); + plan.graph_matrix_target = &system_matrix; + return true; +} + +bool +ensure_kokkos_petsc_direct_storage(HilbertSystem & sys, + const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan, + PetscMatrixBase & system_matrix, + PetscVector & system_rhs) +{ + if (plan.direct_matrix_target == &system_matrix && + plan.direct_rhs_target == &system_rhs && + plan.direct_storage_active) + return true; + + plan.direct_matrix_target = &system_matrix; + plan.direct_rhs_target = &system_rhs; + plan.direct_storage_active = false; + plan.direct_matrix_layout = KokkosDirectMatrixLayout::none; + plan.rhs_local_size = 0; + plan.mat_diag_size = 0; + plan.mat_offdiag_size = 0; + plan.rhs_remote_rows.clear(); + plan.mat_remote_rows.clear(); + plan.mat_remote_cols.clear(); + plan.rhs_remote_owners.clear(); + plan.mat_remote_owners.clear(); + plan.rhs_remote_values = {}; + plan.mat_remote_values = {}; + + const char * mat_type = nullptr; + LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); + PetscBool is_seq_aij = PETSC_FALSE; + PetscBool is_seq_aijkokkos = PETSC_FALSE; + PetscBool is_mpi_aij = PETSC_FALSE; + PetscBool is_mpi_aijkokkos = PETSC_FALSE; + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJ, &is_seq_aij)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJKOKKOS, &is_seq_aijkokkos)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATMPIAIJ, &is_mpi_aij)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATMPIAIJKOKKOS, &is_mpi_aijkokkos)); + if (!is_seq_aij && !is_seq_aijkokkos && !is_mpi_aij && !is_mpi_aijkokkos) + return false; + plan.direct_matrix_layout = + (is_seq_aij || is_seq_aijkokkos) ? KokkosDirectMatrixLayout::seq_aij : + KokkosDirectMatrixLayout::mpi_aij; + + PetscInt rhs_first_local_petsc = 0; + PetscInt rhs_last_local_petsc = 0; + LibmeshPetscCall2(sys.comm(), + VecGetOwnershipRange(system_rhs.vec(), &rhs_first_local_petsc, &rhs_last_local_petsc)); + const numeric_index_type rhs_first_local = cast_int(rhs_first_local_petsc); + const numeric_index_type rhs_last_local = cast_int(rhs_last_local_petsc); + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + LibmeshPetscCall2(sys.comm(), MatGetOwnershipRange(system_matrix.mat(), &row_start, &row_stop)); + LibmeshPetscCall2(sys.comm(), + MatGetOwnershipRangeColumn(system_matrix.mat(), &col_start, &col_stop)); + if (!build_kokkos_petsc_owned_csr_graph( + sys, plan, plan.direct_matrix_layout, row_start, row_stop, col_start, col_stop)) + return false; + + std::vector rhs_local_slots(plan.total_rhs_entries); + std::vector mat_value_slots(plan.total_mat_entries); + plan.rhs_local_size = cast_int(rhs_last_local - rhs_first_local); + plan.mat_diag_size = + plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij ? + cast_int(plan.local_row_offsets.empty() ? 0 : plan.local_row_offsets.back()) : + cast_int(plan.diag_row_offsets.empty() ? 0 : plan.diag_row_offsets.back()); + plan.mat_offdiag_size = + plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij ? + 0 : + cast_int(plan.offdiag_row_offsets.empty() ? 0 : plan.offdiag_row_offsets.back()); + + if (!bind_kokkos_direct_slots_from_plan_graph( + sys, dof_index_cache, plan, rhs_first_local, rhs_last_local, rhs_local_slots, mat_value_slots)) + return false; + + if (!ensure_kokkos_petsc_owned_matrix(sys, plan, system_matrix)) + return false; + + finalize_kokkos_direct_slots(sys, plan, rhs_local_slots, mat_value_slots); + if (!build_kokkos_petsc_remote_sf(sys, plan)) + return false; plan.direct_storage_active = true; return true; } @@ -723,12 +1370,7 @@ build_kokkos_petsc_assembly_plan(HilbertSystem & sys, plan.buckets = std::move(buckets); plan.total_rhs_entries = total_rhs_entries; plan.total_mat_entries = total_mat_entries; - plan.rhs_rows.clear(); - plan.mat_rows.clear(); - plan.mat_cols.clear(); plan.batch_data = std::move(batch_data); - plan.rhs_values = ::Kokkos::View(); - plan.mat_values = ::Kokkos::View(); plan.rhs_local_slots = KokkosSizeView(); plan.mat_value_slots = KokkosSizeView(); plan.geometry_cache_id = &geometry_cache; @@ -737,14 +1379,12 @@ build_kokkos_petsc_assembly_plan(HilbertSystem & sys, plan.hilbert_order = sys.hilbert_order(); plan.extra_quadrature_order = sys.extra_quadrature_order; plan.subdomains = sys.subdomains_list(); - plan.matrix_target = nullptr; - plan.rhs_target = nullptr; + plan.graph_matrix_target = nullptr; plan.direct_matrix_target = nullptr; plan.direct_rhs_target = nullptr; plan.fem_goal_target = nullptr; plan.input_vector_target = nullptr; plan.direct_storage_active = false; - plan.coo_storage_ready = false; return true; } @@ -1136,48 +1776,209 @@ assemble_kokkos_petsc_global_system(HilbertSystem & sys, PetscVector & system_rhs, HilbertSystem::KokkosAssemblyPath & assembly_path) { - if (sys.has_static_condensation() || sys.get_dof_map().n_constrained_dofs()) - return false; - - if (!analytic_goal && !fem_goal) - return false; + libmesh_error_msg_if(sys.has_static_condensation(), + "HilbertSystem Kokkos direct PETSc storage does not support static " + "condensation."); + libmesh_error_msg_if(sys.get_dof_map().n_constrained_dofs(), + "HilbertSystem Kokkos direct PETSc storage does not yet support " + "constrained dofs."); + libmesh_error_msg_if(!analytic_goal && !fem_goal, + "HilbertSystem Kokkos direct PETSc storage requires a parsed analytic " + "or parsed FEM goal."); const auto * dof_index_cache = sys.get_dof_map().get_kokkos_dof_index_cache(0); libmesh_assert(dof_index_cache); - if (ensure_kokkos_petsc_direct_storage( + if (!ensure_kokkos_petsc_direct_storage( sys, *dof_index_cache, plan, system_matrix, system_rhs)) + libmesh_error_msg("Failed to build the HilbertSystem Kokkos direct PETSc storage path. " + "The COO fallback path has been removed; this case must be supported " + "directly or fail."); + + assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_direct_storage; + using KokkosVectorWriteView = + typename PetscVector::kokkos_write_view; + using KokkosMatrixWriteView = + ::Kokkos::View>; + const auto make_matrix_write_view = + [](PetscScalar * values, const PetscMemType mem_type, const std::size_t size, const char * name) { - assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_direct_storage; - auto rhs_guard = system_rhs.make_kokkos_write_view_guard(); - const PetscInt * row_offsets = nullptr; - const PetscInt * col_indices = nullptr; - PetscScalar * values = nullptr; - PetscMemType mem_type = PETSC_MEMTYPE_HOST; - LibmeshPetscCall2(sys.comm(), - MatSeqAIJGetCSRAndMemType( - system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); - libmesh_ignore(col_indices); - libmesh_assert(PetscMemTypeHost(mem_type)); - using KokkosMatrixWriteView = - ::Kokkos::View>; - const auto n_local_rows = cast_int(system_matrix.local_m()); - KokkosMatrixWriteView matrix_values(reinterpret_cast(values), - cast_int(row_offsets[n_local_rows])); - - const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); - if (analytic_goal) - { - const auto timed_goal = analytic_goal->with_time(sys.time); - const auto goal_access = - libMesh::Kokkos::detail::make_hilbert_analytic_goal_access( - timed_goal, timed_goal.gradient_function()); - - for (const auto & bucket : plan.buckets) - libMesh::Kokkos::detail::run_hilbert_system_bucket_scatter_batch< - kokkos_hilbert_max_dofs>( + const bool host_inaccessible = + PetscMemTypeHost(mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + if (host_inaccessible) + libmesh_error_msg(std::string("HilbertSystem Kokkos direct PETSc storage requires ") + + name + " to be accessible from the active Kokkos execution space."); + return KokkosMatrixWriteView(reinterpret_cast(values), size); + }; + PetscInt rhs_first_local_petsc = 0; + PetscInt rhs_last_local_petsc = 0; + LibmeshPetscCall2(sys.comm(), + VecGetOwnershipRange(system_rhs.vec(), &rhs_first_local_petsc, &rhs_last_local_petsc)); + + { + auto rhs_guard = system_rhs.make_kokkos_write_view_guard(); + KokkosVectorWriteView local_rhs_values = rhs_guard.view(); + KokkosVectorWriteView remote_rhs_values; + KokkosMatrixWriteView diag_matrix_values; + KokkosMatrixWriteView offdiag_matrix_values; + KokkosMatrixWriteView remote_matrix_values; + PetscScalar * diag_values_ptr = nullptr; + PetscScalar * offdiag_values_ptr = nullptr; + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + const PetscInt * diag_row_offsets = nullptr; + const PetscInt * diag_col_indices = nullptr; + const PetscInt * offdiag_row_offsets = nullptr; + const PetscInt * offdiag_col_indices = nullptr; + const PetscInt * offdiag_global_columns = nullptr; + PetscInt offdiag_n_cols = 0; + std::unordered_map offdiag_column_map; + + if (plan.rhs_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.rhs_remote_values, Number(0)); + remote_rhs_values = KokkosVectorWriteView(plan.rhs_remote_values.data(), + plan.rhs_remote_values.extent(0)); + } + + if (plan.mat_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.mat_remote_values, Number(0)); + remote_matrix_values = KokkosMatrixWriteView(plan.mat_remote_values.data(), + plan.mat_remote_values.extent(0)); + } + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt * row_offsets = nullptr; + const PetscInt * col_indices = nullptr; + PetscScalar * values = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); + libmesh_ignore(col_indices); + diag_values_ptr = values; + diag_matrix_values = make_matrix_write_view(values, mem_type, plan.mat_diag_size, "PETSc matrix values"); + } + else if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + Mat diagonal = nullptr; + Mat offdiagonal = nullptr; + LibmeshPetscCall2(sys.comm(), + MatMPIAIJGetSeqAIJ(system_matrix.mat(), + &diagonal, + &offdiagonal, + &offdiag_global_columns)); + libmesh_ignore(offdiag_global_columns); + + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + diagonal, &diag_row_offsets, &diag_col_indices, &diag_values_ptr, &mem_type)); + libmesh_ignore(diag_col_indices); + diag_matrix_values = make_matrix_write_view(diag_values_ptr, + mem_type, + plan.mat_diag_size, + "PETSc diagonal matrix values"); + + mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + offdiagonal, + &offdiag_row_offsets, + &offdiag_col_indices, + &offdiag_values_ptr, + &mem_type)); + libmesh_ignore(offdiag_col_indices); + offdiag_matrix_values = make_matrix_write_view(offdiag_values_ptr, + mem_type, + plan.mat_offdiag_size, + "PETSc off-diagonal matrix values"); + + LibmeshPetscCall2(sys.comm(), MatGetOwnershipRange(system_matrix.mat(), &row_start, &row_stop)); + LibmeshPetscCall2(sys.comm(), + MatGetOwnershipRangeColumn(system_matrix.mat(), &col_start, &col_stop)); + PetscInt offdiag_n_rows = 0; + LibmeshPetscCall2(sys.comm(), MatGetSize(offdiagonal, &offdiag_n_rows, &offdiag_n_cols)); + libmesh_ignore(offdiag_n_rows); + if (offdiag_global_columns && offdiag_n_cols) + { + offdiag_column_map.reserve(cast_int(offdiag_n_cols)); + for (PetscInt i = 0; i != offdiag_n_cols; ++i) + offdiag_column_map.emplace(offdiag_global_columns[i], i); + } + } + else + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage was built without a valid " + "PETSc AIJ storage layout."); + + const auto rhs_scatter = + libMesh::Kokkos::detail::SplitScatterAccess{ + local_rhs_values, remote_rhs_values, plan.rhs_local_size}; + const auto mat_scatter = + libMesh::Kokkos::detail::SplitMatrixScatterAccess{ + diag_matrix_values, + offdiag_matrix_values, + remote_matrix_values, + plan.mat_diag_size, + plan.mat_diag_size + plan.mat_offdiag_size}; + + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + if (analytic_goal) + { + const auto timed_goal = analytic_goal->with_time(sys.time); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access( + timed_goal, timed_goal.gradient_function()); + + for (const auto & bucket : plan.buckets) + libMesh::Kokkos::detail::run_hilbert_system_bucket_scatter_batch< + kokkos_hilbert_max_dofs>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.rhs_local_slots, + plan.mat_value_slots, + sys.hilbert_order(), + goal_access, + rhs_scatter, + mat_scatter, + "hilbert_direct_scatter_bucket_batch"); + } + else + { + const auto timed_goal = fem_goal->with_time(sys.time); + if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage could not build parsed " + "FEM goal batch data."); + + auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_scatter_batch< + kokkos_hilbert_max_dofs, + kokkos_parsed_fem_max_fields>( bucket.key, bucket.mapping_type, bucket.n_nodes, @@ -1194,159 +1995,82 @@ assemble_kokkos_petsc_global_system(HilbertSystem & sys, ::Kokkos::make_pair(bucket.begin, bucket.end)), plan.rhs_local_slots, plan.mat_value_slots, + plan.fem_goal_batch_data.bucket_field_keys[bucket_index], + plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], + plan.fem_goal_batch_data.field_local_indices, + input_guard.view(), + timed_goal, sys.hilbert_order(), - goal_access, - rhs_guard.view(), - matrix_values, - "hilbert_direct_scatter_bucket_batch"); - } - else - { - const auto timed_goal = fem_goal->with_time(sys.time); - if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) - return false; - - auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); - for (auto bucket_index : index_range(plan.buckets)) - { - const auto & bucket = plan.buckets[bucket_index]; - libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_scatter_batch< - kokkos_hilbert_max_dofs, - kokkos_parsed_fem_max_fields>( - bucket.key, - bucket.mapping_type, - bucket.n_nodes, - bucket.quadrature_order, - geometry_cache.node_coordinates, - geometry_cache.element_node_ids, - ::Kokkos::subview(plan.batch_data.elem_indices, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.elem_n_dofs, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.rhs_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.mat_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - plan.rhs_local_slots, - plan.mat_value_slots, - plan.fem_goal_batch_data.bucket_field_keys[bucket_index], - plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], - plan.fem_goal_batch_data.field_local_indices, - input_guard.view(), - timed_goal, - sys.hilbert_order(), - rhs_guard.view(), - matrix_values, - "hilbert_direct_scatter_fem_bucket_batch"); - } - } - - return true; - } + rhs_scatter, + mat_scatter, + "hilbert_direct_scatter_fem_bucket_batch"); + } + } - assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_coo_fallback; - ensure_kokkos_petsc_coo_storage(*dof_index_cache, plan); + ::Kokkos::fence(); - if (analytic_goal) - { - const auto timed_goal = analytic_goal->with_time(sys.time); - const auto goal_access = - libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(timed_goal, - timed_goal.gradient_function()); - - const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); - for (const auto & bucket : plan.buckets) - libMesh::Kokkos::detail::run_hilbert_system_bucket_value_batch( - bucket.key, - bucket.mapping_type, - bucket.n_nodes, - bucket.quadrature_order, - geometry_cache.node_coordinates, - geometry_cache.element_node_ids, - ::Kokkos::subview(plan.batch_data.elem_indices, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.elem_n_dofs, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.rhs_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.mat_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - sys.hilbert_order(), - goal_access, - plan.rhs_values, - plan.mat_values, - "hilbert_value_bucket_batch"); - } - else - { - const auto timed_goal = fem_goal->with_time(sys.time); - if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) - return false; + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + if (plan.rhs_remote_sf && plan.rhs_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.rhs_remote_root_values, Number(0)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceWithMemTypeBegin(plan.rhs_remote_sf, + MPIU_SCALAR, + kokkos_default_petsc_mem_type(), + plan.rhs_remote_values.data(), + kokkos_default_petsc_mem_type(), + plan.rhs_remote_root_values.data(), + MPIU_SUM)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceEnd(plan.rhs_remote_sf, + MPIU_SCALAR, + plan.rhs_remote_values.data(), + plan.rhs_remote_root_values.data(), + MPIU_SUM)); + ::Kokkos::parallel_for("hilbert_apply_rhs_remote_reduce", + ::Kokkos::RangePolicy<>(0, plan.rhs_remote_root_values.extent(0)), + KOKKOS_LAMBDA(const std::size_t i) + { + local_rhs_values(i) += plan.rhs_remote_root_values(i); + }); + } - const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); - auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); - for (auto bucket_index : index_range(plan.buckets)) - { - const auto & bucket = plan.buckets[bucket_index]; - libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_value_batch< - kokkos_hilbert_max_dofs, - kokkos_parsed_fem_max_fields>( - bucket.key, - bucket.mapping_type, - bucket.n_nodes, - bucket.quadrature_order, - geometry_cache.node_coordinates, - geometry_cache.element_node_ids, - ::Kokkos::subview(plan.batch_data.elem_indices, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.elem_n_dofs, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.rhs_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - ::Kokkos::subview(plan.batch_data.mat_offsets, - ::Kokkos::make_pair(bucket.begin, bucket.end)), - plan.fem_goal_batch_data.bucket_field_keys[bucket_index], - plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], - plan.fem_goal_batch_data.field_local_indices, - input_guard.view(), - timed_goal, - sys.hilbert_order(), - plan.rhs_values, - plan.mat_values, - "hilbert_fem_value_bucket_batch"); - } - } + if (plan.mat_remote_sf && plan.mat_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.mat_remote_root_values, Number(0)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceWithMemTypeBegin(plan.mat_remote_sf, + MPIU_SCALAR, + kokkos_default_petsc_mem_type(), + plan.mat_remote_values.data(), + kokkos_default_petsc_mem_type(), + plan.mat_remote_root_values.data(), + MPIU_SUM)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceEnd(plan.mat_remote_sf, + MPIU_SCALAR, + plan.mat_remote_values.data(), + plan.mat_remote_root_values.data(), + MPIU_SUM)); + ::Kokkos::parallel_for("hilbert_apply_mat_remote_reduce", + ::Kokkos::RangePolicy<>(0, plan.mat_remote_root_values.extent(0)), + KOKKOS_LAMBDA(const std::size_t i) + { + if (i < plan.mat_diag_size) + diag_matrix_values(i) += plan.mat_remote_root_values(i); + else + offdiag_matrix_values(i - plan.mat_diag_size) += + plan.mat_remote_root_values(i); + }); + } - if (plan.matrix_target != &system_matrix) - { - LibmeshPetscCall2(sys.comm(), - MatSetPreallocationCOO(system_matrix.mat(), - static_cast(plan.mat_rows.size()), - plan.mat_rows.empty() ? nullptr : plan.mat_rows.data(), - plan.mat_cols.empty() ? nullptr : plan.mat_cols.data())); - plan.matrix_target = &system_matrix; - } - if (plan.rhs_target != &system_rhs) - { - LibmeshPetscCall2(sys.comm(), - VecSetPreallocationCOO(system_rhs.vec(), - static_cast(plan.rhs_rows.size()), - plan.rhs_rows.empty() ? nullptr : plan.rhs_rows.data())); - plan.rhs_target = &system_rhs; - } - LibmeshPetscCall2(sys.comm(), - MatSetValuesCOO(system_matrix.mat(), - reinterpret_cast(plan.mat_values.data()), - INSERT_VALUES)); - LibmeshPetscCall2(sys.comm(), - VecSetValuesCOO(system_rhs.vec(), - reinterpret_cast(plan.rhs_values.data()), - INSERT_VALUES)); + ::Kokkos::fence(); + } + } return true; } -#endif - } // anonymous namespace #endif @@ -1361,6 +2085,7 @@ HilbertSystem::HilbertSystem(libMesh::EquationSystems & es, _fe_order(1), _hilbert_order(0), _use_kokkos_backend(false), + _use_exact_parsed_fem_host_path(false), _fdm_eps(libMesh::TOLERANCE), _subdomains_list() { @@ -1500,8 +2225,12 @@ HilbertSystem::try_kokkos_petsc_solve() const auto * analytic_goal = this->ensure_kokkos_goal_func(); const auto * fem_goal = this->ensure_kokkos_fem_goal_func(); - if (!petsc_matrix || !petsc_rhs || !petsc_solution || !(analytic_goal || fem_goal)) - return false; + libmesh_error_msg_if(!petsc_matrix || !petsc_rhs || !petsc_solution, + "HilbertSystem Kokkos direct PETSc storage requires PETSc-backed matrix, " + "RHS, and solution objects."); + libmesh_error_msg_if(!(analytic_goal || fem_goal), + "HilbertSystem Kokkos direct PETSc storage requires a parsed analytic " + "goal or a parsed FEM goal with Kokkos support."); prewarm_kokkos_hilbert_entities(*this, fem_goal); this->_last_kokkos_timing = {}; @@ -1513,8 +2242,9 @@ HilbertSystem::try_kokkos_petsc_solve() bool rebuilt_plan = false; const auto plan_start = clock::now(); auto * plan = this->ensure_kokkos_petsc_plan(&rebuilt_plan); - if (!plan) - return false; + libmesh_error_msg_if(!plan, + "HilbertSystem Kokkos direct PETSc storage could not build a supported " + "assembly plan for the current FE/mapping/quadrature configuration."); const auto plan_stop = clock::now(); this->_last_kokkos_timing.plan_seconds = rebuilt_plan ? @@ -1530,7 +2260,7 @@ HilbertSystem::try_kokkos_petsc_solve() *petsc_matrix, *petsc_rhs, assembly_path)) - return false; + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage assembly failed."); const auto assembly_stop = clock::now(); this->_last_kokkos_timing.assembly_seconds = std::chrono::duration_cast>(assembly_stop - assembly_start).count(); @@ -1569,6 +2299,7 @@ HilbertSystem::try_kokkos_petsc_solve() void HilbertSystem::init_data () { + this->get_dof_map().full_sparsity_pattern_needed(); this->add_variable ("u", static_cast(_fe_order), Utility::string_to_enum(_fe_family)); @@ -1695,7 +2426,7 @@ bool HilbertSystem::element_time_derivative (bool request_jacobian, } #if defined(LIBMESH_HAVE_PETSC) - if (input_system) + if (_use_exact_parsed_fem_host_path && input_system) if (const auto * kokkos_fem_goal = this->ensure_kokkos_fem_goal_func(); kokkos_fem_goal && assemble_host_exact_parsed_fem_goal_element(*this, @@ -1736,8 +2467,18 @@ void HilbertSystem::solve() { _last_kokkos_timing = {}; #if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) - if (_use_kokkos_backend && this->try_kokkos_petsc_solve()) - return; + if (_use_kokkos_backend) + { + if (this->try_kokkos_petsc_solve()) + return; + + libmesh_error_msg("HilbertSystem Kokkos backend did not complete the direct PETSc " + "storage solve path."); + } +#else + libmesh_error_msg_if(_use_kokkos_backend, + "HilbertSystem Kokkos backend requires a libMesh build with Kokkos, " + "PETSc, and real Number support."); #endif FEMSystem::solve(); diff --git a/src/apps/L2system.h b/src/apps/L2system.h index 3d6a2c7aa51..4e5f86b2b44 100644 --- a/src/apps/L2system.h +++ b/src/apps/L2system.h @@ -48,8 +48,7 @@ class HilbertSystem : public libMesh::FEMSystem enum class KokkosAssemblyPath { none, - petsc_direct_storage, - petsc_coo_fallback + petsc_direct_storage }; struct KokkosTimingInfo @@ -78,6 +77,8 @@ class HilbertSystem : public libMesh::FEMSystem unsigned int hilbert_order() const { return _hilbert_order; } void use_kokkos_backend(bool use) { _use_kokkos_backend = use; } bool use_kokkos_backend() const { return _use_kokkos_backend; } + void use_exact_parsed_fem_host_path(bool use) { _use_exact_parsed_fem_host_path = use; } + bool use_exact_parsed_fem_host_path() const { return _use_exact_parsed_fem_host_path; } const KokkosTimingInfo & last_kokkos_timing() const { return _last_kokkos_timing; } virtual void solve () override; void set_fdm_eps(libMesh::Real eps) { @@ -144,6 +145,7 @@ class HilbertSystem : public libMesh::FEMSystem unsigned int _hilbert_order; bool _use_kokkos_backend; + bool _use_exact_parsed_fem_host_path; // The function we will call to finite difference our goal // function diff --git a/src/apps/hilbert_kokkos_benchmark.C b/src/apps/hilbert_kokkos_benchmark.C index 4e437389ce5..037e37aad87 100644 --- a/src/apps/hilbert_kokkos_benchmark.C +++ b/src/apps/hilbert_kokkos_benchmark.C @@ -178,8 +178,6 @@ kokkos_assembly_path_name(const HilbertSystem::KokkosAssemblyPath path) return "none"; case HilbertSystem::KokkosAssemblyPath::petsc_direct_storage: return "direct PETSc storage"; - case HilbertSystem::KokkosAssemblyPath::petsc_coo_fallback: - return "PETSc COO fallback"; } return "unknown"; diff --git a/src/numerics/petsc_matrix_base.C b/src/numerics/petsc_matrix_base.C index 33d439622ec..4666e7a3258 100644 --- a/src/numerics/petsc_matrix_base.C +++ b/src/numerics/petsc_matrix_base.C @@ -93,7 +93,6 @@ void PetscMatrixBase::set_destroy_mat_on_exit(bool destroy) this->_destroy_mat_on_exit = destroy; } - template void PetscMatrixBase::swap(PetscMatrixBase & m_in) { diff --git a/tests/Makefile.in b/tests/Makefile.in index b45feaec91d..dbbf441b633 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -15183,7 +15183,7 @@ $(top_builddir)/libmesh_oprof.la: FORCE # any wrapper-provided defines remain visible. .K.o: $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(KOKKOS_MPI_CPPFLAGS) $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ -c $< -o $@ diff --git a/tests/systems/hilbert_system_kokkos_test.C b/tests/systems/hilbert_system_kokkos_test.C index 46e29513e63..e3d321b449f 100644 --- a/tests/systems/hilbert_system_kokkos_test.C +++ b/tests/systems/hilbert_system_kokkos_test.C @@ -41,6 +41,7 @@ configure_hilbert_system(HilbertSystem & sys, const bool use_kokkos) sys.fe_family() = "LAGRANGE"; sys.fe_order() = 1; sys.use_kokkos_backend(use_kokkos); + sys.use_exact_parsed_fem_host_path(true); sys.time_solver = std::make_unique(sys); } From 55102afeefd52d993f26717f686bb2fedda73978 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 15:28:30 -0600 Subject: [PATCH 40/48] Fix libMesh Kokkos autotools CUDA wiring --- Makefile.am | 234 ++++++++++++++++++++++++++++++++ m4/libmesh_optional_packages.m4 | 63 ++++++--- tests/Makefile.am | 79 +++++++++++ 3 files changed, 360 insertions(+), 16 deletions(-) diff --git a/Makefile.am b/Makefile.am index 6bd18a0eaaa..ba2cafc8bbe 100644 --- a/Makefile.am +++ b/Makefile.am @@ -44,6 +44,28 @@ AM_CPPFLAGS = -DLIBMESH_IS_COMPILING_ITSELF \ $(libmesh_optional_INCLUDES) \ -I$(top_builddir)/include # required for libmesh_version.h +.SUFFIXES: +.SUFFIXES: .C .K .lo .o .obj + +if LIBMESH_ENABLE_KOKKOS +# Compile .K translation units with the dedicated Kokkos compiler. +# If KOKKOS_CXX is not the MPI wrapper, configure populates +# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and +# any wrapper-provided defines remain visible. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + +.K.lo: + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ +endif + LIBS = $(libmesh_optional_LIBS) $(libmesh_precision_LIBS) # additional files which must be included in 'make dist' @@ -490,6 +512,218 @@ hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la +if LIBMESH_ENABLE_KOKKOS +calculator_opt_LDADD += $(KOKKOS_LIBS) +calculator_devel_LDADD += $(KOKKOS_LIBS) +calculator_dbg_LDADD += $(KOKKOS_LIBS) +hilbert_kokkos_benchmark_opt_LDADD += $(KOKKOS_LIBS) +hilbert_kokkos_benchmark_devel_LDADD += $(KOKKOS_LIBS) +hilbert_kokkos_benchmark_dbg_LDADD += $(KOKKOS_LIBS) + +libmesh_dbg_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +libmesh_devel_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +libmesh_oprof_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +libmesh_opt_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +libmesh_prof_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ + +calculator_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +calculator_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +calculator_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +hilbert_kokkos_benchmark_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +hilbert_kokkos_benchmark_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +hilbert_kokkos_benchmark_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ + +src/base/libmesh_dbg_la-dof_map.lo: src/base/dof_map.C + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/base/libmesh_devel_la-dof_map.lo: src/base/dof_map.C + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/base/libmesh_oprof_la-dof_map.lo: src/base/dof_map.C + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/base/libmesh_opt_la-dof_map.lo: src/base/dof_map.C + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/base/libmesh_prof_la-dof_map.lo: src/base/dof_map.C + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_opt-calculator.o: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_opt-calculator.obj: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_devel-calculator.o: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_devel-calculator.obj: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_dbg-calculator.o: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_dbg-calculator.obj: src/apps/calculator.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_opt-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_opt-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_devel-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_devel-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_dbg-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/calculator_dbg-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_opt-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_opt-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_devel-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_devel-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.o: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj: src/apps/L2system.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< +endif + # compare opt_programs += compare-opt compare_opt_SOURCES = src/apps/compare.C diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 9bada9d647f..d3ccebd419c 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -875,6 +875,13 @@ AC_ARG_WITH([kokkos-backend], [cuda|hip|sycl|openmp|serial (default: auto-detect from KokkosCore_config.h)]), [KOKKOS_BACKEND="$withval"], [KOKKOS_BACKEND="auto"]) +dnl Allow callers to provide the full Kokkos toolchain directly. +AC_ARG_VAR([KOKKOS_CXX], [Compiler for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_CPPFLAGS], [Preprocessor flags for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_CXXFLAGS], [C++ flags for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_LDFLAGS], [Linker flags for linking Kokkos translation units]) +AC_ARG_VAR([KOKKOS_LIBS], [Libraries for linking Kokkos translation units]) + dnl Allow the caller (e.g. MOOSE's configure_libmesh.sh) to pre-set the dnl Kokkos compiler and flags via environment variables. If KOKKOS_CXX is dnl already set, we skip auto-detection entirely — the caller knows best. @@ -883,17 +890,31 @@ dnl compilation rules and don't leak into the main CPPFLAGS/CXXFLAGS. AS_IF([test "x$KOKKOS_DIR" != "xno"], [ - AC_CHECK_FILE([$KOKKOS_DIR/include/Kokkos_Core.hpp], + libmesh_kokkos_include_dirs="-I$KOKKOS_DIR/include" + AS_IF([test -n "$PETSC_ARCH" && test -d "$KOKKOS_DIR/$PETSC_ARCH/include"], + [libmesh_kokkos_include_dirs="$libmesh_kokkos_include_dirs -I$KOKKOS_DIR/$PETSC_ARCH/include"]) + + libmesh_kokkos_lib_dirs= + AS_IF([test -d "$KOKKOS_DIR/lib"], + [libmesh_kokkos_lib_dirs="-L$KOKKOS_DIR/lib"]) + AS_IF([test -n "$PETSC_ARCH" && test -d "$KOKKOS_DIR/$PETSC_ARCH/lib"], + [libmesh_kokkos_lib_dirs="$libmesh_kokkos_lib_dirs -L$KOKKOS_DIR/$PETSC_ARCH/lib"]) + + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + AS_IF([! test -r "$KOKKOS_CFG" && test -n "$PETSC_ARCH" && + test -r "$KOKKOS_DIR/$PETSC_ARCH/include/KokkosCore_config.h"], + [KOKKOS_CFG="$KOKKOS_DIR/$PETSC_ARCH/include/KokkosCore_config.h"]) + + AS_IF([test -r "$KOKKOS_DIR/include/Kokkos_Core.hpp" || + (test -n "$PETSC_ARCH" && + test -r "$KOKKOS_DIR/$PETSC_ARCH/include/Kokkos_Core.hpp")], [ enablekokkos=yes - libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" - libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES $libmesh_kokkos_include_dirs" dnl Only auto-detect if KOKKOS_CXX was not pre-set by the caller AS_IF([test "x$KOKKOS_CXX" = "x"], [ - KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" - dnl Auto-detect backend AS_IF([test "x$KOKKOS_BACKEND" = "xauto"], [ @@ -922,12 +943,21 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], case "$KOKKOS_BACKEND" in cuda) + AC_PATH_PROG([NVCC_WRAPPER],[nvcc_wrapper],[no],[$PATH]) AC_PATH_PROG([NVCC],[nvcc],[no],[$PATH]) - AS_IF([test "x$NVCC" = "xno"], - [AC_MSG_ERROR([nvcc not found but Kokkos CUDA backend requested])]) - KOKKOS_CXX="$NVCC" - KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" - KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + AS_IF([test "x$NVCC_WRAPPER" != "xno"], + [ + KOKKOS_CXX="$NVCC_WRAPPER" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" + ], + [ + AS_IF([test "x$NVCC" = "xno"], + [AC_MSG_ERROR([neither nvcc_wrapper nor nvcc was found but Kokkos CUDA backend was requested])]) + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler $libmesh_kokkos_lib_dirs" + ]) AS_IF([test "x$have_kokkos_openmp" = "xyes"], [ KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" @@ -939,7 +969,7 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], AS_IF([test "x$HIPCC" = "xno"], [AC_MSG_ERROR([hipcc not found but Kokkos HIP backend requested])]) KOKKOS_CXX="$HIPCC" - KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" ;; sycl) AC_PATH_PROG([ICPX],[icpx],[no],[$PATH]) @@ -947,26 +977,27 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], [AC_MSG_ERROR([icpx not found but Kokkos SYCL backend requested])]) KOKKOS_CXX="$ICPX" KOKKOS_CXXFLAGS="-fsycl" - KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + KOKKOS_LDFLAGS="-fsycl $libmesh_kokkos_lib_dirs" ;; openmp) KOKKOS_CXX="${CXX}" KOKKOS_CXXFLAGS="-fopenmp -x c++" - KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + KOKKOS_LDFLAGS="-fopenmp $libmesh_kokkos_lib_dirs" ;; serial|*) KOKKOS_CXX="${CXX}" KOKKOS_CXXFLAGS="-x c++" - KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" ;; esac ], [AC_MSG_RESULT([Using caller-provided KOKKOS_CXX=$KOKKOS_CXX])]) dnl Set defaults for any variables not provided by caller or auto-detect - KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" - KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION $libmesh_kokkos_include_dirs}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:-$libmesh_kokkos_lib_dirs}" KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + libmesh_optional_LIBS="$libmesh_optional_LIBS $KOKKOS_LDFLAGS $KOKKOS_LIBS" dnl If KOKKOS_CXX differs from the main compiler, it may not be the MPI dnl wrapper and thus may need the wrapper's compile flags explicitly in diff --git a/tests/Makefile.am b/tests/Makefile.am index 27353c88985..e58b8a03544 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -410,6 +410,85 @@ if LIBMESH_OPT_MODE endif endif +if LIBMESH_ENABLE_KOKKOS +unit_tests_dbg_LDADD += $(KOKKOS_LIBS) +unit_tests_devel_LDADD += $(KOKKOS_LIBS) +unit_tests_prof_LDADD += $(KOKKOS_LIBS) +unit_tests_oprof_LDADD += $(KOKKOS_LIBS) +unit_tests_opt_LDADD += $(KOKKOS_LIBS) + +unit_tests_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +unit_tests_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +unit_tests_prof_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +unit_tests_oprof_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ +unit_tests_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ + +unit_tests_dbg-driver.o: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_dbg-driver.obj: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_devel-driver.o: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_devel-driver.obj: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_prof-driver.o: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_prof-driver.obj: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_oprof-driver.o: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_oprof-driver.obj: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_opt-driver.o: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< + +unit_tests_opt-driver.obj: driver.C + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c -o $@ $< +endif + # Recursive automake builds subdirectories before parent directories. # But here we need the subdirectory to be able to link to # already-built parent directory libraries. From a8b02f96f1d32f519d5f9dfefdce45371508be2a Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 16:30:54 -0600 Subject: [PATCH 41/48] Use Kokkos compiler for Kokkos-enabled builds --- Makefile.am | 238 ++++------------------------------------------ tests/Makefile.am | 214 ++++++++++------------------------------- 2 files changed, 65 insertions(+), 387 deletions(-) diff --git a/Makefile.am b/Makefile.am index ba2cafc8bbe..62e29756c6a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -48,21 +48,27 @@ AM_CPPFLAGS = -DLIBMESH_IS_COMPILING_ITSELF \ .SUFFIXES: .C .K .lo .o .obj if LIBMESH_ENABLE_KOKKOS -# Compile .K translation units with the dedicated Kokkos compiler. -# If KOKKOS_CXX is not the MPI wrapper, configure populates -# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and -# any wrapper-provided defines remain visible. +# In a Kokkos-enabled build, drive the whole C++ tree with the Kokkos +# compiler wrapper and flags instead of hand-routing individual files. +# This keeps ordinary .C translation units and explicit .K device tests +# on the same toolchain model. +CXX = $(KOKKOS_CXX) +CXXLD = $(KOKKOS_CXX) +CPPFLAGS += $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) +CXXFLAGS += $(KOKKOS_CXXFLAGS) +LDFLAGS += $(KOKKOS_LDFLAGS) + .K.o: - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ -c $< -o $@ .K.lo: - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(CXX) \ $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(KOKKOS_MPI_CPPFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ -c $< -o $@ endif @@ -512,218 +518,6 @@ hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la -if LIBMESH_ENABLE_KOKKOS -calculator_opt_LDADD += $(KOKKOS_LIBS) -calculator_devel_LDADD += $(KOKKOS_LIBS) -calculator_dbg_LDADD += $(KOKKOS_LIBS) -hilbert_kokkos_benchmark_opt_LDADD += $(KOKKOS_LIBS) -hilbert_kokkos_benchmark_devel_LDADD += $(KOKKOS_LIBS) -hilbert_kokkos_benchmark_dbg_LDADD += $(KOKKOS_LIBS) - -libmesh_dbg_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -libmesh_devel_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -libmesh_oprof_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -libmesh_opt_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -libmesh_prof_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ - -calculator_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -calculator_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -calculator_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -hilbert_kokkos_benchmark_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -hilbert_kokkos_benchmark_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -hilbert_kokkos_benchmark_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ - -src/base/libmesh_dbg_la-dof_map.lo: src/base/dof_map.C - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ - $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/base/libmesh_devel_la-dof_map.lo: src/base/dof_map.C - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ - $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/base/libmesh_oprof_la-dof_map.lo: src/base/dof_map.C - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ - $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/base/libmesh_opt_la-dof_map.lo: src/base/dof_map.C - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ - $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/base/libmesh_prof_la-dof_map.lo: src/base/dof_map.C - $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(KOKKOS_CXX) \ - $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_opt-calculator.o: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_opt-calculator.obj: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_devel-calculator.o: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_devel-calculator.obj: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_dbg-calculator.o: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_dbg-calculator.obj: src/apps/calculator.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_opt-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_opt-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_devel-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_devel-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_dbg-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/calculator_dbg-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(calculator_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(calculator_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_opt-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_opt-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_devel-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_devel-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_dbg-L2system.o: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj: src/apps/L2system.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) \ - $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< -endif - # compare opt_programs += compare-opt compare_opt_SOURCES = src/apps/compare.C diff --git a/tests/Makefile.am b/tests/Makefile.am index e58b8a03544..bbf1db2a365 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -9,6 +9,17 @@ AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) LIBS = $(libmesh_optional_LIBS) $(CPPUNIT_LIBS) KOKKOS_TEST_CPPFLAGS = +if LIBMESH_ENABLE_KOKKOS +# Match the top-level build model: once Kokkos is enabled, build the +# C++ tree with the Kokkos compiler wrapper instead of routing selected +# objects by hand. +CXX = $(KOKKOS_CXX) +CXXLD = $(KOKKOS_CXX) +CPPFLAGS += $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) +CXXFLAGS += $(KOKKOS_CXXFLAGS) +LDFLAGS += $(KOKKOS_LDFLAGS) +endif + # We might have turned on -Werror and/or paranoid warnings CXXFLAGS_DBG += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) CXXFLAGS_DEVEL += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) @@ -254,7 +265,7 @@ check_PROGRAMS = # empty, append below TESTS = if LIBMESH_ENABLE_KOKKOS - KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) + KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include check_PROGRAMS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ @@ -269,51 +280,51 @@ if LIBMESH_ENABLE_KOKKOS kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la check_PROGRAMS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ kokkos_tensor_ops_oracle_unit @@ -322,21 +333,21 @@ if LIBMESH_ENABLE_KOKKOS kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) - kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) - kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) - kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) + kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la endif # our GLIBC debugging preprocessor flags seem to potentially conflict @@ -410,85 +421,6 @@ if LIBMESH_OPT_MODE endif endif -if LIBMESH_ENABLE_KOKKOS -unit_tests_dbg_LDADD += $(KOKKOS_LIBS) -unit_tests_devel_LDADD += $(KOKKOS_LIBS) -unit_tests_prof_LDADD += $(KOKKOS_LIBS) -unit_tests_oprof_LDADD += $(KOKKOS_LIBS) -unit_tests_opt_LDADD += $(KOKKOS_LIBS) - -unit_tests_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -unit_tests_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -unit_tests_prof_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -unit_tests_oprof_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ -unit_tests_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) $(KOKKOS_LDFLAGS) -o $@ - -unit_tests_dbg-driver.o: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_dbg-driver.obj: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_devel-driver.o: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_devel-driver.obj: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_prof-driver.o: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_prof-driver.obj: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_oprof-driver.o: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_oprof-driver.obj: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_opt-driver.o: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< - -unit_tests_opt-driver.obj: driver.C - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ - -c -o $@ $< -endif - # Recursive automake builds subdirectories before parent directories. # But here we need the subdirectory to be able to link to # already-built parent directory libraries. @@ -530,61 +462,13 @@ if LIBMESH_ENABLE_CPPUNIT TESTS += run_unit_tests.sh endif -# Compile .K translation units with the Kokkos device compiler. -# If KOKKOS_CXX is not the MPI wrapper, configure populates -# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and -# any wrapper-provided defines remain visible. +# Compile .K translation units with the same project-wide compiler model. .K.o: - $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(KOKKOS_MPI_CPPFLAGS) $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ - $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ -c $< -o $@ -# Custom link rules so the Kokkos compiler drives the final link step. -kokkos_fe_types_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_types_oracle_unit_LDFLAGS) -o $@ - -kokkos_fe_shape_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_shape_oracle_unit_LDFLAGS) -o $@ - -kokkos_fe_map_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_map_oracle_unit_LDFLAGS) -o $@ - -kokkos_fe_invariant_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_invariant_unit_LDFLAGS) -o $@ - -kokkos_fe_contract_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_contract_unit_LDFLAGS) -o $@ - -kokkos_fe_permuted_map_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_permuted_map_oracle_unit_LDFLAGS) -o $@ - -kokkos_fe_reconstruction_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_reconstruction_oracle_unit_LDFLAGS) -o $@ - -kokkos_fe_side_trace_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_fe_side_trace_oracle_unit_LDFLAGS) -o $@ - -kokkos_quadrature_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_quadrature_oracle_unit_LDFLAGS) -o $@ - -kokkos_vector_ops_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ - -kokkos_tensor_ops_oracle_unit_LINK = \ - $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ - $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ - CLEANFILES = cube_mesh.xda \ slit_mesh.xda \ slit_solution.xda \ From e50326042bbe5f41207dd9c4cae0c617b62e03f3 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 16:40:12 -0600 Subject: [PATCH 42/48] Move Kokkos compiler selection into configure --- Makefile.am | 16 ++++------------ m4/libmesh_optional_packages.m4 | 17 +++++++++++++++++ tests/Makefile.am | 14 +++----------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/Makefile.am b/Makefile.am index 62e29756c6a..6009b3bc9f0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,21 +43,13 @@ AM_CPPFLAGS = -DLIBMESH_IS_COMPILING_ITSELF \ $(libmesh_contrib_INCLUDES) \ $(libmesh_optional_INCLUDES) \ -I$(top_builddir)/include # required for libmesh_version.h +AM_CPPFLAGS += $(LIBMESH_KOKKOS_BUILD_CPPFLAGS) +AM_CXXFLAGS += $(LIBMESH_KOKKOS_BUILD_CXXFLAGS) +AM_LDFLAGS += $(LIBMESH_KOKKOS_BUILD_LDFLAGS) -.SUFFIXES: -.SUFFIXES: .C .K .lo .o .obj +SUFFIXES = .C .K .lo .o .obj if LIBMESH_ENABLE_KOKKOS -# In a Kokkos-enabled build, drive the whole C++ tree with the Kokkos -# compiler wrapper and flags instead of hand-routing individual files. -# This keeps ordinary .C translation units and explicit .K device tests -# on the same toolchain model. -CXX = $(KOKKOS_CXX) -CXXLD = $(KOKKOS_CXX) -CPPFLAGS += $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) -CXXFLAGS += $(KOKKOS_CXXFLAGS) -LDFLAGS += $(KOKKOS_LDFLAGS) - .K.o: $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) \ diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index d3ccebd419c..2978d3200d6 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -882,6 +882,10 @@ AC_ARG_VAR([KOKKOS_CXXFLAGS], [C++ flags for compiling Kokkos translation units] AC_ARG_VAR([KOKKOS_LDFLAGS], [Linker flags for linking Kokkos translation units]) AC_ARG_VAR([KOKKOS_LIBS], [Libraries for linking Kokkos translation units]) +LIBMESH_KOKKOS_BUILD_CPPFLAGS="" +LIBMESH_KOKKOS_BUILD_CXXFLAGS="" +LIBMESH_KOKKOS_BUILD_LDFLAGS="" + dnl Allow the caller (e.g. MOOSE's configure_libmesh.sh) to pre-set the dnl Kokkos compiler and flags via environment variables. If KOKKOS_CXX is dnl already set, we skip auto-detection entirely — the caller knows best. @@ -1080,6 +1084,16 @@ int main(int argc, char ** argv) AS_IF([test "x$kokkos_config_works" = "xyes"], [AC_MSG_RESULT([yes])], [AC_MSG_ERROR([configured Kokkos compiler/flags failed to compile and link a minimal test program])]) + + dnl Use the validated Kokkos compiler as the project-wide C++ compiler + dnl for Kokkos-enabled builds. Automake-generated rules compile .C + dnl translation units with $(CXX), so the switch has to happen at + dnl configure time rather than in Makefile.am. + CXX="$KOKKOS_CXX" + LIBMESH_KOKKOS_BUILD_CPPFLAGS="$KOKKOS_MPI_CPPFLAGS $KOKKOS_CPPFLAGS" + LIBMESH_KOKKOS_BUILD_CXXFLAGS="$KOKKOS_CXXFLAGS" + LIBMESH_KOKKOS_BUILD_LDFLAGS="$KOKKOS_LDFLAGS" + AC_DEFINE([HAVE_KOKKOS], [1], [Define if Kokkos support is enabled in libMesh]) AC_MSG_RESULT(<<< Configuring library with Kokkos support >>>) @@ -1097,6 +1111,9 @@ AC_SUBST([KOKKOS_CXXFLAGS]) AC_SUBST([KOKKOS_LDFLAGS]) AC_SUBST([KOKKOS_LIBS]) AC_SUBST([KOKKOS_MPI_CPPFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_CPPFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_CXXFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_LDFLAGS]) AM_CONDITIONAL(LIBMESH_ENABLE_KOKKOS, test x$enablekokkos = xyes) # ------------------------------------------------------------- diff --git a/tests/Makefile.am b/tests/Makefile.am index bbf1db2a365..740c6440af5 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -6,20 +6,12 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ $(libmesh_contrib_INCLUDES) $(CPPUNIT_CFLAGS) \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) +AM_CPPFLAGS += $(LIBMESH_KOKKOS_BUILD_CPPFLAGS) +AM_CXXFLAGS += $(LIBMESH_KOKKOS_BUILD_CXXFLAGS) +AM_LDFLAGS += $(LIBMESH_KOKKOS_BUILD_LDFLAGS) LIBS = $(libmesh_optional_LIBS) $(CPPUNIT_LIBS) KOKKOS_TEST_CPPFLAGS = -if LIBMESH_ENABLE_KOKKOS -# Match the top-level build model: once Kokkos is enabled, build the -# C++ tree with the Kokkos compiler wrapper instead of routing selected -# objects by hand. -CXX = $(KOKKOS_CXX) -CXXLD = $(KOKKOS_CXX) -CPPFLAGS += $(KOKKOS_MPI_CPPFLAGS) $(KOKKOS_CPPFLAGS) -CXXFLAGS += $(KOKKOS_CXXFLAGS) -LDFLAGS += $(KOKKOS_LDFLAGS) -endif - # We might have turned on -Werror and/or paranoid warnings CXXFLAGS_DBG += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) CXXFLAGS_DEVEL += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) From f5959eabcaa35dd36244e998af0417fa7549de1a Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 16:44:54 -0600 Subject: [PATCH 43/48] Initialize AM_CXXFLAGS for Kokkos build flags --- Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.am b/Makefile.am index 6009b3bc9f0..14454fea543 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = include #src AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 -I m4/autoconf-submodule +AM_CXXFLAGS = $(libmesh_CXXFLAGS) AM_CFLAGS = $(libmesh_CFLAGS) AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) From 24b252580ce9573ee72fead1d1459faf6ca158a3 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 17:02:32 -0600 Subject: [PATCH 44/48] Require nvcc_wrapper for CUDA Kokkos builds --- m4/libmesh_optional_packages.m4 | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 2978d3200d6..d2fe78ed0b3 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -948,7 +948,6 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], case "$KOKKOS_BACKEND" in cuda) AC_PATH_PROG([NVCC_WRAPPER],[nvcc_wrapper],[no],[$PATH]) - AC_PATH_PROG([NVCC],[nvcc],[no],[$PATH]) AS_IF([test "x$NVCC_WRAPPER" != "xno"], [ KOKKOS_CXX="$NVCC_WRAPPER" @@ -956,11 +955,7 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" ], [ - AS_IF([test "x$NVCC" = "xno"], - [AC_MSG_ERROR([neither nvcc_wrapper nor nvcc was found but Kokkos CUDA backend was requested])]) - KOKKOS_CXX="$NVCC" - KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu -ccbin $CXX" - KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler $libmesh_kokkos_lib_dirs" + AC_MSG_ERROR([nvcc_wrapper was not found but Kokkos CUDA backend was requested. libMesh's project-wide Kokkos CUDA build requires nvcc_wrapper (or an explicitly provided CUDA-capable KOKKOS_CXX) rather than raw nvcc.]) ]) AS_IF([test "x$have_kokkos_openmp" = "xyes"], [ From 0083a70164364173fbe2e03f133563a02eaa8828 Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 18:33:26 -0600 Subject: [PATCH 45/48] Preserve MPI link flags for Kokkos wrapper builds --- m4/libmesh_optional_packages.m4 | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index d2fe78ed0b3..be376e37075 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -999,10 +999,12 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], libmesh_optional_LIBS="$libmesh_optional_LIBS $KOKKOS_LDFLAGS $KOKKOS_LIBS" dnl If KOKKOS_CXX differs from the main compiler, it may not be the MPI - dnl wrapper and thus may need the wrapper's compile flags explicitly in - dnl order to find mpi.h. Query the primary CXX wrapper for compile-time - dnl flags and fall back to MPI_INCLUDES when probing is unavailable. + dnl wrapper and thus may need the wrapper's compile and link flags + dnl explicitly in order to find mpi.h and resolve MPI symbols. Query + dnl the primary CXX wrapper first and fall back to the configure-time + dnl MPI variables when probing is unavailable. KOKKOS_MPI_CPPFLAGS="" + KOKKOS_MPI_LIBS="" AS_IF([test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX"], [ AC_MSG_CHECKING([for MPI compile flags usable with KOKKOS_CXX]) @@ -1016,8 +1018,22 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], [AC_MSG_RESULT([not found])], [AC_MSG_RESULT([$KOKKOS_MPI_CPPFLAGS])]) + + AC_MSG_CHECKING([for MPI link flags usable with KOKKOS_CXX]) + KOKKOS_MPI_LIBS=`$CXX -showme:link 2>/dev/null` + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS=`$CXX -link_info 2>/dev/null`]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'`]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS="$MPI_LDFLAGS $MPI_LIBS"]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [AC_MSG_RESULT([not found])], + [AC_MSG_RESULT([$KOKKOS_MPI_LIBS])]) ]) + libmesh_optional_LIBS="$libmesh_optional_LIBS $KOKKOS_MPI_LIBS" + dnl Fail configure early if the chosen Kokkos compiler/flags/libs cannot dnl actually compile and link a minimal Kokkos program. AC_MSG_CHECKING([whether the Kokkos compiler configuration works]) @@ -1031,7 +1047,7 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], CPPFLAGS="$CPPFLAGS $KOKKOS_CPPFLAGS $KOKKOS_MPI_CPPFLAGS" CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" - LIBS="$LIBS $KOKKOS_LIBS" + LIBS="$LIBS $KOKKOS_LIBS $KOKKOS_MPI_LIBS" AC_LANG_PUSH([C++]) AS_IF([test "x$enablempi" = "xyes"], From 4203587fda73f7e24c630054793632394e1fb2cb Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 18:58:27 -0600 Subject: [PATCH 46/48] Teach nvcc_wrapper the MPI host compiler --- m4/libmesh_optional_packages.m4 | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index be376e37075..36774c4950b 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -950,8 +950,29 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], AC_PATH_PROG([NVCC_WRAPPER],[nvcc_wrapper],[no],[$PATH]) AS_IF([test "x$NVCC_WRAPPER" != "xno"], [ + libmesh_kokkos_host_cxx="" + AS_IF([test "x$enablempi" = "xyes"], + [ + libmesh_kokkos_host_cxx=`$CXX --showme:command 2>/dev/null` + AS_IF([test "x$libmesh_kokkos_host_cxx" = "x"], + [libmesh_kokkos_host_cxx=`$CXX -show 2>/dev/null | sed 's/ .*//'`]) + ], + [libmesh_kokkos_host_cxx="$CXX"]) + + AC_MSG_CHECKING([for host compiler usable with nvcc_wrapper]) + AS_IF([test "x$libmesh_kokkos_host_cxx" = "x"], + [ + AC_MSG_RESULT([not found]) + AC_MSG_ERROR([Could not determine a host compiler to pass to nvcc_wrapper. Set KOKKOS_CXXFLAGS with a suitable -ccbin value or provide NVCC_WRAPPER_DEFAULT_COMPILER in the environment.]) + ], + [AC_MSG_RESULT([$libmesh_kokkos_host_cxx])]) + KOKKOS_CXX="$NVCC_WRAPPER" - KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu" + dnl nvcc_wrapper already mediates between nvcc and the host + dnl compiler; passing raw nvcc forwarding flags through the + dnl wrapper can leak them to g++ and fail. Keep only the + dnl CUDA flags the wrapper recognizes here. + KOKKOS_CXXFLAGS="--extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu -ccbin $libmesh_kokkos_host_cxx" KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" ], [ From 46b89c0697adceab433ef737b2edeefd666d192c Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 19:20:28 -0600 Subject: [PATCH 47/48] Shim nvcc_wrapper for automake deps --- build-aux/libmesh_nvcc_wrapper | 44 +++++++++++++++++++++++++++++++++ m4/libmesh_optional_packages.m4 | 5 +++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100755 build-aux/libmesh_nvcc_wrapper diff --git a/build-aux/libmesh_nvcc_wrapper b/build-aux/libmesh_nvcc_wrapper new file mode 100755 index 00000000000..32798574ddd --- /dev/null +++ b/build-aux/libmesh_nvcc_wrapper @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Wrapper around Kokkos' nvcc_wrapper to smooth over Automake dependency +# tracking flags that nvcc_wrapper does not fully understand. +# +# In particular: +# - Automake commonly passes -MP, which is meaningful to GCC dependency +# generation but ends up leaking to the host compiler without -M/-MM through +# nvcc_wrapper's split dependency path. +# - -MQ is the quoted-target form of -MT; nvcc_wrapper only understands -MT. + +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 REAL_NVCC_WRAPPER [ARGS...]" >&2 + exit 2 +fi + +real_nvcc_wrapper=$1 +shift + +forwarded_args=() + +while [[ $# -gt 0 ]]; do + case "$1" in + -MP) + shift + ;; + -MQ) + if [[ $# -lt 2 ]]; then + echo "$0: -MQ requires an argument" >&2 + exit 2 + fi + forwarded_args+=(-MT "$2") + shift 2 + ;; + *) + forwarded_args+=("$1") + shift + ;; + esac +done + +exec "$real_nvcc_wrapper" "${forwarded_args[@]}" diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 36774c4950b..7878f8d9e37 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -967,7 +967,10 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], ], [AC_MSG_RESULT([$libmesh_kokkos_host_cxx])]) - KOKKOS_CXX="$NVCC_WRAPPER" + dnl Route through a tiny libMesh-owned shim so Automake's + dnl dependency-tracking flags do not trip nvcc_wrapper up + dnl on ordinary host-only .C files. + KOKKOS_CXX="$SHELL $srcdir/build-aux/libmesh_nvcc_wrapper $NVCC_WRAPPER" dnl nvcc_wrapper already mediates between nvcc and the host dnl compiler; passing raw nvcc forwarding flags through the dnl wrapper can leak them to g++ and fail. Keep only the From 439215412d740f82dfbe46cd7bcd97ebf116ab1d Mon Sep 17 00:00:00 2001 From: rochi00 Date: Tue, 19 May 2026 19:39:44 -0600 Subject: [PATCH 48/48] Use absolute path for nvcc_wrapper shim --- m4/libmesh_optional_packages.m4 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 7878f8d9e37..a6e6c7703f5 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -967,10 +967,11 @@ AS_IF([test "x$KOKKOS_DIR" != "xno"], ], [AC_MSG_RESULT([$libmesh_kokkos_host_cxx])]) + libmesh_kokkos_wrapper_shim="$PWD/$srcdir/build-aux/libmesh_nvcc_wrapper" dnl Route through a tiny libMesh-owned shim so Automake's dnl dependency-tracking flags do not trip nvcc_wrapper up dnl on ordinary host-only .C files. - KOKKOS_CXX="$SHELL $srcdir/build-aux/libmesh_nvcc_wrapper $NVCC_WRAPPER" + KOKKOS_CXX="$SHELL $libmesh_kokkos_wrapper_shim $NVCC_WRAPPER" dnl nvcc_wrapper already mediates between nvcc and the host dnl compiler; passing raw nvcc forwarding flags through the dnl wrapper can leak them to g++ and fail. Keep only the