diff --git a/Makefile.am b/Makefile.am index dea7f3fd3aa..14454fea543 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = include #src AUTOMAKE_OPTIONS = subdir-objects ACLOCAL_AMFLAGS = -I m4 -I m4/autoconf-submodule +AM_CXXFLAGS = $(libmesh_CXXFLAGS) AM_CFLAGS = $(libmesh_CFLAGS) AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) @@ -43,6 +44,26 @@ AM_CPPFLAGS = -DLIBMESH_IS_COMPILING_ITSELF \ $(libmesh_contrib_INCLUDES) \ $(libmesh_optional_INCLUDES) \ -I$(top_builddir)/include # required for libmesh_version.h +AM_CPPFLAGS += $(LIBMESH_KOKKOS_BUILD_CPPFLAGS) +AM_CXXFLAGS += $(LIBMESH_KOKKOS_BUILD_CXXFLAGS) +AM_LDFLAGS += $(LIBMESH_KOKKOS_BUILD_LDFLAGS) + +SUFFIXES = .C .K .lo .o .obj + +if LIBMESH_ENABLE_KOKKOS +.K.o: + $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ + -c $< -o $@ + +.K.lo: + $(LIBTOOL) $(AM_V_lt) --tag=CXX --mode=compile $(CXX) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ + -c $< -o $@ +endif LIBS = $(libmesh_optional_LIBS) $(libmesh_precision_LIBS) @@ -468,6 +489,28 @@ calculator_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) calculator_dbg_CXXFLAGS = $(CXXFLAGS_DBG) calculator_dbg_LDADD = libmesh_dbg.la +# hilbert_kokkos_benchmark +opt_programs += hilbert_kokkos_benchmark-opt +hilbert_kokkos_benchmark_opt_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_opt_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_opt_CXXFLAGS = $(CXXFLAGS_OPT) +hilbert_kokkos_benchmark_opt_LDADD = libmesh_opt.la + +devel_programs += hilbert_kokkos_benchmark-devel +hilbert_kokkos_benchmark_devel_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_devel_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_devel_CPPFLAGS = $(CPPFLAGS_DEVEL) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_devel_CXXFLAGS = $(CXXFLAGS_DEVEL) +hilbert_kokkos_benchmark_devel_LDADD = libmesh_devel.la + +dbg_programs += hilbert_kokkos_benchmark-dbg +hilbert_kokkos_benchmark_dbg_SOURCES = src/apps/hilbert_kokkos_benchmark.C +hilbert_kokkos_benchmark_dbg_SOURCES += src/apps/L2system.C src/apps/L2system.h +hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) +hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la + # compare opt_programs += compare-opt compare_opt_SOURCES = src/apps/compare.C diff --git a/Makefile.in b/Makefile.in index cec62a434fa..a6ab5a0776e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -202,21 +202,22 @@ CONFIG_CLEAN_VPATH_FILES = am__EXEEXT_1 = fparser_parse-opt$(EXEEXT) getpot_parse-opt$(EXEEXT) \ amr-opt$(EXEEXT) matrixconvert-opt$(EXEEXT) \ matrixsolve-opt$(EXEEXT) meshtool-opt$(EXEEXT) \ - calculator-opt$(EXEEXT) compare-opt$(EXEEXT) \ - meshbcid-opt$(EXEEXT) meshid-opt$(EXEEXT) meshavg-opt$(EXEEXT) \ - meshdiff-opt$(EXEEXT) meshnorm-opt$(EXEEXT) \ - projection-opt$(EXEEXT) output_libmesh_version-opt$(EXEEXT) \ - meshplot-opt$(EXEEXT) solution_components-opt$(EXEEXT) \ - splitter-opt$(EXEEXT) embedding-opt$(EXEEXT) + calculator-opt$(EXEEXT) hilbert_kokkos_benchmark-opt$(EXEEXT) \ + compare-opt$(EXEEXT) meshbcid-opt$(EXEEXT) meshid-opt$(EXEEXT) \ + meshavg-opt$(EXEEXT) meshdiff-opt$(EXEEXT) \ + meshnorm-opt$(EXEEXT) projection-opt$(EXEEXT) \ + output_libmesh_version-opt$(EXEEXT) meshplot-opt$(EXEEXT) \ + solution_components-opt$(EXEEXT) splitter-opt$(EXEEXT) \ + embedding-opt$(EXEEXT) @LIBMESH_OPT_MODE_TRUE@am__EXEEXT_2 = $(am__EXEEXT_1) am__EXEEXT_3 = fparser_parse-devel$(EXEEXT) \ getpot_parse-devel$(EXEEXT) amr-devel$(EXEEXT) \ matrixconvert-devel$(EXEEXT) matrixsolve-devel$(EXEEXT) \ meshtool-devel$(EXEEXT) calculator-devel$(EXEEXT) \ - compare-devel$(EXEEXT) meshbcid-devel$(EXEEXT) \ - meshid-devel$(EXEEXT) meshavg-devel$(EXEEXT) \ - meshdiff-devel$(EXEEXT) meshnorm-devel$(EXEEXT) \ - projection-devel$(EXEEXT) \ + hilbert_kokkos_benchmark-devel$(EXEEXT) compare-devel$(EXEEXT) \ + meshbcid-devel$(EXEEXT) meshid-devel$(EXEEXT) \ + meshavg-devel$(EXEEXT) meshdiff-devel$(EXEEXT) \ + meshnorm-devel$(EXEEXT) projection-devel$(EXEEXT) \ output_libmesh_version-devel$(EXEEXT) meshplot-devel$(EXEEXT) \ solution_components-devel$(EXEEXT) splitter-devel$(EXEEXT) \ embedding-devel$(EXEEXT) @@ -224,12 +225,13 @@ am__EXEEXT_3 = fparser_parse-devel$(EXEEXT) \ am__EXEEXT_5 = fparser_parse-dbg$(EXEEXT) getpot_parse-dbg$(EXEEXT) \ amr-dbg$(EXEEXT) matrixconvert-dbg$(EXEEXT) \ matrixsolve-dbg$(EXEEXT) meshtool-dbg$(EXEEXT) \ - calculator-dbg$(EXEEXT) compare-dbg$(EXEEXT) \ - meshbcid-dbg$(EXEEXT) meshid-dbg$(EXEEXT) meshavg-dbg$(EXEEXT) \ - meshdiff-dbg$(EXEEXT) meshnorm-dbg$(EXEEXT) \ - projection-dbg$(EXEEXT) output_libmesh_version-dbg$(EXEEXT) \ - meshplot-dbg$(EXEEXT) solution_components-dbg$(EXEEXT) \ - splitter-dbg$(EXEEXT) embedding-dbg$(EXEEXT) + calculator-dbg$(EXEEXT) hilbert_kokkos_benchmark-dbg$(EXEEXT) \ + compare-dbg$(EXEEXT) meshbcid-dbg$(EXEEXT) meshid-dbg$(EXEEXT) \ + meshavg-dbg$(EXEEXT) meshdiff-dbg$(EXEEXT) \ + meshnorm-dbg$(EXEEXT) projection-dbg$(EXEEXT) \ + output_libmesh_version-dbg$(EXEEXT) meshplot-dbg$(EXEEXT) \ + solution_components-dbg$(EXEEXT) splitter-dbg$(EXEEXT) \ + embedding-dbg$(EXEEXT) @LIBMESH_DBG_MODE_TRUE@am__EXEEXT_6 = $(am__EXEEXT_5) am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(libdir)" \ "$(DESTDIR)$(bindir)" "$(DESTDIR)$(contribbindir)" \ @@ -423,8 +425,9 @@ am__libmesh_dbg_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -866,6 +869,7 @@ am__objects_1 = src/base/libmesh_dbg_la-dirichlet_boundary.lo \ src/numerics/libmesh_dbg_la-laspack_vector.lo \ src/numerics/libmesh_dbg_la-lumped_mass_matrix.lo \ src/numerics/libmesh_dbg_la-numeric_vector.lo \ + src/numerics/libmesh_dbg_la-parsed_function_program.lo \ src/numerics/libmesh_dbg_la-petsc_matrix.lo \ src/numerics/libmesh_dbg_la-petsc_matrix_base.lo \ src/numerics/libmesh_dbg_la-petsc_matrix_shell_matrix.lo \ @@ -1238,8 +1242,9 @@ am__libmesh_devel_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -1680,6 +1685,7 @@ am__objects_2 = src/base/libmesh_devel_la-dirichlet_boundary.lo \ src/numerics/libmesh_devel_la-laspack_vector.lo \ src/numerics/libmesh_devel_la-lumped_mass_matrix.lo \ src/numerics/libmesh_devel_la-numeric_vector.lo \ + src/numerics/libmesh_devel_la-parsed_function_program.lo \ src/numerics/libmesh_devel_la-petsc_matrix.lo \ src/numerics/libmesh_devel_la-petsc_matrix_base.lo \ src/numerics/libmesh_devel_la-petsc_matrix_shell_matrix.lo \ @@ -2049,8 +2055,9 @@ am__libmesh_oprof_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -2491,6 +2498,7 @@ am__objects_3 = src/base/libmesh_oprof_la-dirichlet_boundary.lo \ src/numerics/libmesh_oprof_la-laspack_vector.lo \ src/numerics/libmesh_oprof_la-lumped_mass_matrix.lo \ src/numerics/libmesh_oprof_la-numeric_vector.lo \ + src/numerics/libmesh_oprof_la-parsed_function_program.lo \ src/numerics/libmesh_oprof_la-petsc_matrix.lo \ src/numerics/libmesh_oprof_la-petsc_matrix_base.lo \ src/numerics/libmesh_oprof_la-petsc_matrix_shell_matrix.lo \ @@ -2860,8 +2868,9 @@ am__libmesh_opt_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -3302,6 +3311,7 @@ am__objects_4 = src/base/libmesh_opt_la-dirichlet_boundary.lo \ src/numerics/libmesh_opt_la-laspack_vector.lo \ src/numerics/libmesh_opt_la-lumped_mass_matrix.lo \ src/numerics/libmesh_opt_la-numeric_vector.lo \ + src/numerics/libmesh_opt_la-parsed_function_program.lo \ src/numerics/libmesh_opt_la-petsc_matrix.lo \ src/numerics/libmesh_opt_la-petsc_matrix_base.lo \ src/numerics/libmesh_opt_la-petsc_matrix_shell_matrix.lo \ @@ -3670,8 +3680,9 @@ am__libmesh_prof_la_SOURCES_DIST = src/base/dirichlet_boundary.C \ src/numerics/eigen_sparse_vector.C \ src/numerics/laspack_matrix.C src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ - src/numerics/numeric_vector.C src/numerics/petsc_matrix.C \ - src/numerics/petsc_matrix_base.C \ + src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ + src/numerics/petsc_matrix.C src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ src/numerics/petsc_preconditioner.C \ src/numerics/petsc_shell_matrix.C src/numerics/petsc_vector.C \ @@ -4112,6 +4123,7 @@ am__objects_5 = src/base/libmesh_prof_la-dirichlet_boundary.lo \ src/numerics/libmesh_prof_la-laspack_vector.lo \ src/numerics/libmesh_prof_la-lumped_mass_matrix.lo \ src/numerics/libmesh_prof_la-numeric_vector.lo \ + src/numerics/libmesh_prof_la-parsed_function_program.lo \ src/numerics/libmesh_prof_la-petsc_matrix.lo \ src/numerics/libmesh_prof_la-petsc_matrix_base.lo \ src/numerics/libmesh_prof_la-petsc_matrix_shell_matrix.lo \ @@ -4456,6 +4468,33 @@ getpot_parse_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ $(getpot_parse_opt_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_dbg_OBJECTS = src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_dbg-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_dbg_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_dbg_OBJECTS) +hilbert_kokkos_benchmark_dbg_DEPENDENCIES = libmesh_dbg.la +hilbert_kokkos_benchmark_dbg_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_devel_OBJECTS = src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_devel-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_devel_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_devel_OBJECTS) +hilbert_kokkos_benchmark_devel_DEPENDENCIES = libmesh_devel.la +hilbert_kokkos_benchmark_devel_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am_hilbert_kokkos_benchmark_opt_OBJECTS = src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.$(OBJEXT) \ + src/apps/hilbert_kokkos_benchmark_opt-L2system.$(OBJEXT) +hilbert_kokkos_benchmark_opt_OBJECTS = \ + $(am_hilbert_kokkos_benchmark_opt_OBJECTS) +hilbert_kokkos_benchmark_opt_DEPENDENCIES = libmesh_opt.la +hilbert_kokkos_benchmark_opt_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_matrixconvert_dbg_OBJECTS = \ src/apps/matrixconvert_dbg-matrixconvert.$(OBJEXT) matrixconvert_dbg_OBJECTS = $(am_matrixconvert_dbg_OBJECTS) @@ -4771,6 +4810,12 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po \ src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po \ src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po \ + src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po \ src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po \ src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po \ src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po \ @@ -6107,6 +6152,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo \ @@ -6145,6 +6191,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo \ @@ -6183,6 +6230,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo \ @@ -6221,6 +6269,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo \ @@ -6259,6 +6308,7 @@ am__depfiles_remade = src/apps/$(DEPDIR)/amr_dbg-amr.Po \ src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo \ + src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo \ src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo \ @@ -7238,21 +7288,24 @@ SOURCES = $(libmesh_dbg_la_SOURCES) $(libmesh_devel_la_SOURCES) \ $(embedding_opt_SOURCES) $(fparser_parse_dbg_SOURCES) \ $(fparser_parse_devel_SOURCES) $(fparser_parse_opt_SOURCES) \ $(getpot_parse_dbg_SOURCES) $(getpot_parse_devel_SOURCES) \ - $(getpot_parse_opt_SOURCES) $(matrixconvert_dbg_SOURCES) \ - $(matrixconvert_devel_SOURCES) $(matrixconvert_opt_SOURCES) \ - $(matrixsolve_dbg_SOURCES) $(matrixsolve_devel_SOURCES) \ - $(matrixsolve_opt_SOURCES) $(meshavg_dbg_SOURCES) \ - $(meshavg_devel_SOURCES) $(meshavg_opt_SOURCES) \ - $(meshbcid_dbg_SOURCES) $(meshbcid_devel_SOURCES) \ - $(meshbcid_opt_SOURCES) $(meshdiff_dbg_SOURCES) \ - $(meshdiff_devel_SOURCES) $(meshdiff_opt_SOURCES) \ - $(meshid_dbg_SOURCES) $(meshid_devel_SOURCES) \ - $(meshid_opt_SOURCES) $(meshnorm_dbg_SOURCES) \ - $(meshnorm_devel_SOURCES) $(meshnorm_opt_SOURCES) \ - $(meshplot_dbg_SOURCES) $(meshplot_devel_SOURCES) \ - $(meshplot_opt_SOURCES) $(meshtool_dbg_SOURCES) \ - $(meshtool_devel_SOURCES) $(meshtool_opt_SOURCES) \ - $(output_libmesh_version_dbg_SOURCES) \ + $(getpot_parse_opt_SOURCES) \ + $(hilbert_kokkos_benchmark_dbg_SOURCES) \ + $(hilbert_kokkos_benchmark_devel_SOURCES) \ + $(hilbert_kokkos_benchmark_opt_SOURCES) \ + $(matrixconvert_dbg_SOURCES) $(matrixconvert_devel_SOURCES) \ + $(matrixconvert_opt_SOURCES) $(matrixsolve_dbg_SOURCES) \ + $(matrixsolve_devel_SOURCES) $(matrixsolve_opt_SOURCES) \ + $(meshavg_dbg_SOURCES) $(meshavg_devel_SOURCES) \ + $(meshavg_opt_SOURCES) $(meshbcid_dbg_SOURCES) \ + $(meshbcid_devel_SOURCES) $(meshbcid_opt_SOURCES) \ + $(meshdiff_dbg_SOURCES) $(meshdiff_devel_SOURCES) \ + $(meshdiff_opt_SOURCES) $(meshid_dbg_SOURCES) \ + $(meshid_devel_SOURCES) $(meshid_opt_SOURCES) \ + $(meshnorm_dbg_SOURCES) $(meshnorm_devel_SOURCES) \ + $(meshnorm_opt_SOURCES) $(meshplot_dbg_SOURCES) \ + $(meshplot_devel_SOURCES) $(meshplot_opt_SOURCES) \ + $(meshtool_dbg_SOURCES) $(meshtool_devel_SOURCES) \ + $(meshtool_opt_SOURCES) $(output_libmesh_version_dbg_SOURCES) \ $(output_libmesh_version_devel_SOURCES) \ $(output_libmesh_version_opt_SOURCES) \ $(projection_dbg_SOURCES) $(projection_devel_SOURCES) \ @@ -7273,21 +7326,24 @@ DIST_SOURCES = $(am__libmesh_dbg_la_SOURCES_DIST) \ $(embedding_opt_SOURCES) $(fparser_parse_dbg_SOURCES) \ $(fparser_parse_devel_SOURCES) $(fparser_parse_opt_SOURCES) \ $(getpot_parse_dbg_SOURCES) $(getpot_parse_devel_SOURCES) \ - $(getpot_parse_opt_SOURCES) $(matrixconvert_dbg_SOURCES) \ - $(matrixconvert_devel_SOURCES) $(matrixconvert_opt_SOURCES) \ - $(matrixsolve_dbg_SOURCES) $(matrixsolve_devel_SOURCES) \ - $(matrixsolve_opt_SOURCES) $(meshavg_dbg_SOURCES) \ - $(meshavg_devel_SOURCES) $(meshavg_opt_SOURCES) \ - $(meshbcid_dbg_SOURCES) $(meshbcid_devel_SOURCES) \ - $(meshbcid_opt_SOURCES) $(meshdiff_dbg_SOURCES) \ - $(meshdiff_devel_SOURCES) $(meshdiff_opt_SOURCES) \ - $(meshid_dbg_SOURCES) $(meshid_devel_SOURCES) \ - $(meshid_opt_SOURCES) $(meshnorm_dbg_SOURCES) \ - $(meshnorm_devel_SOURCES) $(meshnorm_opt_SOURCES) \ - $(meshplot_dbg_SOURCES) $(meshplot_devel_SOURCES) \ - $(meshplot_opt_SOURCES) $(meshtool_dbg_SOURCES) \ - $(meshtool_devel_SOURCES) $(meshtool_opt_SOURCES) \ - $(output_libmesh_version_dbg_SOURCES) \ + $(getpot_parse_opt_SOURCES) \ + $(hilbert_kokkos_benchmark_dbg_SOURCES) \ + $(hilbert_kokkos_benchmark_devel_SOURCES) \ + $(hilbert_kokkos_benchmark_opt_SOURCES) \ + $(matrixconvert_dbg_SOURCES) $(matrixconvert_devel_SOURCES) \ + $(matrixconvert_opt_SOURCES) $(matrixsolve_dbg_SOURCES) \ + $(matrixsolve_devel_SOURCES) $(matrixsolve_opt_SOURCES) \ + $(meshavg_dbg_SOURCES) $(meshavg_devel_SOURCES) \ + $(meshavg_opt_SOURCES) $(meshbcid_dbg_SOURCES) \ + $(meshbcid_devel_SOURCES) $(meshbcid_opt_SOURCES) \ + $(meshdiff_dbg_SOURCES) $(meshdiff_devel_SOURCES) \ + $(meshdiff_opt_SOURCES) $(meshid_dbg_SOURCES) \ + $(meshid_devel_SOURCES) $(meshid_opt_SOURCES) \ + $(meshnorm_dbg_SOURCES) $(meshnorm_devel_SOURCES) \ + $(meshnorm_opt_SOURCES) $(meshplot_dbg_SOURCES) \ + $(meshplot_devel_SOURCES) $(meshplot_opt_SOURCES) \ + $(meshtool_dbg_SOURCES) $(meshtool_devel_SOURCES) \ + $(meshtool_opt_SOURCES) $(output_libmesh_version_dbg_SOURCES) \ $(output_libmesh_version_devel_SOURCES) \ $(output_libmesh_version_opt_SOURCES) \ $(projection_dbg_SOURCES) $(projection_devel_SOURCES) \ @@ -7517,11 +7573,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -7569,6 +7633,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -8135,6 +8200,7 @@ libmesh_SOURCES = \ src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ src/numerics/petsc_matrix.C \ src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ @@ -8390,6 +8456,8 @@ CLEANFILES = $(am__append_21) # calculator +# hilbert_kokkos_benchmark + # compare # meshbcid @@ -8415,22 +8483,23 @@ CLEANFILES = $(am__append_21) # embedding opt_programs = fparser_parse-opt getpot_parse-opt amr-opt \ matrixconvert-opt matrixsolve-opt meshtool-opt calculator-opt \ - compare-opt meshbcid-opt meshid-opt meshavg-opt meshdiff-opt \ - meshnorm-opt projection-opt output_libmesh_version-opt \ - meshplot-opt solution_components-opt splitter-opt \ - embedding-opt + hilbert_kokkos_benchmark-opt compare-opt meshbcid-opt \ + meshid-opt meshavg-opt meshdiff-opt meshnorm-opt \ + projection-opt output_libmesh_version-opt meshplot-opt \ + solution_components-opt splitter-opt embedding-opt devel_programs = fparser_parse-devel getpot_parse-devel amr-devel \ matrixconvert-devel matrixsolve-devel meshtool-devel \ - calculator-devel compare-devel meshbcid-devel meshid-devel \ - meshavg-devel meshdiff-devel meshnorm-devel projection-devel \ - output_libmesh_version-devel meshplot-devel \ - solution_components-devel splitter-devel embedding-devel + calculator-devel hilbert_kokkos_benchmark-devel compare-devel \ + meshbcid-devel meshid-devel meshavg-devel meshdiff-devel \ + meshnorm-devel projection-devel output_libmesh_version-devel \ + meshplot-devel solution_components-devel splitter-devel \ + embedding-devel dbg_programs = fparser_parse-dbg getpot_parse-dbg amr-dbg \ matrixconvert-dbg matrixsolve-dbg meshtool-dbg calculator-dbg \ - compare-dbg meshbcid-dbg meshid-dbg meshavg-dbg meshdiff-dbg \ - meshnorm-dbg projection-dbg output_libmesh_version-dbg \ - meshplot-dbg solution_components-dbg splitter-dbg \ - embedding-dbg + hilbert_kokkos_benchmark-dbg compare-dbg meshbcid-dbg \ + meshid-dbg meshavg-dbg meshdiff-dbg meshnorm-dbg \ + projection-dbg output_libmesh_version-dbg meshplot-dbg \ + solution_components-dbg splitter-dbg embedding-dbg prof_programs = # empty, append below oprof_programs = # empty, append below fparser_parse_opt_SOURCES = src/apps/fparser_parse.C @@ -8520,6 +8589,24 @@ calculator_dbg_SOURCES = src/apps/calculator.C src/apps/L2system.C \ calculator_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) calculator_dbg_CXXFLAGS = $(CXXFLAGS_DBG) calculator_dbg_LDADD = libmesh_dbg.la +hilbert_kokkos_benchmark_opt_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_opt_CXXFLAGS = $(CXXFLAGS_OPT) +hilbert_kokkos_benchmark_opt_LDADD = libmesh_opt.la +hilbert_kokkos_benchmark_devel_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_devel_CPPFLAGS = $(CPPFLAGS_DEVEL) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_devel_CXXFLAGS = $(CXXFLAGS_DEVEL) +hilbert_kokkos_benchmark_devel_LDADD = libmesh_devel.la +hilbert_kokkos_benchmark_dbg_SOURCES = \ + src/apps/hilbert_kokkos_benchmark.C src/apps/L2system.C \ + src/apps/L2system.h +hilbert_kokkos_benchmark_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) +hilbert_kokkos_benchmark_dbg_CXXFLAGS = $(CXXFLAGS_DBG) +hilbert_kokkos_benchmark_dbg_LDADD = libmesh_dbg.la compare_opt_SOURCES = src/apps/compare.C compare_opt_CPPFLAGS = $(CPPFLAGS_OPT) $(AM_CPPFLAGS) compare_opt_CXXFLAGS = $(CXXFLAGS_OPT) @@ -9432,6 +9519,9 @@ src/numerics/libmesh_dbg_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_dbg_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_dbg_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_dbg_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -10657,6 +10747,9 @@ src/numerics/libmesh_devel_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_devel_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_devel_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_devel_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -11831,6 +11924,9 @@ src/numerics/libmesh_oprof_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_oprof_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_oprof_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_oprof_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -13005,6 +13101,9 @@ src/numerics/libmesh_opt_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_opt_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_opt_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_opt_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -14176,6 +14275,9 @@ src/numerics/libmesh_prof_la-lumped_mass_matrix.lo: \ src/numerics/libmesh_prof_la-numeric_vector.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) +src/numerics/libmesh_prof_la-parsed_function_program.lo: \ + src/numerics/$(am__dirstamp) \ + src/numerics/$(DEPDIR)/$(am__dirstamp) src/numerics/libmesh_prof_la-petsc_matrix.lo: \ src/numerics/$(am__dirstamp) \ src/numerics/$(DEPDIR)/$(am__dirstamp) @@ -14883,6 +14985,30 @@ src/apps/getpot_parse_opt-getpot_parse.$(OBJEXT): \ getpot_parse-opt$(EXEEXT): $(getpot_parse_opt_OBJECTS) $(getpot_parse_opt_DEPENDENCIES) $(EXTRA_getpot_parse_opt_DEPENDENCIES) @rm -f getpot_parse-opt$(EXEEXT) $(AM_V_CXXLD)$(getpot_parse_opt_LINK) $(getpot_parse_opt_OBJECTS) $(getpot_parse_opt_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_dbg-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-dbg$(EXEEXT): $(hilbert_kokkos_benchmark_dbg_OBJECTS) $(hilbert_kokkos_benchmark_dbg_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_dbg_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-dbg$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_dbg_LINK) $(hilbert_kokkos_benchmark_dbg_OBJECTS) $(hilbert_kokkos_benchmark_dbg_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_devel-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-devel$(EXEEXT): $(hilbert_kokkos_benchmark_devel_OBJECTS) $(hilbert_kokkos_benchmark_devel_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_devel_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-devel$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_devel_LINK) $(hilbert_kokkos_benchmark_devel_OBJECTS) $(hilbert_kokkos_benchmark_devel_LDADD) $(LIBS) +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) +src/apps/hilbert_kokkos_benchmark_opt-L2system.$(OBJEXT): \ + src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) + +hilbert_kokkos_benchmark-opt$(EXEEXT): $(hilbert_kokkos_benchmark_opt_OBJECTS) $(hilbert_kokkos_benchmark_opt_DEPENDENCIES) $(EXTRA_hilbert_kokkos_benchmark_opt_DEPENDENCIES) + @rm -f hilbert_kokkos_benchmark-opt$(EXEEXT) + $(AM_V_CXXLD)$(hilbert_kokkos_benchmark_opt_LINK) $(hilbert_kokkos_benchmark_opt_OBJECTS) $(hilbert_kokkos_benchmark_opt_LDADD) $(LIBS) src/apps/matrixconvert_dbg-matrixconvert.$(OBJEXT): \ src/apps/$(am__dirstamp) src/apps/$(DEPDIR)/$(am__dirstamp) @@ -15283,6 +15409,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po@am__quote@ # am--include-marker @@ -16619,6 +16751,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16657,6 +16790,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16695,6 +16829,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16733,6 +16868,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -16771,6 +16907,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo@am__quote@ # am--include-marker @@ -19644,6 +19781,13 @@ src/numerics/libmesh_dbg_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_dbg_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_dbg_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_dbg_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_dbg_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_dbg_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_dbg_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_dbg_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_dbg_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_dbg_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_dbg_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_dbg_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo @@ -22976,6 +23120,13 @@ src/numerics/libmesh_devel_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_devel_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_devel_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_devel_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_devel_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_devel_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_devel_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_devel_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_devel_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_devel_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_devel_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_devel_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo @@ -26308,6 +26459,13 @@ src/numerics/libmesh_oprof_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_oprof_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_oprof_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_oprof_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_oprof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_oprof_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_oprof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_oprof_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_oprof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_oprof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_oprof_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_oprof_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo @@ -29640,6 +29798,13 @@ src/numerics/libmesh_opt_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_opt_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_opt_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_opt_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_opt_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_opt_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_opt_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_opt_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_opt_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_opt_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_opt_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_opt_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo @@ -32972,6 +33137,13 @@ src/numerics/libmesh_prof_la-numeric_vector.lo: src/numerics/numeric_vector.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_prof_la-numeric_vector.lo `test -f 'src/numerics/numeric_vector.C' || echo '$(srcdir)/'`src/numerics/numeric_vector.C +src/numerics/libmesh_prof_la-parsed_function_program.lo: src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_prof_la-parsed_function_program.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Tpo -c -o src/numerics/libmesh_prof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Tpo src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/numerics/parsed_function_program.C' object='src/numerics/libmesh_prof_la-parsed_function_program.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -c -o src/numerics/libmesh_prof_la-parsed_function_program.lo `test -f 'src/numerics/parsed_function_program.C' || echo '$(srcdir)/'`src/numerics/parsed_function_program.C + src/numerics/libmesh_prof_la-petsc_matrix.lo: src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmesh_prof_la_CPPFLAGS) $(CPPFLAGS) $(libmesh_prof_la_CXXFLAGS) $(CXXFLAGS) -MT src/numerics/libmesh_prof_la-petsc_matrix.lo -MD -MP -MF src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Tpo -c -o src/numerics/libmesh_prof_la-petsc_matrix.lo `test -f 'src/numerics/petsc_matrix.C' || echo '$(srcdir)/'`src/numerics/petsc_matrix.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Tpo src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo @@ -34687,6 +34859,90 @@ src/apps/getpot_parse_opt-getpot_parse.obj: src/apps/getpot_parse.C @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(getpot_parse_opt_CPPFLAGS) $(CPPFLAGS) $(getpot_parse_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/getpot_parse_opt-getpot_parse.obj `if test -f 'src/apps/getpot_parse.C'; then $(CYGPATH_W) 'src/apps/getpot_parse.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/getpot_parse.C'; fi` +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_dbg-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_dbg_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_dbg_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_dbg-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_devel-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_devel-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_devel-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_devel-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_devel-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_devel_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_devel_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_devel-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.o `test -f 'src/apps/hilbert_kokkos_benchmark.C' || echo '$(srcdir)/'`src/apps/hilbert_kokkos_benchmark.C + +src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj: src/apps/hilbert_kokkos_benchmark.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/hilbert_kokkos_benchmark.C' object='src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.obj `if test -f 'src/apps/hilbert_kokkos_benchmark.C'; then $(CYGPATH_W) 'src/apps/hilbert_kokkos_benchmark.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/hilbert_kokkos_benchmark.C'; fi` + +src/apps/hilbert_kokkos_benchmark_opt-L2system.o: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-L2system.o -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_opt-L2system.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.o `test -f 'src/apps/L2system.C' || echo '$(srcdir)/'`src/apps/L2system.C + +src/apps/hilbert_kokkos_benchmark_opt-L2system.obj: src/apps/L2system.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -MT src/apps/hilbert_kokkos_benchmark_opt-L2system.obj -MD -MP -MF src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Tpo src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/apps/L2system.C' object='src/apps/hilbert_kokkos_benchmark_opt-L2system.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(hilbert_kokkos_benchmark_opt_CPPFLAGS) $(CPPFLAGS) $(hilbert_kokkos_benchmark_opt_CXXFLAGS) $(CXXFLAGS) -c -o src/apps/hilbert_kokkos_benchmark_opt-L2system.obj `if test -f 'src/apps/L2system.C'; then $(CYGPATH_W) 'src/apps/L2system.C'; else $(CYGPATH_W) '$(srcdir)/src/apps/L2system.C'; fi` + src/apps/matrixconvert_dbg-matrixconvert.o: src/apps/matrixconvert.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(matrixconvert_dbg_CPPFLAGS) $(CPPFLAGS) $(matrixconvert_dbg_CXXFLAGS) $(CXXFLAGS) -MT src/apps/matrixconvert_dbg-matrixconvert.o -MD -MP -MF src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Tpo -c -o src/apps/matrixconvert_dbg-matrixconvert.o `test -f 'src/apps/matrixconvert.C' || echo '$(srcdir)/'`src/apps/matrixconvert.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Tpo src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po @@ -35738,6 +35994,12 @@ distclean: distclean-recursive -rm -f src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po @@ -37074,6 +37336,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo @@ -37112,6 +37375,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo @@ -37150,6 +37414,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo @@ -37188,6 +37453,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo @@ -37226,6 +37492,7 @@ distclean: distclean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo @@ -38229,6 +38496,12 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/apps/$(DEPDIR)/getpot_parse_dbg-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_devel-getpot_parse.Po -rm -f src/apps/$(DEPDIR)/getpot_parse_opt-getpot_parse.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_dbg-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_devel-hilbert_kokkos_benchmark.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-L2system.Po + -rm -f src/apps/$(DEPDIR)/hilbert_kokkos_benchmark_opt-hilbert_kokkos_benchmark.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_dbg-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_devel-matrixconvert.Po -rm -f src/apps/$(DEPDIR)/matrixconvert_opt-matrixconvert.Po @@ -39565,6 +39838,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_dbg_la-petsc_matrix_shell_matrix.Plo @@ -39603,6 +39877,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_devel_la-petsc_matrix_shell_matrix.Plo @@ -39641,6 +39916,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_oprof_la-petsc_matrix_shell_matrix.Plo @@ -39679,6 +39955,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_opt_la-petsc_matrix_shell_matrix.Plo @@ -39717,6 +39994,7 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-laspack_vector.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-lumped_mass_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-numeric_vector.Plo + -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-parsed_function_program.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_base.Plo -rm -f src/numerics/$(DEPDIR)/libmesh_prof_la-petsc_matrix_shell_matrix.Plo diff --git a/build-aux/libmesh_nvcc_wrapper b/build-aux/libmesh_nvcc_wrapper new file mode 100755 index 00000000000..32798574ddd --- /dev/null +++ b/build-aux/libmesh_nvcc_wrapper @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +# Wrapper around Kokkos' nvcc_wrapper to smooth over Automake dependency +# tracking flags that nvcc_wrapper does not fully understand. +# +# In particular: +# - Automake commonly passes -MP, which is meaningful to GCC dependency +# generation but ends up leaking to the host compiler without -M/-MM through +# nvcc_wrapper's split dependency path. +# - -MQ is the quoted-target form of -MT; nvcc_wrapper only understands -MT. + +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 REAL_NVCC_WRAPPER [ARGS...]" >&2 + exit 2 +fi + +real_nvcc_wrapper=$1 +shift + +forwarded_args=() + +while [[ $# -gt 0 ]]; do + case "$1" in + -MP) + shift + ;; + -MQ) + if [[ $# -lt 2 ]]; then + echo "$0: -MQ requires an argument" >&2 + exit 2 + fi + forwarded_args+=(-MT "$2") + shift 2 + ;; + *) + forwarded_args+=("$1") + shift + ;; + esac +done + +exec "$real_nvcc_wrapper" "${forwarded_args[@]}" diff --git a/configure b/configure index 33ff470d60a..c3eed3aee79 100755 --- a/configure +++ b/configure @@ -672,6 +672,17 @@ libmesh_contrib_LDFLAGS libmesh_contrib_INCLUDES libmesh_optional_LIBS libmesh_optional_INCLUDES +LIBMESH_ENABLE_KOKKOS_FALSE +LIBMESH_ENABLE_KOKKOS_TRUE +KOKKOS_MPI_CPPFLAGS +KOKKOS_LIBS +KOKKOS_LDFLAGS +KOKKOS_CXXFLAGS +KOKKOS_CPPFLAGS +KOKKOS_CXX +ICPX +HIPCC +NVCC LIBMESH_ENABLE_METAPHYSICL_FALSE LIBMESH_ENABLE_METAPHYSICL_TRUE METAPHYSICL_INCLUDE @@ -1347,6 +1358,8 @@ enable_metaphysicl with_metaphysicl with_metaphysicl_include enable_metaphysicl_required +with_kokkos +with_kokkos_backend ' ac_precious_vars='build_alias host_alias @@ -2273,6 +2286,10 @@ Optional Packages: internal: build from contrib --with-metaphysicl-include= + --with-kokkos=DIR Enable Kokkos support using the installation at DIR + --with-kokkos-backend=BACKEND + cuda|hip|sycl|openmp|serial (default: auto-detect + from KokkosCore_config.h) Some influential environment variables: PETSC_DIR path to PETSc installation @@ -58938,7 +58955,7 @@ fi if test "x$enablepoly2tri" = "xyes" then : - POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified" + POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified -I\$(top_srcdir)/contrib/poly2tri/poly2tri" printf "%s\n" "#define HAVE_POLY2TRI 1" >>confdefs.h @@ -63913,6 +63930,481 @@ fi +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- + +# Check whether --with-kokkos was given. +if test ${with_kokkos+y} +then : + withval=$with_kokkos; KOKKOS_DIR="$withval" +else case e in #( + e) KOKKOS_DIR="no" ;; +esac +fi + + + +# Check whether --with-kokkos-backend was given. +if test ${with_kokkos_backend+y} +then : + withval=$with_kokkos_backend; KOKKOS_BACKEND="$withval" +else case e in #( + e) KOKKOS_BACKEND="auto" ;; +esac +fi + + + +if test "x$KOKKOS_DIR" != "xno" +then : + + as_ac_File=`printf "%s\n" "ac_cv_file_$KOKKOS_DIR/include/Kokkos_Core.hpp" | sed "$as_sed_sh"` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $KOKKOS_DIR/include/Kokkos_Core.hpp" >&5 +printf %s "checking for $KOKKOS_DIR/include/Kokkos_Core.hpp... " >&6; } +if eval test \${$as_ac_File+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) test "$cross_compiling" = yes && + as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 +if test -r "$KOKKOS_DIR/include/Kokkos_Core.hpp"; then + eval "$as_ac_File=yes" +else + eval "$as_ac_File=no" +fi ;; +esac +fi +eval ac_res=\$$as_ac_File + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +printf "%s\n" "$ac_res" >&6; } +if eval test \"x\$"$as_ac_File"\" = x"yes" +then : + + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES -I$KOKKOS_DIR/include" + libmesh_optional_LIBS="$libmesh_optional_LIBS -L$KOKKOS_DIR/lib -lkokkoscore" + + if test "x$KOKKOS_CXX" = "x" +then : + + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + + if test "x$KOKKOS_BACKEND" = "xauto" +then : + + if test -r "$KOKKOS_CFG" +then : + + if grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=cuda +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=hip +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=sycl +else case e in #( + e) if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + KOKKOS_BACKEND=openmp +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi ;; +esac +fi ;; +esac +fi ;; +esac +fi + +else case e in #( + e) KOKKOS_BACKEND=serial ;; +esac +fi + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Kokkos backend: $KOKKOS_BACKEND" >&5 +printf "%s\n" "Kokkos backend: $KOKKOS_BACKEND" >&6; } + + have_kokkos_openmp=no + if test -r "$KOKKOS_CFG" +then : + if grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG" +then : + have_kokkos_openmp=yes +fi +fi + + case "$KOKKOS_BACKEND" in + cuda) + # Extract the first word of "nvcc", so it can be a program name with args. +set dummy nvcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_NVCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $NVCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_NVCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="no" + ;; +esac ;; +esac +fi +NVCC=$ac_cv_path_NVCC +if test -n "$NVCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5 +printf "%s\n" "$NVCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$NVCC" = "xno" +then : + as_fn_error $? "nvcc not found but Kokkos CUDA backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$NVCC" + KOKKOS_CXXFLAGS="--forward-unknown-to-host-compiler --extended-lambda --disable-warnings -x cu -ccbin $CXX" + KOKKOS_LDFLAGS="--forward-unknown-to-host-compiler -L$KOKKOS_DIR/lib" + if test "x$have_kokkos_openmp" = "xyes" +then : + + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + +fi + ;; + hip) + # Extract the first word of "hipcc", so it can be a program name with args. +set dummy hipcc; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_HIPCC+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $HIPCC in + [\\/]* | ?:[\\/]*) + ac_cv_path_HIPCC="$HIPCC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_HIPCC="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_HIPCC" && ac_cv_path_HIPCC="no" + ;; +esac ;; +esac +fi +HIPCC=$ac_cv_path_HIPCC +if test -n "$HIPCC"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $HIPCC" >&5 +printf "%s\n" "$HIPCC" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$HIPCC" = "xno" +then : + as_fn_error $? "hipcc not found but Kokkos HIP backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + sycl) + # Extract the first word of "icpx", so it can be a program name with args. +set dummy icpx; ac_word=$2 +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_path_ICPX+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) case $ICPX in + [\\/]* | ?:[\\/]*) + ac_cv_path_ICPX="$ICPX" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_path_ICPX="$as_dir$ac_word$ac_exec_ext" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_ICPX" && ac_cv_path_ICPX="no" + ;; +esac ;; +esac +fi +ICPX=$ac_cv_path_ICPX +if test -n "$ICPX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ICPX" >&5 +printf "%s\n" "$ICPX" >&6; } +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + + + if test "x$ICPX" = "xno" +then : + as_fn_error $? "icpx not found but Kokkos SYCL backend requested" "$LINENO" 5 +fi + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl -L$KOKKOS_DIR/lib" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp -L$KOKKOS_DIR/lib" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="-L$KOKKOS_DIR/lib" + ;; + esac + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&5 +printf "%s\n" "Using caller-provided KOKKOS_CXX=$KOKKOS_CXX" >&6; } ;; +esac +fi + + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION -I$KOKKOS_DIR/include}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:--L$KOKKOS_DIR/lib}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + + KOKKOS_MPI_CPPFLAGS="" + if test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX" +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MPI compile flags usable with KOKKOS_CXX" >&5 +printf %s "checking for MPI compile flags usable with KOKKOS_CXX... " >&6; } + KOKKOS_MPI_CPPFLAGS=`$CXX -showme:compile 2>/dev/null` + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS=`$CXX -compile_info 2>/dev/null` +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^^ * //'` +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + KOKKOS_MPI_CPPFLAGS="$MPI_INCLUDES" +fi + if test "x$KOKKOS_MPI_CPPFLAGS" = "x" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: not found" >&5 +printf "%s\n" "not found" >&6; } +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $KOKKOS_MPI_CPPFLAGS" >&5 +printf "%s\n" "$KOKKOS_MPI_CPPFLAGS" >&6; } ;; +esac +fi + +fi + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the Kokkos compiler configuration works" >&5 +printf %s "checking whether the Kokkos compiler configuration works... " >&6; } + libmesh_save_CXX="$CXX" + libmesh_save_CPPFLAGS="$CPPFLAGS" + libmesh_save_CXXFLAGS="$CXXFLAGS" + libmesh_save_LDFLAGS="$LDFLAGS" + libmesh_save_LIBS="$LIBS" + + CXX="$KOKKOS_CXX" + CPPFLAGS="$CPPFLAGS $KOKKOS_CPPFLAGS $KOKKOS_MPI_CPPFLAGS" + CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" + LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" + LIBS="$LIBS $KOKKOS_LIBS" + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + + if test "x$enablempi" = "xyes" +then : + + LDFLAGS="$LDFLAGS $MPI_LDFLAGS" + LIBS="$LIBS $MPI_LIBS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include +int main(int argc, char ** argv) +{ + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + MPI_Finalize(); + return 0; +} + +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + kokkos_config_works=yes +else case e in #( + e) kokkos_config_works=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + +else case e in #( + e) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + return 0; +} + +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + kokkos_config_works=yes +else case e in #( + e) kokkos_config_works=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + ;; +esac +fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + CXX="$libmesh_save_CXX" + CPPFLAGS="$libmesh_save_CPPFLAGS" + CXXFLAGS="$libmesh_save_CXXFLAGS" + LDFLAGS="$libmesh_save_LDFLAGS" + LIBS="$libmesh_save_LIBS" + + if test "x$kokkos_config_works" = "xyes" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else case e in #( + e) as_fn_error $? "configured Kokkos compiler/flags failed to compile and link a minimal test program" "$LINENO" 5 ;; +esac +fi + +printf "%s\n" "#define HAVE_KOKKOS 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: <<< Configuring library with Kokkos support >>>" >&5 +printf "%s\n" "<<< Configuring library with Kokkos support >>>" >&6; } + +else case e in #( + e) + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&5 +printf "%s\n" "$as_me: WARNING: Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support" >&2;} + enablekokkos=no + ;; +esac +fi + + +else case e in #( + e) enablekokkos=no ;; +esac +fi + + + + + + + + if test x$enablekokkos = xyes; then + LIBMESH_ENABLE_KOKKOS_TRUE= + LIBMESH_ENABLE_KOKKOS_FALSE='#' +else + LIBMESH_ENABLE_KOKKOS_TRUE='#' + LIBMESH_ENABLE_KOKKOS_FALSE= +fi + +# ------------------------------------------------------------- + + + if test "$enableoptional" != no then : @@ -65182,6 +65674,10 @@ if test -z "${LIBMESH_ENABLE_METAPHYSICL_TRUE}" && test -z "${LIBMESH_ENABLE_MET as_fn_error $? "conditional \"LIBMESH_ENABLE_METAPHYSICL\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${LIBMESH_ENABLE_KOKKOS_TRUE}" && test -z "${LIBMESH_ENABLE_KOKKOS_FALSE}"; then + as_fn_error $? "conditional \"LIBMESH_ENABLE_KOKKOS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${GIT_CHECKOUT_TRUE}" && test -z "${GIT_CHECKOUT_FALSE}"; then as_fn_error $? "conditional \"GIT_CHECKOUT\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/contrib/Makefile.in b/contrib/Makefile.in index 6c8d8649cdc..8f435734690 100644 --- a/contrib/Makefile.in +++ b/contrib/Makefile.in @@ -604,11 +604,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -656,6 +664,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/bin/libmesh-config.in b/contrib/bin/libmesh-config.in index b935b5b334b..e75de90ed95 100644 --- a/contrib/bin/libmesh-config.in +++ b/contrib/bin/libmesh-config.in @@ -106,7 +106,7 @@ while [ "x$1" != "x" ]; do ;; "--cppflags") - return_val="${CPPFLAGS} $return_val" + return_val="${CPPFLAGS} @KOKKOS_CPPFLAGS@ $return_val" ;; "--cxxflags") diff --git a/contrib/bin/test_installed_examples.sh b/contrib/bin/test_installed_examples.sh index 8be94eccde2..c255be44304 100755 --- a/contrib/bin/test_installed_examples.sh +++ b/contrib/bin/test_installed_examples.sh @@ -29,9 +29,13 @@ fi echo "Testing examples in $examples_install_path" +if test "$PKG_CONFIG" = ""; then + PKG_CONFIG=pkg-config +fi + if test "$PKG_CONFIG" != "no"; then - installed_CXXFLAGS=$(pkg-config libmesh --cflags) - installed_LIBS=$(pkg-config libmesh --libs) + installed_CXXFLAGS=$($PKG_CONFIG libmesh --cflags) + installed_LIBS=$($PKG_CONFIG libmesh --libs) elif test -x $LIBMESH_CONFIG_PATH/libmesh-config; then installed_CXXFLAGS=$($LIBMESH_CONFIG_PATH/libmesh-config --cppflags --cxxflags --include) diff --git a/contrib/bin/test_installed_headers.sh b/contrib/bin/test_installed_headers.sh index 2154bc19c20..645ba266a49 100755 --- a/contrib/bin/test_installed_headers.sh +++ b/contrib/bin/test_installed_headers.sh @@ -49,12 +49,16 @@ fi # Variable is set but not used # testing_installed_tree="no" +if test "$PKG_CONFIG" = ""; then + PKG_CONFIG=pkg-config +fi + if test "$test_CXXFLAGS" = ""; then # testing_installed_tree="yes" if test "$PKG_CONFIG" != "no"; then - test_CXXFLAGS=$(pkg-config libmesh --cflags) + test_CXXFLAGS=$($PKG_CONFIG libmesh --cflags) elif test -x $LIBMESH_CONFIG_PATH/libmesh-config; then test_CXXFLAGS=$($LIBMESH_CONFIG_PATH/libmesh-config --cppflags --cxxflags --include) diff --git a/contrib/capnproto/Makefile.in b/contrib/capnproto/Makefile.in index 43bfdcb44d5..baef088d0b1 100644 --- a/contrib/capnproto/Makefile.in +++ b/contrib/capnproto/Makefile.in @@ -453,11 +453,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -505,6 +513,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/eigen/gitshim/Makefile.in b/contrib/eigen/gitshim/Makefile.in index cd83617aabb..9fb8ce682e8 100644 --- a/contrib/eigen/gitshim/Makefile.in +++ b/contrib/eigen/gitshim/Makefile.in @@ -337,11 +337,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -389,6 +397,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/exodus/Makefile.in b/contrib/exodusii/5.22b/exodus/Makefile.in index d6f576d1a90..9d470e1a03f 100644 --- a/contrib/exodusii/5.22b/exodus/Makefile.in +++ b/contrib/exodusii/5.22b/exodus/Makefile.in @@ -3320,11 +3320,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -3372,6 +3380,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/5.22b/nemesis/Makefile.in b/contrib/exodusii/5.22b/nemesis/Makefile.in index 8538cdc184f..40204256e72 100644 --- a/contrib/exodusii/5.22b/nemesis/Makefile.in +++ b/contrib/exodusii/5.22b/nemesis/Makefile.in @@ -399,11 +399,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -451,6 +459,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/Lib/Makefile.in b/contrib/exodusii/Lib/Makefile.in index 4c6ef829ed5..b4f9a349981 100644 --- a/contrib/exodusii/Lib/Makefile.in +++ b/contrib/exodusii/Lib/Makefile.in @@ -1955,11 +1955,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2007,6 +2015,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/exodus/Makefile.in b/contrib/exodusii/v8.11/exodus/Makefile.in index 826c62c23e0..2d72bdbc847 100644 --- a/contrib/exodusii/v8.11/exodus/Makefile.in +++ b/contrib/exodusii/v8.11/exodus/Makefile.in @@ -4248,11 +4248,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -4300,6 +4308,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/exodusii/v8.11/nemesis/Makefile.in b/contrib/exodusii/v8.11/nemesis/Makefile.in index d1909c9f1d5..a6197c64b1f 100644 --- a/contrib/exodusii/v8.11/nemesis/Makefile.in +++ b/contrib/exodusii/v8.11/nemesis/Makefile.in @@ -409,11 +409,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -461,6 +469,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/Makefile.in b/contrib/fparser/Makefile.in index a9a20542d31..325e1c48d59 100644 --- a/contrib/fparser/Makefile.in +++ b/contrib/fparser/Makefile.in @@ -867,11 +867,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -919,6 +927,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/extrasrc/Makefile.in b/contrib/fparser/extrasrc/Makefile.in index f257d59051c..9bd3bff32c8 100644 --- a/contrib/fparser/extrasrc/Makefile.in +++ b/contrib/fparser/extrasrc/Makefile.in @@ -339,11 +339,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -391,6 +399,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/fparser/fparser.hh b/contrib/fparser/fparser.hh index 85c70c5759a..cb0d792a402 100644 --- a/contrib/fparser/fparser.hh +++ b/contrib/fparser/fparser.hh @@ -91,7 +91,6 @@ class FunctionParserBase void Optimize(); - int ParseAndDeduceVariables(const std::string& function, int* amountOfVariablesFound = 0, bool useDegrees = false); diff --git a/contrib/fparser/fparser_ad.cc b/contrib/fparser/fparser_ad.cc index 2720e0cc5ac..c3f80c17d08 100644 --- a/contrib/fparser/fparser_ad.cc +++ b/contrib/fparser/fparser_ad.cc @@ -187,6 +187,19 @@ bool FunctionParserADBase::isZero() this->mData->mByteCode[0] == cImmed && this->mData->mImmed[0] == Value_t(0)); } +template +bool FunctionParserADBase::isEmpty() +{ + return this->mData->mByteCode.empty(); +} + +template +const typename FunctionParserBase::Data * +FunctionParserADBase::parser_data() const +{ + return const_cast *>(this)->getParserData(); +} + template void FunctionParserADBase::setZero() { diff --git a/contrib/fparser/fparser_ad.hh b/contrib/fparser/fparser_ad.hh index 133cb2b8046..f06dae17455 100644 --- a/contrib/fparser/fparser_ad.hh +++ b/contrib/fparser/fparser_ad.hh @@ -47,7 +47,9 @@ public: /** * check if the function's byte code is empty. */ - bool isEmpty() { return this->mData->mByteCode.empty(); } + bool isEmpty(); + + const typename FunctionParserBase::Data * parser_data() const; /** * set the bytecode of this function to return constant zero. diff --git a/contrib/gmv/Makefile.in b/contrib/gmv/Makefile.in index 1043f694cf0..51d91b1684d 100644 --- a/contrib/gmv/Makefile.in +++ b/contrib/gmv/Makefile.in @@ -394,11 +394,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -446,6 +454,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/gzstream/Makefile.in b/contrib/gzstream/Makefile.in index 26d9c6a99cd..d5c845abf3d 100644 --- a/contrib/gzstream/Makefile.in +++ b/contrib/gzstream/Makefile.in @@ -446,11 +446,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -498,6 +506,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/laspack/Makefile.in b/contrib/laspack/Makefile.in index a921519ee01..47a8abd00be 100644 --- a/contrib/laspack/Makefile.in +++ b/contrib/laspack/Makefile.in @@ -504,11 +504,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/libHilbert/Makefile.in b/contrib/libHilbert/Makefile.in index 92b7a8a5c6e..2c594e8ad9b 100644 --- a/contrib/libHilbert/Makefile.in +++ b/contrib/libHilbert/Makefile.in @@ -477,11 +477,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -529,6 +537,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/metis/Makefile.in b/contrib/metis/Makefile.in index 2167e22fd72..642793791bf 100644 --- a/contrib/metis/Makefile.in +++ b/contrib/metis/Makefile.in @@ -1021,11 +1021,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1073,6 +1081,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nanoflann/Makefile.in b/contrib/nanoflann/Makefile.in index cb6fb5b1e25..0ebeeff7406 100644 --- a/contrib/nanoflann/Makefile.in +++ b/contrib/nanoflann/Makefile.in @@ -443,11 +443,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -495,6 +503,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/nemesis/Lib/Makefile.in b/contrib/nemesis/Lib/Makefile.in index 30e196af739..122bd09254d 100644 --- a/contrib/nemesis/Lib/Makefile.in +++ b/contrib/nemesis/Lib/Makefile.in @@ -789,11 +789,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -841,6 +849,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/netgen/Makefile.in b/contrib/netgen/Makefile.in index f7db0d91967..5044c6fd1a9 100644 --- a/contrib/netgen/Makefile.in +++ b/contrib/netgen/Makefile.in @@ -341,11 +341,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -393,6 +401,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/parmetis/Makefile.in b/contrib/parmetis/Makefile.in index 70875e6ff39..c7935e17638 100644 --- a/contrib/parmetis/Makefile.in +++ b/contrib/parmetis/Makefile.in @@ -855,11 +855,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -907,6 +915,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/poly2tri/modified/Makefile.in b/contrib/poly2tri/modified/Makefile.in index abb0c8cffa6..8ea3f7cd8cf 100644 --- a/contrib/poly2tri/modified/Makefile.in +++ b/contrib/poly2tri/modified/Makefile.in @@ -541,11 +541,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -593,6 +601,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/qhull/2012.1/Makefile.in b/contrib/qhull/2012.1/Makefile.in index d9910cea212..e84b600ef25 100644 --- a/contrib/qhull/2012.1/Makefile.in +++ b/contrib/qhull/2012.1/Makefile.in @@ -1164,11 +1164,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -1216,6 +1224,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/sfcurves/Makefile.in b/contrib/sfcurves/Makefile.in index 453e333f449..ed6554d0ba3 100644 --- a/contrib/sfcurves/Makefile.in +++ b/contrib/sfcurves/Makefile.in @@ -414,11 +414,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -466,6 +474,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/binary/Makefile.in b/contrib/tecplot/binary/Makefile.in index 9c918125c59..ec79f2ea10a 100644 --- a/contrib/tecplot/binary/Makefile.in +++ b/contrib/tecplot/binary/Makefile.in @@ -384,11 +384,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -436,6 +444,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tecplot/tecio/Makefile.in b/contrib/tecplot/tecio/Makefile.in index 0ca8d9faaf2..1edfb7397dd 100644 --- a/contrib/tecplot/tecio/Makefile.in +++ b/contrib/tecplot/tecio/Makefile.in @@ -623,11 +623,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -675,6 +683,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/tetgen/Makefile.in b/contrib/tetgen/Makefile.in index b3035e72b2b..fdecf33a03f 100644 --- a/contrib/tetgen/Makefile.in +++ b/contrib/tetgen/Makefile.in @@ -429,11 +429,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -481,6 +489,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/triangle/Makefile.in b/contrib/triangle/Makefile.in index a63cb31454c..1e25b331714 100644 --- a/contrib/triangle/Makefile.in +++ b/contrib/triangle/Makefile.in @@ -424,11 +424,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -476,6 +484,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/contrib/utils/libmesh-dbg.pc.in b/contrib/utils/libmesh-dbg.pc.in index ab532d664c1..8e366deaf74 100644 --- a/contrib/utils/libmesh-dbg.pc.in +++ b/contrib/utils/libmesh-dbg.pc.in @@ -12,5 +12,7 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_dbg -ltimpi_dbg \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_DBG@ \ + ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-devel.pc.in b/contrib/utils/libmesh-devel.pc.in index 63b3d6c8097..62f74ccad3a 100644 --- a/contrib/utils/libmesh-devel.pc.in +++ b/contrib/utils/libmesh-devel.pc.in @@ -12,5 +12,7 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_devel -ltimpi_devel \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_DEVEL@ \ + ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-oprof.pc.in b/contrib/utils/libmesh-oprof.pc.in index 1184f433eb5..896f849bbd0 100644 --- a/contrib/utils/libmesh-oprof.pc.in +++ b/contrib/utils/libmesh-oprof.pc.in @@ -12,5 +12,7 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_oprof -ltimpi_oprof \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_OPROF@ \ + ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-opt.pc.in b/contrib/utils/libmesh-opt.pc.in index 10deb13ec50..6beb7b6a87f 100644 --- a/contrib/utils/libmesh-opt.pc.in +++ b/contrib/utils/libmesh-opt.pc.in @@ -12,5 +12,7 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_opt -ltimpi_opt \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_OPT@ \ + ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/contrib/utils/libmesh-prof.pc.in b/contrib/utils/libmesh-prof.pc.in index 213601d795a..8f24579e6f6 100644 --- a/contrib/utils/libmesh-prof.pc.in +++ b/contrib/utils/libmesh-prof.pc.in @@ -12,5 +12,7 @@ Libs: -Wl,-rpath,${libdir} -L${libdir} -lmesh_prof -ltimpi_prof \ @libmesh_installed_LIBS@ @libmesh_optional_LIBS@ Libs.private: Cflags: @CPPFLAGS_PROF@ \ + ${cxxflags_extra} \ -I${includedir} \ + @KOKKOS_CPPFLAGS@ \ @libmesh_optional_INCLUDES@ diff --git a/doc/Makefile.in b/doc/Makefile.in index 42ba76ef15b..472cd35389b 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -347,11 +347,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -399,6 +407,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/doc/html/Makefile.in b/doc/html/Makefile.in index ea0e18aacb4..302703289ee 100644 --- a/doc/html/Makefile.in +++ b/doc/html/Makefile.in @@ -307,11 +307,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -359,6 +367,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/Makefile.in b/examples/Makefile.in index d16d580d3a2..a662cf88a28 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -375,11 +375,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -427,6 +435,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex1/Makefile.in b/examples/adaptivity/adaptivity_ex1/Makefile.in index 156523e9faa..cc396d29f42 100644 --- a/examples/adaptivity/adaptivity_ex1/Makefile.in +++ b/examples/adaptivity/adaptivity_ex1/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex2/Makefile.in b/examples/adaptivity/adaptivity_ex2/Makefile.in index be5308ddd74..9c39660be70 100644 --- a/examples/adaptivity/adaptivity_ex2/Makefile.in +++ b/examples/adaptivity/adaptivity_ex2/Makefile.in @@ -484,11 +484,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -536,6 +544,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex3/Makefile.in b/examples/adaptivity/adaptivity_ex3/Makefile.in index 0a64440aa41..5f7d55a3477 100644 --- a/examples/adaptivity/adaptivity_ex3/Makefile.in +++ b/examples/adaptivity/adaptivity_ex3/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex4/Makefile.in b/examples/adaptivity/adaptivity_ex4/Makefile.in index b25aca31264..f013de36926 100644 --- a/examples/adaptivity/adaptivity_ex4/Makefile.in +++ b/examples/adaptivity/adaptivity_ex4/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adaptivity/adaptivity_ex5/Makefile.in b/examples/adaptivity/adaptivity_ex5/Makefile.in index b419dc98312..7deb22866e0 100644 --- a/examples/adaptivity/adaptivity_ex5/Makefile.in +++ b/examples/adaptivity/adaptivity_ex5/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex1/Makefile.in b/examples/adjoints/adjoints_ex1/Makefile.in index a174eb3b2fe..9559de61b70 100644 --- a/examples/adjoints/adjoints_ex1/Makefile.in +++ b/examples/adjoints/adjoints_ex1/Makefile.in @@ -559,11 +559,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -611,6 +619,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex2/Makefile.in b/examples/adjoints/adjoints_ex2/Makefile.in index d0ac7f06aeb..1ed48513c88 100644 --- a/examples/adjoints/adjoints_ex2/Makefile.in +++ b/examples/adjoints/adjoints_ex2/Makefile.in @@ -527,11 +527,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +587,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex3/Makefile.in b/examples/adjoints/adjoints_ex3/Makefile.in index af52a508f53..32dac179bc2 100644 --- a/examples/adjoints/adjoints_ex3/Makefile.in +++ b/examples/adjoints/adjoints_ex3/Makefile.in @@ -562,11 +562,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +622,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex4/Makefile.in b/examples/adjoints/adjoints_ex4/Makefile.in index 6721b984eb7..2b6f750b92c 100644 --- a/examples/adjoints/adjoints_ex4/Makefile.in +++ b/examples/adjoints/adjoints_ex4/Makefile.in @@ -562,11 +562,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +622,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex5/Makefile.in b/examples/adjoints/adjoints_ex5/Makefile.in index 5dcc545ab49..077e3ba31f9 100644 --- a/examples/adjoints/adjoints_ex5/Makefile.in +++ b/examples/adjoints/adjoints_ex5/Makefile.in @@ -562,11 +562,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -614,6 +622,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex6/Makefile.in b/examples/adjoints/adjoints_ex6/Makefile.in index f8568140952..e8a09524ff0 100644 --- a/examples/adjoints/adjoints_ex6/Makefile.in +++ b/examples/adjoints/adjoints_ex6/Makefile.in @@ -527,11 +527,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -579,6 +587,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/adjoints/adjoints_ex7/Makefile.in b/examples/adjoints/adjoints_ex7/Makefile.in index 793d69f1149..e5a98c02318 100644 --- a/examples/adjoints/adjoints_ex7/Makefile.in +++ b/examples/adjoints/adjoints_ex7/Makefile.in @@ -577,11 +577,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -629,6 +637,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex1/Makefile.in b/examples/eigenproblems/eigenproblems_ex1/Makefile.in index a23b7ec4684..d6696c9657b 100644 --- a/examples/eigenproblems/eigenproblems_ex1/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex1/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex2/Makefile.in b/examples/eigenproblems/eigenproblems_ex2/Makefile.in index 5ef2c3bc3ec..b2a5fb48010 100644 --- a/examples/eigenproblems/eigenproblems_ex2/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex2/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex3/Makefile.in b/examples/eigenproblems/eigenproblems_ex3/Makefile.in index 4a060dc7358..33072d3d79b 100644 --- a/examples/eigenproblems/eigenproblems_ex3/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex3/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/eigenproblems/eigenproblems_ex4/Makefile.in b/examples/eigenproblems/eigenproblems_ex4/Makefile.in index eb92f3d91f8..585db452640 100644 --- a/examples/eigenproblems/eigenproblems_ex4/Makefile.in +++ b/examples/eigenproblems/eigenproblems_ex4/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex1/Makefile.in b/examples/fem_system/fem_system_ex1/Makefile.in index 47f9d7e206f..25e7f610ded 100644 --- a/examples/fem_system/fem_system_ex1/Makefile.in +++ b/examples/fem_system/fem_system_ex1/Makefile.in @@ -499,11 +499,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +559,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex2/Makefile.in b/examples/fem_system/fem_system_ex2/Makefile.in index d1357da5ca8..8b30230a3b1 100644 --- a/examples/fem_system/fem_system_ex2/Makefile.in +++ b/examples/fem_system/fem_system_ex2/Makefile.in @@ -514,11 +514,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +574,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex3/Makefile.in b/examples/fem_system/fem_system_ex3/Makefile.in index 6783edf17bd..2d141f77288 100644 --- a/examples/fem_system/fem_system_ex3/Makefile.in +++ b/examples/fem_system/fem_system_ex3/Makefile.in @@ -499,11 +499,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +559,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex4/Makefile.in b/examples/fem_system/fem_system_ex4/Makefile.in index 8e6296abb4e..522bb49f8e9 100644 --- a/examples/fem_system/fem_system_ex4/Makefile.in +++ b/examples/fem_system/fem_system_ex4/Makefile.in @@ -499,11 +499,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +559,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/fem_system/fem_system_ex5/Makefile.in b/examples/fem_system/fem_system_ex5/Makefile.in index 00224c8d1c2..01fb482aa91 100644 --- a/examples/fem_system/fem_system_ex5/Makefile.in +++ b/examples/fem_system/fem_system_ex5/Makefile.in @@ -514,11 +514,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +574,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex1/Makefile.in b/examples/introduction/introduction_ex1/Makefile.in index 59920135dfb..e8308c46d86 100644 --- a/examples/introduction/introduction_ex1/Makefile.in +++ b/examples/introduction/introduction_ex1/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex2/Makefile.in b/examples/introduction/introduction_ex2/Makefile.in index 4168b619aa4..36d5f45eee7 100644 --- a/examples/introduction/introduction_ex2/Makefile.in +++ b/examples/introduction/introduction_ex2/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex3/Makefile.in b/examples/introduction/introduction_ex3/Makefile.in index 859921ed0fa..f9d6287a98e 100644 --- a/examples/introduction/introduction_ex3/Makefile.in +++ b/examples/introduction/introduction_ex3/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex4/Makefile.in b/examples/introduction/introduction_ex4/Makefile.in index 3572fea8ef5..2d2474a658f 100644 --- a/examples/introduction/introduction_ex4/Makefile.in +++ b/examples/introduction/introduction_ex4/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/introduction/introduction_ex5/Makefile.in b/examples/introduction/introduction_ex5/Makefile.in index 3ae3f4020fe..437ce7b05f2 100644 --- a/examples/introduction/introduction_ex5/Makefile.in +++ b/examples/introduction/introduction_ex5/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex1/Makefile.in b/examples/miscellaneous/miscellaneous_ex1/Makefile.in index d0de4bad619..9bcaf44480c 100644 --- a/examples/miscellaneous/miscellaneous_ex1/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex1/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex10/Makefile.in b/examples/miscellaneous/miscellaneous_ex10/Makefile.in index 1afb2f88961..bf721ec793f 100644 --- a/examples/miscellaneous/miscellaneous_ex10/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex10/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex11/Makefile.in b/examples/miscellaneous/miscellaneous_ex11/Makefile.in index de80da2aab8..8db1b8690a6 100644 --- a/examples/miscellaneous/miscellaneous_ex11/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex11/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex12/Makefile.in b/examples/miscellaneous/miscellaneous_ex12/Makefile.in index 99775b9b0b9..2d6c2fbd728 100644 --- a/examples/miscellaneous/miscellaneous_ex12/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex12/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex13/Makefile.in b/examples/miscellaneous/miscellaneous_ex13/Makefile.in index d6f6a103170..b76863f34c0 100644 --- a/examples/miscellaneous/miscellaneous_ex13/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex13/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex14/Makefile.in b/examples/miscellaneous/miscellaneous_ex14/Makefile.in index feb36eea1f3..e4cef4ae163 100644 --- a/examples/miscellaneous/miscellaneous_ex14/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex14/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex15/Makefile.in b/examples/miscellaneous/miscellaneous_ex15/Makefile.in index 504236ef024..643317916e6 100644 --- a/examples/miscellaneous/miscellaneous_ex15/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex15/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex16/Makefile.in b/examples/miscellaneous/miscellaneous_ex16/Makefile.in index 816dc5777b4..ecd2b7973a7 100644 --- a/examples/miscellaneous/miscellaneous_ex16/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex16/Makefile.in @@ -481,11 +481,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -533,6 +541,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex17/Makefile.in b/examples/miscellaneous/miscellaneous_ex17/Makefile.in index d7264bce057..6d61cb4ed36 100644 --- a/examples/miscellaneous/miscellaneous_ex17/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex17/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex2/Makefile.in b/examples/miscellaneous/miscellaneous_ex2/Makefile.in index bc5b0d64089..8e59e6ad478 100644 --- a/examples/miscellaneous/miscellaneous_ex2/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex2/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex3/Makefile.in b/examples/miscellaneous/miscellaneous_ex3/Makefile.in index 2f5e956a6ae..5146d27ca1f 100644 --- a/examples/miscellaneous/miscellaneous_ex3/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex3/Makefile.in @@ -470,11 +470,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -522,6 +530,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex4/Makefile.in b/examples/miscellaneous/miscellaneous_ex4/Makefile.in index d9d6f954cbc..2b48571cd36 100644 --- a/examples/miscellaneous/miscellaneous_ex4/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex4/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex5/Makefile.in b/examples/miscellaneous/miscellaneous_ex5/Makefile.in index 719d522a31a..707ea4dc6e1 100644 --- a/examples/miscellaneous/miscellaneous_ex5/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex5/Makefile.in @@ -479,11 +479,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -531,6 +539,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex6/Makefile.in b/examples/miscellaneous/miscellaneous_ex6/Makefile.in index a5a756000c3..aa8bc685743 100644 --- a/examples/miscellaneous/miscellaneous_ex6/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex6/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex7/Makefile.in b/examples/miscellaneous/miscellaneous_ex7/Makefile.in index d0115c3cac9..9ac39bc4084 100644 --- a/examples/miscellaneous/miscellaneous_ex7/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex7/Makefile.in @@ -508,11 +508,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -560,6 +568,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex8/Makefile.in b/examples/miscellaneous/miscellaneous_ex8/Makefile.in index 270aa93b899..939249e5570 100644 --- a/examples/miscellaneous/miscellaneous_ex8/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex8/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/miscellaneous/miscellaneous_ex9/Makefile.in b/examples/miscellaneous/miscellaneous_ex9/Makefile.in index 46f5b5339d5..66bcac9673a 100644 --- a/examples/miscellaneous/miscellaneous_ex9/Makefile.in +++ b/examples/miscellaneous/miscellaneous_ex9/Makefile.in @@ -505,11 +505,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -557,6 +565,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex1/Makefile.in b/examples/optimization/optimization_ex1/Makefile.in index cf5012ab431..50737c7c35f 100644 --- a/examples/optimization/optimization_ex1/Makefile.in +++ b/examples/optimization/optimization_ex1/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/optimization/optimization_ex2/Makefile.in b/examples/optimization/optimization_ex2/Makefile.in index f3065b739c7..16f38051df1 100644 --- a/examples/optimization/optimization_ex2/Makefile.in +++ b/examples/optimization/optimization_ex2/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex1/Makefile.in b/examples/reduced_basis/reduced_basis_ex1/Makefile.in index 0879171980e..b33bf22f98c 100644 --- a/examples/reduced_basis/reduced_basis_ex1/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex1/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex2/Makefile.in b/examples/reduced_basis/reduced_basis_ex2/Makefile.in index 986069c3c25..fb18b6c3a8b 100644 --- a/examples/reduced_basis/reduced_basis_ex2/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex2/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex3/Makefile.in b/examples/reduced_basis/reduced_basis_ex3/Makefile.in index f68264db04e..66e9889d4e6 100644 --- a/examples/reduced_basis/reduced_basis_ex3/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex3/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex4/Makefile.in b/examples/reduced_basis/reduced_basis_ex4/Makefile.in index 2c3343c83e8..4018e00c9b7 100644 --- a/examples/reduced_basis/reduced_basis_ex4/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex4/Makefile.in @@ -494,11 +494,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +554,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex5/Makefile.in b/examples/reduced_basis/reduced_basis_ex5/Makefile.in index 238b469fe39..f5c5f996a49 100644 --- a/examples/reduced_basis/reduced_basis_ex5/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex5/Makefile.in @@ -504,11 +504,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex6/Makefile.in b/examples/reduced_basis/reduced_basis_ex6/Makefile.in index 50076f35515..704b9196cf4 100644 --- a/examples/reduced_basis/reduced_basis_ex6/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex6/Makefile.in @@ -494,11 +494,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -546,6 +554,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/reduced_basis/reduced_basis_ex7/Makefile.in b/examples/reduced_basis/reduced_basis_ex7/Makefile.in index 3817bee46df..88b4bb8693a 100644 --- a/examples/reduced_basis/reduced_basis_ex7/Makefile.in +++ b/examples/reduced_basis/reduced_basis_ex7/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/solution_transfer/solution_transfer_ex1/Makefile.in b/examples/solution_transfer/solution_transfer_ex1/Makefile.in index cd7613f44f7..0cf9f53ad59 100644 --- a/examples/solution_transfer/solution_transfer_ex1/Makefile.in +++ b/examples/solution_transfer/solution_transfer_ex1/Makefile.in @@ -465,11 +465,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -517,6 +525,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex1/Makefile.in b/examples/subdomains/subdomains_ex1/Makefile.in index f6238ba3205..799e2edc23d 100644 --- a/examples/subdomains/subdomains_ex1/Makefile.in +++ b/examples/subdomains/subdomains_ex1/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex2/Makefile.in b/examples/subdomains/subdomains_ex2/Makefile.in index 5c20167218c..e81cd89393f 100644 --- a/examples/subdomains/subdomains_ex2/Makefile.in +++ b/examples/subdomains/subdomains_ex2/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/subdomains/subdomains_ex3/Makefile.in b/examples/subdomains/subdomains_ex3/Makefile.in index 3a83f502b61..303ef37b9d2 100644 --- a/examples/subdomains/subdomains_ex3/Makefile.in +++ b/examples/subdomains/subdomains_ex3/Makefile.in @@ -474,11 +474,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -526,6 +534,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in index 3add79bf28a..bff0732e60d 100644 --- a/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex1/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in index cd44f4a5603..5e5a2e62e27 100644 --- a/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex2/Makefile.in @@ -471,11 +471,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -523,6 +531,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in index f5c4ff69de1..776822ee34e 100644 --- a/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex3/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in index 0fbdb133bf1..4626e3bedea 100644 --- a/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex4/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in index d1f25e7dc8b..22fe2d48bf0 100644 --- a/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex5/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in index 5712834b887..b6b15092aba 100644 --- a/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex6/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in index 15445020704..9c036b14331 100644 --- a/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex7/Makefile.in @@ -472,11 +472,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +532,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in index abcd909b40d..50acb9ef0a2 100644 --- a/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex8/Makefile.in @@ -510,11 +510,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -562,6 +570,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in index a497c06f581..b6f6ee6544e 100644 --- a/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in +++ b/examples/systems_of_equations/systems_of_equations_ex9/Makefile.in @@ -472,11 +472,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -524,6 +532,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex1/Makefile.in b/examples/transient/transient_ex1/Makefile.in index 288208b4c6c..0e39e5979cf 100644 --- a/examples/transient/transient_ex1/Makefile.in +++ b/examples/transient/transient_ex1/Makefile.in @@ -480,11 +480,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -532,6 +540,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex2/Makefile.in b/examples/transient/transient_ex2/Makefile.in index bae2b2fcc21..19c83b89de5 100644 --- a/examples/transient/transient_ex2/Makefile.in +++ b/examples/transient/transient_ex2/Makefile.in @@ -466,11 +466,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -518,6 +526,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/transient/transient_ex3/Makefile.in b/examples/transient/transient_ex3/Makefile.in index 031ab6ae7a6..f79116ac7dc 100644 --- a/examples/transient/transient_ex3/Makefile.in +++ b/examples/transient/transient_ex3/Makefile.in @@ -514,11 +514,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -566,6 +574,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex1/Makefile.in b/examples/vector_fe/vector_fe_ex1/Makefile.in index c474401c7ad..25a34ed4414 100644 --- a/examples/vector_fe/vector_fe_ex1/Makefile.in +++ b/examples/vector_fe/vector_fe_ex1/Makefile.in @@ -478,11 +478,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -530,6 +538,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex10/Makefile.in b/examples/vector_fe/vector_fe_ex10/Makefile.in index c2a3aa0dd15..6242a9689e2 100644 --- a/examples/vector_fe/vector_fe_ex10/Makefile.in +++ b/examples/vector_fe/vector_fe_ex10/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex2/Makefile.in b/examples/vector_fe/vector_fe_ex2/Makefile.in index 1aca800f63c..bcc7bda8e92 100644 --- a/examples/vector_fe/vector_fe_ex2/Makefile.in +++ b/examples/vector_fe/vector_fe_ex2/Makefile.in @@ -504,11 +504,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex3/Makefile.in b/examples/vector_fe/vector_fe_ex3/Makefile.in index b033d436f7d..76252e51ad4 100644 --- a/examples/vector_fe/vector_fe_ex3/Makefile.in +++ b/examples/vector_fe/vector_fe_ex3/Makefile.in @@ -504,11 +504,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex4/Makefile.in b/examples/vector_fe/vector_fe_ex4/Makefile.in index b2320fc3c58..c88ea1a27c9 100644 --- a/examples/vector_fe/vector_fe_ex4/Makefile.in +++ b/examples/vector_fe/vector_fe_ex4/Makefile.in @@ -504,11 +504,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -556,6 +564,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex5/Makefile.in b/examples/vector_fe/vector_fe_ex5/Makefile.in index 21a71638e6e..4464f32ec13 100644 --- a/examples/vector_fe/vector_fe_ex5/Makefile.in +++ b/examples/vector_fe/vector_fe_ex5/Makefile.in @@ -491,11 +491,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -543,6 +551,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex6/Makefile.in b/examples/vector_fe/vector_fe_ex6/Makefile.in index cb550f280e4..89576c57f18 100644 --- a/examples/vector_fe/vector_fe_ex6/Makefile.in +++ b/examples/vector_fe/vector_fe_ex6/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex7/Makefile.in b/examples/vector_fe/vector_fe_ex7/Makefile.in index 6ae32888b36..d45faaa9f85 100644 --- a/examples/vector_fe/vector_fe_ex7/Makefile.in +++ b/examples/vector_fe/vector_fe_ex7/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex8/Makefile.in b/examples/vector_fe/vector_fe_ex8/Makefile.in index fe881610825..2ea4632110f 100644 --- a/examples/vector_fe/vector_fe_ex8/Makefile.in +++ b/examples/vector_fe/vector_fe_ex8/Makefile.in @@ -489,11 +489,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -541,6 +549,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/examples/vector_fe/vector_fe_ex9/Makefile.in b/examples/vector_fe/vector_fe_ex9/Makefile.in index 34e5092c976..3f031e8bf29 100644 --- a/examples/vector_fe/vector_fe_ex9/Makefile.in +++ b/examples/vector_fe/vector_fe_ex9/Makefile.in @@ -499,11 +499,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -551,6 +559,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ diff --git a/include/Makefile.am b/include/Makefile.am index a8ace90467c..c44a385fa96 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,5 +1,26 @@ SUBDIRS = libmesh +# GPU (Kokkos) FE math headers — installed preserving the gpu/ subdirectory so +# downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. +# nobase_ is used instead of the standard flat install to keep the namespace. +if LIBMESH_ENABLE_KOKKOS +nobase_include_HEADERS = \ + gpu/kokkos_scalar_types.h \ + gpu/kokkos_fe_types.h \ + gpu/kokkos_fe_shape_dispatch.h \ + gpu/kokkos_fe_base.h \ + gpu/kokkos_fe_evaluator.h \ + gpu/kokkos_fe_lagrange_1d.h \ + gpu/kokkos_fe_lagrange_2d.h \ + gpu/kokkos_fe_lagrange_3d.h \ + gpu/kokkos_fe_monomial.h \ + gpu/kokkos_fe_face_map.h \ + gpu/kokkos_fe_map.h \ + gpu/kokkos_quadrature.h \ + gpu/kokkos_hilbert_system.h \ + gpu/kokkos_parsed_function.h +endif + # special handholding for prefix_config.m4 generated files # so that 'make clean ; make' works as does 'make distcheck' # libmesh_config.h is made by ./configure, so it should get diff --git a/include/Makefile.in b/include/Makefile.in index 15e2ded0d9e..81e0e777867 100644 --- a/include/Makefile.in +++ b/include/Makefile.in @@ -157,7 +157,8 @@ am__aclocal_m4_deps = \ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ - $(noinst_HEADERS) $(am__DIST_COMMON) + $(am__nobase_include_HEADERS_DIST) $(noinst_HEADERS) \ + $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = libmesh_config.h.tmp CONFIG_CLEAN_FILES = @@ -215,8 +216,17 @@ am__uninstall_files_from_dir = { \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && echo $$files | $(am__xargs_n) 40 $(am__rm_f); }; \ } -am__installdirs = "$(DESTDIR)$(includedir)" -HEADERS = $(include_HEADERS) $(noinst_HEADERS) +am__installdirs = "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)" +am__nobase_include_HEADERS_DIST = gpu/kokkos_scalar_types.h \ + gpu/kokkos_fe_types.h gpu/kokkos_fe_shape_dispatch.h \ + gpu/kokkos_fe_base.h gpu/kokkos_fe_evaluator.h \ + gpu/kokkos_fe_lagrange_1d.h gpu/kokkos_fe_lagrange_2d.h \ + gpu/kokkos_fe_lagrange_3d.h gpu/kokkos_fe_monomial.h \ + gpu/kokkos_fe_face_map.h gpu/kokkos_fe_map.h \ + gpu/kokkos_quadrature.h gpu/kokkos_hilbert_system.h \ + gpu/kokkos_parsed_function.h +HEADERS = $(include_HEADERS) $(nobase_include_HEADERS) \ + $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive am__recursive_targets = \ @@ -380,11 +390,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -432,6 +450,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -598,6 +617,26 @@ vtkmajor = @vtkmajor@ vtkversion = @vtkversion@ SUBDIRS = libmesh +# GPU (Kokkos) FE math headers — installed preserving the gpu/ subdirectory so +# downstream code can use #include "libmesh/gpu/kokkos_fe_types.h" etc. +# nobase_ is used instead of the standard flat install to keep the namespace. +@LIBMESH_ENABLE_KOKKOS_TRUE@nobase_include_HEADERS = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_scalar_types.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_types.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_shape_dispatch.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_base.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_evaluator.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_1d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_2d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_lagrange_3d.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_monomial.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_face_map.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_fe_map.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_quadrature.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_hilbert_system.h \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ gpu/kokkos_parsed_function.h + + # special handholding for prefix_config.m4 generated files # so that 'make clean ; make' works as does 'make distcheck' # libmesh_config.h is made by ./configure, so it should get @@ -635,6 +674,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -656,6 +696,7 @@ include_HEADERS = \ enums/enum_elem_quality.h \ enums/enum_elem_type.h \ enums/enum_error_estimator_type.h \ + enums/enum_fe_elem_class.h \ enums/enum_fe_family.h \ enums/enum_inf_map_type.h \ enums/enum_io_package.h \ @@ -694,8 +735,13 @@ include_HEADERS = \ fe/fe_interface.h \ fe/fe_interface_macros.h \ fe/fe_lagrange_shape_1D.h \ + fe/fe_reference_element_traits.h \ + fe/fe_serendipity_lagrange.h \ + fe/fe_simplex_lagrange.h \ + fe/fe_tensor_product_lagrange.h \ fe/fe_macro.h \ fe/fe_map.h \ + fe/fe_shape_traits.h \ fe/fe_transformation_base.h \ fe/fe_type.h \ fe/fe_xyz_map.h \ @@ -781,6 +827,11 @@ include_HEADERS = \ geom/sphere.h \ geom/stored_range.h \ geom/surface.h \ + gpu/kokkos_linalg_base.h \ + gpu/kokkos_storage.h \ + gpu/kokkos_storage_policy.h \ + gpu/kokkos_tensor_ops.h \ + gpu/kokkos_vector_ops.h \ ghosting/default_coupling.h \ ghosting/ghost_point_neighbors.h \ ghosting/ghosting_functor.h \ @@ -877,6 +928,7 @@ include_HEADERS = \ numerics/parsed_fem_function.h \ numerics/parsed_fem_function_parameter.h \ numerics/parsed_function.h \ + numerics/parsed_function_program.h \ numerics/parsed_function_parameter.h \ numerics/petsc_macro.h \ numerics/petsc_matrix.h \ @@ -929,7 +981,6 @@ include_HEADERS = \ parallel/threads_allocators.h \ parallel/threads_none.h \ parallel/threads_pthread.h \ - parallel/threads_spin_mutex_forward.h \ parallel/threads_tbb.h \ partitioning/centroid_partitioner.h \ partitioning/hilbert_sfc_partitioner.h \ @@ -950,6 +1001,7 @@ include_HEADERS = \ quadrature/quadrature_composite.h \ quadrature/quadrature_conical.h \ quadrature/quadrature_gauss.h \ + quadrature/quadrature_gauss_rules.h \ quadrature/quadrature_gauss_lobatto.h \ quadrature/quadrature_gm.h \ quadrature/quadrature_grid.h \ @@ -1215,6 +1267,30 @@ uninstall-includeHEADERS: @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) +install-nobase_includeHEADERS: $(nobase_include_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \ + fi; \ + $(am__nobase_list) | while read dir files; do \ + xfiles=; for file in $$files; do \ + if test -f "$$file"; then xfiles="$$xfiles $$file"; \ + else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ + test -z "$$xfiles" || { \ + test "x$$dir" = x. || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(includedir)/$$dir'"; \ + $(MKDIR_P) "$(DESTDIR)$(includedir)/$$dir"; }; \ + echo " $(INSTALL_HEADER) $$xfiles '$(DESTDIR)$(includedir)/$$dir'"; \ + $(INSTALL_HEADER) $$xfiles "$(DESTDIR)$(includedir)/$$dir" || exit $$?; }; \ + done + +uninstall-nobase_includeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nobase_include_HEADERS)'; test -n "$(includedir)" || list=; \ + $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ + dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run 'make' without going through this Makefile. @@ -1381,7 +1457,7 @@ check: check-recursive all-am: Makefile $(HEADERS) libmesh_config.h.tmp installdirs: installdirs-recursive installdirs-am: - for dir in "$(DESTDIR)$(includedir)"; do \ + for dir in "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive @@ -1435,7 +1511,7 @@ info: info-recursive info-am: -install-data-am: install-includeHEADERS +install-data-am: install-includeHEADERS install-nobase_includeHEADERS install-dvi: install-dvi-recursive @@ -1479,7 +1555,7 @@ ps: ps-recursive ps-am: -uninstall-am: uninstall-includeHEADERS +uninstall-am: uninstall-includeHEADERS uninstall-nobase_includeHEADERS .MAKE: $(am__recursive_targets) all install-am install-strip @@ -1491,12 +1567,13 @@ uninstall-am: uninstall-includeHEADERS install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am \ install-includeHEADERS install-info install-info-am \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installcheck-local installdirs installdirs-am maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-generic \ - mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ - uninstall-am uninstall-includeHEADERS + install-man install-nobase_includeHEADERS install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installcheck-local installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-am uninstall uninstall-am \ + uninstall-includeHEADERS uninstall-nobase_includeHEADERS .PRECIOUS: Makefile diff --git a/include/base/dof_map.h b/include/base/dof_map.h index 566d3fcba6c..6f33ff529e1 100644 --- a/include/base/dof_map.h +++ b/include/base/dof_map.h @@ -40,6 +40,10 @@ #include "libmesh/mesh_subdivision_support.h" #include "libmesh/dof_map_base.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + // TIMPI includes #include "timpi/parallel_implementation.h" #include "timpi/parallel_sync.h" @@ -50,6 +54,7 @@ #include #include #include +#include #include #include @@ -1813,6 +1818,50 @@ class DofMap : public DofMapBase, */ void reinit_static_condensation(); +#ifdef LIBMESH_HAVE_KOKKOS + struct KokkosDofIndexCache + { + using elem_id_view = ::Kokkos::View; + using elem_dof_id_view = ::Kokkos::View; + using elem_dof_count_view = ::Kokkos::View; + using elem_subdomain_view = ::Kokkos::View; + + elem_id_view element_ids; + elem_dof_id_view element_dof_indices; + elem_dof_count_view element_n_dofs; + elem_subdomain_view element_subdomains; + std::vector host_element_ids; + std::vector host_element_dof_indices; + std::vector host_element_n_dofs; + std::vector host_element_subdomains; + unsigned int max_dofs = 0; + }; + + struct KokkosLocalIndexCache + { + using elem_local_index_view = ::Kokkos::View; + + elem_local_index_view element_local_indices; + unsigned int max_dofs = 0; + }; + + const KokkosDofIndexCache * + get_kokkos_dof_index_cache(const unsigned int vn = libMesh::invalid_uint) const; + + const KokkosLocalIndexCache * + get_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + + const KokkosLocalIndexCache * + require_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + + void prepare_kokkos_dof_index_caches() const; + void prepare_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn = libMesh::invalid_uint) const; + void clear_kokkos_caches() const; +#endif + private: /** @@ -2139,6 +2188,13 @@ class DofMap : public DofMapBase, */ MeshBase & _mesh; +#ifdef LIBMESH_HAVE_KOKKOS + mutable std::map> _kokkos_dof_index_caches; + mutable std::map *>, + std::unique_ptr> _kokkos_local_index_caches; +#endif + /** * Additional matrices handled by this object. These pointers do \e * not handle the memory, instead, \p System, who diff --git a/include/base/libmesh_common.h b/include/base/libmesh_common.h index d907f4a5fe3..1d40d85c4ec 100644 --- a/include/base/libmesh_common.h +++ b/include/base/libmesh_common.h @@ -30,6 +30,10 @@ // The library configuration options #include "libmesh/libmesh_config.h" +// Device compilation support — must be included before assert macros +// so that LIBMESH_DEVICE_ASSERT is available for the Kokkos path. +#include "libmesh/libmesh_device.h" + // Use actual timestamps or constant dummies (to aid ccache) #ifdef LIBMESH_ENABLE_TIMESTAMPS # define LIBMESH_TIME __TIME__ @@ -183,33 +187,33 @@ typedef std::complex COMPLEX; // Helper functions for complex/real numbers // to clean up #ifdef LIBMESH_USE_COMPLEX_NUMBERS elsewhere -template inline T libmesh_real(T a) { return a; } -template inline T libmesh_imag(T /*a*/) { return 0; } -template inline T libmesh_conj(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_real(T a) { return a; } +template LIBMESH_DEVICE_INLINE T libmesh_imag(T /*a*/) { return 0; } +template LIBMESH_DEVICE_INLINE T libmesh_conj(T a) { return a; } template -inline T libmesh_real(std::complex a) { return std::real(a); } +LIBMESH_DEVICE_INLINE T libmesh_real(std::complex a) { return std::real(a); } template -inline T libmesh_imag(std::complex a) { return std::imag(a); } +LIBMESH_DEVICE_INLINE T libmesh_imag(std::complex a) { return std::imag(a); } template -inline std::complex libmesh_conj(std::complex a) { return std::conj(a); } +LIBMESH_DEVICE_INLINE std::complex libmesh_conj(std::complex a) { return std::conj(a); } // std::isnan() is in as of C++11. template -inline bool libmesh_isnan(T x) { return std::isnan(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isnan(T x) { return std::isnan(x); } template -inline bool libmesh_isnan(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isnan(std::complex a) { return (std::isnan(std::real(a)) || std::isnan(std::imag(a))); } // std::isinf() is in as of C++11. template -inline bool libmesh_isinf(T x) { return std::isinf(x); } +LIBMESH_DEVICE_INLINE bool libmesh_isinf(T x) { return std::isinf(x); } template -inline bool libmesh_isinf(std::complex a) +LIBMESH_DEVICE_INLINE bool libmesh_isinf(std::complex a) { return (std::isinf(std::real(a)) || std::isinf(std::imag(a))); } // Define the value type for unknowns in simulations. @@ -287,7 +291,13 @@ extern bool warned_about_auto_ptr; #endif // The libmesh_assert() macro acts like C's assert(), but throws a -// libmesh_error() (including stack trace, etc) instead of just exiting +// libmesh_error() (including stack trace, etc) instead of just exiting. +// +// In .K translation units (LIBMESH_KOKKOS_COMPILATION defined), +// LIBMESH_DEVICE_ASSERT is provided by libmesh_device.h using +// printf + Kokkos::abort() — device-safe across CUDA/HIP/SYCL. +// The assert macros delegate to it so that both host and device +// code in the same file get assertion checking. #ifdef NDEBUG #define libmesh_assert_msg(asserted, msg) ((void) 0) @@ -299,6 +309,18 @@ extern bool warned_about_auto_ptr; #define libmesh_assert_less_equal_msg(expr1,expr2, msg) ((void) 0) #define libmesh_assert_greater_equal_msg(expr1,expr2, msg) ((void) 0) +#elif defined(LIBMESH_DEVICE_ASSERT) + +// Kokkos compilation: use the device-safe assert from libmesh_device.h. +#define libmesh_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_exceptionless_assert_msg(asserted, msg) LIBMESH_DEVICE_ASSERT(asserted) +#define libmesh_assert_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) == (expr2)) +#define libmesh_assert_not_equal_to_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) != (expr2)) +#define libmesh_assert_less_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) < (expr2)) +#define libmesh_assert_greater_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) > (expr2)) +#define libmesh_assert_less_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) <= (expr2)) +#define libmesh_assert_greater_equal_msg(expr1,expr2, msg) LIBMESH_DEVICE_ASSERT((expr1) >= (expr2)) + #else #define libmesh_assertion_types(expr1,expr2) \ @@ -404,6 +426,12 @@ struct casting_compare { // // The libmesh_terminate() macro prints a message and throws a // TerminationException exception +#if LIBMESH_IN_DEVICE_CODE +#define libmesh_error_msg(msg) \ + do { \ + LIBMESH_DEVICE_ERROR_MSG(msg); \ + } while (0) +#else #define libmesh_error_msg(msg) \ do { \ std::stringstream message_stream; \ @@ -411,6 +439,7 @@ struct casting_compare { libMesh::MacroFunctions::report_error(__FILE__, __LINE__, LIBMESH_DATE, LIBMESH_TIME, message_stream); \ LIBMESH_THROW(libMesh::LogicError(message_stream.str())); \ } while (0) +#endif #define libmesh_error() libmesh_error_msg("") @@ -649,7 +678,6 @@ inline Tnew libmesh_cast_int (Told oldvar) return cast_int(oldvar); } - /** * restrict_int checks that the value of the castee is within the * bounds which are exactly representable by the output type, even in @@ -673,8 +701,6 @@ inline Tnew restrict_int (Told oldvar) return oldvar; } - - /** * This is a helper variable template for cases when we want to use a default compile-time * error with constexpr-based if conditions. The templating delays the triggering diff --git a/include/base/libmesh_device.h b/include/base/libmesh_device.h new file mode 100644 index 00000000000..f41d4c70b01 --- /dev/null +++ b/include/base/libmesh_device.h @@ -0,0 +1,74 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_LIBMESH_DEVICE_H +#define LIBMESH_LIBMESH_DEVICE_H + +// Defines LIBMESH_DEVICE_INLINE, mirroring MetaPhysicL's METAPHYSICL_INLINE +// pattern (metaphysicl_device.h / METAPHYSICL_KOKKOS_COMPILATION). +// +// When compiling a .K translation unit (LIBMESH_KOKKOS_COMPILATION is defined +// by kokkos.mk), this expands to KOKKOS_INLINE_FUNCTION so that annotated +// methods are callable from both host and device code. In all other +// translation units it expands to plain `inline`. +#ifdef LIBMESH_KOKKOS_COMPILATION +# include +# include +# define LIBMESH_DEVICE_INLINE KOKKOS_INLINE_FUNCTION + +// Backend-neutral device-code detection for Kokkos .K translation units. +// This lets error/exception plumbing share a single predicate instead of +// hardcoding per-backend checks in multiple headers. +# if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || defined(__SYCL_DEVICE_ONLY__) +# define LIBMESH_IN_DEVICE_CODE 1 +# else +# define LIBMESH_IN_DEVICE_CODE 0 +# endif + +// Device-safe assert: uses printf (supported on CUDA/HIP) and +// Kokkos::abort() for backend-portable device termination. +// Defined here (not in libmesh_common.h) because Kokkos headers +// are only available in .K translation units. +# ifndef NDEBUG +# define LIBMESH_DEVICE_ASSERT(asserted) \ + do { if (!(asserted)) { \ + printf("libMesh assert failed: %s, file %s, line %d\n", \ + #asserted, __FILE__, __LINE__); \ + ::Kokkos::abort("libmesh_assert failed"); \ + } } while (0) +# else +# define LIBMESH_DEVICE_ASSERT(asserted) ((void) 0) +# endif + +# define LIBMESH_DEVICE_ERROR_MSG(msg) \ + do { \ + printf("libMesh error: %s, file %s, line %d\n", \ + msg, __FILE__, __LINE__); \ + ::Kokkos::abort(msg); \ + } while (0) + +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) \ + do { if (cond) { LIBMESH_DEVICE_ERROR_MSG(msg); } } while (0) + +#else +# define LIBMESH_DEVICE_INLINE inline +# define LIBMESH_IN_DEVICE_CODE 0 +# define LIBMESH_DEVICE_ERROR_MSG(msg) libmesh_error_msg(msg) +# define LIBMESH_DEVICE_ERROR_MSG_IF(cond, msg) libmesh_error_msg_if(cond, msg) +#endif + +#endif // LIBMESH_LIBMESH_DEVICE_H diff --git a/include/base/libmesh_exceptions.h b/include/base/libmesh_exceptions.h index 6ca79b7b269..8db27cd83d2 100644 --- a/include/base/libmesh_exceptions.h +++ b/include/base/libmesh_exceptions.h @@ -23,6 +23,7 @@ #include "libmesh/libmesh_config.h" #include "libmesh/libmesh_abort.h" +#include "libmesh/libmesh_device.h" #include #include @@ -212,14 +213,24 @@ class TerminationException #ifdef LIBMESH_ENABLE_EXCEPTIONS #define libmesh_noexcept noexcept +#if LIBMESH_IN_DEVICE_CODE +// Kokkos device code does not support C++ exceptions. +#define LIBMESH_THROW(e) do { LIBMESH_DEVICE_ERROR_MSG((e).what()); } while (0) +#else #define LIBMESH_THROW(e) do { throw e; } while (0) +#endif + #define libmesh_rethrow throw #define libmesh_try try #define libmesh_catch(e) catch(e) #else +#if LIBMESH_IN_DEVICE_CODE +#define LIBMESH_THROW(e) do { LIBMESH_DEVICE_ERROR_MSG((e).what()); } while (0) +#else #define LIBMESH_THROW(e) do { libMesh::err << e.what(); libMesh::libmesh_abort(); } while (0) +#endif #define libmesh_rethrow #define libmesh_try #define libmesh_catch(e) if (0) diff --git a/include/enums/enum_fe_elem_class.h b/include/enums/enum_fe_elem_class.h new file mode 100644 index 00000000000..2b1b2e96d12 --- /dev/null +++ b/include/enums/enum_fe_elem_class.h @@ -0,0 +1,50 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + +#ifndef LIBMESH_ENUM_FE_ELEM_CLASS_H +#define LIBMESH_ENUM_FE_ELEM_CLASS_H + +namespace libMesh { + +/** + * \enum libMesh::FEElemClass groups element types by topological class, + * independent of polynomial order. + * + * e.g. QUAD4, QUAD8, QUAD9 all map to QUAD; TRI3, TRI6, TRI7 all map to TRI. + * Used together with FEFamily and polynomial order to uniquely identify a + * physics finite element space. + * + * The fixed type allows forward declaration as: + * enum class FEElemClass : unsigned int; + */ +enum class FEElemClass : unsigned int +{ + EDGE = 0, + TRI = 1, + QUAD = 2, + TET = 3, + HEX = 4, + PRISM = 5, + PYRAMID = 6, + N_CLASSES +}; + +} // namespace libMesh + +#endif // LIBMESH_ENUM_FE_ELEM_CLASS_H diff --git a/include/fe/fe_lagrange_shape_1D.h b/include/fe/fe_lagrange_shape_1D.h index e72ad564723..f9949090043 100644 --- a/include/fe/fe_lagrange_shape_1D.h +++ b/include/fe/fe_lagrange_shape_1D.h @@ -20,6 +20,7 @@ #define LIBMESH_FE_LAGRANGE_SHAPE_1D_H // Local includes +#include "libmesh/libmesh_device.h" #include "libmesh/enum_order.h" // FIRST, SECOND, etc. #include "libmesh/point.h" @@ -28,7 +29,7 @@ namespace libMesh { -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_linear_shape(const unsigned int i, const Real xi) { @@ -47,7 +48,7 @@ Real fe_lagrange_1D_linear_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape(const unsigned int i, const Real xi) { @@ -69,7 +70,7 @@ Real fe_lagrange_1D_quadratic_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape(const unsigned int i, const Real xi) { @@ -94,7 +95,7 @@ Real fe_lagrange_1D_cubic_shape(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape(const Order order, const unsigned int i, const Real xi) @@ -120,7 +121,7 @@ Real fe_lagrange_1D_shape(const Order order, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_linear_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real) @@ -142,7 +143,7 @@ Real fe_lagrange_1D_linear_shape_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -167,7 +168,7 @@ Real fe_lagrange_1D_quadratic_shape_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -196,7 +197,7 @@ Real fe_lagrange_1D_cubic_shape_deriv(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape_deriv(const Order order, const unsigned int i, const unsigned int j, @@ -224,7 +225,7 @@ Real fe_lagrange_1D_shape_deriv(const Order order, // fe_lagrange_1D_linear_shape_second_deriv is 0 -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_quadratic_shape_second_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real) @@ -249,7 +250,7 @@ Real fe_lagrange_1D_quadratic_shape_second_deriv(const unsigned int i, } -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_cubic_shape_second_deriv(const unsigned int i, const unsigned int libmesh_dbg_var(j), const Real xi) @@ -278,7 +279,7 @@ Real fe_lagrange_1D_cubic_shape_second_deriv(const unsigned int i, -inline +LIBMESH_DEVICE_INLINE Real fe_lagrange_1D_shape_second_deriv(const Order order, const unsigned int i, const unsigned int j, diff --git a/include/fe/fe_reference_element_traits.h b/include/fe/fe_reference_element_traits.h new file mode 100644 index 00000000000..f20d7a87adc --- /dev/null +++ b/include/fe/fe_reference_element_traits.h @@ -0,0 +1,1365 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H +#define LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/libmesh.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" + +namespace libMesh +{ + +constexpr unsigned int edge2_side_node_counts[2] = {1, 1}; +constexpr unsigned int edge3_side_node_counts[2] = {1, 1}; +constexpr unsigned int edge4_side_node_counts[2] = {1, 1}; + +constexpr unsigned int tri3_side_node_counts[3] = {2, 2, 2}; +constexpr unsigned int tri6_side_node_counts[3] = {3, 3, 3}; +constexpr unsigned int tri7_side_node_counts[3] = {3, 3, 3}; + +constexpr unsigned int quad4_side_node_counts[4] = {2, 2, 2, 2}; +constexpr unsigned int quad8_side_node_counts[4] = {3, 3, 3, 3}; +constexpr unsigned int quad9_side_node_counts[4] = {3, 3, 3, 3}; + +constexpr unsigned int tet4_side_node_counts[4] = {3, 3, 3, 3}; +constexpr unsigned int tet10_side_node_counts[4] = {6, 6, 6, 6}; +constexpr unsigned int tet14_side_node_counts[4] = {7, 7, 7, 7}; + +constexpr unsigned int hex8_side_node_counts[6] = {4, 4, 4, 4, 4, 4}; +constexpr unsigned int hex20_side_node_counts[6] = {8, 8, 8, 8, 8, 8}; +constexpr unsigned int hex27_side_node_counts[6] = {9, 9, 9, 9, 9, 9}; + +constexpr unsigned int prism6_side_node_counts[5] = {3, 4, 4, 4, 3}; +constexpr unsigned int prism15_side_node_counts[5] = {6, 8, 8, 8, 6}; +constexpr unsigned int prism18_side_node_counts[5] = {6, 9, 9, 9, 6}; +constexpr unsigned int prism20_side_node_counts[5] = {7, 9, 9, 9, 7}; +constexpr unsigned int prism21_side_node_counts[5] = {7, 9, 9, 9, 7}; + +constexpr unsigned int pyramid5_side_node_counts[5] = {3, 3, 3, 3, 4}; +constexpr unsigned int pyramid13_side_node_counts[5] = {6, 6, 6, 6, 8}; +constexpr unsigned int pyramid14_side_node_counts[5] = {6, 6, 6, 6, 9}; +constexpr unsigned int pyramid18_side_node_counts[5] = {7, 7, 7, 7, 9}; + +constexpr unsigned int tet10_edge_node_counts[6] = {3, 3, 3, 3, 3, 3}; +constexpr unsigned int tet14_edge_node_counts[6] = {3, 3, 3, 3, 3, 3}; +constexpr unsigned int hex20_edge_node_counts[12] = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int hex27_edge_node_counts[12] = {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism15_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism18_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism20_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int prism21_edge_node_counts[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid13_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid14_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; +constexpr unsigned int pyramid18_edge_node_counts[8] = {3, 3, 3, 3, 3, 3, 3, 3}; + +constexpr unsigned int prism6_side_nodes[5][4] = + { + {0, 2, 1, 99}, + {0, 1, 4, 3}, + {1, 2, 5, 4}, + {2, 0, 3, 5}, + {3, 4, 5, 99} + }; + +constexpr unsigned int prism15_side_nodes[5][8] = + { + {0, 2, 1, 8, 7, 6, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9}, + {1, 2, 5, 4, 7, 11, 13, 10}, + {2, 0, 3, 5, 8, 9, 14, 11}, + {3, 4, 5, 12, 13, 14, 99, 99} + }; + +constexpr unsigned int prism18_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 99, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 99, 99, 99} + }; + +constexpr unsigned int prism20_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 18, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 19, 99, 99} + }; + +constexpr unsigned int prism21_side_nodes[5][9] = + { + {0, 2, 1, 8, 7, 6, 18, 99, 99}, + {0, 1, 4, 3, 6, 10, 12, 9, 15}, + {1, 2, 5, 4, 7, 11, 13, 10, 16}, + {2, 0, 3, 5, 8, 9, 14, 11, 17}, + {3, 4, 5, 12, 13, 14, 19, 99, 99} + }; + +constexpr unsigned int pyramid5_side_nodes[5][4] = + { + {0, 1, 4, 99}, + {1, 2, 4, 99}, + {2, 3, 4, 99}, + {3, 0, 4, 99}, + {0, 3, 2, 1} + }; + +constexpr unsigned int pyramid13_side_nodes[5][8] = + { + {0, 1, 4, 5, 10, 9, 99, 99}, + {1, 2, 4, 6, 11, 10, 99, 99}, + {2, 3, 4, 7, 12, 11, 99, 99}, + {3, 0, 4, 8, 9, 12, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5} + }; + +constexpr unsigned int pyramid14_side_nodes[5][9] = + { + {0, 1, 4, 5, 10, 9, 99, 99, 99}, + {1, 2, 4, 6, 11, 10, 99, 99, 99}, + {2, 3, 4, 7, 12, 11, 99, 99, 99}, + {3, 0, 4, 8, 9, 12, 99, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5, 13} + }; + +constexpr unsigned int pyramid18_side_nodes[5][9] = + { + {0, 1, 4, 5, 10, 9, 14, 99, 99}, + {1, 2, 4, 6, 11, 10, 15, 99, 99}, + {2, 3, 4, 7, 12, 11, 16, 99, 99}, + {3, 0, 4, 8, 9, 12, 17, 99, 99}, + {0, 3, 2, 1, 8, 7, 6, 5, 13} + }; + +constexpr unsigned int tri3_side_nodes[3][2] = + { + {0, 1}, + {1, 2}, + {2, 0} + }; + +constexpr unsigned int tri6_side_nodes[3][3] = + { + {0, 1, 3}, + {1, 2, 4}, + {2, 0, 5} + }; + +constexpr unsigned int tri7_side_nodes[3][3] = + { + {0, 1, 3}, + {1, 2, 4}, + {2, 0, 5} + }; + +constexpr unsigned int quad4_side_nodes[4][2] = + { + {0, 1}, + {1, 2}, + {2, 3}, + {3, 0} + }; + +constexpr unsigned int quad8_side_nodes[4][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {2, 3, 6}, + {3, 0, 7} + }; + +constexpr unsigned int quad9_side_nodes[4][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {2, 3, 6}, + {3, 0, 7} + }; + +constexpr unsigned int tet4_side_nodes[4][3] = + { + {0, 2, 1}, + {0, 1, 3}, + {1, 2, 3}, + {2, 0, 3} + }; + +constexpr unsigned int tet10_side_nodes[4][6] = + { + {0, 2, 1, 6, 5, 4}, + {0, 1, 3, 4, 8, 7}, + {1, 2, 3, 5, 9, 8}, + {2, 0, 3, 6, 7, 9} + }; + +constexpr unsigned int tet14_side_nodes[4][7] = + { + {0, 2, 1, 6, 5, 4, 10}, + {0, 1, 3, 4, 8, 7, 11}, + {1, 2, 3, 5, 9, 8, 12}, + {2, 0, 3, 6, 7, 9, 13} + }; + +constexpr unsigned int hex8_side_nodes[6][4] = + { + {0, 3, 2, 1}, + {0, 1, 5, 4}, + {1, 2, 6, 5}, + {2, 3, 7, 6}, + {3, 0, 4, 7}, + {4, 5, 6, 7} + }; + +constexpr unsigned int hex20_side_nodes[6][8] = + { + {0, 3, 2, 1, 11, 10, 9, 8}, + {0, 1, 5, 4, 8, 13, 16, 12}, + {1, 2, 6, 5, 9, 14, 17, 13}, + {2, 3, 7, 6, 10, 15, 18, 14}, + {3, 0, 4, 7, 11, 12, 19, 15}, + {4, 5, 6, 7, 16, 17, 18, 19} + }; + +constexpr unsigned int hex27_side_nodes[6][9] = + { + {0, 3, 2, 1, 11, 10, 9, 8, 20}, + {0, 1, 5, 4, 8, 13, 16, 12, 21}, + {1, 2, 6, 5, 9, 14, 17, 13, 22}, + {2, 3, 7, 6, 10, 15, 18, 14, 23}, + {3, 0, 4, 7, 11, 12, 19, 15, 24}, + {4, 5, 6, 7, 16, 17, 18, 19, 25} + }; + +constexpr unsigned int edge2_side_nodes[2][1] = + { + {0}, + {1} + }; + +constexpr unsigned int edge3_side_nodes[2][1] = + { + {0}, + {1} + }; + +constexpr unsigned int edge4_side_nodes[2][1] = + { + {0}, + {1} + }; + +constexpr unsigned int tet10_edge_nodes[6][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {0, 2, 6}, + {0, 3, 7}, + {1, 3, 8}, + {2, 3, 9} + }; + +constexpr unsigned int tet14_edge_nodes[6][3] = + { + {0, 1, 4}, + {1, 2, 5}, + {0, 2, 6}, + {0, 3, 7}, + {1, 3, 8}, + {2, 3, 9} + }; + +constexpr unsigned int hex20_edge_nodes[12][3] = + { + {0, 1, 8}, + {1, 2, 9}, + {2, 3, 10}, + {0, 3, 11}, + {0, 4, 12}, + {1, 5, 13}, + {2, 6, 14}, + {3, 7, 15}, + {4, 5, 16}, + {5, 6, 17}, + {6, 7, 18}, + {4, 7, 19} + }; + +constexpr unsigned int hex27_edge_nodes[12][3] = + { + {0, 1, 8}, + {1, 2, 9}, + {2, 3, 10}, + {0, 3, 11}, + {0, 4, 12}, + {1, 5, 13}, + {2, 6, 14}, + {3, 7, 15}, + {4, 5, 16}, + {5, 6, 17}, + {6, 7, 18}, + {4, 7, 19} + }; + +constexpr unsigned int prism15_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism18_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism20_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int prism21_edge_nodes[9][3] = + { + {0, 1, 6}, + {1, 2, 7}, + {0, 2, 8}, + {0, 3, 9}, + {1, 4, 10}, + {2, 5, 11}, + {3, 4, 12}, + {4, 5, 13}, + {3, 5, 14} + }; + +constexpr unsigned int pyramid13_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + +constexpr unsigned int pyramid14_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + +constexpr unsigned int pyramid18_edge_nodes[8][3] = + { + {0, 1, 5}, + {1, 2, 6}, + {2, 3, 7}, + {0, 3, 8}, + {0, 4, 9}, + {1, 4, 10}, + {2, 4, 11}, + {3, 4, 12} + }; + +LIBMESH_DEVICE_INLINE bool +requires_side_specific_topology(ElemType parent) +{ + switch (parent) + { + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE ElemType +side_topology_or_invalid(ElemType parent, + unsigned int side) +{ + switch (parent) + { + case PRISM6: + switch (side) + { + case 0: + case 4: + return TRI3; + case 1: + case 2: + case 3: + return QUAD4; + default: + return INVALID_ELEM; + } + + case PRISM15: + switch (side) + { + case 0: + case 4: + return TRI6; + case 1: + case 2: + case 3: + return QUAD8; + default: + return INVALID_ELEM; + } + + case PRISM18: + switch (side) + { + case 0: + case 4: + return TRI6; + case 1: + case 2: + case 3: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PRISM20: + case PRISM21: + switch (side) + { + case 0: + case 4: + return TRI7; + case 1: + case 2: + case 3: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PYRAMID5: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI3; + case 4: + return QUAD4; + default: + return INVALID_ELEM; + } + + case PYRAMID13: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI6; + case 4: + return QUAD8; + default: + return INVALID_ELEM; + } + + case PYRAMID14: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI6; + case 4: + return QUAD9; + default: + return INVALID_ELEM; + } + + case PYRAMID18: + switch (side) + { + case 0: + case 1: + case 2: + case 3: + return TRI7; + case 4: + return QUAD9; + default: + return INVALID_ELEM; + } + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +side_node_count_or_zero(ElemType parent, + unsigned int side) +{ + switch (parent) + { + case EDGE2: + return side < 2 ? edge2_side_node_counts[side] : 0; + case EDGE3: + return side < 2 ? edge3_side_node_counts[side] : 0; + case EDGE4: + return side < 2 ? edge4_side_node_counts[side] : 0; + case TRI3: + case TRISHELL3: + return side < 3 ? tri3_side_node_counts[side] : 0; + case TRI6: + return side < 3 ? tri6_side_node_counts[side] : 0; + case TRI7: + return side < 3 ? tri7_side_node_counts[side] : 0; + case QUAD4: + case QUADSHELL4: + return side < 4 ? quad4_side_node_counts[side] : 0; + case QUAD8: + case QUADSHELL8: + return side < 4 ? quad8_side_node_counts[side] : 0; + case QUAD9: + case QUADSHELL9: + return side < 4 ? quad9_side_node_counts[side] : 0; + case TET4: + return side < 4 ? tet4_side_node_counts[side] : 0; + case TET10: + return side < 4 ? tet10_side_node_counts[side] : 0; + case TET14: + return side < 4 ? tet14_side_node_counts[side] : 0; + case HEX8: + return side < 6 ? hex8_side_node_counts[side] : 0; + case HEX20: + return side < 6 ? hex20_side_node_counts[side] : 0; + case HEX27: + return side < 6 ? hex27_side_node_counts[side] : 0; + case PRISM6: + return side < 5 ? prism6_side_node_counts[side] : 0; + case PRISM15: + return side < 5 ? prism15_side_node_counts[side] : 0; + case PRISM18: + return side < 5 ? prism18_side_node_counts[side] : 0; + case PRISM20: + return side < 5 ? prism20_side_node_counts[side] : 0; + case PRISM21: + return side < 5 ? prism21_side_node_counts[side] : 0; + case PYRAMID5: + return side < 5 ? pyramid5_side_node_counts[side] : 0; + case PYRAMID13: + return side < 5 ? pyramid13_side_node_counts[side] : 0; + case PYRAMID14: + return side < 5 ? pyramid14_side_node_counts[side] : 0; + case PYRAMID18: + return side < 5 ? pyramid18_side_node_counts[side] : 0; + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +edge_node_count_or_zero(ElemType parent, + unsigned int edge) +{ + switch (parent) + { + case TET10: + return edge < 6 ? tet10_edge_node_counts[edge] : 0; + case TET14: + return edge < 6 ? tet14_edge_node_counts[edge] : 0; + case HEX20: + return edge < 12 ? hex20_edge_node_counts[edge] : 0; + case HEX27: + return edge < 12 ? hex27_edge_node_counts[edge] : 0; + case PRISM15: + return edge < 9 ? prism15_edge_node_counts[edge] : 0; + case PRISM18: + return edge < 9 ? prism18_edge_node_counts[edge] : 0; + case PRISM20: + return edge < 9 ? prism20_edge_node_counts[edge] : 0; + case PRISM21: + return edge < 9 ? prism21_edge_node_counts[edge] : 0; + case PYRAMID13: + return edge < 8 ? pyramid13_edge_node_counts[edge] : 0; + case PYRAMID14: + return edge < 8 ? pyramid14_edge_node_counts[edge] : 0; + case PYRAMID18: + return edge < 8 ? pyramid18_edge_node_counts[edge] : 0; + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE bool +try_local_side_node(ElemType parent, + unsigned int side, + unsigned int side_node, + unsigned int & node) +{ + const unsigned int count = side_node_count_or_zero(parent, side); + if (!count || side_node >= count) + return false; + + switch (parent) + { + case EDGE2: + node = edge2_side_nodes[side][side_node]; + return true; + case EDGE3: + node = edge3_side_nodes[side][side_node]; + return true; + case EDGE4: + node = edge4_side_nodes[side][side_node]; + return true; + case TRI3: + case TRISHELL3: + node = tri3_side_nodes[side][side_node]; + return true; + case TRI6: + node = tri6_side_nodes[side][side_node]; + return true; + case TRI7: + node = tri7_side_nodes[side][side_node]; + return true; + case QUAD4: + case QUADSHELL4: + node = quad4_side_nodes[side][side_node]; + return true; + case QUAD8: + case QUADSHELL8: + node = quad8_side_nodes[side][side_node]; + return true; + case QUAD9: + case QUADSHELL9: + node = quad9_side_nodes[side][side_node]; + return true; + case TET4: + node = tet4_side_nodes[side][side_node]; + return true; + case TET10: + node = tet10_side_nodes[side][side_node]; + return true; + case TET14: + node = tet14_side_nodes[side][side_node]; + return true; + case HEX8: + node = hex8_side_nodes[side][side_node]; + return true; + case HEX20: + node = hex20_side_nodes[side][side_node]; + return true; + case HEX27: + node = hex27_side_nodes[side][side_node]; + return true; + case PRISM6: + node = prism6_side_nodes[side][side_node]; + return true; + case PRISM15: + node = prism15_side_nodes[side][side_node]; + return true; + case PRISM18: + node = prism18_side_nodes[side][side_node]; + return true; + case PRISM20: + node = prism20_side_nodes[side][side_node]; + return true; + case PRISM21: + node = prism21_side_nodes[side][side_node]; + return true; + case PYRAMID5: + node = pyramid5_side_nodes[side][side_node]; + return true; + case PYRAMID13: + node = pyramid13_side_nodes[side][side_node]; + return true; + case PYRAMID14: + node = pyramid14_side_nodes[side][side_node]; + return true; + case PYRAMID18: + node = pyramid18_side_nodes[side][side_node]; + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +try_local_edge_node(ElemType parent, + unsigned int edge, + unsigned int edge_node, + unsigned int & node) +{ + const unsigned int count = edge_node_count_or_zero(parent, edge); + if (!count || edge_node >= count) + return false; + + switch (parent) + { + case TET10: + node = tet10_edge_nodes[edge][edge_node]; + return true; + case TET14: + node = tet14_edge_nodes[edge][edge_node]; + return true; + case HEX20: + node = hex20_edge_nodes[edge][edge_node]; + return true; + case HEX27: + node = hex27_edge_nodes[edge][edge_node]; + return true; + case PRISM15: + node = prism15_edge_nodes[edge][edge_node]; + return true; + case PRISM18: + node = prism18_edge_nodes[edge][edge_node]; + return true; + case PRISM20: + node = prism20_edge_nodes[edge][edge_node]; + return true; + case PRISM21: + node = prism21_edge_nodes[edge][edge_node]; + return true; + case PYRAMID13: + node = pyramid13_edge_nodes[edge][edge_node]; + return true; + case PYRAMID14: + node = pyramid14_edge_nodes[edge][edge_node]; + return true; + case PYRAMID18: + node = pyramid18_edge_nodes[edge][edge_node]; + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +try_reference_node(ElemType type, + unsigned int node, + Point & pt) +{ + switch (type) + { + case EDGE2: + case EDGE3: + case EDGE4: + switch (node) + { + case 0: + pt = Point(-1.0); + return true; + case 1: + pt = Point(1.0); + return true; + case 2: + if (type == EDGE3) + { + pt = Point(0.0); + return true; + } + if (type == EDGE4) + { + pt = Point(-1. / 3.); + return true; + } + return false; + case 3: + if (type == EDGE4) + { + pt = Point(1. / 3.); + return true; + } + return false; + default: + return false; + } + + case TRI3: + case TRI6: + case TRI7: + switch (node) + { + case 0: + pt = Point(0.0, 0.0); + return true; + case 1: + pt = Point(1.0, 0.0); + return true; + case 2: + pt = Point(0.0, 1.0); + return true; + case 3: + pt = Point(0.5, 0.0); + return true; + case 4: + pt = Point(0.5, 0.5); + return true; + case 5: + pt = Point(0.0, 0.5); + return true; + case 6: + if (type == TRI7) + { + pt = Point(1. / 3., 1. / 3.); + return true; + } + return false; + default: + return false; + } + + case QUAD4: + case QUAD8: + case QUAD9: + switch (node) + { + case 0: + pt = Point(-1.0, -1.0); + return true; + case 1: + pt = Point(1.0, -1.0); + return true; + case 2: + pt = Point(1.0, 1.0); + return true; + case 3: + pt = Point(-1.0, 1.0); + return true; + case 4: + pt = Point(0.0, -1.0); + return true; + case 5: + pt = Point(1.0, 0.0); + return true; + case 6: + pt = Point(0.0, 1.0); + return true; + case 7: + pt = Point(-1.0, 0.0); + return true; + case 8: + if (type == QUAD9) + { + pt = Point(0.0, 0.0); + return true; + } + return false; + default: + return false; + } + + case TET4: + case TET10: + case TET14: + switch (node) + { + case 0: + pt = Point(0.0, 0.0, 0.0); + return true; + case 1: + pt = Point(1.0, 0.0, 0.0); + return true; + case 2: + pt = Point(0.0, 1.0, 0.0); + return true; + case 3: + pt = Point(0.0, 0.0, 1.0); + return true; + case 4: + pt = Point(0.5, 0.0, 0.0); + return true; + case 5: + pt = Point(0.5, 0.5, 0.0); + return true; + case 6: + pt = Point(0.0, 0.5, 0.0); + return true; + case 7: + pt = Point(0.0, 0.0, 0.5); + return true; + case 8: + pt = Point(0.5, 0.0, 0.5); + return true; + case 9: + pt = Point(0.0, 0.5, 0.5); + return true; + case 10: + if (type == TET14) + { + pt = Point(1. / 3., 1. / 3., 0.0); + return true; + } + return false; + case 11: + if (type == TET14) + { + pt = Point(1. / 3., 0.0, 1. / 3.); + return true; + } + return false; + case 12: + if (type == TET14) + { + pt = Point(1. / 3., 1. / 3., 1. / 3.); + return true; + } + return false; + case 13: + if (type == TET14) + { + pt = Point(0.0, 1. / 3., 1. / 3.); + return true; + } + return false; + default: + return false; + } + + case HEX8: + case HEX20: + case HEX27: + switch (node) + { + case 0: + pt = Point(-1.0, -1.0, -1.0); + return true; + case 1: + pt = Point(1.0, -1.0, -1.0); + return true; + case 2: + pt = Point(1.0, 1.0, -1.0); + return true; + case 3: + pt = Point(-1.0, 1.0, -1.0); + return true; + case 4: + pt = Point(-1.0, -1.0, 1.0); + return true; + case 5: + pt = Point(1.0, -1.0, 1.0); + return true; + case 6: + pt = Point(1.0, 1.0, 1.0); + return true; + case 7: + pt = Point(-1.0, 1.0, 1.0); + return true; + case 8: + pt = Point(0.0, -1.0, -1.0); + return true; + case 9: + pt = Point(1.0, 0.0, -1.0); + return true; + case 10: + pt = Point(0.0, 1.0, -1.0); + return true; + case 11: + pt = Point(-1.0, 0.0, -1.0); + return true; + case 12: + pt = Point(-1.0, -1.0, 0.0); + return true; + case 13: + pt = Point(1.0, -1.0, 0.0); + return true; + case 14: + pt = Point(1.0, 1.0, 0.0); + return true; + case 15: + pt = Point(-1.0, 1.0, 0.0); + return true; + case 16: + pt = Point(0.0, -1.0, 1.0); + return true; + case 17: + pt = Point(1.0, 0.0, 1.0); + return true; + case 18: + pt = Point(0.0, 1.0, 1.0); + return true; + case 19: + pt = Point(-1.0, 0.0, 1.0); + return true; + case 20: + if (type == HEX27) + { + pt = Point(0.0, 0.0, -1.0); + return true; + } + return false; + case 21: + if (type == HEX27) + { + pt = Point(0.0, -1.0, 0.0); + return true; + } + return false; + case 22: + if (type == HEX27) + { + pt = Point(1.0, 0.0, 0.0); + return true; + } + return false; + case 23: + if (type == HEX27) + { + pt = Point(0.0, 1.0, 0.0); + return true; + } + return false; + case 24: + if (type == HEX27) + { + pt = Point(-1.0, 0.0, 0.0); + return true; + } + return false; + case 25: + if (type == HEX27) + { + pt = Point(0.0, 0.0, 1.0); + return true; + } + return false; + case 26: + if (type == HEX27) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + default: + return false; + } + + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + switch (node) + { + case 0: + pt = Point(-1.0, -1.0, 0.0); + return true; + case 1: + pt = Point(1.0, -1.0, 0.0); + return true; + case 2: + pt = Point(1.0, 1.0, 0.0); + return true; + case 3: + pt = Point(-1.0, 1.0, 0.0); + return true; + case 4: + pt = Point(0.0, 0.0, 1.0); + return true; + case 5: + pt = Point(0.0, -1.0, 0.0); + return true; + case 6: + pt = Point(1.0, 0.0, 0.0); + return true; + case 7: + pt = Point(0.0, 1.0, 0.0); + return true; + case 8: + pt = Point(-1.0, 0.0, 0.0); + return true; + case 9: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(-0.5, -0.5, 0.5); + return true; + } + return false; + case 10: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.5, -0.5, 0.5); + return true; + } + return false; + case 11: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.5, 0.5, 0.5); + return true; + } + return false; + case 12: + if (type == PYRAMID13 || type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(-0.5, 0.5, 0.5); + return true; + } + return false; + case 13: + if (type == PYRAMID14 || type == PYRAMID18) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + case 14: + if (type == PYRAMID18) + { + pt = Point(-2. / 3., 0.0, 1. / 3.); + return true; + } + return false; + case 15: + if (type == PYRAMID18) + { + pt = Point(0.0, 2. / 3., 1. / 3.); + return true; + } + return false; + case 16: + if (type == PYRAMID18) + { + pt = Point(2. / 3., 0.0, 1. / 3.); + return true; + } + return false; + case 17: + if (type == PYRAMID18) + { + pt = Point(0.0, -2. / 3., 1. / 3.); + return true; + } + return false; + default: + return false; + } + + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + switch (node) + { + case 0: + pt = Point(0.0, 0.0, -1.0); + return true; + case 1: + pt = Point(1.0, 0.0, -1.0); + return true; + case 2: + pt = Point(0.0, 1.0, -1.0); + return true; + case 3: + pt = Point(0.0, 0.0, 1.0); + return true; + case 4: + pt = Point(1.0, 0.0, 1.0); + return true; + case 5: + pt = Point(0.0, 1.0, 1.0); + return true; + case 6: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, -1.0); + return true; + } + return false; + case 7: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, -1.0); + return true; + } + return false; + case 8: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, -1.0); + return true; + } + return false; + case 9: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + case 10: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(1.0, 0.0, 0.0); + return true; + } + return false; + case 11: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 1.0, 0.0); + return true; + } + return false; + case 12: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, 1.0); + return true; + } + return false; + case 13: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, 1.0); + return true; + } + return false; + case 14: + if (type == PRISM15 || type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, 1.0); + return true; + } + return false; + case 15: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.0, 0.0); + return true; + } + return false; + case 16: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.5, 0.5, 0.0); + return true; + } + return false; + case 17: + if (type == PRISM18 || type == PRISM20 || type == PRISM21) + { + pt = Point(0.0, 0.5, 0.0); + return true; + } + return false; + case 18: + if (type == PRISM20 || type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., -1.0); + return true; + } + return false; + case 19: + if (type == PRISM20 || type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., 1.0); + return true; + } + return false; + case 20: + if (type == PRISM21) + { + pt = Point(1. / 3., 1. / 3., 0.0); + return true; + } + return false; + default: + return false; + } + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +try_refspace_node(ElemType type, + unsigned int node, + Point & pt) +{ + switch (type) + { + case NODEELEM: + if (!node) + { + pt = Point(0.0, 0.0, 0.0); + return true; + } + return false; + + case TRISHELL3: + return try_reference_node(TRI3, node, pt); + + case QUADSHELL4: + return try_reference_node(QUAD4, node, pt); + + case QUADSHELL8: + return try_reference_node(QUAD8, node, pt); + + case QUADSHELL9: + return try_reference_node(QUAD9, node, pt); + + default: + return try_reference_node(type, node, pt); + } +} + +LIBMESH_DEVICE_INLINE bool +try_reference_side_node(ElemType parent, + unsigned int side, + unsigned int side_node, + Point & pt) +{ + unsigned int node = libMesh::invalid_uint; + if (!try_local_side_node(parent, side, side_node, node)) + return false; + + return try_reference_node(parent, node, pt); +} + +} // namespace libMesh + +#endif // LIBMESH_FE_REFERENCE_ELEMENT_TRAITS_H diff --git a/include/fe/fe_serendipity_lagrange.h b/include/fe/fe_serendipity_lagrange.h new file mode 100644 index 00000000000..f1a44f6cd1f --- /dev/null +++ b/include/fe/fe_serendipity_lagrange.h @@ -0,0 +1,417 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_SERENDIPITY_LAGRANGE_H +#define LIBMESH_FE_SERENDIPITY_LAGRANGE_H + +#include "libmesh/point.h" + +namespace libMesh +{ +namespace detail +{ + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (1.0 - eta) * (-1.0 - xi - eta); + case 1: return 0.25 * (1.0 + xi) * (1.0 - eta) * (-1.0 + xi - eta); + case 2: return 0.25 * (1.0 + xi) * (1.0 + eta) * (-1.0 + xi + eta); + case 3: return 0.25 * (1.0 - xi) * (1.0 + eta) * (-1.0 - xi + eta); + case 4: return 0.5 * (1.0 - xi * xi) * (1.0 - eta); + case 5: return 0.5 * (1.0 + xi) * (1.0 - eta * eta); + case 6: return 0.5 * (1.0 - xi * xi) * (1.0 + eta); + default: return 0.5 * (1.0 - xi) * (1.0 - eta * eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + switch (i) + { + case 0: return 0.25 * (1.0 - eta) * (2.0 * xi + eta); + case 1: return 0.25 * (1.0 - eta) * (2.0 * xi - eta); + case 2: return 0.25 * (1.0 + eta) * (2.0 * xi + eta); + case 3: return 0.25 * (1.0 + eta) * (2.0 * xi - eta); + case 4: return -xi * (1.0 - eta); + case 5: return 0.5 * (1.0 - eta * eta); + case 6: return -xi * (1.0 + eta); + default: return -0.5 * (1.0 - eta * eta); + } + + default: + switch (i) + { + case 0: return 0.25 * (1.0 - xi) * (xi + 2.0 * eta); + case 1: return 0.25 * (1.0 + xi) * (2.0 * eta - xi); + case 2: return 0.25 * (1.0 + xi) * (xi + 2.0 * eta); + case 3: return 0.25 * (1.0 - xi) * (2.0 * eta - xi); + case 4: return -0.5 * (1.0 - xi * xi); + case 5: return -eta * (1.0 + xi); + case 6: return 0.5 * (1.0 - xi * xi); + default: return -eta * (1.0 - xi); + } + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad8_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + switch (i) + { + case 0: + case 1: + return 0.5 * (1.0 - eta); + case 2: + case 3: + return 0.5 * (1.0 + eta); + case 4: + return eta - 1.0; + case 6: + return -1.0 - eta; + default: + return 0.0; + } + + case 1: + switch (i) + { + case 0: return 0.25 * (1.0 - 2.0 * xi - 2.0 * eta); + case 1: return 0.25 * (-1.0 - 2.0 * xi + 2.0 * eta); + case 2: return 0.25 * (1.0 + 2.0 * xi + 2.0 * eta); + case 3: return 0.25 * (-1.0 + 2.0 * xi - 2.0 * eta); + case 4: return xi; + case 5: return -eta; + case 6: return -xi; + default: return eta; + } + + default: + switch (i) + { + case 0: + case 3: + return 0.5 * (1.0 - xi); + case 1: + case 2: + return 0.5 * (1.0 + xi); + case 5: + return -1.0 - xi; + case 7: + return xi - 1.0; + default: + return 0.0; + } + } +} +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (i) + { + case 0: return (1.0 - x) * (1.0 - y) * (1.0 - z) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z); + case 1: return x * (1.0 - y) * (1.0 - z) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0); + case 2: return x * y * (1.0 - z) * (2.0 * x + 2.0 * y - 2.0 * z - 3.0); + case 3: return (1.0 - x) * y * (1.0 - z) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0); + case 4: return (1.0 - x) * (1.0 - y) * z * (2.0 * z - 2.0 * x - 2.0 * y - 1.0); + case 5: return x * (1.0 - y) * z * (2.0 * x - 2.0 * y + 2.0 * z - 3.0); + case 6: return x * y * z * (2.0 * x + 2.0 * y + 2.0 * z - 5.0); + case 7: return (1.0 - x) * y * z * (2.0 * y - 2.0 * x + 2.0 * z - 3.0); + case 8: return 4.0 * x * (1.0 - x) * (1.0 - y) * (1.0 - z); + case 9: return 4.0 * x * y * (1.0 - y) * (1.0 - z); + case 10: return 4.0 * x * (1.0 - x) * y * (1.0 - z); + case 11: return 4.0 * (1.0 - x) * y * (1.0 - y) * (1.0 - z); + case 12: return 4.0 * (1.0 - x) * (1.0 - y) * z * (1.0 - z); + case 13: return 4.0 * x * (1.0 - y) * z * (1.0 - z); + case 14: return 4.0 * x * y * z * (1.0 - z); + case 15: return 4.0 * (1.0 - x) * y * z * (1.0 - z); + case 16: return 4.0 * x * (1.0 - x) * (1.0 - y) * z; + case 17: return 4.0 * x * y * (1.0 - y) * z; + case 18: return 4.0 * x * (1.0 - x) * y * z; + default: return 4.0 * (1.0 - x) * y * (1.0 - y) * z; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + libmesh_assert_less(j, 3); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (j) + { + case 0: + switch (i) + { + case 0: return 0.5 * (1.0 - y) * (1.0 - z) * ((1.0 - x) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * (1.0 - y) * (1.0 - z) * (x * 2.0 + (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * y * (1.0 - z) * (x * 2.0 + (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * y * (1.0 - z) * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - y) * z * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * (1.0 - y) * z * (x * 2.0 + (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * y * z * (x * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * y * z * ((1.0 - x) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return 2.0 * (1.0 - y) * (1.0 - z) * (1.0 - 2.0 * x); + case 9: return 2.0 * y * (1.0 - y) * (1.0 - z); + case 10: return 2.0 * y * (1.0 - z) * (1.0 - 2.0 * x); + case 11: return -2.0 * y * (1.0 - y) * (1.0 - z); + case 12: return -2.0 * (1.0 - y) * z * (1.0 - z); + case 13: return 2.0 * (1.0 - y) * z * (1.0 - z); + case 14: return 2.0 * y * z * (1.0 - z); + case 15: return -2.0 * y * z * (1.0 - z); + case 16: return 2.0 * (1.0 - y) * z * (1.0 - 2.0 * x); + case 17: return 2.0 * y * (1.0 - y) * z; + case 18: return 2.0 * y * z * (1.0 - 2.0 * x); + default: return -2.0 * y * (1.0 - y) * z; + } + + case 1: + switch (i) + { + case 0: return 0.5 * (1.0 - x) * (1.0 - z) * ((1.0 - y) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * x * (1.0 - z) * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * x * (1.0 - z) * (y * 2.0 + (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * (1.0 - x) * (1.0 - z) * (y * 2.0 + (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - x) * z * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * x * z * ((1.0 - y) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * x * z * (y * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * (1.0 - x) * z * (y * 2.0 + (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return -2.0 * x * (1.0 - x) * (1.0 - z); + case 9: return 2.0 * x * (1.0 - z) * (1.0 - 2.0 * y); + case 10: return 2.0 * x * (1.0 - x) * (1.0 - z); + case 11: return 2.0 * (1.0 - x) * (1.0 - z) * (1.0 - 2.0 * y); + case 12: return -2.0 * (1.0 - x) * z * (1.0 - z); + case 13: return -2.0 * x * z * (1.0 - z); + case 14: return 2.0 * x * z * (1.0 - z); + case 15: return 2.0 * (1.0 - x) * z * (1.0 - z); + case 16: return -2.0 * x * (1.0 - x) * z; + case 17: return 2.0 * x * z * (1.0 - 2.0 * y); + case 18: return 2.0 * x * (1.0 - x) * z; + default: return 2.0 * (1.0 - x) * z * (1.0 - 2.0 * y); + } + + default: + switch (i) + { + case 0: return 0.5 * (1.0 - x) * (1.0 - y) * ((1.0 - z) * (-2.0) + (-1.0) * (1.0 - 2.0 * x - 2.0 * y - 2.0 * z)); + case 1: return 0.5 * x * (1.0 - y) * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * x - 2.0 * y - 2.0 * z - 1.0)); + case 2: return 0.5 * x * y * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * x + 2.0 * y - 2.0 * z - 3.0)); + case 3: return 0.5 * (1.0 - x) * y * ((1.0 - z) * (-2.0) + (-1.0) * (2.0 * y - 2.0 * x - 2.0 * z - 1.0)); + case 4: return 0.5 * (1.0 - x) * (1.0 - y) * (z * 2.0 + (2.0 * z - 2.0 * x - 2.0 * y - 1.0)); + case 5: return 0.5 * x * (1.0 - y) * (z * 2.0 + (2.0 * x - 2.0 * y + 2.0 * z - 3.0)); + case 6: return 0.5 * x * y * (z * 2.0 + (2.0 * x + 2.0 * y + 2.0 * z - 5.0)); + case 7: return 0.5 * (1.0 - x) * y * (z * 2.0 + (2.0 * y - 2.0 * x + 2.0 * z - 3.0)); + case 8: return -2.0 * x * (1.0 - x) * (1.0 - y); + case 9: return -2.0 * x * y * (1.0 - y); + case 10: return -2.0 * x * (1.0 - x) * y; + case 11: return -2.0 * (1.0 - x) * y * (1.0 - y); + case 12: return 2.0 * (1.0 - x) * (1.0 - y) * (1.0 - 2.0 * z); + case 13: return 2.0 * x * (1.0 - y) * (1.0 - 2.0 * z); + case 14: return 2.0 * x * y * (1.0 - 2.0 * z); + case 15: return 2.0 * (1.0 - x) * y * (1.0 - 2.0 * z); + case 16: return 2.0 * x * (1.0 - x) * (1.0 - y); + case 17: return 2.0 * x * y * (1.0 - y); + case 18: return 2.0 * x * (1.0 - x) * y; + default: return 2.0 * (1.0 - x) * y * (1.0 - y); + } + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex20_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 20); + libmesh_assert_less(j, 6); + + const Real x = 0.5 * (xi + 1.0); + const Real y = 0.5 * (eta + 1.0); + const Real z = 0.5 * (zeta + 1.0); + + switch (j) + { + case 0: + switch (i) + { + case 0: + case 1: return (1.0 - y) * (1.0 - z); + case 2: + case 3: return y * (1.0 - z); + case 4: + case 5: return (1.0 - y) * z; + case 6: + case 7: return y * z; + case 8: return -2.0 * (1.0 - y) * (1.0 - z); + case 10: return -2.0 * y * (1.0 - z); + case 16: return -2.0 * (1.0 - y) * z; + case 18: return -2.0 * y * z; + default: return 0.0; + } + + case 1: + switch (i) + { + case 0: return (1.25 - x - y - 0.5 * z) * (1.0 - z); + case 1: return (-x + y + 0.5 * z - 0.25) * (1.0 - z); + case 2: return (x + y - 0.5 * z - 0.75) * (1.0 - z); + case 3: return (-y + x + 0.5 * z - 0.25) * (1.0 - z); + case 4: return -0.25 * z * (4.0 * x + 4.0 * y - 2.0 * z - 3.0); + case 5: return -0.25 * z * (-4.0 * y + 4.0 * x + 2.0 * z - 1.0); + case 6: return 0.25 * z * (-5.0 + 4.0 * x + 4.0 * y + 2.0 * z); + case 7: return 0.25 * z * (4.0 * x - 4.0 * y - 2.0 * z + 1.0); + case 8: return (-1.0 + 2.0 * x) * (1.0 - z); + case 9: return (1.0 - 2.0 * y) * (1.0 - z); + case 10: return (1.0 - 2.0 * x) * (1.0 - z); + case 11: return (-1.0 + 2.0 * y) * (1.0 - z); + case 12: return z * (1.0 - z); + case 13: return -z * (1.0 - z); + case 14: return z * (1.0 - z); + case 15: return -z * (1.0 - z); + case 16: return (-1.0 + 2.0 * x) * z; + case 17: return (1.0 - 2.0 * y) * z; + case 18: return (1.0 - 2.0 * x) * z; + default: return (-1.0 + 2.0 * y) * z; + } + + case 2: + switch (i) + { + case 0: + case 3: return (1.0 - x) * (1.0 - z); + case 1: + case 2: return x * (1.0 - z); + case 4: + case 7: return (1.0 - x) * z; + case 5: + case 6: return x * z; + case 9: return -2.0 * x * (1.0 - z); + case 11: return -2.0 * (1.0 - x) * (1.0 - z); + case 17: return -2.0 * x * z; + case 19: return -2.0 * (1.0 - x) * z; + default: return 0.0; + } + + case 3: + switch (i) + { + case 0: return (1.25 - x - 0.5 * y - z) * (1.0 - y); + case 1: return (-x + 0.5 * y + z - 0.25) * (1.0 - y); + case 2: return -0.25 * y * (2.0 * y + 4.0 * x - 4.0 * z - 1.0); + case 3: return -0.25 * y * (-2.0 * y + 4.0 * x + 4.0 * z - 3.0); + case 4: return (-z + x + 0.5 * y - 0.25) * (1.0 - y); + case 5: return (x - 0.5 * y + z - 0.75) * (1.0 - y); + case 6: return 0.25 * y * (2.0 * y + 4.0 * x + 4.0 * z - 5.0); + case 7: return 0.25 * y * (-2.0 * y + 4.0 * x - 4.0 * z + 1.0); + case 8: return (-1.0 + 2.0 * x) * (1.0 - y); + case 9: return -y * (1.0 - y); + case 10: return (-1.0 + 2.0 * x) * y; + case 11: return y * (1.0 - y); + case 12: return (-1.0 + 2.0 * z) * (1.0 - y); + case 13: return (1.0 - 2.0 * z) * (1.0 - y); + case 14: return (1.0 - 2.0 * z) * y; + case 15: return (-1.0 + 2.0 * z) * y; + case 16: return (1.0 - 2.0 * x) * (1.0 - y); + case 17: return y * (1.0 - y); + case 18: return (1.0 - 2.0 * x) * y; + default: return -y * (1.0 - y); + } + + case 4: + switch (i) + { + case 0: return (1.25 - 0.5 * x - y - z) * (1.0 - x); + case 1: return 0.25 * x * (2.0 * x - 4.0 * y - 4.0 * z + 3.0); + case 2: return -0.25 * x * (2.0 * x + 4.0 * y - 4.0 * z - 1.0); + case 3: return (-y + 0.5 * x + z - 0.25) * (1.0 - x); + case 4: return (-z + 0.5 * x + y - 0.25) * (1.0 - x); + case 5: return -0.25 * x * (2.0 * x - 4.0 * y + 4.0 * z - 1.0); + case 6: return 0.25 * x * (2.0 * x + 4.0 * y + 4.0 * z - 5.0); + case 7: return (y - 0.5 * x + z - 0.75) * (1.0 - x); + case 8: return x * (1.0 - x); + case 9: return (-1.0 + 2.0 * y) * x; + case 10: return -x * (1.0 - x); + case 11: return (-1.0 + 2.0 * y) * (1.0 - x); + case 12: return (-1.0 + 2.0 * z) * (1.0 - x); + case 13: return (-1.0 + 2.0 * z) * x; + case 14: return (1.0 - 2.0 * z) * x; + case 15: return (1.0 - 2.0 * z) * (1.0 - x); + case 16: return -x * (1.0 - x); + case 17: return (1.0 - 2.0 * y) * x; + case 18: return x * (1.0 - x); + default: return (1.0 - 2.0 * y) * (1.0 - x); + } + + default: + switch (i) + { + case 0: + case 4: return (1.0 - x) * (1.0 - y); + case 1: + case 5: return x * (1.0 - y); + case 2: + case 6: return x * y; + case 3: + case 7: return (1.0 - x) * y; + case 12: return -2.0 * (1.0 - x) * (1.0 - y); + case 13: return -2.0 * x * (1.0 - y); + case 14: return -2.0 * x * y; + case 15: return -2.0 * (1.0 - x) * y; + default: return 0.0; + } + } +} +#endif + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_SERENDIPITY_LAGRANGE_H diff --git a/include/fe/fe_shape_traits.h b/include/fe/fe_shape_traits.h new file mode 100644 index 00000000000..bd300f31859 --- /dev/null +++ b/include/fe/fe_shape_traits.h @@ -0,0 +1,719 @@ +#ifndef LIBMESH_FE_SHAPE_TRAITS_H +#define LIBMESH_FE_SHAPE_TRAITS_H + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_elem_class.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_order.h" +#include "libmesh/libmesh_device.h" + +namespace libMesh +{ + +struct FEShapeKey +{ + FEFamily family; + ElemType elem_type; + Order order; +}; + +LIBMESH_DEVICE_INLINE bool +is_monomial_2d_elem_type(ElemType elem_type) +{ + switch (elem_type) + { + case C0POLYGON: + case TRI3: + case TRISHELL3: + case TRI6: + case TRI7: + case QUAD4: + case QUADSHELL4: + case QUAD8: + case QUADSHELL8: + case QUAD9: + case QUADSHELL9: + return true; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +is_monomial_3d_elem_type(ElemType elem_type, + bool include_pyramid18 = true) +{ + switch (elem_type) + { + case TET4: + case TET10: + case TET14: + case HEX8: + case HEX20: + case HEX27: + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case C0POLYHEDRON: + return true; + case PYRAMID18: + return include_pyramid18; + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE ElemType +side_topology_or_invalid(ElemType parent) +{ + switch (parent) + { + case EDGE2: + case EDGE3: + case EDGE4: + return NODEELEM; + + case TRI3: + case QUAD4: + return EDGE2; + + case TRI6: + case TRI7: + case QUAD8: + case QUAD9: + return EDGE3; + + case TET4: + return TRI3; + case HEX8: + return QUAD4; + + case TET10: + return TRI6; + case TET14: + return TRI7; + case HEX20: + return QUAD8; + case HEX27: + return QUAD9; + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE FEElemClass +class_from_topology_or_invalid(ElemType topo) +{ + switch (topo) + { + case EDGE2: + case EDGE3: + case EDGE4: + return FEElemClass::EDGE; + + case TRI3: + case TRI6: + case TRI7: + return FEElemClass::TRI; + + case QUAD4: + case QUAD8: + case QUAD9: + return FEElemClass::QUAD; + + case TET4: + case TET10: + case TET14: + return FEElemClass::TET; + + case HEX8: + case HEX20: + case HEX27: + return FEElemClass::HEX; + + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + return FEElemClass::PRISM; + + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return FEElemClass::PYRAMID; + + default: + return FEElemClass::N_CLASSES; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +elem_class_dim_or_zero(FEElemClass cls) +{ + switch (cls) + { + case FEElemClass::EDGE: + return 1; + case FEElemClass::TRI: + case FEElemClass::QUAD: + return 2; + case FEElemClass::TET: + case FEElemClass::HEX: + case FEElemClass::PRISM: + case FEElemClass::PYRAMID: + return 3; + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +topology_dim_or_zero(ElemType topo) +{ + return elem_class_dim_or_zero(class_from_topology_or_invalid(topo)); +} + +LIBMESH_DEVICE_INLINE constexpr ElemType +lagrange_shape_topology_or_invalid(FEShapeKey key) +{ + switch (key.order) + { + case CONSTANT: + case FIRST: + switch (key.elem_type) + { + case EDGE2: + case EDGE3: + case EDGE4: + return EDGE2; + + case TRI3: + case TRI6: + case TRI7: + return TRI3; + + case QUAD4: + case QUAD8: + case QUAD9: + return QUAD4; + + case TET4: + case TET10: + case TET14: + return TET4; + + case HEX8: + case HEX20: + case HEX27: + return HEX8; + + default: + return INVALID_ELEM; + } + + case SECOND: + switch (key.elem_type) + { + case EDGE3: + return EDGE3; + + case TRI6: + case TRI7: + return TRI6; + + case QUAD8: + return QUAD8; + + case QUAD9: + return QUAD9; + + case TET10: + case TET14: + return TET10; + + case HEX20: + return HEX20; + + case HEX27: + return HEX27; + + default: + return INVALID_ELEM; + } + + case THIRD: + switch (key.elem_type) + { + case EDGE4: + return EDGE4; + + default: + return INVALID_ELEM; + } + + default: + return INVALID_ELEM; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +lagrange_exact_n_dofs_or_zero(ElemType elem_type, + Order order) +{ + switch (order) + { + case CONSTANT: + return (elem_type == NODEELEM) ? 1u : 0u; + + case FIRST: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 2; + + case TRI3: + case TRI6: + case TRI7: + return 3; + + case QUAD4: + case QUAD8: + case QUAD9: + return 4; + + case TET4: + case TET10: + case TET14: + return 4; + + case HEX8: + case HEX20: + case HEX27: + return 8; + + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + return 6; + + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return 5; + + default: + return 0; + } + + case SECOND: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE3: + return 3; + + case TRI6: + case TRI7: + return 6; + + case QUAD8: + return 8; + + case QUAD9: + return 9; + + case TET10: + case TET14: + return 10; + + case HEX20: + return 20; + + case HEX27: + return 27; + + case PRISM15: + return 15; + + case PRISM18: + case PRISM20: + case PRISM21: + return 18; + + case PYRAMID13: + return 13; + + case PYRAMID14: + case PYRAMID18: + return 14; + + default: + return 0; + } + + case THIRD: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE4: + return 4; + + case TRI7: + return 7; + + case TET14: + return 14; + + case PRISM20: + return 20; + + case PRISM21: + return 21; + + case PYRAMID18: + return 18; + + default: + return 0; + } + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE unsigned int +monomial_exact_n_dofs_or_zero(ElemType elem_type, + Order order) +{ + if (elem_type == INVALID_ELEM) + return 0; + if (order < CONSTANT) + return 0; + + switch (order) + { + case CONSTANT: + return 1; + + case FIRST: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 2; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 3; + if (is_monomial_3d_elem_type(elem_type)) + return 4; + return 0; + + case SECOND: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 3; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 6; + if (is_monomial_3d_elem_type(elem_type)) + return 10; + return 0; + + case THIRD: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + case EDGE4: + return 4; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 10; + if (is_monomial_3d_elem_type(elem_type)) + return 20; + return 0; + + case FOURTH: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return 5; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 15; + if (is_monomial_3d_elem_type(elem_type, false)) + return 35; + return 0; + + case FIFTH: + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return 6; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return 21; + if (is_monomial_3d_elem_type(elem_type, false)) + return 56; + return 0; + + default: + { + const unsigned int p = static_cast(order); + + switch (elem_type) + { + case NODEELEM: + return 1; + + case EDGE2: + case EDGE3: + return p + 1; + + default: + break; + } + + if (is_monomial_2d_elem_type(elem_type)) + return (p + 1) * (p + 2) / 2; + if (is_monomial_3d_elem_type(elem_type, false)) + return (p + 1) * (p + 2) * (p + 3) / 6; + return 0; + } + } +} + +LIBMESH_DEVICE_INLINE constexpr unsigned int +monomial_evaluator_dim_or_zero(ElemType elem_type) +{ + switch (elem_type) + { + case EDGE2: + case EDGE3: + case EDGE4: + return 1; + + case TRI3: + case TRI6: + case TRI7: + case QUAD4: + case QUAD8: + case QUAD9: + return 2; + + case TET4: + case TET10: + case TET14: + case HEX8: + case HEX20: + case HEX27: + case PRISM6: + case PRISM15: + case PRISM18: + case PRISM20: + case PRISM21: + case PYRAMID5: + case PYRAMID13: + case PYRAMID14: + case PYRAMID18: + return 3; + + default: + return 0; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key); + +LIBMESH_DEVICE_INLINE bool +supports_lagrange_map_topology(ElemType topo) +{ + switch (topo) + { + case EDGE2: + case EDGE3: + case EDGE4: + case TRI3: + case TRI6: + case QUAD4: + case QUAD8: + case QUAD9: + case TET4: + case TET10: + case HEX8: + case HEX20: + case HEX27: + return true; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_lagrange_face_map_topology(ElemType topo) +{ + return supports_lagrange_map_topology(topo); +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_lagrange_map_topology_or(ElemType topo, + const Op & op, + const Unsupported & unsupported) + -> decltype(op.template operator()()) +{ + switch (topo) + { + case EDGE2: + return op.template operator()(); + case EDGE3: + return op.template operator()(); + case EDGE4: + return op.template operator()(); + case TRI3: + return op.template operator()(); + case TRI6: + return op.template operator()(); + case QUAD4: + return op.template operator()(); + case QUAD8: + return op.template operator()(); + case QUAD9: + return op.template operator()(); + case TET4: + return op.template operator()(); + case TET10: + return op.template operator()(); + case HEX8: + return op.template operator()(); + case HEX20: + return op.template operator()(); + case HEX27: + return op.template operator()(); + default: + return unsupported(topo); + } +} + +LIBMESH_DEVICE_INLINE bool +supports_shape_with_lagrange_map(FEShapeKey key) +{ + return supports_shape(key) && + supports_lagrange_map_topology(key.elem_type); +} + +LIBMESH_DEVICE_INLINE bool +supports_shape(FEShapeKey key) +{ + switch (key.family) + { + case LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + lagrange_shape_topology_or_invalid(key) != INVALID_ELEM; + + case MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order) != 0 && + monomial_evaluator_dim_or_zero(key.elem_type) != 0 && + key.order >= CONSTANT && + key.order <= FIFTH; + + default: + return false; + } +} + +LIBMESH_DEVICE_INLINE bool +supports_grad_shape(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE bool +supports_n_dofs(FEShapeKey key) +{ + return supports_shape(key); +} + +LIBMESH_DEVICE_INLINE unsigned int +n_dofs_or_zero(FEShapeKey key) +{ + switch (key.family) + { + case LAGRANGE: + return lagrange_exact_n_dofs_or_zero(key.elem_type, key.order); + + case MONOMIAL: + return monomial_exact_n_dofs_or_zero(key.elem_type, key.order); + + default: + return 0; + } +} + +} // namespace libMesh + +#endif // LIBMESH_FE_SHAPE_TRAITS_H diff --git a/include/fe/fe_simplex_lagrange.h b/include/fe/fe_simplex_lagrange.h new file mode 100644 index 00000000000..ce29d0605fb --- /dev/null +++ b/include/fe/fe_simplex_lagrange.h @@ -0,0 +1,462 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +#ifndef LIBMESH_FE_SIMPLEX_LAGRANGE_H +#define LIBMESH_FE_SIMPLEX_LAGRANGE_H + +#include "libmesh/point.h" + +namespace libMesh +{ +namespace detail +{ + +constexpr Real tri_dzeta[3][2] = + { + {-1., -1.}, + { 1., 0.}, + { 0., 1.} + }; + +constexpr unsigned short tri6_zeta_indices[6][2] = + { + {0, 0}, + {1, 1}, + {2, 2}, + {0, 1}, + {1, 2}, + {2, 0} + }; + +constexpr unsigned short tri7_bubble_zeta_indices[1][3] = + { + {0, 1, 2} + }; + +constexpr Real tet_dzeta[4][3] = + { + {-1., -1., -1.}, + { 1., 0., 0.}, + { 0., 1., 0.}, + { 0., 0., 1.} + }; + +constexpr unsigned short tet10_zeta_indices[10][2] = + { + {0, 0}, + {1, 1}, + {2, 2}, + {3, 3}, + {0, 1}, + {1, 2}, + {2, 0}, + {0, 3}, + {1, 3}, + {2, 3} + }; + +constexpr unsigned short tet14_bubble_zeta_indices[4][3] = + { + {0, 1, 2}, + {0, 1, 3}, + {1, 2, 3}, + {0, 2, 3} + }; + +constexpr unsigned short tet14_vertex_bubble_indices[4][3] = + { + {0, 1, 3}, + {0, 1, 2}, + {0, 2, 3}, + {1, 2, 3} + }; + +constexpr unsigned short tet14_edge_bubble_indices[6][2] = + { + {0, 1}, + {0, 2}, + {0, 3}, + {1, 3}, + {1, 2}, + {3, 2} + }; + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +constexpr unsigned short tet_second_deriv_indices[6][2] = + { + {0, 0}, + {0, 1}, + {1, 1}, + {0, 2}, + {1, 2}, + {2, 2} + }; +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri3_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 3); + + switch (i) + { + case 0: return 1. - xi - eta; + case 1: return xi; + default: return eta; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri3_shape_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 3); + libmesh_assert_less(j, 2); + + return tri_dzeta[i][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 6); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + if (i < 3) + return bary[m] * (2. * bary[m] - 1.); + + return 4. * bary[m] * bary[n]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 6); + libmesh_assert_less(j, 2); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + if (i < 3) + return (4. * bary[m] - 1.) * tri_dzeta[m][j]; + + return 4. * bary[n] * tri_dzeta[m][j] + 4. * bary[m] * tri_dzeta[n][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble = + bary[bubble_indices[0]] * bary[bubble_indices[1]] * bary[bubble_indices[2]]; + + if (i < 3) + return fe_lagrange_tri6_shape(i, xi, eta) + 3. * bubble; + + if (i < 6) + return fe_lagrange_tri6_shape(i, xi, eta) - 12. * bubble; + + return 27. * bubble; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + libmesh_assert_less(j, 2); + + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble_deriv = + tri_dzeta[bubble_indices[0]][j] * bary[bubble_indices[1]] * bary[bubble_indices[2]] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][j] * bary[bubble_indices[2]] + + bary[bubble_indices[0]] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][j]; + + if (i < 3) + return fe_lagrange_tri6_shape_deriv(i, j, xi, eta) + 3. * bubble_deriv; + + if (i < 6) + return fe_lagrange_tri6_shape_deriv(i, j, xi, eta) - 12. * bubble_deriv; + + return 27. * bubble_deriv; +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri6_shape_second_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 6); + libmesh_assert_less(j, 3); + + const unsigned short my_j = j == 2 ? 1 : 0; + const unsigned short my_k = j == 0 ? 0 : 1; + + if (i < 3) + return 4. * tri_dzeta[i][my_j] * tri_dzeta[i][my_k]; + + const unsigned short m = tri6_zeta_indices[i][0]; + const unsigned short n = tri6_zeta_indices[i][1]; + + return 4. * (tri_dzeta[n][my_j] * tri_dzeta[m][my_k] + + tri_dzeta[m][my_j] * tri_dzeta[n][my_k]); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tri7_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 7); + libmesh_assert_less(j, 3); + + const unsigned short my_j = j == 2 ? 1 : 0; + const unsigned short my_k = j == 0 ? 0 : 1; + const Real bary[3] = {1. - xi - eta, xi, eta}; + const auto & bubble_indices = tri7_bubble_zeta_indices[0]; + const Real bubble_second_deriv = + tri_dzeta[bubble_indices[0]][my_j] * tri_dzeta[bubble_indices[1]][my_k] * bary[bubble_indices[2]] + + tri_dzeta[bubble_indices[0]][my_j] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][my_k] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][my_j] * tri_dzeta[bubble_indices[2]][my_k] + + tri_dzeta[bubble_indices[0]][my_k] * tri_dzeta[bubble_indices[1]][my_j] * bary[bubble_indices[2]] + + tri_dzeta[bubble_indices[0]][my_k] * bary[bubble_indices[1]] * tri_dzeta[bubble_indices[2]][my_j] + + bary[bubble_indices[0]] * tri_dzeta[bubble_indices[1]][my_k] * tri_dzeta[bubble_indices[2]][my_j]; + + if (i < 3) + return fe_lagrange_tri6_shape_second_deriv(i, j) + 3. * bubble_second_deriv; + + if (i < 6) + return fe_lagrange_tri6_shape_second_deriv(i, j) - 12. * bubble_second_deriv; + + return 27. * bubble_second_deriv; +} +#endif + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet4_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 4); + + switch (i) + { + case 0: return 1. - xi - eta - zeta; + case 1: return xi; + case 2: return eta; + default: return zeta; + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet4_shape_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 3); + + return tet_dzeta[i][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 10); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + if (i < 4) + return bary[m] * (2. * bary[m] - 1.); + + return 4. * bary[m] * bary[n]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 10); + libmesh_assert_less(j, 3); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + if (i < 4) + return (4. * bary[m] - 1.) * tet_dzeta[m][j]; + + return 4. * bary[n] * tet_dzeta[m][j] + 4. * bary[m] * tet_dzeta[n][j]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble[4]; + + for (unsigned short b = 0; b != 4; ++b) + bubble[b] = + bary[tet14_bubble_zeta_indices[b][0]] * + bary[tet14_bubble_zeta_indices[b][1]] * + bary[tet14_bubble_zeta_indices[b][2]]; + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape(i, xi, eta, zeta) + + 3. * (bubble[bubble_ids[0]] + bubble[bubble_ids[1]] + bubble[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape(i, xi, eta, zeta) - + 12. * (bubble[bubble_ids[0]] + bubble[bubble_ids[1]]); + } + + return 27. * bubble[i - 10]; +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + libmesh_assert_less(j, 3); + + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble_deriv[4]; + + for (unsigned short b = 0; b != 4; ++b) + { + const auto & bubble_ids = tet14_bubble_zeta_indices[b]; + bubble_deriv[b] = + tet_dzeta[bubble_ids[0]][j] * bary[bubble_ids[1]] * bary[bubble_ids[2]] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][j] * bary[bubble_ids[2]] + + bary[bubble_ids[0]] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][j]; + } + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape_deriv(i, j, xi, eta, zeta) + + 3. * (bubble_deriv[bubble_ids[0]] + bubble_deriv[bubble_ids[1]] + bubble_deriv[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape_deriv(i, j, xi, eta, zeta) - + 12. * (bubble_deriv[bubble_ids[0]] + bubble_deriv[bubble_ids[1]]); + } + + return 27. * bubble_deriv[i - 10]; +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet10_shape_second_deriv(const unsigned int i, + const unsigned int j) +{ + libmesh_assert_less(i, 10); + libmesh_assert_less(j, 6); + + const unsigned short my_j = tet_second_deriv_indices[j][0]; + const unsigned short my_k = tet_second_deriv_indices[j][1]; + + if (i < 4) + return 4. * tet_dzeta[i][my_j] * tet_dzeta[i][my_k]; + + const unsigned short m = tet10_zeta_indices[i][0]; + const unsigned short n = tet10_zeta_indices[i][1]; + + return 4. * (tet_dzeta[n][my_j] * tet_dzeta[m][my_k] + + tet_dzeta[m][my_j] * tet_dzeta[n][my_k]); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_tet14_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 14); + libmesh_assert_less(j, 6); + + const unsigned short my_j = tet_second_deriv_indices[j][0]; + const unsigned short my_k = tet_second_deriv_indices[j][1]; + const Real bary[4] = {1. - xi - eta - zeta, xi, eta, zeta}; + Real bubble_second_deriv[4]; + + for (unsigned short b = 0; b != 4; ++b) + { + const auto & bubble_ids = tet14_bubble_zeta_indices[b]; + bubble_second_deriv[b] = + tet_dzeta[bubble_ids[0]][my_j] * tet_dzeta[bubble_ids[1]][my_k] * bary[bubble_ids[2]] + + tet_dzeta[bubble_ids[0]][my_j] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][my_k] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][my_j] * tet_dzeta[bubble_ids[2]][my_k] + + tet_dzeta[bubble_ids[0]][my_k] * tet_dzeta[bubble_ids[1]][my_j] * bary[bubble_ids[2]] + + tet_dzeta[bubble_ids[0]][my_k] * bary[bubble_ids[1]] * tet_dzeta[bubble_ids[2]][my_j] + + bary[bubble_ids[0]] * tet_dzeta[bubble_ids[1]][my_k] * tet_dzeta[bubble_ids[2]][my_j]; + } + + if (i < 4) + { + const auto & bubble_ids = tet14_vertex_bubble_indices[i]; + return fe_lagrange_tet10_shape_second_deriv(i, j) + + 3. * (bubble_second_deriv[bubble_ids[0]] + bubble_second_deriv[bubble_ids[1]] + bubble_second_deriv[bubble_ids[2]]); + } + + if (i < 10) + { + const auto & bubble_ids = tet14_edge_bubble_indices[i - 4]; + return fe_lagrange_tet10_shape_second_deriv(i, j) - + 12. * (bubble_second_deriv[bubble_ids[0]] + bubble_second_deriv[bubble_ids[1]]); + } + + return 27. * bubble_second_deriv[i - 10]; +} +#endif + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_SIMPLEX_LAGRANGE_H diff --git a/include/fe/fe_tensor_product_lagrange.h b/include/fe/fe_tensor_product_lagrange.h new file mode 100644 index 00000000000..2e4efa2dfcc --- /dev/null +++ b/include/fe/fe_tensor_product_lagrange.h @@ -0,0 +1,320 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +#ifndef LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H +#define LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H + +#include "libmesh/fe_lagrange_shape_1D.h" + +namespace libMesh +{ +namespace detail +{ + +constexpr unsigned int quad4_i0[4] = {0, 1, 1, 0}; +constexpr unsigned int quad4_i1[4] = {0, 0, 1, 1}; + +constexpr unsigned int quad9_i0[9] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; +constexpr unsigned int quad9_i1[9] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; + +constexpr unsigned int hex8_i0[8] = {0, 1, 1, 0, 0, 1, 1, 0}; +constexpr unsigned int hex8_i1[8] = {0, 0, 1, 1, 0, 0, 1, 1}; +constexpr unsigned int hex8_i2[8] = {0, 0, 0, 0, 1, 1, 1, 1}; + +constexpr unsigned int hex27_i0[27] = + {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; +constexpr unsigned int hex27_i1[27] = + {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; +constexpr unsigned int hex27_i2[27] = + {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + + return fe_lagrange_1D_linear_shape(quad4_i0[i], xi) * + fe_lagrange_1D_linear_shape(quad4_i1[i], eta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + return fe_lagrange_1D_linear_shape_deriv(quad4_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(quad4_i1[i], eta); + + default: + return fe_lagrange_1D_linear_shape(quad4_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(quad4_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape(const unsigned int i, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + libmesh_assert_less(j, 2); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); + + default: + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(quad9_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + case 1: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + default: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape(const unsigned int i, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 1: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + default: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + } +} + +#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad4_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 4); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + case 2: + return 0.; + + default: + return fe_lagrange_1D_linear_shape_deriv(quad4_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape_deriv(quad4_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_quad9_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta) +{ + libmesh_assert_less(i, 9); + libmesh_assert_less(j, 3); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_second_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(quad9_i1[i], eta); + + case 1: + return fe_lagrange_1D_quadratic_shape_deriv(quad9_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape_deriv(quad9_i1[i], 0, eta); + + default: + return fe_lagrange_1D_quadratic_shape(quad9_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_second_deriv(quad9_i1[i], 0, eta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex8_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 8); + libmesh_assert_less(j, 6); + + switch (j) + { + case 0: + case 2: + case 5: + return 0.; + + case 1: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape(hex8_i2[i], zeta); + + case 3: + return fe_lagrange_1D_linear_shape_deriv(hex8_i0[i], 0, xi) * + fe_lagrange_1D_linear_shape(hex8_i1[i], eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + + default: + return fe_lagrange_1D_linear_shape(hex8_i0[i], xi) * + fe_lagrange_1D_linear_shape_deriv(hex8_i1[i], 0, eta) * + fe_lagrange_1D_linear_shape_deriv(hex8_i2[i], 0, zeta); + } +} + +LIBMESH_DEVICE_INLINE +Real fe_lagrange_hex27_shape_second_deriv(const unsigned int i, + const unsigned int j, + const Real xi, + const Real eta, + const Real zeta) +{ + libmesh_assert_less(i, 27); + libmesh_assert_less(j, 6); + + switch (j) + { + case 0: + return fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 1: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 2: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape(hex27_i2[i], zeta); + + case 3: + return fe_lagrange_1D_quadratic_shape_deriv(hex27_i0[i], 0, xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + + case 4: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i1[i], 0, eta) * + fe_lagrange_1D_quadratic_shape_deriv(hex27_i2[i], 0, zeta); + + default: + return fe_lagrange_1D_quadratic_shape(hex27_i0[i], xi) * + fe_lagrange_1D_quadratic_shape(hex27_i1[i], eta) * + fe_lagrange_1D_quadratic_shape_second_deriv(hex27_i2[i], 0, zeta); + } +} + +#endif // LIBMESH_ENABLE_SECOND_DERIVATIVES + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_FE_TENSOR_PRODUCT_LAGRANGE_H diff --git a/include/geom/cell_hex20.h b/include/geom/cell_hex20.h index 70c37c23f70..cbbf7bcb5b7 100644 --- a/include/geom/cell_hex20.h +++ b/include/geom/cell_hex20.h @@ -145,13 +145,13 @@ class Hex20 final : public Hex virtual Order default_order() const override; /** - * \returns \p Hex20::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Hex20::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -216,18 +216,6 @@ class Hex20 final : public Hex static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Hex20. */ diff --git a/include/geom/cell_hex27.h b/include/geom/cell_hex27.h index 0777540a303..63268b48495 100644 --- a/include/geom/cell_hex27.h +++ b/include/geom/cell_hex27.h @@ -160,13 +160,13 @@ class Hex27 final : public Hex virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Hex27::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Hex27::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -231,18 +231,6 @@ class Hex27 final : public Hex static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Hex27. */ diff --git a/include/geom/cell_prism15.h b/include/geom/cell_prism15.h index 68374b06f6f..3c8707cdad4 100644 --- a/include/geom/cell_prism15.h +++ b/include/geom/cell_prism15.h @@ -150,13 +150,13 @@ class Prism15 final : public Prism virtual Order default_order() const override; /** - * \returns \p Prism15::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism15::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -221,18 +221,6 @@ class Prism15 final : public Prism static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Prism15. */ diff --git a/include/geom/cell_prism18.h b/include/geom/cell_prism18.h index 530f6f37970..4d6e53ef9f9 100644 --- a/include/geom/cell_prism18.h +++ b/include/geom/cell_prism18.h @@ -165,13 +165,13 @@ class Prism18 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism18::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism18::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -236,18 +236,6 @@ class Prism18 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Prism18. */ diff --git a/include/geom/cell_prism20.h b/include/geom/cell_prism20.h index af1bedf2634..d58193f94b6 100644 --- a/include/geom/cell_prism20.h +++ b/include/geom/cell_prism20.h @@ -169,13 +169,13 @@ class Prism20 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism20::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism20::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -241,18 +241,6 @@ class Prism20 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_prism21.h b/include/geom/cell_prism21.h index 894f86789fd..1b34fccc470 100644 --- a/include/geom/cell_prism21.h +++ b/include/geom/cell_prism21.h @@ -172,13 +172,13 @@ class Prism21 final : public Prism virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Prism21::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Prism21::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -244,18 +244,6 @@ class Prism21 final : public Prism static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_pyramid13.h b/include/geom/cell_pyramid13.h index f0d2819fb2e..971ddba79a9 100644 --- a/include/geom/cell_pyramid13.h +++ b/include/geom/cell_pyramid13.h @@ -149,13 +149,13 @@ class Pyramid13 final : public Pyramid virtual Order default_order() const override; /** - * \returns \p Pyramid13::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid13::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -209,18 +209,6 @@ class Pyramid13 final : public Pyramid static const int nodes_per_side = 8; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * Specialization for computing the volume of a Pyramid13. */ diff --git a/include/geom/cell_pyramid14.h b/include/geom/cell_pyramid14.h index 87547dc63e4..8c58f2300e6 100644 --- a/include/geom/cell_pyramid14.h +++ b/include/geom/cell_pyramid14.h @@ -167,13 +167,13 @@ class Pyramid14 final : public Pyramid virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Pyramid14::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid14::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -227,18 +227,6 @@ class Pyramid14 final : public Pyramid static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * Specialization for computing the volume of a Pyramid14. */ diff --git a/include/geom/cell_pyramid18.h b/include/geom/cell_pyramid18.h index 33f5c21e707..eb049a5aac2 100644 --- a/include/geom/cell_pyramid18.h +++ b/include/geom/cell_pyramid18.h @@ -173,13 +173,13 @@ class Pyramid18 final : public Pyramid virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Pyramid18::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Pyramid18::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -234,18 +234,6 @@ class Pyramid18 final : public Pyramid static const int nodes_per_side = 9; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/cell_tet10.h b/include/geom/cell_tet10.h index 5f454fe755c..b7c381df29d 100644 --- a/include/geom/cell_tet10.h +++ b/include/geom/cell_tet10.h @@ -146,13 +146,13 @@ class Tet10 final : public Tet virtual Order default_order() const override; /** - * \returns \p Tet10::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Tet10::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -217,18 +217,6 @@ class Tet10 final : public Tet static const int nodes_per_side = 6; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - /** * A specialization for computing the volume of a Tet10. */ diff --git a/include/geom/cell_tet14.h b/include/geom/cell_tet14.h index 43245751eec..800044de525 100644 --- a/include/geom/cell_tet14.h +++ b/include/geom/cell_tet14.h @@ -152,13 +152,13 @@ class Tet14 final : public Tet virtual Order default_order() const override; /** - * \returns \p Tet14::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; /** - * \returns \p Tet14::edge_nodes_map[edge][edge_node] after doing some range checking. + * \returns The requested local edge node after doing some range checking. */ virtual unsigned int local_edge_node(unsigned int edge, unsigned int edge_node) const override; @@ -222,18 +222,6 @@ class Tet14 final : public Tet static const int nodes_per_side = 7; static const int nodes_per_edge = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ edge to - * element node numbers. - */ - static const unsigned int edge_nodes_map[num_edges][nodes_per_edge]; - virtual void permute(unsigned int perm_num) override final; virtual void flip(BoundaryInfo *) override final; diff --git a/include/geom/elem.h b/include/geom/elem.h index 5ce4e229aee..add84059882 100644 --- a/include/geom/elem.h +++ b/include/geom/elem.h @@ -2787,9 +2787,10 @@ Elem::simple_build_side_ptr (const unsigned int i) { libmesh_assert_less (i, this->n_sides()); + Subclass & real_me = cast_ref(*this); std::unique_ptr face = std::make_unique(); for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Subclass::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -2817,8 +2818,9 @@ Elem::simple_build_side_ptr (std::unique_ptr & side, { side->set_interior_parent(this); side->inherit_data_from(*this); + Subclass & real_me = cast_ref(*this); for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Subclass::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); } } @@ -2841,9 +2843,9 @@ Elem::simple_side_ptr (std::unique_ptr & side, else { side->subdomain_id() = this->subdomain_id(); - + Subclass & real_me = cast_ref(*this); for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Mapclass::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(real_me.local_side_node(i, n))); } } @@ -2881,10 +2883,11 @@ Elem::simple_build_edge_ptr (const unsigned int i) { libmesh_assert_less (i, this->n_edges()); + Subclass & real_me = cast_ref(*this); std::unique_ptr edge = std::make_unique(); for (auto n : edge->node_index_range()) - edge->set_node(n, this->node_ptr(Subclass::edge_nodes_map[i][n])); + edge->set_node(n, this->node_ptr(real_me.local_edge_node(i, n))); edge->set_interior_parent(this); edge->inherit_data_from(*this); @@ -2912,8 +2915,9 @@ Elem::simple_build_edge_ptr (std::unique_ptr & edge, else { edge->inherit_data_from(*this); + Subclass & real_me = cast_ref(*this); for (auto n : edge->node_index_range()) - edge->set_node(n, this->node_ptr(Subclass::edge_nodes_map[i][n])); + edge->set_node(n, this->node_ptr(real_me.local_edge_node(i, n))); } } diff --git a/include/geom/face_quad8.h b/include/geom/face_quad8.h index 4bcdaa39433..4b510aa8eb7 100644 --- a/include/geom/face_quad8.h +++ b/include/geom/face_quad8.h @@ -141,7 +141,7 @@ class Quad8 : public Quad virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Quad8::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -190,12 +190,6 @@ class Quad8 : public Quad static const int num_nodes = 8; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * QUAD8 using quadrature. diff --git a/include/geom/face_quad9.h b/include/geom/face_quad9.h index e065d8bafa5..bc666861761 100644 --- a/include/geom/face_quad9.h +++ b/include/geom/face_quad9.h @@ -149,7 +149,7 @@ class Quad9 : public Quad virtual dof_id_type key () const override; /** - * \returns \p Quad9::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -197,12 +197,6 @@ class Quad9 : public Quad static const int num_nodes = 9; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * QUAD9 using quadrature. diff --git a/include/geom/face_tri6.h b/include/geom/face_tri6.h index 6417999e9fd..2573d9b7f70 100644 --- a/include/geom/face_tri6.h +++ b/include/geom/face_tri6.h @@ -151,7 +151,7 @@ class Tri6 : public Tri virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Tri6::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -200,12 +200,6 @@ class Tri6 : public Tri static const int num_nodes = 6; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * An optimized method for approximating the area of a * TRI6 using quadrature. diff --git a/include/geom/face_tri7.h b/include/geom/face_tri7.h index 833bb3ac3b6..0556ccb2f09 100644 --- a/include/geom/face_tri7.h +++ b/include/geom/face_tri7.h @@ -156,7 +156,7 @@ class Tri7 : public Tri virtual dof_id_type key (const unsigned int s) const override; /** - * \returns \p Tri7::side_nodes_map[side][side_node] after doing some range checking. + * \returns The requested local side node after doing some range checking. */ virtual unsigned int local_side_node(unsigned int side, unsigned int side_node) const override; @@ -204,12 +204,6 @@ class Tri7 : public Tri static const int num_nodes = 7; static const int nodes_per_side = 3; - /** - * This maps the \f$ j^{th} \f$ node of the \f$ i^{th} \f$ side to - * element node numbers. - */ - static const unsigned int side_nodes_map[num_sides][nodes_per_side]; - /** * \returns A bounding box (not necessarily the minimal bounding box) * containing the geometric element. diff --git a/include/geom/point.h b/include/geom/point.h index a305deea3a7..57796757869 100644 --- a/include/geom/point.h +++ b/include/geom/point.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/hashing.h" +#include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" namespace libMesh @@ -44,6 +45,7 @@ class Point : public TypeVector * Constructor. By default sets all entries to 0. Gives the point * 0 in \p LIBMESH_DIM dimensions. */ + LIBMESH_DEVICE_INLINE Point (const Real x=0., const Real y=0., const Real z=0.) : @@ -53,11 +55,13 @@ class Point : public TypeVector /** * Trivial copy-constructor. */ + LIBMESH_DEVICE_INLINE Point (const Point & p) = default; /** * Copy-constructor from non-point Typevector. */ + LIBMESH_DEVICE_INLINE Point (const TypeVector & p) : TypeVector (p) {} @@ -65,6 +69,7 @@ class Point : public TypeVector /** * Copy-assignment operator. */ + LIBMESH_DEVICE_INLINE Point& operator=(const Point & p) = default; /** @@ -73,6 +78,7 @@ class Point : public TypeVector template ::value,void>::type> + LIBMESH_DEVICE_INLINE Point (const T x) : TypeVector (x,0,0) {} diff --git a/include/gpu/kokkos_fe_base.h b/include/gpu/kokkos_fe_base.h new file mode 100644 index 00000000000..140c47c7d3d --- /dev/null +++ b/include/gpu/kokkos_fe_base.h @@ -0,0 +1,84 @@ +// Primary FEEvaluator template for Kokkos device-compatible shape functions. +// +// Uses libMesh's own ElemType and FEFamily enums as non-type template +// parameters — no separate tag structs are needed. +// +// All uses must be explicit specializations defined in the kokkos_fe_lagrange_*.h +// and kokkos_fe_monomial.h headers. Every specialization must provide: +// +// static constexpr unsigned int n_dofs() +// +// LIBMESH_DEVICE_INLINE +// static Real shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// LIBMESH_DEVICE_INLINE +// static RealVector grad_shape(unsigned int i, Real xi, Real eta, Real zeta) +// +// Reference-element coordinate conventions (matching libMesh): +// Edge: xi in [-1, 1] +// Quad: (xi, eta) in [-1,1]^2 +// Hex: (xi, eta, zeta) in [-1,1]^3 +// Tri: (xi, eta) in unit triangle, xi >= 0, eta >= 0, xi+eta <= 1 +// Tet: (xi, eta, zeta) in unit tetrahedron +// +// Unused coordinate arguments (e.g. zeta on a 2D element) are accepted but +// ignored, so call sites can always pass all three without special-casing. +// +#ifndef LIBMESH_KOKKOS_FE_BASE_H +#define LIBMESH_KOKKOS_FE_BASE_H + +#include "libmesh/libmesh_device.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/kokkos_tensor_ops.h" +#include "libmesh/kokkos_vector_ops.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" + +namespace libMesh::Kokkos +{ + +using Real = libMesh::Real; +using RealVector = libMesh::TypeVector; +using RealTensor = libMesh::TypeTensor; + +LIBMESH_DEVICE_INLINE +RealVector zero_vector() +{ + return zero_vector_value(); +} + +LIBMESH_DEVICE_INLINE +RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + RealVector v = zero_vector(); + + v(0) = x; + +#if LIBMESH_DIM > 1 + v(1) = y; +#else + libmesh_assert_equal_to(y, Real(0)); +#endif + +#if LIBMESH_DIM > 2 + v(2) = z; +#else + libmesh_assert_equal_to(z, Real(0)); +#endif + + return v; +} + +LIBMESH_DEVICE_INLINE +RealTensor zero_tensor() +{ + return zero_tensor_value(); +} + +template +struct FEEvaluator; // forward declaration only; instantiation requires a specialization + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_BASE_H diff --git a/include/gpu/kokkos_fe_evaluator.h b/include/gpu/kokkos_fe_evaluator.h new file mode 100644 index 00000000000..6d7e8bf110f --- /dev/null +++ b/include/gpu/kokkos_fe_evaluator.h @@ -0,0 +1,425 @@ +// Kokkos on-device FE shape function dispatch (fe_evaluator.h). +// +// Provides: +// map_shape — isoparametric Lagrange shape (topology-based) +// grad_map_shape — isoparametric Lagrange gradient (topology-based) +// shape — physics FE shape (FEShapeKey-based) +// grad_shape — physics FE gradient (FEShapeKey-based) +// +// All functions are LIBMESH_DEVICE_INLINE and dispatch via switch statements +// that compile to fast GPU branch logic. +// +// These helpers are intended for Kokkos-enabled code paths. Device execution +// happens from .K translation units, but the header is also parsed by host code. + +#ifndef LIBMESH_KOKKOS_FE_EVALUATOR_H +#define LIBMESH_KOKKOS_FE_EVALUATOR_H + +#include "kokkos_fe_base.h" +#include "kokkos_fe_types.h" +#include "kokkos_fe_lagrange_1d.h" +#include "kokkos_fe_lagrange_2d.h" +#include "kokkos_fe_lagrange_3d.h" +#include "kokkos_fe_monomial.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" + +namespace libMesh::Kokkos +{ + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_for_key(FEShapeKey key); + +LIBMESH_DEVICE_INLINE Real +eval_lagrange_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta); + +LIBMESH_DEVICE_INLINE RealVector +eval_lagrange_grad_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta); + +namespace detail +{ + +template +LIBMESH_DEVICE_INLINE auto +dispatch_lagrange_topology(libMesh::ElemType topo, const Op & op) + -> decltype(op.template operator()()) +{ + return libMesh::dispatch_lagrange_map_topology_or( + topo, + op, + [&](libMesh::ElemType) -> decltype(op.template operator()()) + { + detail::abort_unsupported("dispatch_lagrange_topology(): unsupported evaluator topology"); + return op.template operator()(); + }); +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_monomial_order(libMesh::Order order, const Op & op) + -> decltype(op.template operator()()) +{ + switch (order) + { + case libMesh::CONSTANT: return op.template operator()(); + case libMesh::FIRST: return op.template operator()(); + case libMesh::SECOND: return op.template operator()(); + case libMesh::THIRD: return op.template operator()(); + case libMesh::FOURTH: return op.template operator()(); + case libMesh::FIFTH: return op.template operator()(); + default: + detail::abort_unsupported("dispatch_monomial_order(): unsupported MONOMIAL order"); + return op.template operator()(); + } +} + +template +LIBMESH_DEVICE_INLINE auto +dispatch_monomial(libMesh::ElemType elem_type, libMesh::Order order, const Op & op) + -> decltype(op.template operator()<1, 0>()) +{ + switch (monomial_evaluator_dim_or_zero(elem_type)) + { + case 1: return dispatch_monomial_order<1>(order, op); + case 2: return dispatch_monomial_order<2>(order, op); + case 3: return dispatch_monomial_order<3>(order, op); + default: + detail::abort_unsupported("dispatch_monomial(): unsupported MONOMIAL element topology"); + return op.template operator()<1, 0>(); + } +} + +struct LagrangeShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE Real operator()() const + { + return FEEvaluator::shape(i, xi, eta, zeta); + } +}; + +struct LagrangeGradShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return FEEvaluator::grad_shape(i, xi, eta, zeta); + } +}; + +struct MonomialShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE Real operator()() const + { + if constexpr (Dim == 1) + return MonomialImpl1D::shape(i, xi, eta, zeta); + else if constexpr (Dim == 2) + return MonomialImpl2D::shape(i, xi, eta, zeta); + else + return MonomialImpl3D::shape(i, xi, eta, zeta); + } +}; + +struct MonomialGradShapeOp +{ + unsigned int i; + Real xi; + Real eta; + Real zeta; + + template + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + if constexpr (Dim == 1) + return MonomialImpl1D::grad_shape(i, xi, eta, zeta); + else if constexpr (Dim == 2) + return MonomialImpl2D::grad_shape(i, xi, eta, zeta); + else + return MonomialImpl3D::grad_shape(i, xi, eta, zeta); + } +}; + +template +LIBMESH_DEVICE_INLINE auto +dispatch_shape_family(libMesh::FEShapeKey key, + const LagrangeOp & lagrange_op, + const MonomialOp & monomial_op, + const char * unsupported_message) + -> decltype(lagrange_op()) +{ + switch (key.family) + { + case libMesh::LAGRANGE: + return lagrange_op(); + + case libMesh::MONOMIAL: + return monomial_op(); + + default: + detail::abort_unsupported(unsupported_message); + return lagrange_op(); + } +} + +struct KeyedLagrangeShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE Real operator()() const + { + return eval_lagrange_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + } +}; + +struct KeyedLagrangeGradShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return eval_lagrange_grad_shape(lagrange_shape_topology_for_key(key), i, xi, eta, zeta); + } +}; + +struct KeyedMonomialShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE Real operator()() const + { + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialShapeOp{i, xi, eta, zeta}); + } +}; + +struct KeyedMonomialGradShapeOp +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int i; + Real xi; + Real eta; + Real zeta; + + LIBMESH_DEVICE_INLINE RealVector operator()() const + { + return detail::dispatch_monomial(key.elem_type, + key.order, + detail::MonomialGradShapeOp{i, xi, eta, zeta}); + } +}; + +} // namespace detail + +// ── On-device helpers: element class -> spatial dimension ───────────────────── + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_class(FEElemClass cls) +{ + const unsigned int dim = libMesh::elem_class_dim_or_zero(cls); + + if (!dim) + { + detail::abort_unsupported("dim_from_class(): unsupported element class"); + return 0; + } + + return dim; +} + +LIBMESH_DEVICE_INLINE unsigned int +dim_from_topology(libMesh::ElemType topo) +{ + const unsigned int dim = libMesh::topology_dim_or_zero(topo); + + if (!dim) + { + detail::abort_unsupported("dim_from_topology(): unsupported element type"); + return 0; + } + + return dim; +} + +// ── On-device helper: exact libMesh Lagrange key -> evaluator topology ───────── + +LIBMESH_DEVICE_INLINE libMesh::ElemType +lagrange_shape_topology_for_key(FEShapeKey key) +{ + const libMesh::ElemType topo = lagrange_shape_topology_or_invalid(key); + + if (topo == libMesh::INVALID_ELEM) + { + detail::abort_unsupported("lagrange_shape_topology_for_key(): unsupported LAGRANGE key for current Kokkos evaluator support boundary"); + return libMesh::INVALID_ELEM; + } + + return topo; +} + +LIBMESH_DEVICE_INLINE Real +eval_lagrange_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return detail::dispatch_lagrange_topology(topo, detail::LagrangeShapeOp{i, xi, eta, zeta}); +} + +LIBMESH_DEVICE_INLINE RealVector +eval_lagrange_grad_shape(libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return detail::dispatch_lagrange_topology(topo, detail::LagrangeGradShapeOp{i, xi, eta, zeta}); +} + +// ── Geometry-only shape dispatch (mapping-type + topology) ──────────────────── +// +// Used by map_face_qp_to_parent() for the isoparametric mapping from face reference +// coordinates to parent reference coordinates. +// +// The mapping_type parameter selects the geometric map family. Currently only +// LAGRANGE_MAP is supported; RATIONAL_BERNSTEIN_MAP requires additional +// rational-weight data that is not yet threaded through the device path. + +// ── Compile-time topology versions (preferred for GPU) ─────────────────── +// Template on FEFamily and ElemType so gpu compiler only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. + +/// Compile-time map shape evaluation. +template +LIBMESH_DEVICE_INLINE Real +map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::shape(i, xi, eta, zeta); +} + +/// Compile-time map gradient evaluation. +template +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(unsigned int i, Real xi, Real eta, Real zeta) +{ + return FEEvaluator::grad_shape(i, xi, eta, zeta); +} + +// ── Runtime topology versions (larger GPU stack usage) ─────────────────── + +/// Evaluate the i-th geometric map shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("map_shape(): only LAGRANGE_MAP is implemented"); + return Real(0); + } +} + +/// Evaluate the reference-space gradient of the i-th geometric map shape function. +LIBMESH_DEVICE_INLINE RealVector +grad_map_shape(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + switch (mapping_type) + { + case libMesh::LAGRANGE_MAP: + return eval_lagrange_grad_shape(topo, i, xi, eta, zeta); + default: + detail::abort_unsupported("grad_map_shape(): only LAGRANGE_MAP is implemented"); + return zero_vector(); + } +} + +// ── Physics shape dispatch (FEShapeKey-based) ───────────────────────────────── + +/// Evaluate the i-th physics shape function at (xi, eta, zeta). +LIBMESH_DEVICE_INLINE Real +shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_shape(key)) + { + detail::abort_unsupported("shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return Real(0); + } + + return detail::dispatch_shape_family( + key, + detail::KeyedLagrangeShapeOp{key, i, xi, eta, zeta}, + detail::KeyedMonomialShapeOp{key, i, xi, eta, zeta}, + "shape(): unsupported FE family"); +} + +/// Evaluate the reference-space gradient of the i-th physics shape function. +/// With J from jacobian(), rows are reference derivatives, so +/// grad_ref = J * grad_phys and grad_phys = J.inverse(dim) * grad_ref. +LIBMESH_DEVICE_INLINE RealVector +grad_shape(FEShapeKey key, unsigned int i, Real xi, Real eta, Real zeta) +{ + if (!supports_grad_shape(key)) + { + detail::abort_unsupported("grad_shape(): unsupported FE key for current Kokkos evaluator support boundary"); + return zero_vector(); + } + + return detail::dispatch_shape_family( + key, + detail::KeyedLagrangeGradShapeOp{key, i, xi, eta, zeta}, + detail::KeyedMonomialGradShapeOp{key, i, xi, eta, zeta}, + "grad_shape(): unsupported FE family"); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_EVALUATOR_H diff --git a/include/gpu/kokkos_fe_face_map.h b/include/gpu/kokkos_fe_face_map.h new file mode 100644 index 00000000000..b09abbd4833 --- /dev/null +++ b/include/gpu/kokkos_fe_face_map.h @@ -0,0 +1,135 @@ +#ifndef LIBMESH_KOKKOS_FE_FACE_MAP_H +#define LIBMESH_KOKKOS_FE_FACE_MAP_H + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "kokkos_fe_evaluator.h" +#include "libmesh/elem.h" +#include "libmesh/fe_reference_element_traits.h" + +namespace libMesh::Kokkos +{ + +LIBMESH_DEVICE_INLINE +RealVector point_to_real_vector(const libMesh::Point & pt) +{ +#if LIBMESH_DIM == 1 + return make_vector(pt(0)); +#elif LIBMESH_DIM == 2 + return make_vector(pt(0), pt(1)); +#else + return make_vector(pt(0), pt(1), pt(2)); +#endif +} + +inline unsigned int +parent_local_side_node(const libMesh::Elem & parent, + unsigned int side, + unsigned int side_node) +{ + unsigned int node = libMesh::invalid_uint; + if (libMesh::try_local_side_node(parent.type(), side, side_node, node)) + return node; + + detail::abort_unsupported("map_face_qp_to_parent(): unsupported parent element type in local side-node lookup"); + return libMesh::invalid_uint; +} + +inline unsigned int +recover_parent_side(const libMesh::Elem & parent, + const libMesh::Elem & side_in_parent) +{ + for (unsigned int side = 0; side < parent.n_sides(); ++side) + { + if (get_side_topology(parent.type(), side) != side_in_parent.type() || + ((libMesh::side_node_count_or_zero(parent.type(), side) && + libMesh::side_node_count_or_zero(parent.type(), side) != side_in_parent.n_nodes()))) + continue; + + bool same_side = true; + for (unsigned int k = 0; k < side_in_parent.n_nodes(); ++k) + if (parent.node_ptr(parent_local_side_node(parent, side, k)) != side_in_parent.node_ptr(k)) + { + same_side = false; + break; + } + + if (same_side) + return side; + } + + return libMesh::invalid_uint; +} + +inline libMesh::Point +parent_refspace_node(const libMesh::Elem & parent, unsigned int node) +{ + libMesh::Point pt; + if (libMesh::try_reference_node(parent.type(), node, pt)) + return pt; + + detail::abort_unsupported("map_face_qp_to_parent(): unsupported parent element type in reference-node lookup"); + return libMesh::Point(); +} + +/** + * Map a face quadrature point from the side element's reference coordinate system + * to the parent element's reference coordinate system. + * + * side_in_parent must be obtained via build_side_ptr() (not side_ptr()), so that + * second-order sides carry their midpoint nodes. Parent reference coordinates + * are reconstructed from shared libMesh reference-element traits. They are not + * reconstructed from side_in_parent.point(k), which lives in physical space. + * Element types outside the Kokkos FE support boundary are rejected rather + * than silently falling back to generic Elem runtime helpers. + * + * @param side_in_parent The side element as embedded in the parent (from build_side_ptr()) + * @param mapping_type Geometric mapping type (LAGRANGE_MAP, RATIONAL_BERNSTEIN_MAP) + * @param side_topo Topology of the side element (libMesh::ElemType) + * @param face_qpt Quadrature point in the side element's reference coordinates + * @returns Corresponding point in the parent element's reference coordinates + */ +inline RealVector +map_face_qp_to_parent(const libMesh::Elem & side_in_parent, + libMesh::ElemMappingType mapping_type, + libMesh::ElemType side_topo, + RealVector face_qpt) +{ + const libMesh::Elem * parent = side_in_parent.interior_parent(); + libmesh_error_msg_if(!parent, + "map_face_qp_to_parent(): side element must carry an interior_parent() from build_side_ptr()"); + + const unsigned int side = recover_parent_side(*parent, side_in_parent); + libmesh_error_msg_if(side == libMesh::invalid_uint, + "map_face_qp_to_parent(): could not recover parent side for the provided side element"); + + const unsigned int n = side_in_parent.n_nodes(); + RealVector parent_pt = zero_vector(); + + // 1-D elements: the "side" is a single vertex node. There is only one + // point-side reference coordinate, (0,0,0), so we map directly to the + // corresponding parent vertex in the parent reference element. + if (n == 1) + { + const libMesh::Point pt = parent_refspace_node(*parent, parent_local_side_node(*parent, side, 0)); + return point_to_real_vector(pt); + } + + for (unsigned int k = 0; k < n; ++k) + { + const Real s = face_qpt(0); + const Real t = face_qpt(1); + const Real psi = map_shape(mapping_type, side_topo, k, s, t, 0.0); + + const libMesh::Point pt = parent_refspace_node(*parent, parent_local_side_node(*parent, side, k)); + parent_pt.add_scaled(point_to_real_vector(pt), psi); + } + + return parent_pt; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_FE_FACE_MAP_H diff --git a/include/gpu/kokkos_fe_lagrange_1d.h b/include/gpu/kokkos_fe_lagrange_1d.h new file mode 100644 index 00000000000..b98e2f89acd --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_1d.h @@ -0,0 +1,94 @@ +// Kokkos FEEvaluator specializations for 1-D Lagrange elements. +// +// Covers EDGE2 (linear), EDGE3 (quadratic), and EDGE4 (cubic). +// Reference-element coordinate convention (libMesh-compatible): +// EDGE2/EDGE3: xi in [-1, 1] +// +// EDGE3 node ordering (libMesh non-sequential): +// index 0 -> xi = -1 (left node) +// index 1 -> xi = +1 (right node) +// index 2 -> xi = 0 (midpoint) + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_1D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_1D_H + +#include "kokkos_fe_base.h" +#include "libmesh/fe_lagrange_shape_1D.h" + +namespace libMesh::Kokkos +{ + +// ── EDGE2 (linear edge, 2 nodes) ───────────────────────────────────────────── + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 2; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return libMesh::fe_lagrange_1D_linear_shape(i, xi); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::fe_lagrange_1D_linear_shape_deriv(i, 0, xi), 0.0, 0.0); + } +#endif +}; + +// ── EDGE3 (quadratic edge, 3 nodes) ────────────────────────────────────────── +// Node ordering matches libMesh: 0->left(-1), 1->right(+1), 2->mid(0) +// L_0(xi) = 0.5*xi*(xi-1) dL_0/dxi = xi - 0.5 +// L_1(xi) = 0.5*xi*(xi+1) dL_1/dxi = xi + 0.5 +// L_2(xi) = 1 - xi² dL_2/dxi = -2*xi + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return libMesh::fe_lagrange_1D_quadratic_shape(i, xi); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::fe_lagrange_1D_quadratic_shape_deriv(i, 0, xi), 0.0, 0.0); + } +#endif +}; + +// ── EDGE4 (cubic edge, 4 nodes) ────────────────────────────────────────────── +// Node ordering matches libMesh: 0->left(-1), 1->right(+1), 2->(-1/3), 3->(+1/3) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return libMesh::fe_lagrange_1D_cubic_shape(i, xi); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::fe_lagrange_1D_cubic_shape_deriv(i, 0, xi), 0.0, 0.0); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_1D_H diff --git a/include/gpu/kokkos_fe_lagrange_2d.h b/include/gpu/kokkos_fe_lagrange_2d.h new file mode 100644 index 00000000000..fda9baacc8a --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_2d.h @@ -0,0 +1,163 @@ +// Kokkos FEEvaluator specializations for 2-D Lagrange elements. +// +// Covers TRI3, TRI6, QUAD4, QUAD8, QUAD9. +// Reference-element coordinate conventions (libMesh-compatible): +// Tri: xi >= 0, eta >= 0, xi+eta <= 1 (unit triangle) +// Quad: (xi, eta) in [-1,1]² + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_2D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_2D_H + +#include "kokkos_fe_base.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" + +namespace libMesh::Kokkos +{ + +// ── TRI3 (linear triangle, 3 nodes) ────────────────────────────────────────── +// Barycentric: zeta0 = 1-xi-eta, zeta1 = xi, zeta2 = eta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 3; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return libMesh::detail::fe_lagrange_tri3_shape(i, xi, eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_tri3_shape_deriv(i, 0), + libMesh::detail::fe_lagrange_tri3_shape_deriv(i, 1), + 0.0); + } +#endif +}; + +// ── TRI6 (quadratic triangle, 6 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta, z1=xi, z2=eta +// phi_0 = z0*(2*z0-1) = (1-xi-eta)*(1-2*xi-2*eta) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = 4*z0*z1 = 4*(1-xi-eta)*xi +// phi_4 = 4*z1*z2 = 4*xi*eta +// phi_5 = 4*z2*z0 = 4*eta*(1-xi-eta) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 6; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return libMesh::detail::fe_lagrange_tri6_shape(i, xi, eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_tri6_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_tri6_shape_deriv(i, 1, xi, eta), + 0.0); + } +#endif +}; + +// ── QUAD4 (bilinear quadrilateral, 4 nodes) ─────────────────────────────────── +// Tensor product of two EDGE2 bases. libMesh node ordering: +// node 0: (-1,-1) node 1: (+1,-1) +// node 2: (+1,+1) node 3: (-1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return libMesh::detail::fe_lagrange_quad4_shape(i, xi, eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_quad4_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad4_shape_deriv(i, 1, xi, eta), + 0.0); + } +#endif +}; + +// ── QUAD8 (serendipity quadrilateral, 8 nodes) ──────────────────────────────── +// Node ordering: +// 0: (-1,-1) 1: (+1,-1) 2: (+1,+1) 3: (-1,+1) +// 4: ( 0,-1) 5: (+1, 0) 6: ( 0,+1) 7: (-1, 0) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return libMesh::detail::fe_lagrange_quad8_shape(i, xi, eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_quad8_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad8_shape_deriv(i, 1, xi, eta), + 0.0); + } +#endif +}; + +// ── QUAD9 (biquadratic quadrilateral, 9 nodes) ──────────────────────────────── +// Tensor product of two EDGE3 bases. libMesh node ordering: +// i0[] = {0,1,1,0, 2,1,2,0, 2} +// i1[] = {0,0,1,1, 0,2,1,2, 2} +// +// 1D basis (libMesh non-sequential ordering): +// L_0(t) = 0.5*t*(t-1) dL_0/dt = t - 0.5 +// L_1(t) = 0.5*t*(t+1) dL_1/dt = t + 0.5 +// L_2(t) = 1 - t² dL_2/dt = -2*t + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 9; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return libMesh::detail::fe_lagrange_quad9_shape(i, xi, eta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_quad9_shape_deriv(i, 0, xi, eta), + libMesh::detail::fe_lagrange_quad9_shape_deriv(i, 1, xi, eta), + 0.0); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_2D_H diff --git a/include/gpu/kokkos_fe_lagrange_3d.h b/include/gpu/kokkos_fe_lagrange_3d.h new file mode 100644 index 00000000000..cde05985aa0 --- /dev/null +++ b/include/gpu/kokkos_fe_lagrange_3d.h @@ -0,0 +1,164 @@ +// Kokkos FEEvaluator specializations for 3-D Lagrange elements. +// +// Covers TET4, TET10, HEX8, HEX20, HEX27. +// Reference-element coordinate conventions (libMesh-compatible): +// Tet: xi >= 0, eta >= 0, zeta >= 0, xi+eta+zeta <= 1 (unit tetrahedron) +// Hex: (xi, eta, zeta) in [-1,1]³ + +#ifndef LIBMESH_KOKKOS_FE_LAGRANGE_3D_H +#define LIBMESH_KOKKOS_FE_LAGRANGE_3D_H + +#include "kokkos_fe_base.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" + +namespace libMesh::Kokkos +{ + +// ── TET4 (linear tetrahedron, 4 nodes) ─────────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 4; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return libMesh::detail::fe_lagrange_tet4_shape(i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real /*xi*/, Real /*eta*/, Real /*zeta*/) + { + return make_vector(libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 0), + libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 1), + libMesh::detail::fe_lagrange_tet4_shape_deriv(i, 2)); + } +#endif +}; + +// ── TET10 (quadratic tetrahedron, 10 nodes) ─────────────────────────────────── +// Barycentric: z0=1-xi-eta-zeta, z1=xi, z2=eta, z3=zeta +// phi_0 = z0*(2*z0-1) +// phi_1 = z1*(2*z1-1) = xi*(2*xi-1) +// phi_2 = z2*(2*z2-1) = eta*(2*eta-1) +// phi_3 = z3*(2*z3-1) = zeta*(2*zeta-1) +// phi_4 = 4*z0*z1 = 4*(1-xi-eta-zeta)*xi +// phi_5 = 4*z1*z2 = 4*xi*eta +// phi_6 = 4*z2*z0 = 4*eta*(1-xi-eta-zeta) +// phi_7 = 4*z0*z3 = 4*(1-xi-eta-zeta)*zeta +// phi_8 = 4*z1*z3 = 4*xi*zeta +// phi_9 = 4*z2*z3 = 4*eta*zeta + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 10; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return libMesh::detail::fe_lagrange_tet10_shape(i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return make_vector(libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_tet10_shape_deriv(i, 2, xi, eta, zeta)); + } +#endif +}; + +// ── HEX8 (trilinear hexahedron, 8 nodes) ───────────────────────────────────── +// Tensor product of three EDGE2 bases. +// Node ordering (same as libMesh): +// 0:(-1,-1,-1) 1:(+1,-1,-1) 2:(+1,+1,-1) 3:(-1,+1,-1) +// 4:(-1,-1,+1) 5:(+1,-1,+1) 6:(+1,+1,+1) 7:(-1,+1,+1) + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 8; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return libMesh::detail::fe_lagrange_hex8_shape(i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return make_vector(libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 2, xi, eta, zeta)); + } +#endif +}; + +// ── HEX20 (serendipity hexahedron, 20 nodes) ───────────────────────────────── +// Corner nodes: phi = 0.125*(1+sx*xi)*(1+sy*eta)*(1+sz*zeta)*(sx*xi+sy*eta+sz*zeta-2) +// Node ordering follows libMesh (nodes 0-7 corners, 8-19 midside). + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 20; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return libMesh::detail::fe_lagrange_hex20_shape(i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return make_vector(libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex20_shape_deriv(i, 2, xi, eta, zeta)); + } +#endif +}; + +// ── HEX27 (triquadratic hexahedron, 27 nodes) ───────────────────────────────── +// Tensor product of three EDGE3 bases. +// Index tables (libMesh fe_lagrange_shape_3D.C): +// i0[] = {0,1,1,0, 0,1,1,0, 2,1,2,0, 0,1,1,0, 2,1,2,0, 2,2,1,2,0,2,2} +// i1[] = {0,0,1,1, 0,0,1,1, 0,2,1,2, 0,0,1,1, 0,2,1,2, 2,0,2,1,2,2,2} +// i2[] = {0,0,0,0, 1,1,1,1, 0,0,0,0, 2,2,2,2, 1,1,1,1, 0,2,2,2,2,1,2} + +template <> +struct FEEvaluator +{ + static constexpr unsigned int n_dofs() { return 27; } + +#ifdef LIBMESH_HAVE_KOKKOS + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return libMesh::detail::fe_lagrange_hex27_shape(i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + return make_vector(libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 0, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 1, xi, eta, zeta), + libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 2, xi, eta, zeta)); + } +#endif +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_LAGRANGE_3D_H diff --git a/include/gpu/kokkos_fe_map.h b/include/gpu/kokkos_fe_map.h new file mode 100644 index 00000000000..7ff539bfbcc --- /dev/null +++ b/include/gpu/kokkos_fe_map.h @@ -0,0 +1,464 @@ +// Kokkos device-compatible physical map evaluation. +// +// All functions are LIBMESH_DEVICE_INLINE — callable from both host and GPU. +// +// Two API levels: +// 1. Template on ElemType (preferred): eliminates the topology switch at +// compile time, producing small inlined functions with no stack pressure. +// 2. Runtime ElemType dispatch: convenient but requires increased CUDA +// stack size due to the large switch in map_shape. +// +// Given node coordinates and a reference-space point, these functions compute: +// - Physical coordinates (xyz) +// - Jacobian matrix (reference -> physical) +// - Jacobian measures and JxW +// - Outward normal helpers for face/edge integrals + +#ifndef LIBMESH_KOKKOS_FE_MAP_H +#define LIBMESH_KOKKOS_FE_MAP_H + +#include "kokkos_fe_evaluator.h" +#include "kokkos_storage.h" + +#include + +namespace libMesh::Kokkos +{ + +namespace detail +{ + +LIBMESH_DEVICE_INLINE const RealVector & +node_at(const RealVector * nodes, unsigned int i) +{ + return nodes[i]; +} + +template +LIBMESH_DEVICE_INLINE const RealVector & +node_at(const RealVector (&nodes)[N], unsigned int i) +{ + return nodes[i]; +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealVector +node_at(const NodeStorage & nodes, unsigned int i) +{ + return load_vector(nodes, i); +} + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point_impl(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(i, xi, eta, zeta) * node_at(nodes, i); + return xyz; +} + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian_impl(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + node_at(nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian_impl(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(k, xi, eta, zeta); + const RealVector grad = grad_map_shape(k, xi, eta, zeta); + const RealVector node = node_at(nodes, k); + xyz += phi * node; + J += libMesh::outer_product(grad, node); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian_impl(const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(k, xi, eta, zeta), + node_at(face_nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealVector xyz = zero_vector(); + for (unsigned int i = 0; i < n_nodes; ++i) + xyz += map_shape(mapping_type, topo, i, xi, eta, zeta) * node_at(nodes, i); + return xyz; +} + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, topo, k, xi, eta, zeta), + node_at(nodes, k)); + return J; +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + xyz = zero_vector(); + J = zero_tensor(); + for (unsigned int k = 0; k < n_nodes; ++k) + { + const Real phi = map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector grad = grad_map_shape(mapping_type, topo, k, xi, eta, zeta); + const RealVector node = node_at(nodes, k); + xyz += phi * node; + J += libMesh::outer_product(grad, node); + } +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian_impl(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + RealTensor J = zero_tensor(); + for (unsigned int k = 0; k < n_face_nodes; ++k) + J += libMesh::outer_product(grad_map_shape(mapping_type, face_topo, k, xi, eta, zeta), + node_at(face_nodes, k)); + return J; +} + +} // namespace detail + +template +LIBMESH_DEVICE_INLINE RealVector +physical_point(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealVector +physical_point(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(nodes, n_nodes, xi, eta, zeta); +} + +// ========================================================================= +// Compile-time dispatch (preferred for GPU — no switch overhead) +// +// Template on FEFamily and ElemType so nvcc only instantiates the specific +// FEEvaluator specialization. No topology switch means no stack pressure. +// ========================================================================= + +template +LIBMESH_DEVICE_INLINE RealTensor +jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +jacobian(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(nodes, n_nodes, xi, eta, zeta); +} + +template +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +template +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(face_nodes, n_face_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(face_nodes, n_face_nodes, xi, eta, zeta); +} + +// ========================================================================= +// Runtime topology dispatch (convenient, but larger GPU stack usage) +// ========================================================================= + +/// Compute physical coordinate (runtime topology). +LIBMESH_DEVICE_INLINE RealVector +physical_point(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealVector +physical_point(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::physical_point_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +/// Compute Jacobian matrix (runtime topology), with rows d(x)/d(xi_r). +LIBMESH_DEVICE_INLINE RealTensor +jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta); +} + +/// Compute physical point and Jacobian together (runtime topology). +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const RealVector * nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE void +physical_point_and_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType topo, + const NodeStorage & nodes, + unsigned int n_nodes, + Real xi, Real eta, Real zeta, + RealVector & xyz, + RealTensor & J) +{ + detail::physical_point_and_jacobian_impl(mapping_type, topo, nodes, n_nodes, xi, eta, zeta, xyz, J); +} + +/// Face Jacobian (runtime topology). +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const RealVector * face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(mapping_type, face_topo, face_nodes, n_face_nodes, xi, eta, zeta); +} + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE RealTensor +face_jacobian(libMesh::ElemMappingType mapping_type, + libMesh::ElemType face_topo, + const NodeStorage & face_nodes, + unsigned int n_face_nodes, + Real xi, Real eta, Real zeta) +{ + return detail::face_jacobian_impl(mapping_type, face_topo, face_nodes, n_face_nodes, xi, eta, zeta); +} + +// ========================================================================= +// Geometry helpers (topology-independent) +// ========================================================================= + +/// libMesh FEMap-compatible volume measure * quadrature_weight. +/// 3D: det(J) * weight +/// 2D: ||J_row0 x J_row1|| * weight +/// 1D: ||J_row0|| * weight +/// 0D: weight +LIBMESH_DEVICE_INLINE Real +volume_jxw(const RealTensor & J, unsigned int dim, Real quad_weight) +{ + if (dim == 3) + return detail::leading_determinant(J, 3) * quad_weight; + else if (dim == 2) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (dim == 1) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Face JxW: surface measure * quadrature_weight +/// 3D: ||J_row0 x J_row1|| * weight +/// 2D: ||J_row0|| * weight +/// 1D: weight (face is a point) +LIBMESH_DEVICE_INLINE Real +face_jxw(const RealTensor & J, unsigned int parent_dim, Real quad_weight) +{ + if (parent_dim == 3) + return J.row(0).cross(J.row(1)).norm() * quad_weight; + else if (parent_dim == 2) + return J.row(0).norm() * quad_weight; + else + return quad_weight; +} + +/// Outward unit normal for a 3D face from the face Jacobian. +LIBMESH_DEVICE_INLINE RealVector +face_normal(const RealTensor & J, unsigned int parent_dim) +{ + if (parent_dim != 3) + { + detail::abort_unsupported("face_normal(): only 3D face normals are defined from face Jacobians alone; use edge_normal_on_parent_surface() for 2D parent elements"); + return zero_vector(); + } + + RealVector n = J.row(0).cross(J.row(1)); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +/// Outward edge normal for a 2D parent element embedded in 3D. +/// Requires the edge Jacobian and the parent surface Jacobian at the mapped +/// parent-reference point. +LIBMESH_DEVICE_INLINE RealVector +edge_normal_on_parent_surface(const RealTensor & edge_J, + const RealTensor & parent_J) +{ + RealVector surface_normal = parent_J.row(0).cross(parent_J.row(1)); + const Real surface_len = surface_normal.norm(); + if (surface_len > 0.0) + surface_normal *= 1.0 / surface_len; + + RealVector n = edge_J.row(0).cross(surface_normal); + + const Real len = n.norm(); + if (len > 0.0) + n *= 1.0 / len; + return n; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MAP_H diff --git a/include/gpu/kokkos_fe_monomial.h b/include/gpu/kokkos_fe_monomial.h new file mode 100644 index 00000000000..b6e97436b6f --- /dev/null +++ b/include/gpu/kokkos_fe_monomial.h @@ -0,0 +1,260 @@ +// Kokkos FEEvaluator specializations for MONOMIAL elements. +// +// MONOMIAL uses the complete total-degree polynomial space P_p. Following +// libMesh's FE, the basis is parameterized by spatial +// dimension, not element class: TRI and QUAD share the 2-D implementation, +// while TET/HEX/PRISM/PYRAMID share the 3-D implementation. +// +// The implementation below mirrors the generic index-to-exponent decoding used +// by libMesh's host-side MONOMIAL FE code, so the Kokkos layer reuses the same +// basis ordering without hand-expanding every order into bespoke tables. + +#ifndef LIBMESH_KOKKOS_FE_MONOMIAL_H +#define LIBMESH_KOKKOS_FE_MONOMIAL_H + +#include "kokkos_fe_base.h" +#include "libmesh/enum_elem_type.h" + +namespace libMesh::Kokkos +{ + +namespace detail +{ + +LIBMESH_DEVICE_INLINE Real +pow_unsigned(Real base, unsigned int exponent) +{ + Real value = 1; + for (unsigned int i = 0; i < exponent; ++i) + value *= base; + return value; +} + +template +struct monomial_exponents; + +template <> +struct monomial_exponents<1> +{ + unsigned int nx; + + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) + { + return {i}; + } +}; + +template <> +struct monomial_exponents<2> +{ + unsigned int nx; + unsigned int ny; + + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) + { + unsigned int degree = 0; + for (; i >= (degree + 1) * (degree + 2) / 2; ++degree) {} + + const unsigned int ny = i - (degree * (degree + 1) / 2); + const unsigned int nx = degree - ny; + return {nx, ny}; + } +}; + +template <> +struct monomial_exponents<3> +{ + unsigned int nx; + unsigned int ny; + unsigned int nz; + + LIBMESH_DEVICE_INLINE static monomial_exponents decode(unsigned int i) + { + unsigned int degree = 0; + for (; i >= (degree + 1) * (degree + 2) * (degree + 3) / 6; ++degree) {} + + const unsigned int degree_offset = degree * (degree + 1) * (degree + 2) / 6; + const unsigned int local_index = i - degree_offset; + + unsigned int block = degree; + unsigned int nz = 0; + for (; block < local_index; block += (degree - nz + 1)) + ++nz; + + const unsigned int nx = block - local_index; + const unsigned int ny = degree - nx - nz; + return {nx, ny, nz}; + } +}; + +} // namespace detail + +template +struct MonomialImpl1D +{ + static constexpr unsigned int n_dofs() { return N + 1; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + return detail::pow_unsigned(xi, i); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real /*eta*/, Real /*zeta*/) + { + if (!i) + return zero_vector(); + + return make_vector(i * detail::pow_unsigned(xi, i - 1), 0.0, 0.0); + } +}; + +template +struct MonomialImpl2D +{ + static constexpr unsigned int n_dofs() { return (N + 1) * (N + 2) / 2; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + const auto exponents = detail::monomial_exponents<2>::decode(i); + return detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real /*zeta*/) + { + const auto exponents = detail::monomial_exponents<2>::decode(i); + const Real dx = exponents.nx + ? exponents.nx * + detail::pow_unsigned(xi, exponents.nx - 1) * + detail::pow_unsigned(eta, exponents.ny) + : 0.0; + const Real dy = exponents.ny + ? exponents.ny * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny - 1) + : 0.0; + return make_vector(dx, dy, 0.0); + } +}; + +template +struct MonomialImpl3D +{ + static constexpr unsigned int n_dofs() { return (N + 1) * (N + 2) * (N + 3) / 6; } + + LIBMESH_DEVICE_INLINE static Real + shape(unsigned int i, Real xi, Real eta, Real zeta) + { + const auto exponents = detail::monomial_exponents<3>::decode(i); + return detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz); + } + + LIBMESH_DEVICE_INLINE static RealVector + grad_shape(unsigned int i, Real xi, Real eta, Real zeta) + { + const auto exponents = detail::monomial_exponents<3>::decode(i); + const Real dx = exponents.nx + ? exponents.nx * + detail::pow_unsigned(xi, exponents.nx - 1) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz) + : 0.0; + const Real dy = exponents.ny + ? exponents.ny * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny - 1) * + detail::pow_unsigned(zeta, exponents.nz) + : 0.0; + const Real dz = exponents.nz + ? exponents.nz * + detail::pow_unsigned(xi, exponents.nx) * + detail::pow_unsigned(eta, exponents.ny) * + detail::pow_unsigned(zeta, exponents.nz - 1) + : 0.0; + return make_vector(dx, dy, dz); + } +}; + +// Per-topology FEEvaluator delegating specializations + +template +struct FEEvaluator : MonomialImpl1D {}; + +template +struct FEEvaluator : MonomialImpl1D {}; + +template +struct FEEvaluator : MonomialImpl1D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl2D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +template +struct FEEvaluator : MonomialImpl3D {}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_MONOMIAL_H diff --git a/include/gpu/kokkos_fe_shape_dispatch.h b/include/gpu/kokkos_fe_shape_dispatch.h new file mode 100644 index 00000000000..2989eb6d193 --- /dev/null +++ b/include/gpu/kokkos_fe_shape_dispatch.h @@ -0,0 +1,618 @@ +// Shared Kokkos FE shape dispatch helpers. +// +// These helpers capture the supported Kokkos FE evaluator boundary in one +// place so production code and oracle tests can dispatch exact FE keys without +// duplicating the support matrix. + +#ifndef LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H +#define LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H + +#include "libmesh/fe_shape_traits.h" +#include "libmesh/kokkos_fe_evaluator.h" + +namespace libMesh::Kokkos +{ + +template +struct monomial_order_evaluator; + +#define LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(dim_value, exact_order, impl_suffix, impl_order) \ + template <> \ + struct monomial_order_evaluator \ + { \ + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, \ + libMesh::Real xi, \ + libMesh::Real eta, \ + libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::shape(i, xi, eta, zeta); \ + } \ + \ + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape( \ + unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) \ + { \ + return libMesh::Kokkos::impl_suffix::grad_shape(i, xi, eta, zeta); \ + } \ + } + +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, CONSTANT, MonomialImpl1D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FIRST, MonomialImpl1D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, SECOND, MonomialImpl1D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, THIRD, MonomialImpl1D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FOURTH, MonomialImpl1D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(1, FIFTH, MonomialImpl1D, 5); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, CONSTANT, MonomialImpl2D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FIRST, MonomialImpl2D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, SECOND, MonomialImpl2D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, THIRD, MonomialImpl2D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FOURTH, MonomialImpl2D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(2, FIFTH, MonomialImpl2D, 5); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, CONSTANT, MonomialImpl3D, 0); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FIRST, MonomialImpl3D, 1); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, SECOND, MonomialImpl3D, 2); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, THIRD, MonomialImpl3D, 3); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FOURTH, MonomialImpl3D, 4); +LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE(3, FIFTH, MonomialImpl3D, 5); + +#undef LIBMESH_KOKKOS_MONOMIAL_ORDER_CASE + +template +struct exact_shape_evaluator; + +template +struct exact_shape_evaluator +{ + static constexpr libMesh::FEShapeKey exact_key{ libMesh::LAGRANGE, ExactTopo, ExactOrder }; + static constexpr libMesh::ElemType evaluator_topology = + libMesh::lagrange_shape_topology_or_invalid(exact_key); + + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return map_shape( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return grad_map_shape( + i, xi, eta, zeta); + } +}; + +template +struct exact_shape_evaluator +{ + static constexpr unsigned int evaluator_dim = + libMesh::monomial_evaluator_dim_or_zero(ExactTopo); + + LIBMESH_DEVICE_INLINE static libMesh::Real shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::shape( + i, xi, eta, zeta); + } + + LIBMESH_DEVICE_INLINE static libMesh::Kokkos::RealVector grad_shape(unsigned int i, + libMesh::Real xi, + libMesh::Real eta, + libMesh::Real zeta) + { + return monomial_order_evaluator::grad_shape( + i, xi, eta, zeta); + } +}; + +template +LIBMESH_DEVICE_INLINE libMesh::Real +shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::shape(i, xi, eta, zeta); +} + +template +LIBMESH_DEVICE_INLINE libMesh::Kokkos::RealVector +grad_shape_for_key(unsigned int i, libMesh::Real xi, libMesh::Real eta, libMesh::Real zeta) +{ + return exact_shape_evaluator::grad_shape(i, xi, eta, zeta); +} + +template +inline int +dispatch_supported_monomial_order(libMesh::Order order, const Dispatcher & dispatcher) +{ + switch (order) + { + case libMesh::CONSTANT: + return dispatcher.template operator()(); + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + case libMesh::FOURTH: + return dispatcher.template operator()(); + case libMesh::FIFTH: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(libMesh::FEShapeKey{ libMesh::MONOMIAL, ExactTopo, order }); + } +} + +template +inline int +dispatch_exact_lagrange_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI7: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET14: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_exact_lagrange_shape_key_with_map(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + switch (key.elem_type) + { + case libMesh::EDGE2: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::EDGE4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::THIRD: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI3: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TRI6: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::QUAD9: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET4: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::TET10: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX8: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX20: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + case libMesh::HEX27: + switch (key.order) + { + case libMesh::FIRST: + return dispatcher.template operator()(); + case libMesh::SECOND: + return dispatcher.template operator()(); + default: + return dispatcher.unsupported_key(key); + } + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_exact_monomial_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI7: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM15: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM18: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PRISM21: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID5: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID13: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID14: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::PYRAMID18: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_exact_monomial_shape_key_with_map(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + switch (key.elem_type) + { + case libMesh::EDGE2: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::EDGE4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI3: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TRI6: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::QUAD9: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET4: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::TET10: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX8: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX20: + return dispatch_supported_monomial_order(key.order, dispatcher); + case libMesh::HEX27: + return dispatch_supported_monomial_order(key.order, dispatcher); + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_exact_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_exact_lagrange_shape_key(key, dispatcher); + + case libMesh::MONOMIAL: + return dispatch_exact_monomial_shape_key(key, dispatcher); + + default: + return dispatcher.unsupported_key(key); + } +} + +template +inline int +dispatch_supported_shape_key(libMesh::FEShapeKey key, const Dispatcher & dispatcher) +{ + if (!libMesh::supports_shape(key)) + return dispatcher.unsupported_key(key); + + return dispatch_exact_shape_key(key, dispatcher); +} + +inline bool +is_supported_lagrange_map_topology(libMesh::ElemType topo) +{ + return libMesh::supports_lagrange_map_topology(topo); +} + +inline bool +supports_shape_key_with_lagrange_map(libMesh::FEShapeKey key) +{ + return libMesh::supports_shape_with_lagrange_map(key); +} + +template +inline int +dispatch_supported_shape_key_with_lagrange_map(libMesh::FEShapeKey key, + const Dispatcher & dispatcher) +{ + if (!supports_shape_key_with_lagrange_map(key)) + return dispatcher.unsupported_key(key); + + switch (key.family) + { + case libMesh::LAGRANGE: + return dispatch_exact_lagrange_shape_key_with_map(key, dispatcher); + + case libMesh::MONOMIAL: + return dispatch_exact_monomial_shape_key_with_map(key, dispatcher); + + default: + return dispatcher.unsupported_key(key); + } +} + +inline bool +is_supported_lagrange_face_map_topology(libMesh::ElemType topo) +{ + return libMesh::supports_lagrange_face_map_topology(topo); +} + +template +inline int +dispatch_supported_lagrange_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + return libMesh::dispatch_lagrange_map_topology_or( + topo, + dispatcher, + [&](libMesh::ElemType unsupported) { return dispatcher.unsupported_topology(unsupported); }); +} + +template +inline int +dispatch_supported_lagrange_face_map_topology(libMesh::ElemType topo, + const Dispatcher & dispatcher) +{ + if (!is_supported_lagrange_face_map_topology(topo)) + return dispatcher.unsupported_topology(topo); + + return dispatch_supported_lagrange_map_topology(topo, dispatcher); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_SHAPE_DISPATCH_H diff --git a/include/gpu/kokkos_fe_types.h b/include/gpu/kokkos_fe_types.h new file mode 100644 index 00000000000..d61d163c20e --- /dev/null +++ b/include/gpu/kokkos_fe_types.h @@ -0,0 +1,105 @@ +// Kokkos FE type helpers. +// +// Shared FE topology/order support metadata lives in fe_shape_traits.h. +// This header keeps the Kokkos-facing hard-fail wrappers and namespace +// compatibility for existing FE device code. + +#ifndef LIBMESH_KOKKOS_FE_TYPES_H +#define LIBMESH_KOKKOS_FE_TYPES_H + +#include "libmesh/enum_fe_elem_class.h" +#include "libmesh/fe_reference_element_traits.h" +#include "libmesh/fe_shape_traits.h" +#include "libmesh/libmesh_device.h" +#ifndef LIBMESH_KOKKOS_COMPILATION +# include "libmesh/libmesh_common.h" +#endif + +namespace libMesh::Kokkos +{ + +using libMesh::FEElemClass; +using libMesh::FEShapeKey; +using libMesh::is_monomial_2d_elem_type; +using libMesh::is_monomial_3d_elem_type; +using libMesh::lagrange_shape_topology_or_invalid; +using libMesh::lagrange_exact_n_dofs_or_zero; +using libMesh::monomial_exact_n_dofs_or_zero; +using libMesh::monomial_evaluator_dim_or_zero; +using libMesh::supports_shape; +using libMesh::supports_grad_shape; +using libMesh::supports_n_dofs; + +namespace detail +{ + +LIBMESH_DEVICE_INLINE void +abort_unsupported(const char * msg) +{ +#ifdef LIBMESH_KOKKOS_COMPILATION + ::Kokkos::abort(msg); +#else + libmesh_error_msg(msg); +#endif +} + +} // namespace detail + +LIBMESH_DEVICE_INLINE libMesh::ElemType +get_side_topology(libMesh::ElemType parent) +{ + const libMesh::ElemType side_topology = libMesh::side_topology_or_invalid(parent); + + if (side_topology == libMesh::INVALID_ELEM) + { + if (requires_side_specific_topology(parent)) + detail::abort_unsupported("get_side_topology(): mixed-face elements require side-specific topology"); + else + detail::abort_unsupported("get_side_topology(): unsupported element type"); + return libMesh::INVALID_ELEM; + } + + return side_topology; +} + +LIBMESH_DEVICE_INLINE libMesh::ElemType +get_side_topology(libMesh::ElemType parent, + unsigned int side) +{ + const libMesh::ElemType side_topology = libMesh::side_topology_or_invalid(parent, side); + + if (side_topology != libMesh::INVALID_ELEM) + return side_topology; + + return get_side_topology(parent); +} + +LIBMESH_DEVICE_INLINE libMesh::FEElemClass +class_from_topology(libMesh::ElemType topo) +{ + const libMesh::FEElemClass elem_class = libMesh::class_from_topology_or_invalid(topo); + + if (elem_class == libMesh::FEElemClass::N_CLASSES) + { + detail::abort_unsupported("class_from_topology(): unsupported element type"); + return libMesh::FEElemClass::N_CLASSES; + } + + return elem_class; +} + +LIBMESH_DEVICE_INLINE unsigned int +n_dofs(FEShapeKey key) +{ + if (!supports_n_dofs(key)) + { + detail::abort_unsupported("n_dofs(FEShapeKey): unsupported FE key for current Kokkos evaluator support boundary"); + return 0; + } + + return libMesh::n_dofs_or_zero(key); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_FE_TYPES_H diff --git a/include/gpu/kokkos_hilbert_assembly.h b/include/gpu/kokkos_hilbert_assembly.h new file mode 100644 index 00000000000..c28f67c9e43 --- /dev/null +++ b/include/gpu/kokkos_hilbert_assembly.h @@ -0,0 +1,284 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H +#define LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H + +#include "../systems/hilbert_assembly_kernel.h" + +#include "kokkos_fe_evaluator.h" +#include "kokkos_fe_map.h" +#include "kokkos_quadrature.h" + +#include +#include + +namespace libMesh::Kokkos::detail +{ + +LIBMESH_DEVICE_INLINE bool +supports_hilbert_local_assembly(libMesh::FEShapeKey key, + libMesh::ElemMappingType mapping_type, + const unsigned int quadrature_order) +{ + return mapping_type == libMesh::LAGRANGE_MAP && + libMesh::supports_shape_with_lagrange_map(key) && + libMesh::supports_grad_shape(key) && + libMesh::Kokkos::GaussQuadrature::n_points(key.elem_type, quadrature_order) > 0; +} + +template +class HilbertFEAccess +{ +public: + using node_storage_type = std::decay_t; + class QpData + { + public: + LIBMESH_DEVICE_INLINE + QpData(const HilbertFEAccess & fe, + const unsigned int qp, + const bool need_gradients) + : _fe(fe), + _qp(qp), + _qp_ref(GaussQuadrature::point(fe._key.elem_type, fe._quadrature_order, qp)), + _JxW(0.), + _need_gradients(need_gradients) + { + RealVector xyz = zero_vector(); + RealTensor J = zero_tensor(); + + physical_point_and_jacobian(fe._mapping_type, + fe._key.elem_type, + fe._nodes, + fe._n_nodes, + _qp_ref(0), + _qp_ref(1), + _qp_ref(2), + xyz, + J); + + _xyz = Point(xyz(0), xyz(1), xyz(2)); + _JxW = + volume_jxw(J, + fe._dim, + GaussQuadrature::weight(fe._key.elem_type, fe._quadrature_order, qp)); + + if (_need_gradients) + _Jinv = libMesh::Kokkos::inverse(J, fe._dim); + } + + LIBMESH_DEVICE_INLINE + Real JxW() const + { + return _JxW; + } + + LIBMESH_DEVICE_INLINE + Real phi(const unsigned int i) const + { + return shape(_fe._key, i, _qp_ref(0), _qp_ref(1), _qp_ref(2)); + } + + LIBMESH_DEVICE_INLINE + Gradient dphi(const unsigned int i) const + { + libmesh_assert(_need_gradients); + return _Jinv * grad_shape(_fe._key, i, _qp_ref(0), _qp_ref(1), _qp_ref(2)); + } + + LIBMESH_DEVICE_INLINE + const Point & xyz() const + { + return _xyz; + } + + LIBMESH_DEVICE_INLINE + const RealVector & reference_point() const + { + return _qp_ref; + } + + LIBMESH_DEVICE_INLINE + const RealTensor & inverse_jacobian() const + { + libmesh_assert(_need_gradients); + return _Jinv; + } + + LIBMESH_DEVICE_INLINE + unsigned int qp_index() const + { + return _qp; + } + + LIBMESH_DEVICE_INLINE + unsigned int elem_index() const + { + return _fe._elem_index; + } + + private: + const HilbertFEAccess & _fe; + unsigned int _qp; + RealVector _qp_ref; + Point _xyz; + Real _JxW; + RealTensor _Jinv; + bool _need_gradients; + }; + + LIBMESH_DEVICE_INLINE + HilbertFEAccess(libMesh::FEShapeKey key, + libMesh::ElemMappingType mapping_type, + const NodeStorage & nodes, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const unsigned int elem_index = 0) + : _key(key), + _mapping_type(mapping_type), + _nodes(nodes), + _n_nodes(n_nodes), + _quadrature_order(quadrature_order), + _dim(dim_from_topology(key.elem_type)), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + unsigned int n_qpoints() const + { + return GaussQuadrature::n_points(_key.elem_type, _quadrature_order); + } + + LIBMESH_DEVICE_INLINE + unsigned int n_dofs() const + { + return libMesh::Kokkos::n_dofs(_key); + } + + LIBMESH_DEVICE_INLINE + QpData qp_data(const unsigned int qp, + const bool need_gradients) const + { + return QpData(*this, qp, need_gradients); + } + +private: + libMesh::FEShapeKey _key; + libMesh::ElemMappingType _mapping_type; + node_storage_type _nodes; + unsigned int _n_nodes; + unsigned int _quadrature_order; + unsigned int _dim; + unsigned int _elem_index; +}; + +template +using HilbertSolutionAccess = libMesh::detail::HilbertSolutionAccess; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_solution_access(const FEAccess & fe, + CoeffStorage && coeff, + const Number solution_derivative) +{ + return libMesh::detail::make_hilbert_solution_access( + fe, + std::forward(coeff), + solution_derivative); +} + +template +using AnalyticHilbertGoalAccess = + libMesh::detail::HilbertAnalyticGoalAccess; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_analytic_goal_access(GoalFunction && goal_func, + GoalGradient && goal_grad) +{ + return libMesh::detail::make_hilbert_analytic_goal_access( + std::forward(goal_func), + std::forward(goal_grad)); +} + +template +class LocalHilbertAccumulator +{ +public: + LIBMESH_DEVICE_INLINE + explicit LocalHilbertAccumulator(const unsigned int n_dofs) + : _n_dofs(n_dofs) + { + zero(); + } + + LIBMESH_DEVICE_INLINE + void zero() + { + for (unsigned int i = 0; i != MaxDofs; ++i) + { + _F[i] = 0.; + for (unsigned int j = 0; j != MaxDofs; ++j) + _K[i][j] = 0.; + } + } + + LIBMESH_DEVICE_INLINE + void add_residual(const unsigned int i, + const Number value) + { + _F[i] += value; + } + + LIBMESH_DEVICE_INLINE + void add_jacobian(const unsigned int i, + const unsigned int j, + const Number value) + { + _K[i][j] += value; + } + + LIBMESH_DEVICE_INLINE + Number residual(const unsigned int i) const + { + return _F[i]; + } + + LIBMESH_DEVICE_INLINE + Number jacobian(const unsigned int i, + const unsigned int j) const + { + return _K[i][j]; + } + + LIBMESH_DEVICE_INLINE + unsigned int n_dofs() const + { + return _n_dofs; + } + +private: + Number _F[MaxDofs]; + Number _K[MaxDofs][MaxDofs]; + unsigned int _n_dofs; +}; + +} // namespace libMesh::Kokkos::detail + +#endif // LIBMESH_KOKKOS_HILBERT_ASSEMBLY_H diff --git a/include/gpu/kokkos_hilbert_system.h b/include/gpu/kokkos_hilbert_system.h new file mode 100644 index 00000000000..61f6c37fd16 --- /dev/null +++ b/include/gpu/kokkos_hilbert_system.h @@ -0,0 +1,876 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_HILBERT_SYSTEM_H +#define LIBMESH_KOKKOS_HILBERT_SYSTEM_H + +#include "libmesh/libmesh_common.h" + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "kokkos_hilbert_assembly.h" +#include "kokkos_parsed_function.h" + +namespace libMesh::Kokkos::detail +{ + +template +LIBMESH_DEVICE_INLINE decltype(auto) +storage_at(const Storage & storage, + const unsigned int i) +{ + return storage(i); +} + +template +LIBMESH_DEVICE_INLINE const T & +storage_at(const T * storage, + const unsigned int i) +{ + return storage[i]; +} + +template +struct StaticArrayAccess +{ + using value_type = T; + T values[N] = {}; + unsigned int size = 0; + + LIBMESH_DEVICE_INLINE + const T & operator()(const unsigned int i) const + { + return values[i]; + } +}; + +template +class ElementNodeAccess +{ +public: + LIBMESH_DEVICE_INLINE + ElementNodeAccess(NodeCoordinateView node_coordinates, + ElemNodeIdView element_node_ids, + const unsigned int elem_index) + : _node_coordinates(node_coordinates), + _element_node_ids(element_node_ids), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int node, + const unsigned int component) const + { + return _node_coordinates(_element_node_ids(_elem_index, node), component); + } + +private: + NodeCoordinateView _node_coordinates; + ElemNodeIdView _element_node_ids; + unsigned int _elem_index; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_element_node_access(NodeCoordinateView node_coordinates, + ElemNodeIdView element_node_ids, + const unsigned int elem_index) +{ + return ElementNodeAccess(node_coordinates, + element_node_ids, + elem_index); +} + +template +class GatheredCoeffAccess +{ +public: + LIBMESH_DEVICE_INLINE + GatheredCoeffAccess(GlobalCoeffView global_coeffs, + LocalIndexView local_indices, + const unsigned int elem_index) + : _global_coeffs(global_coeffs), + _local_indices(local_indices), + _elem_index(elem_index) + { + } + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int i) const + { + return storage_at(_global_coeffs, _local_indices(_elem_index, i)); + } + +private: + GlobalCoeffView _global_coeffs; + LocalIndexView _local_indices; + unsigned int _elem_index; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_gathered_coeff_access(GlobalCoeffView global_coeffs, + LocalIndexView local_indices, + const unsigned int elem_index) +{ + return GatheredCoeffAccess(global_coeffs, + local_indices, + elem_index); +} + +template +struct DenseElementOutputSink +{ + ResidualView residual; + JacobianView jacobian; + unsigned int n_dofs = 0; + bool request_jacobian = false; + + template + LIBMESH_DEVICE_INLINE + void write(const Accumulator & accum) const + { + for (unsigned int i = 0; i != n_dofs; ++i) + { + residual(i) = accum.residual(i); + if (request_jacobian) + for (unsigned int j = 0; j != n_dofs; ++j) + jacobian(i, j) = accum.jacobian(i, j); + } + } +}; + +template +struct FlatDeviceValueSink +{ + ResidualView residual; + JacobianView jacobian; + unsigned int n_dofs = 0; + + template + LIBMESH_DEVICE_INLINE + void write(const Accumulator & accum) const + { + for (unsigned int i = 0; i != n_dofs; ++i) + { + residual(i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + jacobian(i * n_dofs + j) = accum.jacobian(i, j); + } + } +}; + +template +struct DirectScatterAccess +{ + View values; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + ::Kokkos::atomic_add(&values(slot), value); + } +}; + +template +struct SplitScatterAccess +{ + LocalView local_values; + RemoteView remote_values; + std::size_t local_size = 0; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + if (slot < local_size) + ::Kokkos::atomic_add(&local_values(slot), value); + else + ::Kokkos::atomic_add(&remote_values(slot - local_size), value); + } +}; + +template +struct SplitMatrixScatterAccess +{ + DiagView diag_values; + OffdiagView offdiag_values; + RemoteView remote_values; + std::size_t diag_size = 0; + std::size_t offdiag_base = 0; + + LIBMESH_DEVICE_INLINE + void add(const std::size_t slot, + const Number value) const + { + if (slot < diag_size) + ::Kokkos::atomic_add(&diag_values(slot), value); + else if (slot < offdiag_base) + ::Kokkos::atomic_add(&offdiag_values(slot - diag_size), value); + else + ::Kokkos::atomic_add(&remote_values(slot - offdiag_base), value); + } +}; + +struct ZeroCoeffAccess +{ + LIBMESH_DEVICE_INLINE + Number operator()(const unsigned int) const + { + return Number(0); + } +}; + +template +class GatheredParsedFEMGoalAccess +{ +public: + LIBMESH_DEVICE_INLINE + GatheredParsedFEMGoalAccess(FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + GlobalCoeffView global_coeffs, + const LocalIndexView * field_local_indices, + GoalFunction goal) + : _field_keys(field_keys), + _field_dofs(field_dofs), + _global_coeffs(global_coeffs), + _goal(goal) + { + for (unsigned int i = 0; i != MaxFieldVariables; ++i) + _field_local_indices[i] = field_local_indices[i]; + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + MaxFieldVariables] = {}; + fill_variables(qp_data, xyz, vars); + return _goal.value(vars); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + MaxFieldVariables] = {}; + Gradient field_gradients[MaxFieldVariables]; + fill_variables(qp_data, xyz, vars); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + field_gradients[field] = sample_field_gradient(qp_data, field); + + return _goal.gradient(vars, field_gradients); + } + +private: + template + LIBMESH_DEVICE_INLINE + void fill_variables(const QpData & qp_data, + const Point & xyz, + Number * vars) const + { + vars[0] = xyz(0); +#if LIBMESH_DIM > 1 + vars[1] = xyz(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = xyz(2); +#endif + vars[LIBMESH_DIM] = _goal.time(); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + vars[LIBMESH_DIM + 1 + field] = sample_field_value(qp_data, field); + } + + template + LIBMESH_DEVICE_INLINE + Number sample_field_value(const QpData & qp_data, + const unsigned int field) const + { + const auto & qp_ref = qp_data.reference_point(); + const auto field_key = _field_keys(field); + const unsigned int n_dofs = _field_dofs(field); + const unsigned int elem_index = qp_data.elem_index(); + + Number value = 0.; + for (unsigned int i = 0; i != n_dofs; ++i) + value += storage_at(_global_coeffs, _field_local_indices[field](elem_index, i)) * + shape(field_key, i, qp_ref(0), qp_ref(1), qp_ref(2)); + + return value; + } + + template + LIBMESH_DEVICE_INLINE + Gradient sample_field_gradient(const QpData & qp_data, + const unsigned int field) const + { + Gradient grad; + grad.zero(); + + const auto & qp_ref = qp_data.reference_point(); + const auto & Jinv = qp_data.inverse_jacobian(); + const auto field_key = _field_keys(field); + const unsigned int n_dofs = _field_dofs(field); + const unsigned int elem_index = qp_data.elem_index(); + + for (unsigned int i = 0; i != n_dofs; ++i) + grad.add_scaled(Jinv * grad_shape(field_key, i, qp_ref(0), qp_ref(1), qp_ref(2)), + storage_at(_global_coeffs, _field_local_indices[field](elem_index, i))); + + return grad; + } + + FieldKeyStorage _field_keys; + FieldDofStorage _field_dofs; + GlobalCoeffView _global_coeffs; + LocalIndexView _field_local_indices[MaxFieldVariables] = {}; + GoalFunction _goal; +}; + +template +bool +run_hilbert_system_assembly(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const unsigned int elem_index, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const CoeffStorage & coeff, + const Number solution_derivative, + GoalAccess goal_access, + const bool request_jacobian, + const Sink & sink, + const char * const kernel_name) +{ + if (sink.n_dofs > MaxDofs) + return false; + + if (!supports_hilbert_local_assembly(key, mapping_type, quadrature_order)) + return false; + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe(key, + mapping_type, + elem_nodes, + n_nodes, + quadrature_order, + elem_index); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, 1), + KOKKOS_LAMBDA(const int) { + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, coeff, solution_derivative); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(sink.n_dofs); + libMesh::detail::assemble_hilbert_element(fe, + solution, + goal_access, + request_jacobian, + hilbert_order, + accum); + sink.write(accum); + }); + + return true; +} + +template +void +run_hilbert_system_value_batch(const libMesh::FEFamily family, + const libMesh::Order base_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemTypeStorage & element_types, + const ElemMappingTypeStorage & element_mapping_types, + const ElemNodeCountStorage & element_n_nodes, + const ElemPLevelStorage & element_p_levels, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const QuadratureOrderStorage & quadrature_orders, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const unsigned int hilbert_order, + GoalAccess goal_access, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + const auto key = + libMesh::FEShapeKey{family, + element_types(elem_index), + static_cast(base_order + + cast_int(element_p_levels(elem_index)))}; + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, + element_mapping_types(elem_index), + elem_nodes, + element_n_nodes(elem_index), + quadrature_orders(record_index), + elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); +} + +template +void +run_hilbert_system_bucket_scatter_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const SlotStorage & rhs_slots, + const SlotStorage & mat_slots, + const unsigned int hilbert_order, + GoalAccess goal_access, + ResidualScatterAccess rhs_scatter, + JacobianScatterAccess mat_scatter, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_scatter.add(rhs_slots(rhs_offset + i), -accum.residual(i)); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_scatter.add(mat_slots(mat_offset + i * n_dofs + j), accum.jacobian(i, j)); + } + }); +} + +template +void +run_hilbert_system_bucket_value_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const unsigned int hilbert_order, + GoalAccess goal_access, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); +} + +template +void +run_hilbert_system_fem_value_batch(const libMesh::FEFamily family, + const libMesh::Order base_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemTypeStorage & element_types, + const ElemMappingTypeStorage & element_mapping_types, + const ElemNodeCountStorage & element_n_nodes, + const ElemPLevelStorage & element_p_levels, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const QuadratureOrderStorage & quadrature_orders, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const FieldKeyRecordStorage & field_keys, + const FieldDofRecordStorage & field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + const auto key = + libMesh::FEShapeKey{family, + element_types(elem_index), + static_cast(base_order + + cast_int(element_p_levels(elem_index)))}; + + StaticArrayAccess record_field_keys; + StaticArrayAccess record_field_dofs; + record_field_keys.size = goal_function.n_field_variables(); + record_field_dofs.size = goal_function.n_field_variables(); + for (unsigned int field = 0; field != goal_function.n_field_variables(); ++field) + { + record_field_keys.values[field] = field_keys(field, record_index); + record_field_dofs.values[field] = field_dofs(field, record_index); + } + + const auto goal_access = + GatheredParsedFEMGoalAccess(record_field_keys, + record_field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, + element_mapping_types(elem_index), + elem_nodes, + element_n_nodes(elem_index), + quadrature_orders(record_index), + elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); +} + +template +void +run_hilbert_system_fem_bucket_scatter_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + const SlotStorage & rhs_slots, + const SlotStorage & mat_slots, + FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + ResidualScatterAccess rhs_scatter, + JacobianScatterAccess mat_scatter, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto goal_access = + GatheredParsedFEMGoalAccess(field_keys, + field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_scatter.add(rhs_slots(rhs_offset + i), -accum.residual(i)); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_scatter.add(mat_slots(mat_offset + i * n_dofs + j), accum.jacobian(i, j)); + } + }); +} + +template +void +run_hilbert_system_fem_bucket_value_batch(const libMesh::FEShapeKey key, + const libMesh::ElemMappingType mapping_type, + const unsigned int n_nodes, + const unsigned int quadrature_order, + const NodeCoordinateStorage & node_coordinates, + const ElemNodeIdStorage & element_node_ids, + const ElemIndexStorage & elem_indices, + const ElemDofCountStorage & elem_n_dofs, + const OffsetStorage & rhs_offsets, + const OffsetStorage & mat_offsets, + FieldKeyStorage field_keys, + FieldDofStorage field_dofs, + const FieldLocalIndexStorage & field_local_indices, + const GlobalCoeffStorage & global_coeffs, + GoalFunction goal_function, + const unsigned int hilbert_order, + ResidualView rhs_values, + JacobianView mat_values, + const char * const kernel_name) +{ + const auto n_records = elem_indices.extent(0); + + ::Kokkos::parallel_for( + kernel_name, + ::Kokkos::RangePolicy<>(0, cast_int(n_records)), + KOKKOS_LAMBDA(const int raw_record_index) { + const unsigned int record_index = cast_int(raw_record_index); + const unsigned int elem_index = elem_indices(record_index); + const unsigned int n_dofs = elem_n_dofs(record_index); + + const auto goal_access = + GatheredParsedFEMGoalAccess(field_keys, + field_dofs, + global_coeffs, + field_local_indices.values, + goal_function); + + const auto elem_nodes = + make_element_node_access(node_coordinates, element_node_ids, elem_index); + const libMesh::Kokkos::detail::HilbertFEAccess fe( + key, mapping_type, elem_nodes, n_nodes, quadrature_order, elem_index); + + const auto solution = + libMesh::Kokkos::detail::make_hilbert_solution_access(fe, ZeroCoeffAccess{}, Number(1.)); + libMesh::Kokkos::detail::LocalHilbertAccumulator accum(n_dofs); + libMesh::detail::assemble_hilbert_element( + fe, solution, goal_access, true, hilbert_order, accum); + + const auto rhs_offset = rhs_offsets(record_index); + const auto mat_offset = mat_offsets(record_index); + for (unsigned int i = 0; i != n_dofs; ++i) + { + rhs_values(rhs_offset + i) = -accum.residual(i); + for (unsigned int j = 0; j != n_dofs; ++j) + mat_values(mat_offset + i * n_dofs + j) = accum.jacobian(i, j); + } + }); +} + +} // namespace libMesh::Kokkos::detail + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_HILBERT_SYSTEM_H diff --git a/include/gpu/kokkos_linalg_base.h b/include/gpu/kokkos_linalg_base.h new file mode 100644 index 00000000000..70a634f1f19 --- /dev/null +++ b/include/gpu/kokkos_linalg_base.h @@ -0,0 +1,472 @@ +// libMesh Kokkos compile-time linalg foundation. +// +// This header defines the small access/materialization layer that sits +// underneath richer vector/tensor algebra. It is intentionally limited to +// component access, storage-backed references, and conversion between +// vector-like/tensor-like objects and libMesh semantic types. + +#ifndef LIBMESH_KOKKOS_LINALG_BASE_H +#define LIBMESH_KOKKOS_LINALG_BASE_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" +#include "libmesh/vector_value.h" + +#include +#include + +namespace libMesh::Kokkos +{ + +namespace detail +{ + +template +using remove_cvref_t = + typename std::remove_cv::type>::type; + +template +using remove_ref_t = typename std::remove_reference::type; + +template +using vector_view_value_t = + remove_cvref_t()(0, 0))>; + +template +using tensor_view_value_t = + remove_cvref_t()(0, 0, 0))>; + +} // namespace detail + +template +struct vector_traits; + +template +struct tensor_traits; + +template +struct is_vector_like : std::false_type +{ +}; + +template +struct is_tensor_like : std::false_type +{ +}; + +template +struct is_vector_ref : std::false_type +{ +}; + +template +struct is_tensor_ref : std::false_type +{ +}; + +template +inline constexpr bool is_vector_like_v = is_vector_like>::value; + +template +inline constexpr bool is_tensor_like_v = is_tensor_like>::value; + +template +inline constexpr bool is_vector_ref_v = is_vector_ref>::value; + +template +inline constexpr bool is_tensor_ref_v = is_tensor_ref>::value; + +template +class vector_ref +{ +public: + using view_type = ViewType; + using value_type = detail::vector_view_value_t; + + LIBMESH_DEVICE_INLINE + vector_ref(ViewType view, const unsigned int index) : _view(view), _index(index) {} + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int component) const + { + return _view(_index, component); + } + + template + LIBMESH_DEVICE_INLINE + void set(const unsigned int component, const Scalar & value) + { + static_assert(std::is_assignable::value, + "Cannot write through a vector_ref built from a read-only view"); + _view(_index, component) = value; + } + + template + LIBMESH_DEVICE_INLINE + void assign(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void add(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void add_scaled(const RightVector & right, const value_type & factor); + + template + LIBMESH_DEVICE_INLINE + void subtract(const RightVector & right); + + template + LIBMESH_DEVICE_INLINE + void subtract_scaled(const RightVector & right, const value_type & factor); + + LIBMESH_DEVICE_INLINE + void zero(); + + template + LIBMESH_DEVICE_INLINE + auto contract(const RightVector & right) const; + + LIBMESH_DEVICE_INLINE + auto norm() const; + + LIBMESH_DEVICE_INLINE + auto norm_sq() const; + + LIBMESH_DEVICE_INLINE + auto l1_norm() const; + + LIBMESH_DEVICE_INLINE + bool is_zero() const; + + LIBMESH_DEVICE_INLINE + auto unit() const; + + template + LIBMESH_DEVICE_INLINE + auto cross(const RightVector & right) const; + + LIBMESH_DEVICE_INLINE + unsigned int index() const + { + return _index; + } + +private: + ViewType _view; + unsigned int _index; +}; + +template +class tensor_ref +{ +public: + using view_type = ViewType; + using value_type = detail::tensor_view_value_t; + + LIBMESH_DEVICE_INLINE + tensor_ref(ViewType view, const unsigned int index) : _view(view), _index(index) {} + + LIBMESH_DEVICE_INLINE + decltype(auto) operator()(const unsigned int row, const unsigned int col) const + { + return _view(_index, row, col); + } + + template + LIBMESH_DEVICE_INLINE + void set(const unsigned int row, const unsigned int col, const Scalar & value) + { + static_assert(std::is_assignable::value, + "Cannot write through a tensor_ref built from a read-only view"); + _view(_index, row, col) = value; + } + + template + LIBMESH_DEVICE_INLINE + void assign(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void add(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void add_scaled(const RightTensor & right, const value_type & factor); + + template + LIBMESH_DEVICE_INLINE + void subtract(const RightTensor & right); + + template + LIBMESH_DEVICE_INLINE + void subtract_scaled(const RightTensor & right, const value_type & factor); + + LIBMESH_DEVICE_INLINE + void zero(); + + template + LIBMESH_DEVICE_INLINE + auto contract(const RightTensor & right) const; + + LIBMESH_DEVICE_INLINE + auto norm() const; + + LIBMESH_DEVICE_INLINE + auto norm_sq() const; + + LIBMESH_DEVICE_INLINE + bool is_zero() const; + + LIBMESH_DEVICE_INLINE + auto transpose() const; + + LIBMESH_DEVICE_INLINE + auto det(const unsigned int dim = LIBMESH_DIM) const; + + LIBMESH_DEVICE_INLINE + auto tr() const; + + LIBMESH_DEVICE_INLINE + auto inverse(const unsigned int dim = LIBMESH_DIM) const; + + template + LIBMESH_DEVICE_INLINE + void solve(const VectorLike & b, ResultVector & x) const; + + LIBMESH_DEVICE_INLINE + auto row(const unsigned int i) const; + + LIBMESH_DEVICE_INLINE + auto column(const unsigned int i) const; + + template + LIBMESH_DEVICE_INLINE + auto left_multiply(const VectorLike & v) const; + + LIBMESH_DEVICE_INLINE + unsigned int index() const + { + return _index; + } + +private: + ViewType _view; + unsigned int _index; +}; + +template +struct vector_traits> +{ + using value_type = T; + using semantic_type = libMesh::TypeVector; +}; + +template +struct vector_traits> +{ + using value_type = T; + using semantic_type = libMesh::VectorValue; +}; + +template <> +struct vector_traits +{ + using value_type = libMesh::Real; + using semantic_type = libMesh::Point; +}; + +template +struct vector_traits> +{ + using value_type = typename vector_ref::value_type; + using semantic_type = libMesh::TypeVector; +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template <> +struct is_vector_like : std::true_type +{ +}; + +template +struct is_vector_like> : std::true_type +{ +}; + +template +struct is_vector_ref> : std::true_type +{ +}; + +template +struct tensor_traits> +{ + using value_type = T; + using semantic_type = libMesh::TypeTensor; +}; + +template +struct tensor_traits> +{ + using value_type = T; + using semantic_type = libMesh::TensorValue; +}; + +template +struct tensor_traits> +{ + using value_type = typename tensor_ref::value_type; + using semantic_type = libMesh::TypeTensor; +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_like> : std::true_type +{ +}; + +template +struct is_tensor_ref> : std::true_type +{ +}; + +template +using vector_value_type_t = typename vector_traits>::value_type; + +template +using tensor_value_type_t = typename tensor_traits>::value_type; + +template +using vector_semantic_type_t = typename vector_traits>::semantic_type; + +template +using tensor_semantic_type_t = typename tensor_traits>::semantic_type; + +template +LIBMESH_DEVICE_INLINE +decltype(auto) +vector_get_component(const T & v, const unsigned int component) +{ + return v(component); +} + +template +LIBMESH_DEVICE_INLINE +void vector_set_component(T & v, const unsigned int component, const Scalar & value) +{ + v(component) = value; +} + +template +LIBMESH_DEVICE_INLINE +void vector_set_component(vector_ref v, + const unsigned int component, + const Scalar & value) +{ + v.set(component, value); +} + +template +LIBMESH_DEVICE_INLINE +decltype(auto) +tensor_get_component(const T & T_in, const unsigned int row, const unsigned int col) +{ + return T_in(row, col); +} + +template +LIBMESH_DEVICE_INLINE +void tensor_set_component(T & T_out, + const unsigned int row, + const unsigned int col, + const Scalar & value) +{ + T_out(row, col) = value; +} + +template +LIBMESH_DEVICE_INLINE +void tensor_set_component(tensor_ref T_out, + const unsigned int row, + const unsigned int col, + const Scalar & value) +{ + T_out.set(row, col, value); +} + +template +LIBMESH_DEVICE_INLINE +vector_ref> +make_vector_ref(ViewType && view, const unsigned int index) +{ + return vector_ref>(std::forward(view), index); +} + +template +LIBMESH_DEVICE_INLINE +tensor_ref> +make_tensor_ref(ViewType && view, const unsigned int index) +{ + return tensor_ref>(std::forward(view), index); +} + +template +LIBMESH_DEVICE_INLINE +OutputVector materialize_vector(const VectorLike & v) +{ + static_assert(is_vector_like>::value, + "materialize_vector() requires a vector-like input type"); + + OutputVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, vector_get_component(v, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +OutputTensor materialize_tensor(const TensorLike & T_in) +{ + static_assert(is_tensor_like>::value, + "materialize_tensor() requires a tensor-like input type"); + + OutputTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col)); + + return out; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_LINALG_BASE_H diff --git a/include/gpu/kokkos_parsed_function.h b/include/gpu/kokkos_parsed_function.h new file mode 100644 index 00000000000..1942ea0fca0 --- /dev/null +++ b/include/gpu/kokkos_parsed_function.h @@ -0,0 +1,832 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_KOKKOS_PARSED_FUNCTION_H +#define LIBMESH_KOKKOS_PARSED_FUNCTION_H + +#include "libmesh/libmesh_common.h" + +#ifdef LIBMESH_HAVE_KOKKOS + +#include "libmesh/libmesh_device.h" +#include "libmesh/parsed_function_program.h" +#include "libmesh/point.h" +#include "libmesh/vector_value.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMesh::Kokkos +{ +namespace detail +{ + +template +struct DeviceParsedFunctionProgram +{ + ::Kokkos::View bytecode; + ::Kokkos::View immediates; + unsigned int stack_size = 0; + unsigned int n_variables = 0; + Scalar epsilon = 0; + + bool empty() const { return bytecode.extent(0) == 0; } +}; + +template +inline void +validate_program_stack(const DeviceParsedFunctionProgram & program, + const char * program_name, + const unsigned int max_stack) +{ + libmesh_error_msg_if(program.stack_size > max_stack, + "KokkosParsedFunction requires a larger MaxStack bound for " << + program_name << " bytecode"); +} + +template +inline void +validate_coordinate_program_variables(const DeviceParsedFunctionProgram & program, + const char * program_name) +{ + libmesh_error_msg_if(program.n_variables > LIBMESH_DIM + 1, + "KokkosParsedFunction currently supports only x/y/z/t variables in " << + program_name << " bytecode"); +} + +template +inline ::Kokkos::View +upload_scalar_buffer(const std::vector & values, + const std::string & label) +{ + ::Kokkos::View d(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline DeviceParsedFunctionProgram +make_device_program(const libMesh::ParsedFunctionProgram & program, + const std::string & label) +{ + DeviceParsedFunctionProgram d_program; + d_program.bytecode = upload_scalar_buffer(program.bytecode, label + "_bytecode"); + d_program.immediates = upload_scalar_buffer(program.immediates, label + "_immediates"); + d_program.stack_size = program.stack_size; + d_program.n_variables = program.n_variables; + d_program.epsilon = program.epsilon; + return d_program; +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_abs(const Scalar x) +{ + using std::abs; + return abs(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_floor(const Scalar x) +{ + using std::floor; + return floor(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_ceil(const Scalar x) +{ + using std::ceil; + return ceil(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log(const Scalar x) +{ + using std::log; + return log(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log10(const Scalar x) +{ + using std::log10; + return log10(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_log2(const Scalar x) +{ + using std::log2; + return log2(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sin(const Scalar x) +{ + using std::sin; + return sin(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cos(const Scalar x) +{ + using std::cos; + return cos(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_tan(const Scalar x) +{ + using std::tan; + return tan(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sinh(const Scalar x) +{ + using std::sinh; + return sinh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cosh(const Scalar x) +{ + using std::cosh; + return cosh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_tanh(const Scalar x) +{ + using std::tanh; + return tanh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_exp(const Scalar x) +{ + using std::exp; + return exp(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_exp2(const Scalar x) +{ + using std::exp2; + return exp2(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_sqrt(const Scalar x) +{ + using std::sqrt; + return sqrt(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_pow(const Scalar x, + const Scalar y) +{ + using std::pow; + return pow(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_hypot(const Scalar x, + const Scalar y) +{ + using std::hypot; + return hypot(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_cbrt(const Scalar x) +{ + using std::cbrt; + return cbrt(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_asin(const Scalar x) +{ + using std::asin; + return asin(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_acos(const Scalar x) +{ + using std::acos; + return acos(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atan(const Scalar x) +{ + using std::atan; + return atan(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atan2(const Scalar y, + const Scalar x) +{ + using std::atan2; + return atan2(y, x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_asinh(const Scalar x) +{ + using std::asinh; + return asinh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_acosh(const Scalar x) +{ + using std::acosh; + return acosh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_atanh(const Scalar x) +{ + using std::atanh; + return atanh(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_mod(const Scalar x, + const Scalar y) +{ + using std::fmod; + return fmod(x, y); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_trunc(const Scalar x) +{ + return x < Scalar(0) ? pf_ceil(x) : pf_floor(x); +} + +template +LIBMESH_DEVICE_INLINE Scalar +pf_int(const Scalar x) +{ + return x < Scalar(0) ? pf_ceil(x - Scalar(0.5)) : pf_floor(x + Scalar(0.5)); +} + +template +LIBMESH_DEVICE_INLINE bool +pf_equal(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return pf_abs(x - y) <= epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_nequal(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return pf_abs(x - y) > epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_less(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return x < y - epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_less_or_eq(const Scalar x, + const Scalar y, + const Scalar epsilon) +{ + return x <= y + epsilon; +} + +template +LIBMESH_DEVICE_INLINE bool +pf_truth(const Scalar x) +{ + return pf_abs(x) >= Scalar(0.5); +} + +template +LIBMESH_DEVICE_INLINE bool +pf_abs_truth(const Scalar x) +{ + return x >= Scalar(0.5); +} + +template +LIBMESH_DEVICE_INLINE Scalar +eval_parsed_function_program(const DeviceParsedFunctionProgram & program, + const Scalar * vars) +{ + if (program.empty()) + return 0; + + Scalar stack[MaxStack]; + unsigned int dp = 0; + int sp = -1; + + for (unsigned int ip = 0; ip < program.bytecode.extent(0); ++ip) + { + const unsigned int opcode = program.bytecode(ip); + + if (libMesh::parsed_function_is_var_opcode(opcode)) + { + stack[++sp] = vars[opcode - libMesh::parsed_function_var_begin()]; + continue; + } + + switch (static_cast(opcode)) + { + case libMesh::ParsedFunctionOpcode::cAbs: stack[sp] = pf_abs(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAcos: stack[sp] = pf_acos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAcosh: stack[sp] = pf_acosh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAsin: stack[sp] = pf_asin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAsinh: stack[sp] = pf_asinh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAtan: stack[sp] = pf_atan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cAtan2: stack[sp - 1] = pf_atan2(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cAtanh: stack[sp] = pf_atanh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCbrt: stack[sp] = pf_cbrt(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCeil: stack[sp] = pf_ceil(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCos: stack[sp] = pf_cos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCosh: stack[sp] = pf_cosh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCot: stack[sp] = Scalar(1) / pf_tan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cCsc: stack[sp] = Scalar(1) / pf_sin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cExp: stack[sp] = pf_exp(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cExp2: stack[sp] = pf_exp2(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cFloor: stack[sp] = pf_floor(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cHypot: stack[sp - 1] = pf_hypot(stack[sp - 1], stack[sp]); --sp; break; + + case libMesh::ParsedFunctionOpcode::cIf: + if (pf_truth(stack[sp--])) + ip += 2; + else + { + const unsigned int jump_ip = program.bytecode(ip + 1); + const unsigned int jump_dp = program.bytecode(ip + 2); + ip = jump_ip; + dp = jump_dp; + } + break; + + case libMesh::ParsedFunctionOpcode::cInt: stack[sp] = pf_int(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog: stack[sp] = pf_log(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog10: stack[sp] = pf_log10(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cLog2: stack[sp] = pf_log2(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cMax: stack[sp - 1] = stack[sp - 1] > stack[sp] ? stack[sp - 1] : stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMin: stack[sp - 1] = stack[sp - 1] < stack[sp] ? stack[sp - 1] : stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cPow: stack[sp - 1] = pf_pow(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cSec: stack[sp] = Scalar(1) / pf_cos(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSin: stack[sp] = pf_sin(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSinh: stack[sp] = pf_sinh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cSqrt: stack[sp] = pf_sqrt(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTan: stack[sp] = pf_tan(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTanh: stack[sp] = pf_tanh(stack[sp]); break; + case libMesh::ParsedFunctionOpcode::cTrunc: stack[sp] = pf_trunc(stack[sp]); break; + + case libMesh::ParsedFunctionOpcode::cImmed: stack[++sp] = program.immediates(dp++); break; + case libMesh::ParsedFunctionOpcode::cJump: + ip = program.bytecode(ip + 1); + dp = program.bytecode(ip + 2); + break; + + case libMesh::ParsedFunctionOpcode::cNeg: stack[sp] = -stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cAdd: stack[sp - 1] += stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cSub: stack[sp - 1] -= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMul: stack[sp - 1] *= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cDiv: stack[sp - 1] /= stack[sp]; --sp; break; + case libMesh::ParsedFunctionOpcode::cMod: stack[sp - 1] = pf_mod(stack[sp - 1], stack[sp]); --sp; break; + case libMesh::ParsedFunctionOpcode::cEqual: stack[sp - 1] = Scalar(pf_equal(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cNEqual: stack[sp - 1] = Scalar(pf_nequal(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cLess: stack[sp - 1] = Scalar(pf_less(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cLessOrEq: stack[sp - 1] = Scalar(pf_less_or_eq(stack[sp - 1], stack[sp], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cGreater: stack[sp - 1] = Scalar(pf_less(stack[sp], stack[sp - 1], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cGreaterOrEq: stack[sp - 1] = Scalar(pf_less_or_eq(stack[sp], stack[sp - 1], program.epsilon)); --sp; break; + case libMesh::ParsedFunctionOpcode::cNot: stack[sp] = Scalar(!pf_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAnd: stack[sp - 1] = Scalar(pf_truth(stack[sp - 1]) && pf_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cOr: stack[sp - 1] = Scalar(pf_truth(stack[sp - 1]) || pf_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cNotNot: stack[sp] = Scalar(pf_truth(stack[sp])); break; + + case libMesh::ParsedFunctionOpcode::cDeg: stack[sp] = stack[sp] * Scalar(180.) / libMesh::pi; break; + case libMesh::ParsedFunctionOpcode::cRad: stack[sp] = stack[sp] * libMesh::pi / Scalar(180.); break; + + case libMesh::ParsedFunctionOpcode::cPopNMov: + { + const unsigned int target = program.bytecode(++ip); + const unsigned int source = program.bytecode(++ip); + stack[target] = stack[source]; + sp = static_cast(target); + break; + } + + case libMesh::ParsedFunctionOpcode::cLog2by: + stack[sp - 1] = pf_log2(stack[sp - 1]) * stack[sp]; + --sp; + break; + + case libMesh::ParsedFunctionOpcode::cNop: + break; + + case libMesh::ParsedFunctionOpcode::cSinCos: + stack[sp + 1] = pf_cos(stack[sp]); + stack[sp] = pf_sin(stack[sp]); + ++sp; + break; + + case libMesh::ParsedFunctionOpcode::cSinhCosh: + stack[sp + 1] = pf_cosh(stack[sp]); + stack[sp] = pf_sinh(stack[sp]); + ++sp; + break; + + case libMesh::ParsedFunctionOpcode::cAbsNot: stack[sp] = Scalar(!pf_abs_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAbsNotNot: stack[sp] = Scalar(pf_abs_truth(stack[sp])); break; + case libMesh::ParsedFunctionOpcode::cAbsAnd: stack[sp - 1] = Scalar(pf_abs_truth(stack[sp - 1]) && pf_abs_truth(stack[sp])); --sp; break; + case libMesh::ParsedFunctionOpcode::cAbsOr: stack[sp - 1] = Scalar(pf_abs_truth(stack[sp - 1]) || pf_abs_truth(stack[sp])); --sp; break; + + case libMesh::ParsedFunctionOpcode::cAbsIf: + if (pf_abs_truth(stack[sp--])) + ip += 2; + else + { + const unsigned int jump_ip = program.bytecode(ip + 1); + const unsigned int jump_dp = program.bytecode(ip + 2); + ip = jump_ip; + dp = jump_dp; + } + break; + + case libMesh::ParsedFunctionOpcode::cDup: stack[sp + 1] = stack[sp]; ++sp; break; + + case libMesh::ParsedFunctionOpcode::cFetch: + { + const unsigned int stack_offset = program.bytecode(++ip); + stack[sp + 1] = stack[stack_offset]; + ++sp; + break; + } + + case libMesh::ParsedFunctionOpcode::cInv: stack[sp] = Scalar(1) / stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cSqr: stack[sp] = stack[sp] * stack[sp]; break; + case libMesh::ParsedFunctionOpcode::cRDiv: stack[sp - 1] = stack[sp] / stack[sp - 1]; --sp; break; + case libMesh::ParsedFunctionOpcode::cRSub: stack[sp - 1] = stack[sp] - stack[sp - 1]; --sp; break; + case libMesh::ParsedFunctionOpcode::cRSqrt: stack[sp] = Scalar(1) / pf_sqrt(stack[sp]); break; + + default: + return Scalar(0); + } + } + + return stack[sp]; +} + +template +LIBMESH_DEVICE_INLINE Scalar +eval_coordinate_parsed_function_program(const DeviceParsedFunctionProgram & program, + const Point & p, + const Real time) +{ + Scalar vars[LIBMESH_DIM + 1]; + vars[0] = p(0); +#if LIBMESH_DIM > 1 + vars[1] = p(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = p(2); +#endif + vars[LIBMESH_DIM] = time; + return eval_parsed_function_program(program, vars); +} + +} // namespace detail + +template +class KokkosParsedFunction; + +template +class KokkosParsedScalarProgram +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedScalarProgram() = default; + + explicit KokkosParsedScalarProgram(const libMesh::ParsedFunctionProgram & program, + const std::string & label) + : _program(detail::make_device_program(program, label)) + { + detail::validate_program_stack(_program, label.c_str(), MaxStack); + } + + LIBMESH_DEVICE_INLINE + unsigned int n_variables() const + { + return _program.n_variables; + } + + template + LIBMESH_DEVICE_INLINE + Scalar operator()(const VariableStorage & vars) const + { + return detail::eval_parsed_function_program(_program, vars); + } + +private: + detail::DeviceParsedFunctionProgram _program; +}; + +template +class KokkosParsedGradient +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedGradient() = default; + + LIBMESH_DEVICE_INLINE + explicit KokkosParsedGradient(const KokkosParsedFunction & func) + : _func(func) + { + } + + LIBMESH_DEVICE_INLINE + Gradient operator()(const Point & p) const; + +private: + KokkosParsedFunction _func; +}; + +template +class KokkosParsedFunction +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedFunction() = default; + + explicit KokkosParsedFunction(const libMesh::ParsedFunctionProgramBundle & program_bundle, + const Real time = 0.) + : _value(detail::make_device_program(program_bundle.value, "parsed_function_value")), + _dx(detail::make_device_program(program_bundle.dx, "parsed_function_dx")), +#if LIBMESH_DIM > 1 + _dy(detail::make_device_program(program_bundle.dy, "parsed_function_dy")), +#endif +#if LIBMESH_DIM > 2 + _dz(detail::make_device_program(program_bundle.dz, "parsed_function_dz")), +#endif + _dt(detail::make_device_program(program_bundle.dt, "parsed_function_dt")), + _time(time) + { + detail::validate_program_stack(_value, "value", MaxStack); + detail::validate_coordinate_program_variables(_value, "value"); + detail::validate_program_stack(_dx, "dx", MaxStack); + detail::validate_coordinate_program_variables(_dx, "dx"); +#if LIBMESH_DIM > 1 + detail::validate_program_stack(_dy, "dy", MaxStack); + detail::validate_coordinate_program_variables(_dy, "dy"); +#endif +#if LIBMESH_DIM > 2 + detail::validate_program_stack(_dz, "dz", MaxStack); + detail::validate_coordinate_program_variables(_dz, "dz"); +#endif + detail::validate_program_stack(_dt, "dt", MaxStack); + detail::validate_coordinate_program_variables(_dt, "dt"); + } + + KokkosParsedFunction + with_time(const Real time) const + { + auto copy = *this; + copy._time = time; + return copy; + } + + LIBMESH_DEVICE_INLINE + Scalar operator()(const Point & p) const + { + return detail::eval_coordinate_parsed_function_program(_value, p, _time); + } + + LIBMESH_DEVICE_INLINE + Scalar time_derivative(const Point & p) const + { + return detail::eval_coordinate_parsed_function_program(_dt, p, _time); + } + + LIBMESH_DEVICE_INLINE + Gradient gradient(const Point & p) const + { + Gradient g; + g(0) = detail::eval_coordinate_parsed_function_program(_dx, p, _time); +#if LIBMESH_DIM > 1 + g(1) = detail::eval_coordinate_parsed_function_program(_dy, p, _time); +#endif +#if LIBMESH_DIM > 2 + g(2) = detail::eval_coordinate_parsed_function_program(_dz, p, _time); +#endif + return g; + } + + LIBMESH_DEVICE_INLINE + KokkosParsedGradient gradient_function() const + { + return KokkosParsedGradient(*this); + } + +private: + detail::DeviceParsedFunctionProgram _value; + detail::DeviceParsedFunctionProgram _dx; +#if LIBMESH_DIM > 1 + detail::DeviceParsedFunctionProgram _dy; +#endif +#if LIBMESH_DIM > 2 + detail::DeviceParsedFunctionProgram _dz; +#endif + detail::DeviceParsedFunctionProgram _dt; + Real _time = 0.; + + friend class KokkosParsedGradient; +}; + +template +LIBMESH_DEVICE_INLINE +Gradient +KokkosParsedGradient::operator()(const Point & p) const +{ + return _func.gradient(p); +} + +template +class KokkosParsedFEMFunction +{ +public: + LIBMESH_DEVICE_INLINE + KokkosParsedFEMFunction() = default; + + explicit KokkosParsedFEMFunction(const libMesh::ParsedFEMFunctionProgramBundle & program_bundle, + const Real time = 0.) + : _value(program_bundle.value, "parsed_fem_function_value"), + _dx(program_bundle.dx, "parsed_fem_function_dx"), +#if LIBMESH_DIM > 1 + _dy(program_bundle.dy, "parsed_fem_function_dy"), +#endif +#if LIBMESH_DIM > 2 + _dz(program_bundle.dz, "parsed_fem_function_dz"), +#endif + _dt(program_bundle.dt, "parsed_fem_function_dt"), + _n_field_variables(cast_int(program_bundle.value_variable_numbers.size())), + _time(time) + { + libmesh_error_msg_if(!program_bundle.supports_kokkos_value_goal(), + "KokkosParsedFEMFunction currently supports only value-based ParsedFEMFunction expressions"); + libmesh_error_msg_if(_n_field_variables > MaxFieldVariables, + "KokkosParsedFEMFunction exceeds MaxFieldVariables"); + + for (unsigned int i = 0; i != _n_field_variables; ++i) + { + _field_variable_numbers[i] = program_bundle.value_variable_numbers[i]; + _field_value_derivatives[i] = + KokkosParsedScalarProgram( + program_bundle.value_variable_derivatives[i], + "parsed_fem_function_dvalue_" + std::to_string(i)); + } + } + + LIBMESH_DEVICE_INLINE + KokkosParsedFEMFunction + with_time(const Real time) const + { + auto copy = *this; + copy._time = time; + return copy; + } + + LIBMESH_DEVICE_INLINE + unsigned int n_field_variables() const + { + return _n_field_variables; + } + + LIBMESH_DEVICE_INLINE + Real time() const + { + return _time; + } + + LIBMESH_DEVICE_INLINE + unsigned int field_variable_number(const unsigned int i) const + { + return _field_variable_numbers[i]; + } + + template + LIBMESH_DEVICE_INLINE + Scalar value(const VariableStorage & vars) const + { + return _value(vars); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const VariableStorage & vars, + const Gradient * field_gradients) const + { + Gradient g; + g(0) = _dx(vars); +#if LIBMESH_DIM > 1 + g(1) = _dy(vars); +#endif +#if LIBMESH_DIM > 2 + g(2) = _dz(vars); +#endif + + for (unsigned int i = 0; i != _n_field_variables; ++i) + g.add_scaled(field_gradients[i], _field_value_derivatives[i](vars)); + + return g; + } + +private: + KokkosParsedScalarProgram _value; + KokkosParsedScalarProgram _dx; +#if LIBMESH_DIM > 1 + KokkosParsedScalarProgram _dy; +#endif +#if LIBMESH_DIM > 2 + KokkosParsedScalarProgram _dz; +#endif + KokkosParsedScalarProgram _dt; + KokkosParsedScalarProgram _field_value_derivatives[MaxFieldVariables]; + unsigned int _field_variable_numbers[MaxFieldVariables] = {}; + unsigned int _n_field_variables = 0; + Real _time = 0.; +}; + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_HAVE_KOKKOS + +#endif // LIBMESH_KOKKOS_PARSED_FUNCTION_H diff --git a/include/gpu/kokkos_quadrature.h b/include/gpu/kokkos_quadrature.h new file mode 100644 index 00000000000..e0d24db8aeb --- /dev/null +++ b/include/gpu/kokkos_quadrature.h @@ -0,0 +1,208 @@ +// Kokkos FE access to the shared libMesh Gauss quadrature rule tables. + +#ifndef LIBMESH_KOKKOS_QUADRATURE_H +#define LIBMESH_KOKKOS_QUADRATURE_H + +#include "kokkos_fe_base.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/quadrature_gauss_rules.h" + +#include + +namespace libMesh::Kokkos +{ + +struct GaussLegendre1D +{ + LIBMESH_DEVICE_INLINE static unsigned int n_points(unsigned int alg_order) + { + return Quadrature::Gauss::gauss_legendre_rule(alg_order).count; + } + + LIBMESH_DEVICE_INLINE static Real point(unsigned int alg_order, unsigned int i) + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(alg_order); + return (i < rule.count) ? rule.points[i] : 0.0; + } + + LIBMESH_DEVICE_INLINE static Real weight(unsigned int alg_order, unsigned int i) + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(alg_order); + return (i < rule.count) ? rule.weights[i] : 0.0; + } +}; + +struct GaussQuadrature +{ + LIBMESH_DEVICE_INLINE static unsigned int + n_points(libMesh::ElemType topo, unsigned int order) + { + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + return GaussLegendre1D::n_points(order); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n; + } + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + { + const unsigned int n = GaussLegendre1D::n_points(order); + return n * n * n; + } + + case libMesh::TRI3: + case libMesh::TRI6: + return Quadrature::Gauss::triangle_rule(order).count; + + case libMesh::TET4: + case libMesh::TET10: + return Quadrature::Gauss::tetrahedron_rule(order).count; + + default: + return 0; + } + } + + LIBMESH_DEVICE_INLINE static RealVector + point(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + return make_vector(GaussLegendre1D::point(order, qp), 0.0, 0.0); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return zero_vector(); + const unsigned int i = qp % n; + const unsigned int j = qp / n; + return make_vector(GaussLegendre1D::point(order, i), + GaussLegendre1D::point(order, j), + 0.0); + } + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return zero_vector(); + const unsigned int i = qp % n; + const unsigned int j = (qp / n) % n; + const unsigned int k = qp / (n * n); + return make_vector(GaussLegendre1D::point(order, i), + GaussLegendre1D::point(order, j), + GaussLegendre1D::point(order, k)); + } + + case libMesh::TRI3: + case libMesh::TRI6: + { + const auto rule = Quadrature::Gauss::triangle_rule(order); + return (qp < rule.count) ? make_vector(rule.points[qp].x, rule.points[qp].y, 0.0) : zero_vector(); + } + + case libMesh::TET4: + case libMesh::TET10: + { + const auto rule = Quadrature::Gauss::tetrahedron_rule(order); + return (qp < rule.count) + ? make_vector(rule.points[qp].x, rule.points[qp].y, rule.points[qp].z) + : zero_vector(); + } + + default: + return zero_vector(); + } + } + + LIBMESH_DEVICE_INLINE static Real + weight(libMesh::ElemType topo, unsigned int order, unsigned int qp) + { + switch (topo) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + return GaussLegendre1D::weight(order, qp); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return 0.0; + return GaussLegendre1D::weight(order, qp % n) * + GaussLegendre1D::weight(order, qp / n); + } + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + { + const auto rule = Quadrature::Gauss::gauss_legendre_rule(order); + const unsigned int n = rule.count; + if (!n) + return 0.0; + return GaussLegendre1D::weight(order, qp % n) * + GaussLegendre1D::weight(order, (qp / n) % n) * + GaussLegendre1D::weight(order, qp / (n * n)); + } + + case libMesh::TRI3: + case libMesh::TRI6: + { + const auto rule = Quadrature::Gauss::triangle_rule(order); + return (qp < rule.count) ? rule.points[qp].w : 0.0; + } + + case libMesh::TET4: + case libMesh::TET10: + { + const auto rule = Quadrature::Gauss::tetrahedron_rule(order); + return (qp < rule.count) ? rule.points[qp].w : 0.0; + } + + default: + return 0.0; + } + } +}; + +inline void +fill_quadrature(libMesh::ElemType topo, + unsigned int order, + std::vector & qpts, + std::vector & weights) +{ + const unsigned int nqp = GaussQuadrature::n_points(topo, order); + qpts.resize(nqp); + weights.resize(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + qpts[q] = GaussQuadrature::point(topo, order, q); + weights[q] = GaussQuadrature::weight(topo, order, q); + } +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_QUADRATURE_H diff --git a/include/gpu/kokkos_scalar_types.h b/include/gpu/kokkos_scalar_types.h new file mode 100644 index 00000000000..7584819413b --- /dev/null +++ b/include/gpu/kokkos_scalar_types.h @@ -0,0 +1,186 @@ +// libMesh Kokkos device-compatible scalar types. +// +// This header provides dimension-aware Kokkos aliases/helpers that mirror +// libMesh host numerics at LIBMESH_DIM=1/2/3. + +#ifndef LIBMESH_KOKKOS_SCALAR_TYPES_H +#define LIBMESH_KOKKOS_SCALAR_TYPES_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_vector.h" +#include "libmesh/type_tensor.h" + +namespace libMesh::Kokkos +{ + +using Real = libMesh::Real; +using RealVector = libMesh::TypeVector; +using RealTensor = libMesh::TypeTensor; + +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + VectorType v; + v.zero(); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + v(d) = view(i, d); + + return v; +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + view(i, d) = v(d); +} + +template +LIBMESH_DEVICE_INLINE +Real vector_component(const ViewType & view, const unsigned int i, const unsigned int component) +{ + if (component < LIBMESH_DIM) + return view(i, component); + + return Real(0); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + TensorType T; + T.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + T(row, col) = view(i, row, col); + + return T; +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + view(i, row, col) = T(row, col); +} + +template +LIBMESH_DEVICE_INLINE +Real tensor_component(const ViewType & view, + const unsigned int i, + const unsigned int row, + const unsigned int col) +{ + if (row < LIBMESH_DIM && col < LIBMESH_DIM) + return view(i, row, col); + + return Real(0); +} + +LIBMESH_DEVICE_INLINE +RealVector zero_vector() +{ + RealVector v; + v.zero(); + return v; +} + +LIBMESH_DEVICE_INLINE +RealVector make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + RealVector v = zero_vector(); + + v(0) = x; + +#if LIBMESH_DIM > 1 + v(1) = y; +#else + libmesh_assert_equal_to(y, Real(0)); +#endif + +#if LIBMESH_DIM > 2 + v(2) = z; +#else + libmesh_assert_equal_to(z, Real(0)); +#endif + + return v; +} + +LIBMESH_DEVICE_INLINE +RealTensor zero_tensor() +{ + RealTensor J; + J.zero(); + return J; +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_identity(const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + RealTensor I = zero_tensor(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + + return I; +} + +LIBMESH_DEVICE_INLINE +Real leading_determinant(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return Real(1); + + if (dim == 1) + return J(0, 0); + + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); + + return J.det(); +} + +LIBMESH_DEVICE_INLINE +RealTensor leading_inverse(const RealTensor & J, const unsigned int dim = LIBMESH_DIM) +{ + libmesh_assert_less_equal(dim, LIBMESH_DIM); + + if (dim == 0) + return leading_identity(0); + + if (dim == 1) + { + RealTensor inv = zero_tensor(); + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real inv_det = Real(1) / leading_determinant(J, dim); + RealTensor inv = zero_tensor(); + inv(0, 0) = J(1, 1) * inv_det; + inv(0, 1) = -J(0, 1) * inv_det; + inv(1, 0) = -J(1, 0) * inv_det; + inv(1, 1) = J(0, 0) * inv_det; + return inv; + } + + return J.inverse(); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_SCALAR_TYPES_H diff --git a/include/gpu/kokkos_storage.h b/include/gpu/kokkos_storage.h new file mode 100644 index 00000000000..23e59aabf8c --- /dev/null +++ b/include/gpu/kokkos_storage.h @@ -0,0 +1,53 @@ +// libMesh Kokkos storage helpers for dimension-aware vector/tensor views. + +#ifndef LIBMESH_KOKKOS_STORAGE_H +#define LIBMESH_KOKKOS_STORAGE_H + +#include "libmesh/kokkos_linalg_base.h" + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/type_tensor.h" +#include "libmesh/type_vector.h" + +namespace libMesh::Kokkos +{ + +template +LIBMESH_DEVICE_INLINE +VectorType load_vector(const ViewType & view, const unsigned int i) +{ + return materialize_vector(make_vector_ref(view, i)); +} + +template +LIBMESH_DEVICE_INLINE +void store_vector(const ViewType & view, const unsigned int i, const VectorType & v) +{ + auto out = make_vector_ref(view, i); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + vector_set_component(out, d, vector_get_component(v, d)); +} + +template +LIBMESH_DEVICE_INLINE +TensorType load_tensor(const ViewType & view, const unsigned int i) +{ + return materialize_tensor(make_tensor_ref(view, i)); +} + +template +LIBMESH_DEVICE_INLINE +void store_tensor(const ViewType & view, const unsigned int i, const TensorType & T) +{ + auto out = make_tensor_ref(view, i); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T, row, col)); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_STORAGE_H diff --git a/include/gpu/kokkos_storage_policy.h b/include/gpu/kokkos_storage_policy.h new file mode 100644 index 00000000000..6bfec5a6df0 --- /dev/null +++ b/include/gpu/kokkos_storage_policy.h @@ -0,0 +1,124 @@ +// libMesh Kokkos compile-time storage policies for fixed-dimension linalg data. +// +// These policies keep storage selection separate from the linalg algorithms: +// kernels operate on refs/materialized values, while the backend policy chooses +// the underlying Kokkos view layout. + +#ifndef LIBMESH_KOKKOS_STORAGE_POLICY_H +#define LIBMESH_KOKKOS_STORAGE_POLICY_H + +#include "libmesh/libmesh_common.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMesh::Kokkos +{ + +template +struct static_dim_storage_policy +{ + using scalar_type = Scalar; + using layout_type = Layout; + using vector_view = ::Kokkos::View; + using tensor_view = ::Kokkos::View; + + static constexpr const char * + name() + { + return std::is_same::value ? "layoutleft" : + std::is_same::value ? "layoutright" : + "layoutcustom"; + } +}; + +using layout_left_storage_policy = static_dim_storage_policy; +using layout_right_storage_policy = static_dim_storage_policy; +using default_storage_policy = layout_right_storage_policy; + +template +constexpr const char * +storage_policy_name() +{ + return StoragePolicy::name(); +} + +template +inline typename StoragePolicy::vector_view +make_vector_storage(const char * label, const std::size_t n) +{ + return typename StoragePolicy::vector_view(std::string(label), n); +} + +inline default_storage_policy::vector_view +make_vector_storage(const char * label, const std::size_t n) +{ + return make_vector_storage(label, n); +} + +template +inline typename StoragePolicy::tensor_view +make_tensor_storage(const char * label, const std::size_t n) +{ + return typename StoragePolicy::tensor_view(std::string(label), n); +} + +inline default_storage_policy::tensor_view +make_tensor_storage(const char * label, const std::size_t n) +{ + return make_tensor_storage(label, n); +} + +template +inline typename StoragePolicy::vector_view +upload_vector_storage(const std::vector & values, const char * label) +{ + auto d = make_vector_storage(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + h(i, component) = values[i](component); + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline default_storage_policy::vector_view +upload_vector_storage(const std::vector & values, const char * label) +{ + return upload_vector_storage(values, label); +} + +template +inline typename StoragePolicy::tensor_view +upload_tensor_storage(const std::vector & values, const char * label) +{ + auto d = make_tensor_storage(label, values.size()); + auto h = ::Kokkos::create_mirror_view(d); + + for (std::size_t i = 0; i < values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + h(i, row, col) = values[i](row, col); + + ::Kokkos::deep_copy(d, h); + return d; +} + +template +inline default_storage_policy::tensor_view +upload_tensor_storage(const std::vector & values, const char * label) +{ + return upload_tensor_storage(values, label); +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_STORAGE_POLICY_H diff --git a/include/gpu/kokkos_tensor_ops.h b/include/gpu/kokkos_tensor_ops.h new file mode 100644 index 00000000000..9b62289c36c --- /dev/null +++ b/include/gpu/kokkos_tensor_ops.h @@ -0,0 +1,869 @@ +// libMesh Kokkos generic tensor operations. +// +// These free functions build tensor algebra on top of the primitive +// access/materialization layer in kokkos_linalg_base.h. They are written +// against tensor-like and vector-like inputs so both libMesh owning types and +// storage-backed refs can participate in the same math. + +#ifndef LIBMESH_KOKKOS_TENSOR_OPS_H +#define LIBMESH_KOKKOS_TENSOR_OPS_H + +#include "libmesh/kokkos_linalg_base.h" +#include "libmesh/kokkos_vector_ops.h" + +#include "libmesh/tensor_tools.h" + +#include + +namespace libMesh::Kokkos +{ + +// Construction and materialization + +template +LIBMESH_DEVICE_INLINE +ResultTensor zero_tensor_value() +{ + ResultTensor out; + out.zero(); + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor tensor_identity(const unsigned int dim = LIBMESH_DIM) +{ + ResultTensor out; + out.zero(); + + for (unsigned int i = 0; i < dim; ++i) + tensor_set_component(out, i, i, tensor_value_type_t(1)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor copy_tensor(const TensorLike & T_in) +{ + return materialize_tensor(T_in); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +tensor_semantic_type_t copy_tensor(const TensorLike & T_in) +{ + return copy_tensor>(T_in); +} + +namespace detail +{ + +template +LIBMESH_DEVICE_INLINE +auto leading_determinant(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +{ + static_assert(is_tensor_like_v, + "detail::leading_determinant() requires a tensor-like input"); + + if (dim == 0) + return tensor_value_type_t(1); + + if (dim == 1) + return tensor_get_component(T_in, 0, 0); + + if (dim == 2) + return tensor_get_component(T_in, 0, 0) * tensor_get_component(T_in, 1, 1) - + tensor_get_component(T_in, 0, 1) * tensor_get_component(T_in, 1, 0); + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + return a00 * (a11 * a22 - a12 * a21) - + a01 * (a10 * a22 - a12 * a20) + + a02 * (a10 * a21 - a11 * a20); +#else + libmesh_ignore(T_in); + return tensor_value_type_t(0); +#endif +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor outer_product(const LeftVector & left, const RightVector & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + vector_get_component(left, row) * libmesh_conj(vector_get_component(right, col))); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) +{ + static_assert(is_tensor_like_v, "detail::inverse() requires a tensor-like input"); + + ResultTensor out; + out.zero(); + + if (dim == 0) + return out; + + if (dim == 1) + { + tensor_set_component(out, 0, 0, tensor_value_type_t(1) / tensor_get_component(T_in, 0, 0)); + return out; + } + + const auto det = leading_determinant(T_in, dim); + + if (dim == 2) + { + tensor_set_component(out, 0, 0, tensor_get_component(T_in, 1, 1) / det); + tensor_set_component(out, 0, 1, -tensor_get_component(T_in, 0, 1) / det); + tensor_set_component(out, 1, 0, -tensor_get_component(T_in, 1, 0) / det); + tensor_set_component(out, 1, 1, tensor_get_component(T_in, 0, 0) / det); + return out; + } + +#if LIBMESH_DIM > 2 + const auto a00 = tensor_get_component(T_in, 0, 0); + const auto a01 = tensor_get_component(T_in, 0, 1); + const auto a02 = tensor_get_component(T_in, 0, 2); + const auto a10 = tensor_get_component(T_in, 1, 0); + const auto a11 = tensor_get_component(T_in, 1, 1); + const auto a12 = tensor_get_component(T_in, 1, 2); + const auto a20 = tensor_get_component(T_in, 2, 0); + const auto a21 = tensor_get_component(T_in, 2, 1); + const auto a22 = tensor_get_component(T_in, 2, 2); + + tensor_set_component(out, 0, 0, (a11 * a22 - a12 * a21) / det); + tensor_set_component(out, 0, 1, (a02 * a21 - a01 * a22) / det); + tensor_set_component(out, 0, 2, (a01 * a12 - a02 * a11) / det); + tensor_set_component(out, 1, 0, (a12 * a20 - a10 * a22) / det); + tensor_set_component(out, 1, 1, (a00 * a22 - a02 * a20) / det); + tensor_set_component(out, 1, 2, (a02 * a10 - a00 * a12) / det); + tensor_set_component(out, 2, 0, (a10 * a21 - a11 * a20) / det); + tensor_set_component(out, 2, 1, (a01 * a20 - a00 * a21) / det); + tensor_set_component(out, 2, 2, (a00 * a11 - a01 * a10) / det); +#else + libmesh_ignore(T_in); +#endif + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor transpose(const TensorLike & T_in) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, col, row)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor multiply_tensors(const LeftTensor & left, const RightTensor & right) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + auto value = tensor_get_component(left, row, 0) * tensor_get_component(right, 0, col); + for (unsigned int k = 1; k < LIBMESH_DIM; ++k) + value += tensor_get_component(left, row, k) * tensor_get_component(right, k, col); + tensor_set_component(out, row, col, value); + } + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector row(const TensorLike & T_in, const unsigned int row_index) +{ + ResultVector out; + out.zero(); + + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + vector_set_component(out, col, tensor_get_component(T_in, row_index, col)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector column(const TensorLike & T_in, const unsigned int col_index) +{ + ResultVector out; + out.zero(); + + for (unsigned int row_index = 0; row_index < LIBMESH_DIM; ++row_index) + vector_set_component(out, row_index, tensor_get_component(T_in, row_index, col_index)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector multiply_tensor_vector(const TensorLike & T_in, const VectorLike & v) +{ + ResultVector out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + { + auto value = tensor_get_component(T_in, row, 0) * vector_get_component(v, 0); + for (unsigned int col = 1; col < LIBMESH_DIM; ++col) + value += tensor_get_component(T_in, row, col) * vector_get_component(v, col); + vector_set_component(out, row, value); + } + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector multiply_vector_tensor(const VectorLike & v, const TensorLike & T_in) +{ + ResultVector out; + out.zero(); + + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + auto value = vector_get_component(v, 0) * tensor_get_component(T_in, 0, col); + for (unsigned int row = 1; row < LIBMESH_DIM; ++row) + value += vector_get_component(v, row) * tensor_get_component(T_in, row, col); + vector_set_component(out, col, value); + } + + return out; +} + +template +LIBMESH_DEVICE_INLINE +void assign_tensor_components(LeftTensor & left, const RightTensor & right) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, row, col, tensor_get_component(right, row, col)); +} + +template +LIBMESH_DEVICE_INLINE +void fill_tensor_components(TensorLike & T_in, const Scalar & value) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, value); +} + +template +LIBMESH_DEVICE_INLINE +void update_tensor_components(LeftTensor & left, const RightTensor & right, const Scalar & factor) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(left, + row, + col, + tensor_get_component(left, row, col) + + factor * tensor_get_component(right, row, col)); +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor combine_tensors(const ScalarA & alpha, + const TensorA & A, + const ScalarB & beta, + const TensorB & B) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, + row, + col, + alpha * tensor_get_component(A, row, col) + + beta * tensor_get_component(B, row, col)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor scale_tensor(const Scalar & alpha, const TensorLike & T_in) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, alpha * tensor_get_component(T_in, row, col)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultTensor divide_tensor(const TensorLike & T_in, const Scalar & alpha) +{ + ResultTensor out; + out.zero(); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(out, row, col, tensor_get_component(T_in, row, col) / alpha); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +void scale_tensor_components(TensorLike & T_in, const Scalar & alpha) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, tensor_get_component(T_in, row, col) * alpha); +} + +template +LIBMESH_DEVICE_INLINE +void divide_tensor_components(TensorLike & T_in, const Scalar & alpha) +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + tensor_set_component(T_in, row, col, tensor_get_component(T_in, row, col) / alpha); +} + +// Tensor reductions and predicates + +template +LIBMESH_DEVICE_INLINE +auto tensor_contract(const LeftTensor & left, const RightTensor & right) +{ + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like left input"); + static_assert(is_tensor_like_v, "tensor_contract() requires a tensor-like right input"); + + using sum_type = + detail::remove_cvref_t; + + sum_type sum = sum_type(0); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + sum += tensor_get_component(left, row, col) * tensor_get_component(right, row, col); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_norm_sq(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_norm_sq() requires a tensor-like input"); + + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + sum += libMesh::TensorTools::norm_sq(tensor_get_component(T_in, row, col)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_norm(const TensorLike & T_in) +{ + using std::sqrt; + return sqrt(tensor_norm_sq(T_in)); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_trace(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_trace() requires a tensor-like input"); + + using trace_type = detail::remove_cvref_t; + trace_type sum = trace_type(0); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + sum += tensor_get_component(T_in, i, i); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_is_zero(const TensorLike & T_in) +{ + static_assert(is_tensor_like_v, "tensor_is_zero() requires a tensor-like input"); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(T_in, row, col) != tensor_value_type_t(0)) + return false; + + return true; +} + +} // namespace detail + +// libMesh-like convenience wrappers + +template +LIBMESH_DEVICE_INLINE +auto contract(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v, + decltype(detail::tensor_contract(left, right))> +{ + return detail::tensor_contract(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto norm_sq(const TensorLike & T_in) + -> std::enable_if_t, decltype(detail::tensor_norm_sq(T_in))> +{ + return detail::tensor_norm_sq(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto norm(const TensorLike & T_in) + -> std::enable_if_t, decltype(detail::tensor_norm(T_in))> +{ + return detail::tensor_norm(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto is_zero(const TensorLike & T_in) + -> std::enable_if_t, bool> +{ + return detail::tensor_is_zero(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto outer_product(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, ResultTensor> +{ + return detail::outer_product(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto outer_product(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, + libMesh::TypeTensor>> +{ + return outer_product>>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto transpose(const TensorLike & T_in) + -> std::enable_if_t, ResultTensor> +{ + return detail::transpose(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto transpose(const TensorLike & T_in) + -> std::enable_if_t, tensor_semantic_type_t> +{ + return transpose>(T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto det(const TensorLike & T_in) + -> std::enable_if_t, decltype(T_in.det())> +{ + return T_in.det(); +} + +template +LIBMESH_DEVICE_INLINE +auto inverse(const TensorLike & T_in, const unsigned int dim = LIBMESH_DIM) + -> std::enable_if_t, + std::conditional_t::value, + tensor_semantic_type_t, + ResultTensor>> +{ + using output_type = std::conditional_t::value, + tensor_semantic_type_t, + ResultTensor>; + return detail::inverse(T_in, dim); +} + +template +LIBMESH_DEVICE_INLINE +auto row(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, ResultVector> +{ + return detail::row(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto row(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, libMesh::TypeVector>> +{ + return row>>(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto column(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, ResultVector> +{ + return detail::column(T_in, i); +} + +template +LIBMESH_DEVICE_INLINE +auto column(const TensorLike & T_in, const unsigned int i) + -> std::enable_if_t, libMesh::TypeVector>> +{ + return column>>(T_in, i); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::assign(const RightTensor & right) +{ + detail::assign_tensor_components(*this, right); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::add(const RightTensor & right) +{ + detail::update_tensor_components(*this, right, value_type(1)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::add_scaled(const RightTensor & right, const value_type & factor) +{ + detail::update_tensor_components(*this, right, factor); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::subtract(const RightTensor & right) +{ + detail::update_tensor_components(*this, right, value_type(-1)); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::subtract_scaled(const RightTensor & right, const value_type & factor) +{ + detail::update_tensor_components(*this, right, -factor); +} + +template +LIBMESH_DEVICE_INLINE +void tensor_ref::zero() +{ + detail::fill_tensor_components(*this, value_type(0)); +} + +template +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::contract(const RightTensor & right) const +{ + return detail::tensor_contract(*this, right); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::norm() const +{ + return detail::tensor_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::norm_sq() const +{ + return detail::tensor_norm_sq(*this); +} + +template +LIBMESH_DEVICE_INLINE +bool tensor_ref::is_zero() const +{ + return detail::tensor_is_zero(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::transpose() const +{ + return libMesh::Kokkos::transpose(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::det(const unsigned int dim) const +{ + return detail::leading_determinant(*this, dim); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::tr() const +{ + return detail::tensor_trace(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::inverse(const unsigned int dim) const +{ + return libMesh::Kokkos::inverse(*this, dim); +} + +template +template +LIBMESH_DEVICE_INLINE +void tensor_ref::solve(const VectorLike & b, ResultVector & x) const +{ + const auto solution = + detail::multiply_tensor_vector>(this->inverse(), b); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(x, component, vector_get_component(solution, component)); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::row(const unsigned int i) const +{ + return libMesh::Kokkos::row(*this, i); +} + +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::column(const unsigned int i) const +{ + return libMesh::Kokkos::column(*this, i); +} + +template +template +LIBMESH_DEVICE_INLINE +auto tensor_ref::left_multiply(const VectorLike & v) const +{ + return v * *this; +} + +// Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. + +template +LIBMESH_DEVICE_INLINE +auto operator-(const TensorLike & T_in) + -> std::enable_if_t && is_tensor_ref_v, + tensor_semantic_type_t> +{ + return detail::scale_tensor>(tensor_value_type_t(-1), T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + tensor_semantic_type_t> +{ + return detail::combine_tensors>( + tensor_value_type_t(1), left, tensor_value_type_t(1), right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator-(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + tensor_semantic_type_t> +{ + return detail::combine_tensors>( + tensor_value_type_t(1), left, tensor_value_type_t(-1), right); +} + +template && !is_tensor_like_v && + is_tensor_like_v && is_tensor_ref_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const Scalar & alpha, const TensorLike & T_in) +{ + return detail::scale_tensor>(alpha, T_in); +} + +template && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const TensorLike & T_in, const Scalar & alpha) +{ + return detail::scale_tensor>(alpha, T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator/(const TensorLike & T_in, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + tensor_semantic_type_t> +{ + return detail::divide_tensor>(T_in, alpha); +} + +template && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const LeftTensor & left, const RightTensor & right) +{ + return detail::multiply_tensors>(left, right); +} + +template && is_vector_like_v && + (is_tensor_ref_v || is_vector_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const TensorLike & T_in, const VectorLike & v) +{ + return detail::multiply_tensor_vector>(T_in, v); +} + +template && is_tensor_like_v && + (is_vector_ref_v || is_tensor_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const VectorLike & v, const TensorLike & T_in) +{ + return detail::multiply_vector_tensor>(v, T_in); +} + +template +LIBMESH_DEVICE_INLINE +auto operator==(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + bool> +{ + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (tensor_get_component(left, row, col) != tensor_get_component(right, row, col)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +auto operator!=(const LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + bool> +{ + return !(left == right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+=(LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + LeftTensor &> +{ + detail::update_tensor_components(left, right, tensor_value_type_t(1)); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator-=(LeftTensor & left, const RightTensor & right) + -> std::enable_if_t && is_tensor_like_v && + (is_tensor_ref_v || is_tensor_ref_v), + LeftTensor &> +{ + detail::update_tensor_components(left, right, tensor_value_type_t(-1)); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator*=(LeftTensor & left, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftTensor &> +{ + detail::scale_tensor_components(left, alpha); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator/=(LeftTensor & left, const Scalar & alpha) + -> std::enable_if_t && is_tensor_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftTensor &> +{ + detail::divide_tensor_components(left, alpha); + return left; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_TENSOR_OPS_H diff --git a/include/gpu/kokkos_vector_ops.h b/include/gpu/kokkos_vector_ops.h new file mode 100644 index 00000000000..2c68a0341f2 --- /dev/null +++ b/include/gpu/kokkos_vector_ops.h @@ -0,0 +1,633 @@ +// libMesh Kokkos generic vector operations. +// +// These free functions build vector algebra on top of the primitive +// access/materialization layer in kokkos_linalg_base.h. They are written +// against vector-like inputs so both libMesh owning types and storage-backed +// refs can participate in the same math. + +#ifndef LIBMESH_KOKKOS_VECTOR_OPS_H +#define LIBMESH_KOKKOS_VECTOR_OPS_H + +#include "libmesh/kokkos_linalg_base.h" + +#include "libmesh/tensor_tools.h" + +#include + +namespace libMesh::Kokkos +{ + +// Construction and materialization + +template +LIBMESH_DEVICE_INLINE +ResultVector zero_vector_value() +{ + ResultVector out; + out.zero(); + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector copy_vector(const VectorLike & v) +{ + return materialize_vector(v); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t copy_vector(const VectorLike & v) +{ + return copy_vector>(v); +} + +namespace detail +{ + +template +LIBMESH_DEVICE_INLINE +void assign_vector_components(LeftVector & left, const RightVector & right) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, component, vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +void fill_vector_components(VectorLike & v, const Scalar & value) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, value); +} + +template +LIBMESH_DEVICE_INLINE +void update_vector_components(LeftVector & left, const RightVector & right, const Scalar & factor) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(left, + component, + vector_get_component(left, component) + + factor * vector_get_component(right, component)); +} + +template +LIBMESH_DEVICE_INLINE +ResultVector linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector linear_combination(const ScalarA & alpha, + const VectorA & a, + const ScalarB & beta, + const VectorB & b, + const ScalarC & gamma, + const VectorC & c) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, + component, + alpha * vector_get_component(a, component) + + beta * vector_get_component(b, component) + + gamma * vector_get_component(c, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector scale_vector(const Scalar & alpha, const VectorLike & v) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, alpha * vector_get_component(v, component)); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector divide_vector(const VectorLike & v, const Scalar & alpha) +{ + ResultVector out; + out.zero(); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(out, component, vector_get_component(v, component) / alpha); + + return out; +} + +template +LIBMESH_DEVICE_INLINE +void scale_vector_components(VectorLike & v, const Scalar & alpha) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, vector_get_component(v, component) * alpha); +} + +template +LIBMESH_DEVICE_INLINE +void divide_vector_components(VectorLike & v, const Scalar & alpha) +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + vector_set_component(v, component, vector_get_component(v, component) / alpha); +} + +} // namespace detail + +// Reductions and predicates + +template +LIBMESH_DEVICE_INLINE +auto vector_dot(const LeftVector & left, const RightVector & right) +{ + static_assert(is_vector_like_v, "vector_dot() requires a vector-like left input"); + static_assert(is_vector_like_v, "vector_dot() requires a vector-like right input"); + + using sum_type = + detail::remove_cvref_t; + + sum_type sum = sum_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += vector_get_component(left, component) * vector_get_component(right, component); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto vector_norm_sq(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_norm_sq() requires a vector-like input"); + + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += libMesh::TensorTools::norm_sq(vector_get_component(v, component)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +auto vector_norm(const VectorLike & v) +{ + using std::sqrt; + return sqrt(vector_norm_sq(v)); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_l1_norm(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_l1_norm() requires a vector-like input"); + + using std::abs; + using norm_type = detail::remove_cvref_t; + + norm_type sum = norm_type(0); + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + sum += abs(vector_get_component(v, component)); + + return sum; +} + +template +LIBMESH_DEVICE_INLINE +bool vector_is_zero(const VectorLike & v) +{ + static_assert(is_vector_like_v, "vector_is_zero() requires a vector-like input"); + + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + if (vector_get_component(v, component) != vector_value_type_t(0)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_unit(const VectorLike & v) +{ + const auto length = vector_norm(v); + libmesh_assert_not_equal_to(length, static_cast(0.)); + return detail::divide_vector(v, length); +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_unit(const VectorLike & v) +{ + return vector_unit>(v); +} + +// Geometry + +template +LIBMESH_DEVICE_INLINE +ResultVector vector_cross(const LeftVector & left, const RightVector & right) +{ + ResultVector out; + out.zero(); + +#if LIBMESH_DIM == 3 + vector_set_component(out, + 0, + vector_get_component(left, 1) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 1)); + vector_set_component(out, + 1, + -vector_get_component(left, 0) * vector_get_component(right, 2) + + vector_get_component(left, 2) * vector_get_component(right, 0)); + vector_set_component(out, + 2, + vector_get_component(left, 0) * vector_get_component(right, 1) - + vector_get_component(left, 1) * vector_get_component(right, 0)); +#else + libmesh_ignore(left); + libmesh_ignore(right); +#endif + + return out; +} + +template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE +vector_semantic_type_t vector_cross(const LeftVector & left, const RightVector & right) +{ + return vector_cross>(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_triple_product(const LeftVector & left, + const MiddleVector & middle, + const RightVector & right) +{ +#if LIBMESH_DIM == 3 + return vector_get_component(left, 0) * + (vector_get_component(middle, 1) * vector_get_component(right, 2) - + vector_get_component(middle, 2) * vector_get_component(right, 1)) - + vector_get_component(left, 1) * + (vector_get_component(middle, 0) * vector_get_component(right, 2) - + vector_get_component(middle, 2) * vector_get_component(right, 0)) + + vector_get_component(left, 2) * + (vector_get_component(middle, 0) * vector_get_component(right, 1) - + vector_get_component(middle, 1) * vector_get_component(right, 0)); +#else + libmesh_ignore(left, middle, right); + using value_type = + detail::remove_cvref_t; + return value_type(0); +#endif +} + +template +LIBMESH_DEVICE_INLINE +auto vector_cross_norm_sq(const LeftVector & left, const RightVector & right) +{ + const auto z = vector_get_component(left, 0) * vector_get_component(right, 1) - + vector_get_component(left, 1) * vector_get_component(right, 0); + +#if LIBMESH_DIM == 3 + const auto x = vector_get_component(left, 1) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 1); + const auto y = vector_get_component(left, 0) * vector_get_component(right, 2) - + vector_get_component(left, 2) * vector_get_component(right, 0); + return x * x + y * y + z * z; +#else + return z * z; +#endif +} + +template +LIBMESH_DEVICE_INLINE +auto vector_solid_angle(const VectorA & v01, const VectorB & v02, const VectorC & v03) +{ + using std::atan; + + const auto norm01 = vector_norm(v01); + const auto norm02 = vector_norm(v02); + const auto norm03 = vector_norm(v03); + const auto tan_half_angle = + vector_triple_product(v01, v02, v03) / + (vector_dot(v01, v02) * norm03 + + vector_dot(v01, v03) * norm02 + + vector_dot(v02, v03) * norm01 + + norm01 * norm02 * norm03); + + return Real(2) * atan(tan_half_angle); +} + +// libMesh-like convenience wrappers + +template +LIBMESH_DEVICE_INLINE +auto contract(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v, + decltype(vector_dot(left, right))> +{ + return vector_dot(left, right); +} + +template +LIBMESH_DEVICE_INLINE +auto norm_sq(const VectorLike & v) + -> std::enable_if_t, decltype(vector_norm_sq(v))> +{ + return vector_norm_sq(v); +} + +template +LIBMESH_DEVICE_INLINE +auto norm(const VectorLike & v) + -> std::enable_if_t, decltype(vector_norm(v))> +{ + return vector_norm(v); +} + +template +LIBMESH_DEVICE_INLINE +auto is_zero(const VectorLike & v) + -> std::enable_if_t, bool> +{ + return vector_is_zero(v); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::assign(const RightVector & right) +{ + detail::assign_vector_components(*this, right); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::add(const RightVector & right) +{ + detail::update_vector_components(*this, right, value_type(1)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::add_scaled(const RightVector & right, const value_type & factor) +{ + detail::update_vector_components(*this, right, factor); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::subtract(const RightVector & right) +{ + detail::update_vector_components(*this, right, value_type(-1)); +} + +template +template +LIBMESH_DEVICE_INLINE +void vector_ref::subtract_scaled(const RightVector & right, const value_type & factor) +{ + detail::update_vector_components(*this, right, -factor); +} + +template +LIBMESH_DEVICE_INLINE +void vector_ref::zero() +{ + detail::fill_vector_components(*this, value_type(0)); +} + +template +template +LIBMESH_DEVICE_INLINE +auto vector_ref::contract(const RightVector & right) const +{ + return vector_dot(*this, right); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::norm() const +{ + return vector_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::norm_sq() const +{ + return vector_norm_sq(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::l1_norm() const +{ + return vector_l1_norm(*this); +} + +template +LIBMESH_DEVICE_INLINE +bool vector_ref::is_zero() const +{ + return vector_is_zero(*this); +} + +template +LIBMESH_DEVICE_INLINE +auto vector_ref::unit() const +{ + return vector_unit(*this); +} + +template +template +LIBMESH_DEVICE_INLINE +auto vector_ref::cross(const RightVector & right) const +{ + return vector_cross(*this, right); +} + +// Operator-compatible wrappers for storage-backed refs and mixed ref/owning math. + +template +LIBMESH_DEVICE_INLINE +auto operator-(const VectorLike & v) + -> std::enable_if_t && is_vector_ref_v, + vector_semantic_type_t> +{ + return detail::scale_vector>(vector_value_type_t(-1), v); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + vector_semantic_type_t> +{ + return detail::linear_combination>( + vector_value_type_t(1), left, vector_value_type_t(1), right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator-(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + vector_semantic_type_t> +{ + return detail::linear_combination>( + vector_value_type_t(1), left, vector_value_type_t(-1), right); +} + +template && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const LeftVector & left, const RightVector & right) +{ + return vector_dot(left, right); +} + +template && !is_tensor_like_v && + is_vector_like_v && is_vector_ref_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const Scalar & alpha, const VectorLike & v) +{ + return detail::scale_vector>(alpha, v); +} + +template && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator*(const VectorLike & v, const Scalar & alpha) +{ + return detail::scale_vector>(alpha, v); +} + +template && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + int>::type = 0> +LIBMESH_DEVICE_INLINE +auto operator/(const VectorLike & v, const Scalar & alpha) +{ + return detail::divide_vector>(v, alpha); +} + +template +LIBMESH_DEVICE_INLINE +auto operator==(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + bool> +{ + for (unsigned int component = 0; component < LIBMESH_DIM; ++component) + if (vector_get_component(left, component) != vector_get_component(right, component)) + return false; + + return true; +} + +template +LIBMESH_DEVICE_INLINE +auto operator!=(const LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + bool> +{ + return !(left == right); +} + +template +LIBMESH_DEVICE_INLINE +auto operator+=(LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + LeftVector &> +{ + detail::update_vector_components(left, right, vector_value_type_t(1)); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator-=(LeftVector & left, const RightVector & right) + -> std::enable_if_t && is_vector_like_v && + (is_vector_ref_v || is_vector_ref_v), + LeftVector &> +{ + detail::update_vector_components(left, right, vector_value_type_t(-1)); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator*=(LeftVector & left, const Scalar & alpha) + -> std::enable_if_t && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftVector &> +{ + detail::scale_vector_components(left, alpha); + return left; +} + +template +LIBMESH_DEVICE_INLINE +auto operator/=(LeftVector & left, const Scalar & alpha) + -> std::enable_if_t && is_vector_ref_v && + !is_vector_like_v && !is_tensor_like_v, + LeftVector &> +{ + detail::divide_vector_components(left, alpha); + return left; +} + +} // namespace libMesh::Kokkos + +#endif // LIBMESH_KOKKOS_VECTOR_OPS_H diff --git a/include/include_HEADERS b/include/include_HEADERS index 115b473ba2e..ca88bcfba61 100644 --- a/include/include_HEADERS +++ b/include/include_HEADERS @@ -28,6 +28,7 @@ include_HEADERS = \ base/libmesh_abort.h \ base/libmesh_base.h \ base/libmesh_common.h \ + base/libmesh_device.h \ base/libmesh_documentation.h \ base/libmesh_exceptions.h \ base/libmesh_logging.h \ @@ -49,6 +50,7 @@ include_HEADERS = \ enums/enum_elem_quality.h \ enums/enum_elem_type.h \ enums/enum_error_estimator_type.h \ + enums/enum_fe_elem_class.h \ enums/enum_fe_family.h \ enums/enum_inf_map_type.h \ enums/enum_io_package.h \ @@ -87,8 +89,13 @@ include_HEADERS = \ fe/fe_interface.h \ fe/fe_interface_macros.h \ fe/fe_lagrange_shape_1D.h \ + fe/fe_reference_element_traits.h \ + fe/fe_serendipity_lagrange.h \ + fe/fe_simplex_lagrange.h \ + fe/fe_tensor_product_lagrange.h \ fe/fe_macro.h \ fe/fe_map.h \ + fe/fe_shape_traits.h \ fe/fe_transformation_base.h \ fe/fe_type.h \ fe/fe_xyz_map.h \ @@ -174,6 +181,11 @@ include_HEADERS = \ geom/sphere.h \ geom/stored_range.h \ geom/surface.h \ + gpu/kokkos_linalg_base.h \ + gpu/kokkos_storage.h \ + gpu/kokkos_storage_policy.h \ + gpu/kokkos_tensor_ops.h \ + gpu/kokkos_vector_ops.h \ ghosting/default_coupling.h \ ghosting/ghost_point_neighbors.h \ ghosting/ghosting_functor.h \ @@ -270,6 +282,7 @@ include_HEADERS = \ numerics/parsed_fem_function.h \ numerics/parsed_fem_function_parameter.h \ numerics/parsed_function.h \ + numerics/parsed_function_program.h \ numerics/parsed_function_parameter.h \ numerics/petsc_macro.h \ numerics/petsc_matrix.h \ @@ -322,7 +335,6 @@ include_HEADERS = \ parallel/threads_allocators.h \ parallel/threads_none.h \ parallel/threads_pthread.h \ - parallel/threads_spin_mutex_forward.h \ parallel/threads_tbb.h \ partitioning/centroid_partitioner.h \ partitioning/hilbert_sfc_partitioner.h \ @@ -343,6 +355,7 @@ include_HEADERS = \ quadrature/quadrature_composite.h \ quadrature/quadrature_conical.h \ quadrature/quadrature_gauss.h \ + quadrature/quadrature_gauss_rules.h \ quadrature/quadrature_gauss_lobatto.h \ quadrature/quadrature_gm.h \ quadrature/quadrature_grid.h \ diff --git a/include/libmesh/Makefile.am b/include/libmesh/Makefile.am index 7b8880c3a42..58f7d2a315a 100644 --- a/include/libmesh/Makefile.am +++ b/include/libmesh/Makefile.am @@ -19,6 +19,7 @@ BUILT_SOURCES = \ libmesh_augment_std_namespace.h \ libmesh_base.h \ libmesh_common.h \ + libmesh_device.h \ libmesh_documentation.h \ libmesh_exceptions.h \ libmesh_logging.h \ @@ -40,6 +41,7 @@ BUILT_SOURCES = \ enum_elem_quality.h \ enum_elem_type.h \ enum_error_estimator_type.h \ + enum_fe_elem_class.h \ enum_fe_family.h \ enum_inf_map_type.h \ enum_io_package.h \ @@ -80,6 +82,11 @@ BUILT_SOURCES = \ fe_lagrange_shape_1D.h \ fe_macro.h \ fe_map.h \ + fe_reference_element_traits.h \ + fe_serendipity_lagrange.h \ + fe_shape_traits.h \ + fe_simplex_lagrange.h \ + fe_tensor_product_lagrange.h \ fe_transformation_base.h \ fe_type.h \ fe_xyz_map.h \ @@ -172,6 +179,23 @@ BUILT_SOURCES = \ overlap_coupling.h \ point_neighbor_coupling.h \ sibling_coupling.h \ + kokkos_fe_base.h \ + kokkos_fe_evaluator.h \ + kokkos_fe_face_map.h \ + kokkos_fe_lagrange_1d.h \ + kokkos_fe_lagrange_2d.h \ + kokkos_fe_lagrange_3d.h \ + kokkos_fe_map.h \ + kokkos_fe_monomial.h \ + kokkos_parsed_function.h \ + kokkos_fe_shape_dispatch.h \ + kokkos_fe_types.h \ + kokkos_linalg_base.h \ + kokkos_quadrature.h \ + kokkos_storage.h \ + kokkos_storage_policy.h \ + kokkos_tensor_ops.h \ + kokkos_vector_ops.h \ abaqus_io.h \ boundary_info.h \ boundary_mesh.h \ @@ -263,6 +287,7 @@ BUILT_SOURCES = \ parsed_fem_function.h \ parsed_fem_function_parameter.h \ parsed_function.h \ + parsed_function_program.h \ parsed_function_parameter.h \ petsc_macro.h \ petsc_matrix.h \ @@ -317,7 +342,6 @@ BUILT_SOURCES = \ threads_allocators.h \ threads_none.h \ threads_pthread.h \ - threads_spin_mutex_forward.h \ threads_tbb.h \ centroid_partitioner.h \ hilbert_sfc_partitioner.h \ @@ -340,6 +364,7 @@ BUILT_SOURCES = \ quadrature_conical.h \ quadrature_gauss.h \ quadrature_gauss_lobatto.h \ + quadrature_gauss_rules.h \ quadrature_gm.h \ quadrature_grid.h \ quadrature_jacobi.h \ @@ -503,6 +528,7 @@ BUILT_SOURCES = \ statistics.h \ string_to_enum.h \ thread_buffered_syncbuf.h \ + threads_spin_mutex_forward.h \ timestamp.h \ topology_map.h \ tree.h \ @@ -657,6 +683,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -720,6 +749,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -840,6 +872,21 @@ fe_macro.h: $(top_srcdir)/include/fe/fe_macro.h fe_map.h: $(top_srcdir)/include/fe/fe_map.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +fe_reference_element_traits.h: $(top_srcdir)/include/fe/fe_reference_element_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_serendipity_lagrange.h: $(top_srcdir)/include/fe/fe_serendipity_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_shape_traits.h: $(top_srcdir)/include/fe/fe_shape_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_simplex_lagrange.h: $(top_srcdir)/include/fe/fe_simplex_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_tensor_product_lagrange.h: $(top_srcdir)/include/fe/fe_tensor_product_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + fe_transformation_base.h: $(top_srcdir)/include/fe/fe_transformation_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1116,6 +1163,57 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_fe_base.h: $(top_srcdir)/include/gpu/kokkos_fe_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_evaluator.h: $(top_srcdir)/include/gpu/kokkos_fe_evaluator.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_face_map.h: $(top_srcdir)/include/gpu/kokkos_fe_face_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_1d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_1d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_2d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_2d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_3d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_3d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_map.h: $(top_srcdir)/include/gpu/kokkos_fe_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_parsed_function.h: $(top_srcdir)/include/gpu/kokkos_parsed_function.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_monomial.h: $(top_srcdir)/include/gpu/kokkos_fe_monomial.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_shape_dispatch.h: $(top_srcdir)/include/gpu/kokkos_fe_shape_dispatch.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_types.h: $(top_srcdir)/include/gpu/kokkos_fe_types.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_quadrature.h: $(top_srcdir)/include/gpu/kokkos_quadrature.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage_policy.h: $(top_srcdir)/include/gpu/kokkos_storage_policy.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_tensor_ops.h: $(top_srcdir)/include/gpu/kokkos_tensor_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_vector_ops.h: $(top_srcdir)/include/gpu/kokkos_vector_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + abaqus_io.h: $(top_srcdir)/include/mesh/abaqus_io.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1389,6 +1487,9 @@ parsed_fem_function_parameter.h: $(top_srcdir)/include/numerics/parsed_fem_funct parsed_function.h: $(top_srcdir)/include/numerics/parsed_function.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +parsed_function_program.h: $(top_srcdir)/include/numerics/parsed_function_program.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + parsed_function_parameter.h: $(top_srcdir)/include/numerics/parsed_function_parameter.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1551,9 +1652,6 @@ threads_none.h: $(top_srcdir)/include/parallel/threads_none.h threads_pthread.h: $(top_srcdir)/include/parallel/threads_pthread.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ -threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h - $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - threads_tbb.h: $(top_srcdir)/include/parallel/threads_tbb.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1620,6 +1718,9 @@ quadrature_gauss.h: $(top_srcdir)/include/quadrature/quadrature_gauss.h quadrature_gauss_lobatto.h: $(top_srcdir)/include/quadrature/quadrature_gauss_lobatto.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +quadrature_gauss_rules.h: $(top_srcdir)/include/quadrature/quadrature_gauss_rules.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + quadrature_gm.h: $(top_srcdir)/include/quadrature/quadrature_gm.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -2109,6 +2210,9 @@ string_to_enum.h: $(top_srcdir)/include/utils/string_to_enum.h thread_buffered_syncbuf.h: $(top_srcdir)/include/utils/thread_buffered_syncbuf.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + timestamp.h: $(top_srcdir)/include/utils/timestamp.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -2138,4 +2242,3 @@ xdr_cxx.h: $(top_srcdir)/include/utils/xdr_cxx.h parallel_communicator_specializations: $(top_srcdir)/include/timpi_shims/parallel_communicator_specializations $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - diff --git a/include/libmesh/Makefile.in b/include/libmesh/Makefile.in index 0e95a2a8ef6..3b2be5283c9 100644 --- a/include/libmesh/Makefile.in +++ b/include/libmesh/Makefile.in @@ -309,11 +309,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -361,6 +369,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -530,21 +539,22 @@ EXTRA_DIST = rebuild_makefile.sh BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ dof_object.h factory.h float128_shims.h getpot.h id_types.h \ libmesh.h libmesh_abort.h libmesh_augment_std_namespace.h \ - libmesh_base.h libmesh_common.h libmesh_documentation.h \ - libmesh_exceptions.h libmesh_logging.h libmesh_singleton.h \ - libmesh_version.h multi_predicates.h periodic_boundaries.h \ - periodic_boundary.h periodic_boundary_base.h print_trace.h \ + libmesh_base.h libmesh_common.h libmesh_device.h \ + libmesh_documentation.h libmesh_exceptions.h libmesh_logging.h \ + libmesh_singleton.h libmesh_version.h multi_predicates.h \ + periodic_boundaries.h periodic_boundary.h \ + periodic_boundary_base.h print_trace.h \ reference_counted_object.h reference_counter.h \ single_predicates.h sparsity_pattern.h variable.h \ variant_filter_iterator.h enum_convergence_flags.h \ enum_eigen_solver_type.h enum_elem_quality.h enum_elem_type.h \ - enum_error_estimator_type.h enum_fe_family.h \ - enum_inf_map_type.h enum_io_package.h enum_matrix_build_type.h \ - enum_norm_type.h enum_order.h enum_parallel_type.h \ - enum_partitioner_type.h enum_point_locator_type.h \ - enum_preconditioner_type.h enum_quadrature_type.h \ - enum_solver_package.h enum_solver_type.h \ - enum_subset_solve_mode.h enum_xdr_mode.h \ + enum_error_estimator_type.h enum_fe_elem_class.h \ + enum_fe_family.h enum_inf_map_type.h enum_io_package.h \ + enum_matrix_build_type.h enum_norm_type.h enum_order.h \ + enum_parallel_type.h enum_partitioner_type.h \ + enum_point_locator_type.h enum_preconditioner_type.h \ + enum_quadrature_type.h enum_solver_package.h \ + enum_solver_type.h enum_subset_solve_mode.h enum_xdr_mode.h \ adjoint_refinement_estimator.h \ adjoint_residual_error_estimator.h discontinuity_measure.h \ error_estimator.h exact_error_estimator.h exact_solution.h \ @@ -555,7 +565,10 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ weighted_patch_recovery_error_estimator.h fe.h fe_abstract.h \ fe_base.h fe_compute_data.h fe_interface.h \ fe_interface_macros.h fe_lagrange_shape_1D.h fe_macro.h \ - fe_map.h fe_transformation_base.h fe_type.h fe_xyz_map.h \ + fe_map.h fe_reference_element_traits.h \ + fe_serendipity_lagrange.h fe_shape_traits.h \ + fe_simplex_lagrange.h fe_tensor_product_lagrange.h \ + fe_transformation_base.h fe_type.h fe_xyz_map.h \ h1_fe_transformation.h hcurl_fe_transformation.h \ hdiv_fe_transformation.h inf_fe.h inf_fe_instantiate_1D.h \ inf_fe_instantiate_2D.h inf_fe_instantiate_3D.h inf_fe_macro.h \ @@ -580,23 +593,30 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ remote_elem.h sphere.h stored_range.h surface.h \ default_coupling.h ghost_point_neighbors.h ghosting_functor.h \ non_manifold_coupling.h overlap_coupling.h \ - point_neighbor_coupling.h sibling_coupling.h abaqus_io.h \ - boundary_info.h boundary_mesh.h checkpoint_io.h \ - distributed_mesh.h dyna_io.h ensight_io.h exodusII_io.h \ - exodusII_io_helper.h exodus_header_info.h fro_io.h gmsh_io.h \ - gmv_io.h gnuplot_io.h inf_elem_builder.h matlab_io.h \ - medit_io.h mesh.h mesh_base.h mesh_communication.h \ - mesh_function.h mesh_generation.h mesh_input.h \ - mesh_modification.h mesh_netgen_interface.h mesh_output.h \ - mesh_refinement.h mesh_serializer.h mesh_smoother.h \ - mesh_smoother_laplace.h mesh_smoother_vsmoother.h \ - mesh_subdivision_support.h mesh_tet_interface.h \ - mesh_tetgen_interface.h mesh_tetgen_wrapper.h mesh_tools.h \ - mesh_triangle_holes.h mesh_triangle_interface.h \ - mesh_triangle_wrapper.h namebased_io.h nemesis_io.h \ - nemesis_io_helper.h off_io.h parallel_mesh.h patch.h \ - poly2tri_triangulator.h postscript_io.h replicated_mesh.h \ - serial_mesh.h sides_to_elem_map.h simplex_refiner.h stl_io.h \ + point_neighbor_coupling.h sibling_coupling.h kokkos_fe_base.h \ + kokkos_fe_evaluator.h kokkos_fe_face_map.h \ + kokkos_fe_lagrange_1d.h kokkos_fe_lagrange_2d.h \ + kokkos_fe_lagrange_3d.h kokkos_fe_map.h kokkos_fe_monomial.h \ + kokkos_parsed_function.h kokkos_fe_shape_dispatch.h \ + kokkos_fe_types.h kokkos_linalg_base.h kokkos_quadrature.h \ + kokkos_storage.h kokkos_storage_policy.h kokkos_tensor_ops.h \ + kokkos_vector_ops.h abaqus_io.h boundary_info.h \ + boundary_mesh.h checkpoint_io.h distributed_mesh.h dyna_io.h \ + ensight_io.h exodusII_io.h exodusII_io_helper.h \ + exodus_header_info.h fro_io.h gmsh_io.h gmv_io.h gnuplot_io.h \ + inf_elem_builder.h matlab_io.h medit_io.h mesh.h mesh_base.h \ + mesh_communication.h mesh_function.h mesh_generation.h \ + mesh_input.h mesh_modification.h mesh_netgen_interface.h \ + mesh_output.h mesh_refinement.h mesh_serializer.h \ + mesh_smoother.h mesh_smoother_laplace.h \ + mesh_smoother_vsmoother.h mesh_subdivision_support.h \ + mesh_tet_interface.h mesh_tetgen_interface.h \ + mesh_tetgen_wrapper.h mesh_tools.h mesh_triangle_holes.h \ + mesh_triangle_interface.h mesh_triangle_wrapper.h \ + namebased_io.h nemesis_io.h nemesis_io_helper.h off_io.h \ + parallel_mesh.h patch.h poly2tri_triangulator.h \ + postscript_io.h replicated_mesh.h serial_mesh.h \ + sides_to_elem_map.h simplex_refiner.h stl_io.h \ sync_refinement_flags.h tecplot_io.h tetgen_io.h \ triangulator_interface.h ucd_io.h unstructured_mesh.h unv_io.h \ vtk_io.h xdr_io.h analytic_function.h composite_fem_function.h \ @@ -610,13 +630,14 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ function_base.h laspack_matrix.h laspack_vector.h \ lumped_mass_matrix.h numeric_vector.h parsed_fem_function.h \ parsed_fem_function_parameter.h parsed_function.h \ - parsed_function_parameter.h petsc_macro.h petsc_matrix.h \ - petsc_matrix_base.h petsc_matrix_shell_matrix.h \ - petsc_mffd_matrix.h petsc_preconditioner.h \ - petsc_shell_matrix.h petsc_solver_exception.h petsc_vector.h \ - preconditioner.h raw_accessor.h refinement_selector.h \ - shell_matrix.h sparse_matrix.h sparse_shell_matrix.h \ - static_condensation.h static_condensation_dof_map.h \ + parsed_function_program.h parsed_function_parameter.h \ + petsc_macro.h petsc_matrix.h petsc_matrix_base.h \ + petsc_matrix_shell_matrix.h petsc_mffd_matrix.h \ + petsc_preconditioner.h petsc_shell_matrix.h \ + petsc_solver_exception.h petsc_vector.h preconditioner.h \ + raw_accessor.h refinement_selector.h shell_matrix.h \ + sparse_matrix.h sparse_shell_matrix.h static_condensation.h \ + static_condensation_dof_map.h \ static_condensation_preconditioner.h sum_shell_matrix.h \ tensor_shell_matrix.h tensor_tools.h tensor_value.h \ trilinos_epetra_matrix.h trilinos_epetra_vector.h \ @@ -629,26 +650,27 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ parallel_ghost_sync.h parallel_hilbert.h parallel_histogram.h \ parallel_node.h parallel_object.h parallel_only.h \ parallel_sort.h threads.h threads_allocators.h threads_none.h \ - threads_pthread.h threads_spin_mutex_forward.h threads_tbb.h \ - centroid_partitioner.h hilbert_sfc_partitioner.h \ - linear_partitioner.h mapped_subdomain_partitioner.h \ - metis_csr_graph.h metis_partitioner.h morton_sfc_partitioner.h \ - parmetis_helper.h parmetis_partitioner.h partitioner.h \ - sfc_partitioner.h subdomain_partitioner.h diff_physics.h \ - diff_qoi.h fem_physics.h quadrature.h quadrature_clough.h \ + threads_pthread.h threads_tbb.h centroid_partitioner.h \ + hilbert_sfc_partitioner.h linear_partitioner.h \ + mapped_subdomain_partitioner.h metis_csr_graph.h \ + metis_partitioner.h morton_sfc_partitioner.h parmetis_helper.h \ + parmetis_partitioner.h partitioner.h sfc_partitioner.h \ + subdomain_partitioner.h diff_physics.h diff_qoi.h \ + fem_physics.h quadrature.h quadrature_clough.h \ quadrature_composite.h quadrature_conical.h quadrature_gauss.h \ - quadrature_gauss_lobatto.h quadrature_gm.h quadrature_grid.h \ - quadrature_jacobi.h quadrature_monomial.h quadrature_nodal.h \ - quadrature_simpson.h quadrature_trap.h rb_assembly_expansion.h \ - rb_construction.h rb_construction_base.h \ - rb_data_deserialization.h rb_data_serialization.h \ - rb_eim_assembly.h rb_eim_construction.h rb_eim_evaluation.h \ - rb_eim_theta.h rb_evaluation.h rb_parameters.h \ - rb_parametrized.h rb_parametrized_function.h \ - rb_scm_construction.h rb_scm_evaluation.h \ - rb_temporal_discretization.h rb_theta.h rb_theta_expansion.h \ - transient_rb_assembly_expansion.h transient_rb_construction.h \ - transient_rb_evaluation.h transient_rb_theta_expansion.h \ + quadrature_gauss_lobatto.h quadrature_gauss_rules.h \ + quadrature_gm.h quadrature_grid.h quadrature_jacobi.h \ + quadrature_monomial.h quadrature_nodal.h quadrature_simpson.h \ + quadrature_trap.h rb_assembly_expansion.h rb_construction.h \ + rb_construction_base.h rb_data_deserialization.h \ + rb_data_serialization.h rb_eim_assembly.h \ + rb_eim_construction.h rb_eim_evaluation.h rb_eim_theta.h \ + rb_evaluation.h rb_parameters.h rb_parametrized.h \ + rb_parametrized_function.h rb_scm_construction.h \ + rb_scm_evaluation.h rb_temporal_discretization.h rb_theta.h \ + rb_theta_expansion.h transient_rb_assembly_expansion.h \ + transient_rb_construction.h transient_rb_evaluation.h \ + transient_rb_theta_expansion.h \ boundary_volume_solution_transfer.h direct_solution_transfer.h \ dtk_adapter.h dtk_evaluator.h dtk_solution_transfer.h \ meshfree_interpolation.h meshfree_interpolation_function.h \ @@ -701,8 +723,9 @@ BUILT_SOURCES = dirichlet_boundaries.h dof_map.h dof_map_base.h \ point_locator_tree.h pointer_to_pointer_iter.h \ pool_allocator.h restore_warnings.h simple_range.h \ statistics.h string_to_enum.h thread_buffered_syncbuf.h \ - timestamp.h topology_map.h tree.h tree_base.h tree_node.h \ - utility.h vectormap.h win_gettimeofday.h xdr_cxx.h \ + threads_spin_mutex_forward.h timestamp.h topology_map.h tree.h \ + tree_base.h tree_node.h utility.h vectormap.h \ + win_gettimeofday.h xdr_cxx.h \ parallel_communicator_specializations $(am__append_1) \ $(am__append_3) $(am__append_5) $(am__append_7) \ $(am__append_9) $(am__append_11) $(am__append_13) \ @@ -992,6 +1015,9 @@ libmesh_base.h: $(top_srcdir)/include/base/libmesh_base.h libmesh_common.h: $(top_srcdir)/include/base/libmesh_common.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +libmesh_device.h: $(top_srcdir)/include/base/libmesh_device.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + libmesh_documentation.h: $(top_srcdir)/include/base/libmesh_documentation.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1055,6 +1081,9 @@ enum_elem_type.h: $(top_srcdir)/include/enums/enum_elem_type.h enum_error_estimator_type.h: $(top_srcdir)/include/enums/enum_error_estimator_type.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +enum_fe_elem_class.h: $(top_srcdir)/include/enums/enum_fe_elem_class.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + enum_fe_family.h: $(top_srcdir)/include/enums/enum_fe_family.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1175,6 +1204,21 @@ fe_macro.h: $(top_srcdir)/include/fe/fe_macro.h fe_map.h: $(top_srcdir)/include/fe/fe_map.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +fe_reference_element_traits.h: $(top_srcdir)/include/fe/fe_reference_element_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_serendipity_lagrange.h: $(top_srcdir)/include/fe/fe_serendipity_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_shape_traits.h: $(top_srcdir)/include/fe/fe_shape_traits.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_simplex_lagrange.h: $(top_srcdir)/include/fe/fe_simplex_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +fe_tensor_product_lagrange.h: $(top_srcdir)/include/fe/fe_tensor_product_lagrange.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + fe_transformation_base.h: $(top_srcdir)/include/fe/fe_transformation_base.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1451,6 +1495,57 @@ point_neighbor_coupling.h: $(top_srcdir)/include/ghosting/point_neighbor_couplin sibling_coupling.h: $(top_srcdir)/include/ghosting/sibling_coupling.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +kokkos_fe_base.h: $(top_srcdir)/include/gpu/kokkos_fe_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_evaluator.h: $(top_srcdir)/include/gpu/kokkos_fe_evaluator.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_face_map.h: $(top_srcdir)/include/gpu/kokkos_fe_face_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_1d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_1d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_2d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_2d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_lagrange_3d.h: $(top_srcdir)/include/gpu/kokkos_fe_lagrange_3d.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_map.h: $(top_srcdir)/include/gpu/kokkos_fe_map.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_parsed_function.h: $(top_srcdir)/include/gpu/kokkos_parsed_function.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_monomial.h: $(top_srcdir)/include/gpu/kokkos_fe_monomial.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_shape_dispatch.h: $(top_srcdir)/include/gpu/kokkos_fe_shape_dispatch.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_fe_types.h: $(top_srcdir)/include/gpu/kokkos_fe_types.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_linalg_base.h: $(top_srcdir)/include/gpu/kokkos_linalg_base.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_quadrature.h: $(top_srcdir)/include/gpu/kokkos_quadrature.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage.h: $(top_srcdir)/include/gpu/kokkos_storage.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_storage_policy.h: $(top_srcdir)/include/gpu/kokkos_storage_policy.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_tensor_ops.h: $(top_srcdir)/include/gpu/kokkos_tensor_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + +kokkos_vector_ops.h: $(top_srcdir)/include/gpu/kokkos_vector_ops.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + abaqus_io.h: $(top_srcdir)/include/mesh/abaqus_io.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1724,6 +1819,9 @@ parsed_fem_function_parameter.h: $(top_srcdir)/include/numerics/parsed_fem_funct parsed_function.h: $(top_srcdir)/include/numerics/parsed_function.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +parsed_function_program.h: $(top_srcdir)/include/numerics/parsed_function_program.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + parsed_function_parameter.h: $(top_srcdir)/include/numerics/parsed_function_parameter.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1886,9 +1984,6 @@ threads_none.h: $(top_srcdir)/include/parallel/threads_none.h threads_pthread.h: $(top_srcdir)/include/parallel/threads_pthread.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ -threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h - $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ - threads_tbb.h: $(top_srcdir)/include/parallel/threads_tbb.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -1955,6 +2050,9 @@ quadrature_gauss.h: $(top_srcdir)/include/quadrature/quadrature_gauss.h quadrature_gauss_lobatto.h: $(top_srcdir)/include/quadrature/quadrature_gauss_lobatto.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +quadrature_gauss_rules.h: $(top_srcdir)/include/quadrature/quadrature_gauss_rules.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + quadrature_gm.h: $(top_srcdir)/include/quadrature/quadrature_gm.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ @@ -2444,6 +2542,9 @@ string_to_enum.h: $(top_srcdir)/include/utils/string_to_enum.h thread_buffered_syncbuf.h: $(top_srcdir)/include/utils/thread_buffered_syncbuf.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ +threads_spin_mutex_forward.h: $(top_srcdir)/include/parallel/threads_spin_mutex_forward.h + $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ + timestamp.h: $(top_srcdir)/include/utils/timestamp.h $(AM_V_GEN)rm -f $@ && $(LN_S) -f $< $@ diff --git a/include/libmesh_config.h.in b/include/libmesh_config.h.in index 9adaa9efe05..a8e72c8859c 100644 --- a/include/libmesh_config.h.in +++ b/include/libmesh_config.h.in @@ -434,6 +434,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H +/* Define if Kokkos support is enabled in libMesh */ +#undef HAVE_KOKKOS + /* Flag indicating whether the library will be compiled with LASPACK support */ #undef HAVE_LASPACK diff --git a/include/mesh/mesh_base.h b/include/mesh/mesh_base.h index 59d727a4282..de6c6fd6baf 100644 --- a/include/mesh/mesh_base.h +++ b/include/mesh/mesh_base.h @@ -31,10 +31,16 @@ #include "libmesh/parallel_object.h" #include "libmesh/simple_range.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + // C++ Includes #include #include #include +#include +#include #include "libmesh/vector_value.h" @@ -188,6 +194,48 @@ class MeshBase : public ParallelObject */ virtual void clear (); +#ifdef LIBMESH_HAVE_KOKKOS + struct KokkosGeometryCache + { + using node_id_view = ::Kokkos::View; + using elem_id_view = ::Kokkos::View; + using node_coord_view = ::Kokkos::View; + using elem_node_id_view = ::Kokkos::View; + using elem_type_view = ::Kokkos::View; + using elem_mapping_type_view = ::Kokkos::View; + using elem_n_nodes_view = ::Kokkos::View; + using elem_p_level_view = ::Kokkos::View; + using elem_subdomain_view = ::Kokkos::View; + + node_id_view node_ids; + elem_id_view element_ids; + node_coord_view node_coordinates; + elem_node_id_view element_node_ids; + elem_type_view element_types; + elem_mapping_type_view element_mapping_types; + elem_n_nodes_view element_n_nodes; + elem_p_level_view element_p_levels; + elem_subdomain_view element_subdomains; + std::vector host_node_ids; + std::vector host_element_ids; + std::unordered_map node_lookup; + std::unordered_map element_lookup; + unsigned int max_nodes = 0; + }; + + const KokkosGeometryCache & get_kokkos_geometry_cache() const; + unsigned int get_kokkos_elem_index(const Elem & elem) const; + void prepare_kokkos_geometry_cache() const; +#else + void prepare_kokkos_geometry_cache() const {} +#endif + void clear_kokkos_geometry_cache() const + { +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif + } + /** * Deletes all the element data that is currently stored. * @@ -240,7 +288,10 @@ class MeshBase : public ParallelObject * generally more efficient to mark finer-grained settings instead. */ void unset_is_prepared() - { _preparation = false; } + { + _preparation = false; + this->clear_kokkos_geometry_cache(); + } /** * Tells this we have done some operation creating unpartitioned @@ -250,7 +301,10 @@ class MeshBase : public ParallelObject * them too or call this method. */ void unset_is_partitioned() - { _preparation.is_partitioned = false; } + { + _preparation.is_partitioned = false; + this->clear_kokkos_geometry_cache(); + } /** * Tells this we have done some operation (e.g. adding objects to a @@ -2145,6 +2199,10 @@ class MeshBase : public ParallelObject */ mutable std::unique_ptr _point_locator; +#ifdef LIBMESH_HAVE_KOKKOS + mutable std::unique_ptr _kokkos_geometry_cache; +#endif + /** * Do we count lower dimensional elements in point locator refinement? * This is relevant in tree-based point locators, for example. diff --git a/include/numerics/parsed_fem_function.h b/include/numerics/parsed_fem_function.h index 9f5646b1bd6..8e0f3ce4778 100644 --- a/include/numerics/parsed_fem_function.h +++ b/include/numerics/parsed_fem_function.h @@ -25,12 +25,13 @@ // Local Includes #include "libmesh/fem_function_base.h" #include "libmesh/int_range.h" +#include "libmesh/parsed_function_program.h" #include "libmesh/point.h" #include "libmesh/system.h" #ifdef LIBMESH_HAVE_FPARSER // FParser includes -#include "libmesh/fparser.hh" +#include "libmesh/fparser_ad.hh" #endif // C++ includes @@ -106,6 +107,10 @@ class ParsedFEMFunction : public FEMFunctionBase const std::string & expression() { return _expression; } +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::ParsedFEMFunctionProgramBundle build_program_bundle() const; +#endif + /** * \returns The value of an inline variable. * @@ -440,6 +445,68 @@ ParsedFEMFunction::clone () const (_sys, _expression, &_additional_vars, &_initial_vals); } +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +template +inline +libMesh::ParsedFEMFunctionProgramBundle +ParsedFEMFunction::build_program_bundle() const +{ + libmesh_error_msg_if(_subexpressions.size() != 1, + "Kokkos ParsedFEMFunction export currently supports scalar expressions only"); + + libMesh::ParsedFEMFunctionProgramBundle bundle; + bundle.uses_field_gradients = _n_requested_grad_components > 0; + bundle.uses_field_hessians = _n_requested_hess_components > 0; + bundle.uses_normals = _requested_normals; + bundle.uses_additional_variables = !_additional_vars.empty(); + + auto fp = std::make_unique>(); + fp->AddConstant("NaN", std::numeric_limits::quiet_NaN()); + fp->AddConstant("pi", std::acos(Real(-1))); + fp->AddConstant("e", std::exp(Real(1))); + libmesh_error_msg_if + (fp->Parse(_subexpressions.front(), variables) != -1, // -1 for success + "ERROR: FunctionParser is unable to parse expression for Kokkos export: " + << _subexpressions.front() << '\n' << fp->ErrorMsg()); + + fp->SetADFlags(FunctionParserADBase::ADSilenceErrors | + FunctionParserADBase::ADAutoOptimize); + fp->Optimize(); + bundle.value = build_parsed_function_program(*fp); + + auto dx_fp = std::make_unique>(*fp); + dx_fp->AutoDiff("x"); + bundle.dx = build_parsed_function_program(*dx_fp); +#if LIBMESH_DIM > 1 + auto dy_fp = std::make_unique>(*fp); + dy_fp->AutoDiff("y"); + bundle.dy = build_parsed_function_program(*dy_fp); +#endif +#if LIBMESH_DIM > 2 + auto dz_fp = std::make_unique>(*fp); + dz_fp->AutoDiff("z"); + bundle.dz = build_parsed_function_program(*dz_fp); +#endif + auto dt_fp = std::make_unique>(*fp); + dt_fp->AutoDiff("t"); + bundle.dt = build_parsed_function_program(*dt_fp); + + for (unsigned int v = 0; v != _n_vars; ++v) + { + if (!_need_var[v]) + continue; + + const std::string & varname = _sys.variable_name(v); + auto dvar_fp = std::make_unique>(*fp); + dvar_fp->AutoDiff(varname); + bundle.value_variable_numbers.push_back(v); + bundle.value_variable_derivatives.push_back(build_parsed_function_program(*dvar_fp)); + } + + return bundle; +} +#endif + template inline Output diff --git a/include/numerics/parsed_function.h b/include/numerics/parsed_function.h index cc9f34f61d4..ef0206955f3 100644 --- a/include/numerics/parsed_function.h +++ b/include/numerics/parsed_function.h @@ -26,6 +26,7 @@ // Local includes #include "libmesh/dense_vector.h" #include "libmesh/int_range.h" +#include "libmesh/parsed_function_program.h" #include "libmesh/vector_value.h" #include "libmesh/point.h" @@ -88,7 +89,7 @@ class ParsedFunction : public FunctionBase /** * Query if the automatic derivative generation was successful. */ - virtual bool has_derivatives() { return _valid_derivatives; } + virtual bool has_derivatives() const { return _valid_derivatives; } virtual Output dot(const Point & p, const Real time = 0); @@ -113,6 +114,12 @@ class ParsedFunction : public FunctionBase virtual std::unique_ptr> clone() const override; + ParsedFunctionProgram + build_program(unsigned int component = 0) const; + + ParsedFunctionProgramBundle + build_program_bundle(unsigned int component = 0) const; + /** * \returns The value of an inline variable. * @@ -379,6 +386,37 @@ ParsedFunction::clone() const &_initial_vals); } +template +inline +ParsedFunctionProgram +ParsedFunction::build_program(const unsigned int component) const +{ + libmesh_assert_less(component, parsers.size()); + return libMesh::build_parsed_function_program(*parsers[component]); +} + +template +inline +ParsedFunctionProgramBundle +ParsedFunction::build_program_bundle(const unsigned int component) const +{ + libmesh_assert_less(component, parsers.size()); + libmesh_error_msg_if(!this->has_derivatives(), + "Cannot build a parsed-function program bundle without valid derivative programs"); + + ParsedFunctionProgramBundle bundle; + bundle.value = this->build_program(component); + bundle.dx = libMesh::build_parsed_function_program(*dx_parsers[component]); +#if LIBMESH_DIM > 1 + bundle.dy = libMesh::build_parsed_function_program(*dy_parsers[component]); +#endif +#if LIBMESH_DIM > 2 + bundle.dz = libMesh::build_parsed_function_program(*dz_parsers[component]); +#endif + bundle.dt = libMesh::build_parsed_function_program(*dt_parsers[component]); + return bundle; +} + template inline Output diff --git a/include/numerics/parsed_function_program.h b/include/numerics/parsed_function_program.h new file mode 100644 index 00000000000..78ddbc5e9c2 --- /dev/null +++ b/include/numerics/parsed_function_program.h @@ -0,0 +1,189 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_PARSED_FUNCTION_PROGRAM_H +#define LIBMESH_PARSED_FUNCTION_PROGRAM_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" + +#include + +template +class FunctionParserBase; +template +class FunctionParserADBase; + +namespace libMesh +{ + +enum class ParsedFunctionOpcode : unsigned int +{ + cAbs, + cAcos, + cAcosh, + cArg, + cAsin, + cAsinh, + cAtan, + cAtan2, + cAtanh, + cCbrt, + cCeil, + cConj, + cCos, + cCosh, + cCot, + cCsc, + cExp, + cExp2, + cFloor, + cHypot, + cIf, + cImag, + cInt, + cLog, + cLog10, + cLog2, + cMax, + cMin, + cPolar, + cPow, + cReal, + cSec, + cSin, + cSinh, + cSqrt, + cTan, + cTanh, + cTrunc, + cImmed, + cJump, + cNeg, + cAdd, + cSub, + cMul, + cDiv, + cMod, + cEqual, + cNEqual, + cLess, + cLessOrEq, + cGreater, + cGreaterOrEq, + cNot, + cAnd, + cOr, + cNotNot, + cDeg, + cRad, + cFCall, + cPCall, + cPopNMov, + cLog2by, + cNop, + cSinCos, + cSinhCosh, + cAbsAnd, + cAbsOr, + cAbsNot, + cAbsNotNot, + cAbsIf, + cDup, + cFetch, + cInv, + cSqr, + cRDiv, + cRSub, + cRSqrt, + VarBegin +}; + +LIBMESH_DEVICE_INLINE constexpr unsigned int +parsed_function_var_begin() +{ + return static_cast(ParsedFunctionOpcode::VarBegin); +} + +LIBMESH_DEVICE_INLINE constexpr bool +parsed_function_is_var_opcode(const unsigned int opcode) +{ + return opcode >= parsed_function_var_begin(); +} + +template +struct ParsedFunctionProgram +{ + std::vector bytecode; + std::vector immediates; + unsigned int stack_size = 0; + unsigned int n_variables = 0; + Scalar epsilon = 0; + + bool empty() const { return bytecode.empty(); } +}; + +template +struct ParsedFunctionProgramBundle +{ + ParsedFunctionProgram value; + ParsedFunctionProgram dx; +#if LIBMESH_DIM > 1 + ParsedFunctionProgram dy; +#endif +#if LIBMESH_DIM > 2 + ParsedFunctionProgram dz; +#endif + ParsedFunctionProgram dt; +}; + +template +struct ParsedFEMFunctionProgramBundle +{ + ParsedFunctionProgram value; + ParsedFunctionProgram dx; +#if LIBMESH_DIM > 1 + ParsedFunctionProgram dy; +#endif +#if LIBMESH_DIM > 2 + ParsedFunctionProgram dz; +#endif + ParsedFunctionProgram dt; + std::vector value_variable_numbers; + std::vector> value_variable_derivatives; + bool uses_field_gradients = false; + bool uses_field_hessians = false; + bool uses_normals = false; + bool uses_additional_variables = false; + + bool supports_kokkos_value_goal() const + { + return !uses_field_gradients && + !uses_field_hessians && + !uses_normals && + !uses_additional_variables && + value_variable_numbers.size() == value_variable_derivatives.size(); + } +}; + +template +ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser); + +} // namespace libMesh + +#endif // LIBMESH_PARSED_FUNCTION_PROGRAM_H diff --git a/include/numerics/petsc_matrix_base.h b/include/numerics/petsc_matrix_base.h index 9099d94a3df..7768b07d791 100644 --- a/include/numerics/petsc_matrix_base.h +++ b/include/numerics/petsc_matrix_base.h @@ -131,6 +131,27 @@ class PetscMatrixBase : public SparseMatrix */ void set_destroy_mat_on_exit(bool destroy = true); + /** + * Replace the underlying PETSc Mat with a prebuilt object. + * + * This is intended for advanced users that need to construct a Mat + * outside of the normal PetscMatrix initialization path while still + * using the libMesh wrapper as the owning interface. + */ + void reset_mat(Mat m, bool destroy_on_exit = true) + { + if (_mat == m && _destroy_mat_on_exit == destroy_on_exit) + return; + + this->clear(); + _mat = m; + _destroy_mat_on_exit = destroy_on_exit; + this->_is_initialized = (_mat != nullptr); + + if (_mat) + this->set_context(); + } + /** * Swaps the internal data pointers of two PetscMatrices, no actual * values are swapped. diff --git a/include/numerics/petsc_vector.h b/include/numerics/petsc_vector.h index 4fd5631acc5..f3c363e6015 100644 --- a/include/numerics/petsc_vector.h +++ b/include/numerics/petsc_vector.h @@ -44,6 +44,10 @@ # undef I // Avoid complex.h contamination #endif +#ifdef LIBMESH_HAVE_KOKKOS +#include +#endif + // C++ includes #include #include @@ -227,6 +231,157 @@ class PetscVector final : public NumericVector */ const PetscScalar * get_array_read() const; + /** + * Query PETSc for the memory type backing this vector. + * + * \note If the raw array is currently borrowed via get_array() or + * get_array_read(), this method is not valid. + */ + PetscMemType get_mem_type() const; + + bool supports_kokkos_access() const; + +#ifdef LIBMESH_HAVE_KOKKOS + using kokkos_read_view = + ::Kokkos::View>; + using kokkos_write_view = + ::Kokkos::View>; + + class KokkosReadViewGuard + { + public: + explicit KokkosReadViewGuard(PetscVector & vector) + : _vector(vector) + { + _borrowed_vec = vector._vec; + if (vector.is_effectively_ghosted()) + { + LibmeshPetscCallA(_vector.comm().get(), VecGhostGetLocalForm(vector._vec, &_borrowed_vec)); + PetscInt my_local_size = 0; + LibmeshPetscCallA(_vector.comm().get(), VecGetLocalSize(_borrowed_vec, &my_local_size)); + _local_size = static_cast(my_local_size); + } + else + _local_size = vector.local_size(); + + const PetscScalar * data = nullptr; + LibmeshPetscCallA(_vector.comm().get(), + VecGetArrayReadAndMemType(_borrowed_vec, &data, &_mem_type)); + _data = reinterpret_cast(data); + const bool host_inaccessible = + PetscMemTypeHost(_mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + libmesh_error_msg_if(host_inaccessible, + "PetscVector Kokkos read access requires host-accessible execution " + "space for host PETSc memory."); + _view = kokkos_read_view(_data, _local_size); + } + + KokkosReadViewGuard(const KokkosReadViewGuard &) = delete; + KokkosReadViewGuard & operator=(const KokkosReadViewGuard &) = delete; + + ~KokkosReadViewGuard() + { + const PetscScalar * data = reinterpret_cast(_data); + const auto restore_ierr = VecRestoreArrayReadAndMemType(_borrowed_vec, &data); + libmesh_ignore(restore_ierr); + if (_vector.is_effectively_ghosted()) + { + const auto ghost_ierr = VecGhostRestoreLocalForm(_vector._vec, &_borrowed_vec); + libmesh_ignore(ghost_ierr); + } + } + + const kokkos_read_view & view() const + { + return _view; + } + + private: + PetscVector & _vector; + Vec _borrowed_vec = nullptr; + const T * _data = nullptr; + PetscMemType _mem_type = PETSC_MEMTYPE_HOST; + numeric_index_type _local_size = 0; + kokkos_read_view _view; + }; + + KokkosReadViewGuard make_kokkos_read_view_guard() + { + return KokkosReadViewGuard(*this); + } + + class KokkosWriteViewGuard + { + public: + explicit KokkosWriteViewGuard(PetscVector & vector) + : _vector(vector) + { + _borrowed_vec = vector._vec; + if (vector.is_effectively_ghosted()) + { + LibmeshPetscCallA(_vector.comm().get(), VecGhostGetLocalForm(vector._vec, &_borrowed_vec)); + PetscInt my_local_size = 0; + LibmeshPetscCallA(_vector.comm().get(), VecGetLocalSize(_borrowed_vec, &my_local_size)); + _local_size = static_cast(my_local_size); + } + else + _local_size = vector.local_size(); + + PetscScalar * data = nullptr; + LibmeshPetscCallA(_vector.comm().get(), + VecGetArrayWriteAndMemType(_borrowed_vec, &data, &_mem_type)); + _data = reinterpret_cast(data); + const bool host_inaccessible = + PetscMemTypeHost(_mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + libmesh_error_msg_if(host_inaccessible, + "PetscVector Kokkos write access requires host-accessible execution " + "space for host PETSc memory."); + _view = kokkos_write_view(_data, _local_size); + } + + KokkosWriteViewGuard(const KokkosWriteViewGuard &) = delete; + KokkosWriteViewGuard & operator=(const KokkosWriteViewGuard &) = delete; + + ~KokkosWriteViewGuard() + { + PetscScalar * data = reinterpret_cast(_data); + const auto restore_ierr = VecRestoreArrayWriteAndMemType(_borrowed_vec, &data); + libmesh_ignore(restore_ierr); + if (_vector.is_effectively_ghosted()) + { + const auto ghost_ierr = VecGhostRestoreLocalForm(_vector._vec, &_borrowed_vec); + libmesh_ignore(ghost_ierr); + } + } + + const kokkos_write_view & view() const + { + return _view; + } + + private: + PetscVector & _vector; + Vec _borrowed_vec = nullptr; + T * _data = nullptr; + PetscMemType _mem_type = PETSC_MEMTYPE_HOST; + numeric_index_type _local_size = 0; + kokkos_write_view _view; + }; + + KokkosWriteViewGuard make_kokkos_write_view_guard() + { + return KokkosWriteViewGuard(*this); + } +#endif + /** * Restore the data array. * @@ -1196,6 +1351,42 @@ const PetscScalar * PetscVector::get_array_read() const return _read_only_values; } +template +inline +PetscMemType PetscVector::get_mem_type() const +{ + libmesh_error_msg_if(_values_manually_retrieved, + "Cannot query PetscVector memory type while a raw array is borrowed"); + +#ifdef LIBMESH_HAVE_CXX11_THREAD + const bool array_is_present = _array_is_present.load(std::memory_order_acquire); +#else + const bool array_is_present = _array_is_present; +#endif + + if (array_is_present) + _restore_array(); + + PetscScalar * dummyarray = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall(VecGetArrayAndMemType(_vec, &dummyarray, &mem_type)); + LibmeshPetscCall(VecRestoreArrayAndMemType(_vec, &dummyarray)); + return mem_type; +} + +template +inline +bool PetscVector::supports_kokkos_access() const +{ +#ifdef LIBMESH_HAVE_KOKKOS + return !PetscMemTypeHost(this->get_mem_type()) || + ::Kokkos::SpaceAccessibility::accessible; +#else + return false; +#endif +} + template inline void PetscVector::restore_array() diff --git a/include/numerics/tensor_tools.h b/include/numerics/tensor_tools.h index 7617116f10d..f183380a84d 100644 --- a/include/numerics/tensor_tools.h +++ b/include/numerics/tensor_tools.h @@ -45,92 +45,92 @@ namespace TensorTools // Vector specializations will follow. template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if::value && ScalarTraits::value, typename CompareTypes::supertype>::type inner_product(const T & a, const T2& b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeVector & a, const TypeVector & b) { return a * b; } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeTensor & a, const TypeTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype inner_product(const TypeNTensor & a, const TypeNTensor & b) { return a.contract(b); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const T & a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE T norm(std::complex a) { using std::abs; return abs(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeVector & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const VectorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TypeTensor & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm(const TensorValue & a) -> decltype(TensorTools::norm(T())) {using std::sqrt; return sqrt(a.norm_sq());} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const T & a) -{ using std::norm; return norm(a); } +{ return a * libmesh_conj(a); } template -inline +LIBMESH_DEVICE_INLINE T norm_sq(std::complex a) { using std::norm; return norm(a); } template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeVector & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const VectorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TypeTensor & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE auto norm_sq(const TensorValue & a) {return a.norm_sq();} template -inline +LIBMESH_DEVICE_INLINE bool is_zero(const T & a){ return a.is_zero();} // Any tensor-rank-independent code will need to include diff --git a/include/numerics/tensor_value.h b/include/numerics/tensor_value.h index 3a0d680476d..c99e0cac003 100644 --- a/include/numerics/tensor_value.h +++ b/include/numerics/tensor_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_tensor.h" +#include "libmesh/libmesh_device.h" #include "libmesh/libmesh.h" // for pi #ifdef LIBMESH_HAVE_METAPHYSICL @@ -93,12 +94,14 @@ class TensorValue : public TypeTensor * Constructor. Takes 1 row vector for LIBMESH_DIM=1 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx); /** * Constructor. Takes 2 row vectors for LIBMESH_DIM=2 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy); @@ -106,6 +109,7 @@ class TensorValue : public TypeTensor * Constructor. Takes 3 row vectors for LIBMESH_DIM=3 */ template + LIBMESH_DEVICE_INLINE TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -134,11 +138,11 @@ class TensorValue : public TypeTensor const TypeTensor & p_im); #endif - /** * Assignment-from-scalar operator. Used only to zero out tensors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TensorValue &>::type @@ -211,7 +215,7 @@ typedef NumberTensorValue Tensor; //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue () : TypeTensor () { @@ -220,7 +224,7 @@ TensorValue::TensorValue () : template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const T & xx, const T & xy, const T & xz, @@ -237,7 +241,7 @@ TensorValue::TensorValue (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -257,7 +261,7 @@ TensorValue::TensorValue (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TensorValue & p) : TypeTensor (p) { @@ -267,7 +271,7 @@ TensorValue::TensorValue (const TensorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx) : TypeTensor (vx) { @@ -277,7 +281,7 @@ TensorValue::TensorValue (const TypeVector & vx) : template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy) : TypeTensor (vx, vy) @@ -288,7 +292,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) : @@ -300,7 +304,7 @@ TensorValue::TensorValue (const TypeVector & vx, template template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p) : TypeTensor (p) { @@ -309,7 +313,7 @@ TensorValue::TensorValue (const TypeTensor & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE TensorValue::TensorValue (const TypeTensor & p_re, const TypeTensor & p_im) : TypeTensor (Complex (p_re(0,0), p_im(0,0)), diff --git a/include/numerics/type_tensor.h b/include/numerics/type_tensor.h index 470b745f120..ac6dc145428 100644 --- a/include/numerics/type_tensor.h +++ b/include/numerics/type_tensor.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/type_vector.h" // C++ includes @@ -101,13 +102,16 @@ class TypeTensor * many vectors are needed. */ template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy); template + LIBMESH_DEVICE_INLINE TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz); @@ -133,12 +137,14 @@ class TypeTensor /** * Destructor. */ + LIBMESH_DEVICE_INLINE ~TypeTensor(); /** * Assign to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeTensor &); /** @@ -147,6 +153,7 @@ class TypeTensor * \returns A reference to *this. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor &>::type @@ -166,11 +173,13 @@ class TypeTensor /** * \returns A proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE ConstTypeTensorColumn slice (const unsigned int i) const; /** * \returns A writable proxy for the \f$ i^{th} \f$ column of the tensor. */ + LIBMESH_DEVICE_INLINE TypeTensorColumn slice (const unsigned int i); /** @@ -181,6 +190,7 @@ class TypeTensor /** * \returns A copy of one column of the tensor as a TypeVector. */ + LIBMESH_DEVICE_INLINE TypeVector column(const unsigned int r) const; /** @@ -210,6 +220,7 @@ class TypeTensor * Add a scaled tensor to this tensor without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeTensor &, const T &); /** @@ -240,6 +251,7 @@ class TypeTensor * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeTensor &, const T &); /** @@ -265,6 +277,7 @@ class TypeTensor */ template ::value, int>::type = 0> + LIBMESH_DEVICE_INLINE const TypeTensor & operator *= (const Scalar & factor) { for (unsigned int i=0; i + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeTensor &) const; @@ -339,6 +353,7 @@ class TypeTensor * \returns A copy of the result vector, this tensor is unchanged. */ template + LIBMESH_DEVICE_INLINE TypeVector::supertype> left_multiply (const TypeVector & p) const; @@ -358,6 +373,7 @@ class TypeTensor * * \returns The solution in the \p x vector. */ + LIBMESH_DEVICE_INLINE void solve(const TypeVector & b, TypeVector & x) const; /** @@ -375,6 +391,7 @@ class TypeTensor /** * \returns True if all values in the tensor are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** @@ -393,11 +410,13 @@ class TypeTensor /** * Set all entries of the tensor to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** * \returns \p true if two tensors are equal, \p false otherwise. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeTensor & rhs) const; /** @@ -513,7 +532,7 @@ class ConstTypeTensorColumn //------------------------------------------------------ // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor () { _coords[0] = {}; @@ -536,7 +555,7 @@ TypeTensor::TypeTensor () template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const T & xx, const T & xy, const T & xz, @@ -582,7 +601,7 @@ TypeTensor::TypeTensor (const T & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const Scalar & xx, const Scalar & xy, const Scalar & xz, @@ -631,7 +650,7 @@ TypeTensor::TypeTensor (const Scalar & xx, template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor (const TypeTensor & p) { // copy the nodes from vector p to me @@ -642,6 +661,7 @@ TypeTensor::TypeTensor (const TypeTensor & p) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx) { libmesh_assert_equal_to (LIBMESH_DIM, 1); @@ -650,6 +670,7 @@ TypeTensor::TypeTensor(const TypeVector & vx) template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy) { @@ -666,6 +687,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template template +LIBMESH_DEVICE_INLINE TypeTensor::TypeTensor(const TypeVector & vx, const TypeVector & vy, const TypeVector & vz) @@ -690,7 +712,7 @@ TypeTensor::TypeTensor(const TypeVector & vx, template -inline +LIBMESH_DEVICE_INLINE TypeTensor::~TypeTensor () { } @@ -699,7 +721,7 @@ TypeTensor::~TypeTensor () template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::assign (const TypeTensor & p) { for (unsigned int i=0; i::assign (const TypeTensor & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeTensor::operator () (const unsigned int i, const unsigned int j) const { @@ -728,14 +750,14 @@ const T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE T & TypeTensor::operator () (const unsigned int i, const unsigned int j) { #if LIBMESH_DIM < 3 - libmesh_error_msg_if(i >= LIBMESH_DIM || j >= LIBMESH_DIM, - "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); + LIBMESH_DEVICE_ERROR_MSG_IF(i >= LIBMESH_DIM || j >= LIBMESH_DIM, + "ERROR: You are assigning to a tensor component that is out of range for the compiled LIBMESH_DIM!"); #endif @@ -747,7 +769,7 @@ T & TypeTensor::operator () (const unsigned int i, template -inline +LIBMESH_DEVICE_INLINE ConstTypeTensorColumn TypeTensor::slice (const unsigned int i) const { @@ -757,7 +779,7 @@ TypeTensor::slice (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE TypeTensorColumn TypeTensor::slice (const unsigned int i) { @@ -767,7 +789,7 @@ TypeTensor::slice (const unsigned int i) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::row(const unsigned int r) const { @@ -781,7 +803,7 @@ TypeTensor::row(const unsigned int r) const template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeTensor::column(const unsigned int r) const { @@ -796,7 +818,7 @@ TypeTensor::column(const unsigned int r) const template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator + (const TypeTensor & p) const { @@ -831,7 +853,7 @@ TypeTensor::operator + (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator += (const TypeTensor & p) { this->add (p); @@ -843,7 +865,7 @@ const TypeTensor & TypeTensor::operator += (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add (const TypeTensor & p) { for (unsigned int i=0; i::add (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::add_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::add_scaled (const TypeTensor & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator - (const TypeTensor & p) const { @@ -901,7 +923,7 @@ TypeTensor::operator - (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) { this->subtract (p); @@ -913,7 +935,7 @@ const TypeTensor & TypeTensor::operator -= (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract (const TypeTensor & p) { for (unsigned int i=0; i::subtract (const TypeTensor & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::subtract_scaled (const TypeTensor & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeTensor & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::operator - () const { @@ -967,7 +989,7 @@ TypeTensor TypeTensor::operator - () const template template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_if< ScalarTraits::value, @@ -1003,7 +1025,7 @@ TypeTensor::operator * (const Scalar & factor) const -> typename std::enable_ template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1015,7 +1037,7 @@ operator * (const Scalar & factor, template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeTensor::supertype>>::type @@ -1053,7 +1075,7 @@ TypeTensor::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::transpose() const { #if LIBMESH_DIM == 1 @@ -1083,7 +1105,7 @@ TypeTensor TypeTensor::transpose() const template -inline +LIBMESH_DEVICE_INLINE TypeTensor TypeTensor::inverse() const { #if LIBMESH_DIM == 1 @@ -1132,7 +1154,7 @@ TypeTensor TypeTensor::inverse() const template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::solve(const TypeVector & b, TypeVector & x) const { #if LIBMESH_DIM == 1 @@ -1183,7 +1205,7 @@ void TypeTensor::solve(const TypeVector & b, TypeVector & x) const template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator /= (const T & factor) { libmesh_assert_not_equal_to (factor, static_cast(0.)); @@ -1199,7 +1221,7 @@ const TypeTensor & TypeTensor::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::operator * (const TypeVector & p) const { @@ -1213,7 +1235,7 @@ TypeTensor::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeTensor::left_multiply (const TypeVector & p) const { @@ -1226,7 +1248,7 @@ TypeTensor::left_multiply (const TypeVector & p) const } template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> operator * (const TypeVector & a, const TypeTensor & b) { @@ -1235,7 +1257,7 @@ operator * (const TypeVector & a, const TypeTensor & b) template template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> TypeTensor::operator * (const TypeTensor & p) const { @@ -1250,7 +1272,7 @@ TypeTensor::operator * (const TypeTensor & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) { TypeTensor temp; @@ -1270,7 +1292,7 @@ const TypeTensor & TypeTensor::operator *= (const TypeTensor & p) */ template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeTensor::contract (const TypeTensor & t) const { @@ -1283,7 +1305,7 @@ TypeTensor::contract (const TypeTensor & t) const template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm() const { using std::sqrt; @@ -1292,7 +1314,7 @@ auto TypeTensor::norm() const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::is_zero() const { for (const auto & val : _coords) @@ -1302,7 +1324,7 @@ bool TypeTensor::is_zero() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::det() const { #if LIBMESH_DIM == 1 @@ -1325,7 +1347,7 @@ T TypeTensor::det() const } template -inline +LIBMESH_DEVICE_INLINE T TypeTensor::tr() const { #if LIBMESH_DIM == 1 @@ -1342,7 +1364,7 @@ T TypeTensor::tr() const } template -inline +LIBMESH_DEVICE_INLINE void TypeTensor::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeTensor::norm_sq () const { Real sum = 0.; @@ -1364,7 +1386,7 @@ auto TypeTensor::norm_sq () const template -inline +LIBMESH_DEVICE_INLINE bool TypeTensor::operator == (const TypeTensor & rhs) const { #if LIBMESH_DIM == 1 @@ -1436,7 +1458,7 @@ void TypeTensor::print(std::ostream & os) const } template -inline +LIBMESH_DEVICE_INLINE TypeTensor::supertype> outer_product(const TypeVector & a, const TypeVector & b) { diff --git a/include/numerics/type_vector.h b/include/numerics/type_vector.h index aaf79a9fd22..a9ae1bb2518 100644 --- a/include/numerics/type_vector.h +++ b/include/numerics/type_vector.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #include "libmesh/tensor_tools.h" #include "libmesh/int_range.h" @@ -141,12 +142,14 @@ class TypeVector * Assign to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void assign (const TypeVector &); /** * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector &>::type @@ -157,12 +160,14 @@ class TypeVector * \returns A const reference to the \f$ i^{th} \f$ entry of the vector. */ const T & operator () (const unsigned int i) const; + LIBMESH_DEVICE_INLINE const T & slice (const unsigned int i) const { return (*this)(i); } /** * \returns A writable reference to the \f$ i^{th} \f$ entry of the vector. */ T & operator () (const unsigned int i); + LIBMESH_DEVICE_INLINE T & slice (const unsigned int i) { return (*this)(i); } /** @@ -192,6 +197,7 @@ class TypeVector * Add a scaled value to this vector without creating a temporary. */ template + LIBMESH_DEVICE_INLINE void add_scaled (const TypeVector &, const T &); /** @@ -222,6 +228,7 @@ class TypeVector * temporary. */ template + LIBMESH_DEVICE_INLINE void subtract_scaled (const TypeVector &, const T &); /** @@ -279,6 +286,7 @@ class TypeVector * \returns The result of TypeVector::operator*(). */ template + LIBMESH_DEVICE_INLINE typename CompareTypes::supertype contract (const TypeVector &) const; @@ -292,6 +300,7 @@ class TypeVector /** * \returns A unit vector in the direction of *this. */ + LIBMESH_DEVICE_INLINE TypeVector unit() const; /** @@ -309,16 +318,19 @@ class TypeVector /** * \returns The L1 norm of the vector */ + LIBMESH_DEVICE_INLINE auto l1_norm() const; /** * \returns True if all values in the vector are zero */ + LIBMESH_DEVICE_INLINE bool is_zero() const; /** * Set all entries of the vector to 0. */ + LIBMESH_DEVICE_INLINE void zero(); /** @@ -342,11 +354,13 @@ class TypeVector * \note For floating point types T, the function \p absolute_fuzzy_equals() * may be a more appropriate choice. */ + LIBMESH_DEVICE_INLINE bool operator == (const TypeVector & rhs) const; /** * \returns !(*this == rhs) */ + LIBMESH_DEVICE_INLINE bool operator != (const TypeVector & rhs) const; /** @@ -425,7 +439,7 @@ class TypeVector // Inline functions template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector () { _coords[0] = {}; @@ -442,7 +456,7 @@ TypeVector::TypeVector () template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const T & x, const T & y, const T & z) @@ -467,7 +481,7 @@ TypeVector::TypeVector (const T & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (typename std::enable_if::value, const Scalar1>::type & x, @@ -497,7 +511,7 @@ TypeVector::TypeVector (typename template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const Scalar & x, typename std::enable_if::value, @@ -518,7 +532,7 @@ TypeVector::TypeVector (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE TypeVector::TypeVector (const TypeVector & p) { // copy the nodes from vector p to me @@ -530,7 +544,7 @@ TypeVector::TypeVector (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::assign (const TypeVector & p) { for (unsigned int i=0; i::assign (const TypeVector & p) template -inline +LIBMESH_DEVICE_INLINE const T & TypeVector::operator () (const unsigned int i) const { libmesh_assert_less (i, LIBMESH_DIM); @@ -551,7 +565,7 @@ const T & TypeVector::operator () (const unsigned int i) const template -inline +LIBMESH_DEVICE_INLINE T & TypeVector::operator () (const unsigned int i) { libmesh_assert_less (i, LIBMESH_DIM); @@ -563,7 +577,7 @@ T & TypeVector::operator () (const unsigned int i) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator + (const TypeVector & p) const { @@ -589,7 +603,7 @@ TypeVector::operator + (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator += (const TypeVector & p) { this->add (p); @@ -601,7 +615,7 @@ const TypeVector & TypeVector::operator += (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add (const TypeVector & p) { #if LIBMESH_DIM == 1 @@ -625,7 +639,7 @@ void TypeVector::add (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::add_scaled (const TypeVector & p, const T & factor) { #if LIBMESH_DIM == 1 @@ -649,7 +663,7 @@ void TypeVector::add_scaled (const TypeVector & p, const T & factor) template template -inline +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::operator - (const TypeVector & p) const { @@ -676,7 +690,7 @@ TypeVector::operator - (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator -= (const TypeVector & p) { this->subtract (p); @@ -688,7 +702,7 @@ const TypeVector & TypeVector::operator -= (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract (const TypeVector & p) { for (unsigned int i=0; i::subtract (const TypeVector & p) template template -inline +LIBMESH_DEVICE_INLINE void TypeVector::subtract_scaled (const TypeVector & p, const T & factor) { for (unsigned int i=0; i::subtract_scaled (const TypeVector & p, const T & factor) template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::operator - () const { @@ -734,7 +748,7 @@ TypeVector TypeVector::operator - () const template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -761,7 +775,7 @@ TypeVector::operator * (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -774,7 +788,7 @@ operator * (const Scalar & factor, template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator *= (const T & factor) { #if LIBMESH_DIM == 1 @@ -799,7 +813,7 @@ const TypeVector & TypeVector::operator *= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, TypeVector::supertype>>::type @@ -830,7 +844,7 @@ TypeVector::operator / (const Scalar & factor) const template -inline +LIBMESH_DEVICE_INLINE const TypeVector & TypeVector::operator /= (const T & factor) { @@ -847,7 +861,7 @@ TypeVector::operator /= (const T & factor) template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::operator * (const TypeVector & p) const { @@ -869,7 +883,7 @@ TypeVector::operator * (const TypeVector & p) const template template -inline +LIBMESH_DEVICE_INLINE typename CompareTypes::supertype TypeVector::contract(const TypeVector & p) const { @@ -880,6 +894,7 @@ TypeVector::contract(const TypeVector & p) const template template +LIBMESH_DEVICE_INLINE TypeVector::supertype> TypeVector::cross(const TypeVector & p) const { @@ -903,7 +918,7 @@ TypeVector::cross(const TypeVector & p) const template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm() const { using std::sqrt; @@ -913,7 +928,7 @@ auto TypeVector::norm() const template -inline +LIBMESH_DEVICE_INLINE void TypeVector::zero() { for (unsigned int i=0; i::zero() template -inline +LIBMESH_DEVICE_INLINE auto TypeVector::norm_sq() const { #if LIBMESH_DIM == 1 @@ -944,7 +959,7 @@ auto TypeVector::norm_sq() const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::is_zero() const { for (const auto & val : _coords) @@ -958,6 +973,7 @@ auto TypeVector::l1_norm() const; template +LIBMESH_DEVICE_INLINE auto TypeVector::l1_norm() const { @@ -988,7 +1004,7 @@ bool TypeVector::relative_fuzzy_equals(const TypeVector & rhs, Real tol) c template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator == (const TypeVector & rhs) const { #if LIBMESH_DIM == 1 @@ -1010,7 +1026,7 @@ bool TypeVector::operator == (const TypeVector & rhs) const template -inline +LIBMESH_DEVICE_INLINE bool TypeVector::operator != (const TypeVector & rhs) const { return (!(*this == rhs)); @@ -1027,7 +1043,7 @@ bool TypeVector::operator != (const TypeVector & rhs) const // [b0, b1, b2] // [c0, c1, c2] template -inline +LIBMESH_DEVICE_INLINE T triple_product(const TypeVector & a, const TypeVector & b, const TypeVector & c) @@ -1049,7 +1065,7 @@ T triple_product(const TypeVector & a, // to be positive if the vectors are obey the right-hand rule, or // negative for a left-hand orientation. template -inline +LIBMESH_DEVICE_INLINE T solid_angle(const TypeVector & v01, const TypeVector & v02, const TypeVector & v03) @@ -1075,7 +1091,7 @@ T solid_angle(const TypeVector & v01, * calling b.cross(c).norm_sq(). */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm_sq(const TypeVector & b, const TypeVector & c) { @@ -1096,7 +1112,7 @@ T cross_norm_sq(const TypeVector & b, * Calls cross_norm_sq() and takes the square root of the result. */ template -inline +LIBMESH_DEVICE_INLINE T cross_norm(const TypeVector & b, const TypeVector & c) { @@ -1105,7 +1121,7 @@ T cross_norm(const TypeVector & b, } template -inline +LIBMESH_DEVICE_INLINE TypeVector TypeVector::unit() const { @@ -1167,6 +1183,7 @@ struct CompareTypes, TypeVector> }; template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const T & a, const TypeVector & b) { @@ -1178,6 +1195,7 @@ outer_product(const T & a, const TypeVector & b) } template ::value, int>::type = 0> +LIBMESH_DEVICE_INLINE TypeVector::supertype> outer_product(const TypeVector & a, const T2 & b) { @@ -1208,6 +1226,7 @@ l1_norm_diff(const TypeVector & vec1, const TypeVector & vec2) namespace std { template +LIBMESH_DEVICE_INLINE auto norm(const libMesh::TypeVector & vector) -> decltype(std::norm(T())) { // Yea I agree it's dumb that the standard returns the square of the Euclidean norm diff --git a/include/numerics/vector_value.h b/include/numerics/vector_value.h index c93f17313dc..45116e1a737 100644 --- a/include/numerics/vector_value.h +++ b/include/numerics/vector_value.h @@ -22,6 +22,7 @@ // Local includes #include "libmesh/type_vector.h" +#include "libmesh/libmesh_device.h" #include "libmesh/compare_types.h" #ifdef LIBMESH_HAVE_METAPHYSICL @@ -124,6 +125,7 @@ class VectorValue : public TypeVector * Assignment-from-scalar operator. Used only to zero out vectors. */ template + LIBMESH_DEVICE_INLINE typename std::enable_if< ScalarTraits::value, VectorValue &>::type @@ -146,7 +148,7 @@ typedef NumberVectorValue Gradient; // Inline functions template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue () : TypeVector () { @@ -154,7 +156,7 @@ VectorValue::VectorValue () : template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const T & x, const T & y, const T & z) : @@ -166,7 +168,7 @@ VectorValue::VectorValue (const T & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (typename std::enable_if::value, const Scalar1>::type & x, @@ -183,7 +185,7 @@ VectorValue::VectorValue (typename template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const Scalar & x, typename std::enable_if::value, @@ -194,7 +196,7 @@ VectorValue::VectorValue (const Scalar & x, template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const VectorValue & p) : TypeVector (p) { @@ -204,7 +206,7 @@ VectorValue::VectorValue (const VectorValue & p) : template template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p) : TypeVector (p) { @@ -212,7 +214,7 @@ VectorValue::VectorValue (const TypeVector & p) : #ifdef LIBMESH_USE_COMPLEX_NUMBERS template -inline +LIBMESH_DEVICE_INLINE VectorValue::VectorValue (const TypeVector & p_re, const TypeVector & p_im) : TypeVector (Complex (p_re(0), p_im(0)), diff --git a/include/quadrature/quadrature_gauss_rules.h b/include/quadrature/quadrature_gauss_rules.h new file mode 100644 index 00000000000..a1acfcf4adb --- /dev/null +++ b/include/quadrature/quadrature_gauss_rules.h @@ -0,0 +1,373 @@ +#ifndef LIBMESH_QUADRATURE_GAUSS_RULES_H +#define LIBMESH_QUADRATURE_GAUSS_RULES_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" + +namespace libMesh::Quadrature::Gauss +{ + +struct Rule1D +{ + unsigned int count; + const Real * points; + const Real * weights; +}; + +struct PointWeight2D +{ + Real x; + Real y; + Real w; +}; + +struct Rule2D +{ + unsigned int count; + const PointWeight2D * points; +}; + +struct PointWeight3D +{ + Real x; + Real y; + Real z; + Real w; +}; + +struct Rule3D +{ + unsigned int count; + const PointWeight3D * points; +}; + +inline constexpr Real gauss_legendre_points_1[] = {0._R}; +inline constexpr Real gauss_legendre_weights_1[] = {2._R}; + +inline constexpr Real gauss_legendre_points_2[] = { + -5.7735026918962576450914878050196e-01_R, + 5.7735026918962576450914878050196e-01_R +}; +inline constexpr Real gauss_legendre_weights_2[] = {1._R, 1._R}; + +inline constexpr Real gauss_legendre_points_3[] = { + -7.7459666924148337703585307995648e-01_R, + 0._R, + 7.7459666924148337703585307995648e-01_R +}; +inline constexpr Real gauss_legendre_weights_3[] = { + 5.5555555555555555555555555555556e-01_R, + 8.8888888888888888888888888888889e-01_R, + 5.5555555555555555555555555555556e-01_R +}; + +inline constexpr Real gauss_legendre_points_4[] = { + -8.6113631159405257522394648889281e-01_R, + -3.3998104358485626480266575910324e-01_R, + 3.3998104358485626480266575910324e-01_R, + 8.6113631159405257522394648889281e-01_R +}; +inline constexpr Real gauss_legendre_weights_4[] = { + 3.4785484513745385737306394922200e-01_R, + 6.5214515486254614262693605077800e-01_R, + 6.5214515486254614262693605077800e-01_R, + 3.4785484513745385737306394922200e-01_R +}; + +inline constexpr Real gauss_legendre_points_5[] = { + -9.0617984593866399279762687829939e-01_R, + -5.3846931010568309103631442070021e-01_R, + 0._R, + 5.3846931010568309103631442070021e-01_R, + 9.0617984593866399279762687829939e-01_R +}; +inline constexpr Real gauss_legendre_weights_5[] = { + 2.3692688505618908751426404071992e-01_R, + 4.7862867049936646804129151483564e-01_R, + 5.6888888888888888888888888888889e-01_R, + 4.7862867049936646804129151483564e-01_R, + 2.3692688505618908751426404071992e-01_R +}; + +inline constexpr Real gauss_legendre_points_6[] = { + -9.3246951420315202781230155449399e-01_R, + -6.6120938646626451366139959501991e-01_R, + -2.3861918608319690863050172168071e-01_R, + 2.3861918608319690863050172168071e-01_R, + 6.6120938646626451366139959501991e-01_R, + 9.3246951420315202781230155449399e-01_R +}; +inline constexpr Real gauss_legendre_weights_6[] = { + 1.7132449237917034504029614217273e-01_R, + 3.6076157304813860756983351383772e-01_R, + 4.6791393457269104738987034398955e-01_R, + 4.6791393457269104738987034398955e-01_R, + 3.6076157304813860756983351383772e-01_R, + 1.7132449237917034504029614217273e-01_R +}; + +inline constexpr Real gauss_legendre_points_7[] = { + -9.4910791234275852452618968404785e-01_R, + -7.4153118559939443986386477328079e-01_R, + -4.0584515137739716690660641207696e-01_R, + 0._R, + 4.0584515137739716690660641207696e-01_R, + 7.4153118559939443986386477328079e-01_R, + 9.4910791234275852452618968404785e-01_R +}; +inline constexpr Real gauss_legendre_weights_7[] = { + 1.2948496616886969327061143267908e-01_R, + 2.7970539148927666790146777142378e-01_R, + 3.8183005050511894495036977548898e-01_R, + 4.1795918367346938775510204081633e-01_R, + 3.8183005050511894495036977548898e-01_R, + 2.7970539148927666790146777142378e-01_R, + 1.2948496616886969327061143267908e-01_R +}; + +inline constexpr PointWeight2D tri_rule_1[] = { + {Real(1) / 3, Real(1) / 3, Real(1) / 2} +}; + +inline constexpr PointWeight2D tri_rule_2[] = { + {Real(2) / 3, Real(1) / 6, Real(1) / 6}, + {Real(1) / 6, Real(2) / 3, Real(1) / 6}, + {Real(1) / 6, Real(1) / 6, Real(1) / 6} +}; + +inline constexpr PointWeight2D tri_rule_3[] = { + {1.5505102572168219018027159252941e-01_R, 1.7855872826361642311703513337422e-01_R, 1.5902069087198858469718450103758e-01_R}, + {6.4494897427831780981972840747059e-01_R, 7.5031110222608118177475598324603e-02_R, 9.0979309128011415302815498962418e-02_R}, + {1.5505102572168219018027159252941e-01_R, 6.6639024601470138670269327409637e-01_R, 1.5902069087198858469718450103758e-01_R}, + {6.4494897427831780981972840747059e-01_R, 2.8001991549907407200279599420481e-01_R, 9.0979309128011415302815498962418e-02_R} +}; + +inline constexpr Real tri4_a1 = 4.4594849091596488631832925388305199e-01_R; +inline constexpr Real tri4_b1 = 1._R - 2._R * tri4_a1; +inline constexpr Real tri4_a2 = 9.1576213509770743459571463402201508e-02_R; +inline constexpr Real tri4_b2 = 1._R - 2._R * tri4_a2; +inline constexpr PointWeight2D tri_rule_4[] = { + {tri4_a1, tri4_a1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_a1, tri4_b1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_b1, tri4_a1, 1.1169079483900573284750350421656140e-01_R}, + {tri4_a2, tri4_a2, 5.4975871827660933819163162450105264e-02_R}, + {tri4_a2, tri4_b2, 5.4975871827660933819163162450105264e-02_R}, + {tri4_b2, tri4_a2, 5.4975871827660933819163162450105264e-02_R} +}; + +inline constexpr Real tri5_sqrt15 = 3.872983346207417_R; +inline constexpr Real tri5_a1 = Real(2) / 7 + tri5_sqrt15 / 21; +inline constexpr Real tri5_b1 = 1._R - 2._R * tri5_a1; +inline constexpr Real tri5_a2 = Real(2) / 7 - tri5_sqrt15 / 21; +inline constexpr Real tri5_b2 = 1._R - 2._R * tri5_a2; +inline constexpr Real tri5_w1 = Real(31) / 480 + tri5_sqrt15 / 2400; +inline constexpr Real tri5_w2 = Real(31) / 480 - tri5_sqrt15 / 2400; +inline constexpr PointWeight2D tri_rule_5[] = { + {Real(1) / 3, Real(1) / 3, Real(9) / 80}, + {tri5_a1, tri5_a1, tri5_w1}, + {tri5_a1, tri5_b1, tri5_w1}, + {tri5_b1, tri5_a1, tri5_w1}, + {tri5_a2, tri5_a2, tri5_w2}, + {tri5_a2, tri5_b2, tri5_w2}, + {tri5_b2, tri5_a2, tri5_w2} +}; + +inline constexpr Real tri6_a1 = 2.4928674517091042129163855310701908e-01_R; +inline constexpr Real tri6_b1 = 1._R - 2._R * tri6_a1; +inline constexpr Real tri6_a2 = 6.3089014491502228340331602870819157e-02_R; +inline constexpr Real tri6_b2 = 1._R - 2._R * tri6_a2; +inline constexpr Real tri6_a3 = 3.1035245103378440541660773395655215e-01_R; +inline constexpr Real tri6_b3 = 6.3650249912139864723014259441204970e-01_R; +inline constexpr Real tri6_c3 = 1._R - tri6_a3 - tri6_b3; +inline constexpr PointWeight2D tri_rule_6[] = { + {tri6_a1, tri6_a1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_a1, tri6_b1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_b1, tri6_a1, 5.8393137863189683012644805692789721e-02_R}, + {tri6_a2, tri6_a2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_a2, tri6_b2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_b2, tri6_a2, 2.5422453185103408460468404553434492e-02_R}, + {tri6_a3, tri6_b3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_b3, tri6_a3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_a3, tri6_c3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_c3, tri6_a3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_b3, tri6_c3, 4.1425537809186787596776728210221227e-02_R}, + {tri6_c3, tri6_b3, 4.1425537809186787596776728210221227e-02_R} +}; + +inline constexpr PointWeight2D tri_rule_7[] = { + {6.2382265094402118173683000996350e-02_R, 6.7517867073916085442557131050869e-02_R, 2.6517028157436251428754180460739e-02_R}, + {8.7009986783168172748385986795285e-01_R, 6.2382265094402118173683000996350e-02_R, 2.6517028157436251428754180460739e-02_R}, + {6.7517867073916085442557131050869e-02_R, 8.7009986783168172748385986795285e-01_R, 2.6517028157436251428754180460739e-02_R}, + {5.5225456656926611737479190275645e-02_R, 3.2150249385198182266630784919920e-01_R, 4.3881408714446055036769903139288e-02_R}, + {6.2327204949109156559621296052516e-01_R, 5.5225456656926611737479190275645e-02_R, 4.3881408714446055036769903139288e-02_R}, + {3.2150249385198182266630784919920e-01_R, 6.2327204949109156559621296052516e-01_R, 4.3881408714446055036769903139288e-02_R}, + {3.4324302945097146469630642483938e-02_R, 6.6094919618673565761198031019780e-01_R, 2.8775042784981585738445496900219e-02_R}, + {3.0472650086816719591838904731826e-01_R, 3.4324302945097146469630642483938e-02_R, 2.8775042784981585738445496900219e-02_R}, + {6.6094919618673565761198031019780e-01_R, 3.0472650086816719591838904731826e-01_R, 2.8775042784981585738445496900219e-02_R}, + {5.1584233435359177925746338682643e-01_R, 2.7771616697639178256958187139372e-01_R, 6.7493187009802774462697086166421e-02_R}, + {2.0644149867001643817295474177985e-01_R, 5.1584233435359177925746338682643e-01_R, 6.7493187009802774462697086166421e-02_R}, + {2.7771616697639178256958187139372e-01_R, 2.0644149867001643817295474177985e-01_R, 6.7493187009802774462697086166421e-02_R} +}; + +inline constexpr PointWeight3D tet_rule_1[] = { + {0.25_R, 0.25_R, 0.25_R, Real(1) / 6} +}; + +inline constexpr Real tet2_b = 0.25_R * (1._R - 1._R / 2.2360679774997896964_R); +inline constexpr Real tet2_a = 1._R - 3._R * tet2_b; +inline constexpr PointWeight3D tet_rule_2[] = { + {tet2_a, tet2_b, tet2_b, Real(1) / 24}, + {tet2_b, tet2_a, tet2_b, Real(1) / 24}, + {tet2_b, tet2_b, tet2_a, Real(1) / 24}, + {tet2_b, tet2_b, tet2_b, Real(1) / 24} +}; + +inline constexpr PointWeight3D tet_rule_3[] = { + {0.25_R, 0.25_R, 0.25_R, Real(-2) / 15}, + {0.5_R, Real(1) / 6, Real(1) / 6, 0.075_R}, + {Real(1) / 6, 0.5_R, Real(1) / 6, 0.075_R}, + {Real(1) / 6, Real(1) / 6, 0.5_R, 0.075_R}, + {Real(1) / 6, Real(1) / 6, Real(1) / 6, 0.075_R} +}; + +inline constexpr PointWeight3D tet_rule_4[] = { + {2.5e-01_R, 2.5e-01_R, 2.5e-01_R, -1.31555555555555556e-02_R}, + {7.85714285714285714e-01_R, 7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.85714285714285714e-01_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.85714285714285714e-01_R, 7.62222222222222222e-03_R}, + {7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.14285714285714285e-02_R, 7.62222222222222222e-03_R}, + {3.99403576166799219e-01_R, 3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 2.48888888888888889e-02_R}, + {1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R}, + {3.99403576166799219e-01_R, 1.00596423833200785e-01_R, 3.99403576166799219e-01_R, 2.48888888888888889e-02_R} +}; + +inline constexpr PointWeight3D tet_rule_5[] = { + {3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {3.1088591926330060980e-01_R, 6.7342242201009817060e-02_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {6.7342242201009817060e-02_R, 3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 1.8781320953002641800e-02_R}, + {3.1088591926330060980e-01_R, 3.1088591926330060980e-01_R, 6.7342242201009817060e-02_R, 1.8781320953002641800e-02_R}, + {9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {9.2735250310891226402e-02_R, 7.2179424906732632079e-01_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {7.2179424906732632079e-01_R, 9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 1.2248840519393658257e-02_R}, + {9.2735250310891226402e-02_R, 9.2735250310891226402e-02_R, 7.2179424906732632079e-01_R, 1.2248840519393658257e-02_R}, + {4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R}, + {4.5503704125649649492e-02_R, 4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 4.5449629587435035051e-01_R, 7.0910034628469110730e-03_R}, + {4.5449629587435035051e-01_R, 4.5449629587435035051e-01_R, 4.5503704125649649492e-02_R, 7.0910034628469110730e-03_R} +}; + +inline constexpr PointWeight3D tet_rule_6[] = { + {3.56191386222544953e-01_R, 2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 3.56191386222544953e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 3.56191386222544953e-01_R, 6.65379170969464506e-03_R}, + {2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 2.14602871259151684e-01_R, 6.65379170969464506e-03_R}, + {8.77978124396165982e-01_R, 4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 8.77978124396165982e-01_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 8.77978124396165982e-01_R, 1.67953517588677620e-03_R}, + {4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 4.06739585346113397e-02_R, 1.67953517588677620e-03_R}, + {3.29863295731730594e-02_R, 3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.29863295731730594e-02_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 3.29863295731730594e-02_R, 9.22619692394239843e-03_R}, + {3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 3.22337890142275646e-01_R, 9.22619692394239843e-03_R}, + {6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 6.03005664791649076e-01_R, 8.03571428571428248e-03_R}, + {2.69672331458315867e-01_R, 6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 6.36610018750175299e-02_R, 2.69672331458315867e-01_R, 8.03571428571428248e-03_R}, + {6.03005664791649076e-01_R, 2.69672331458315867e-01_R, 6.36610018750175299e-02_R, 8.03571428571428248e-03_R} +}; + +LIBMESH_DEVICE_INLINE +Rule1D gauss_legendre_rule(const unsigned int order) +{ + switch (order) + { + case 0: + case 1: + return {1u, gauss_legendre_points_1, gauss_legendre_weights_1}; + case 2: + case 3: + return {2u, gauss_legendre_points_2, gauss_legendre_weights_2}; + case 4: + case 5: + return {3u, gauss_legendre_points_3, gauss_legendre_weights_3}; + case 6: + case 7: + return {4u, gauss_legendre_points_4, gauss_legendre_weights_4}; + case 8: + case 9: + return {5u, gauss_legendre_points_5, gauss_legendre_weights_5}; + case 10: + case 11: + return {6u, gauss_legendre_points_6, gauss_legendre_weights_6}; + case 12: + case 13: + return {7u, gauss_legendre_points_7, gauss_legendre_weights_7}; + default: + return {0u, nullptr, nullptr}; + } +} + +LIBMESH_DEVICE_INLINE +Rule2D triangle_rule(const unsigned int order) +{ + switch (order) + { + case 0: + case 1: + return {1u, tri_rule_1}; + case 2: + return {3u, tri_rule_2}; + case 3: + return {4u, tri_rule_3}; + case 4: + return {6u, tri_rule_4}; + case 5: + return {7u, tri_rule_5}; + case 6: + return {12u, tri_rule_6}; + case 7: + return {12u, tri_rule_7}; + default: + return {0u, nullptr}; + } +} + +LIBMESH_DEVICE_INLINE +Rule3D tetrahedron_rule(const unsigned int order, + const bool allow_negative_weights = true) +{ + switch (order) + { + case 0: + case 1: + return {1u, tet_rule_1}; + case 2: + return {4u, tet_rule_2}; + case 3: + return allow_negative_weights ? Rule3D{5u, tet_rule_3} : Rule3D{0u, nullptr}; + case 4: + return allow_negative_weights ? Rule3D{11u, tet_rule_4} : Rule3D{0u, nullptr}; + case 5: + return {14u, tet_rule_5}; + case 6: + return {24u, tet_rule_6}; + default: + return {0u, nullptr}; + } +} + +} // namespace libMesh::Quadrature::Gauss + +#endif // LIBMESH_QUADRATURE_GAUSS_RULES_H diff --git a/include/systems/hilbert_assembly.h b/include/systems/hilbert_assembly.h new file mode 100644 index 00000000000..21c0882fba7 --- /dev/null +++ b/include/systems/hilbert_assembly.h @@ -0,0 +1,352 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_HILBERT_ASSEMBLY_H +#define LIBMESH_HILBERT_ASSEMBLY_H + +#include "hilbert_assembly_kernel.h" + +#include "libmesh/fdm_gradient.h" +#include "libmesh/fe_abstract.h" +#include "libmesh/fem_context.h" +#include "libmesh/fem_function_base.h" +#include "libmesh/function_base.h" +#include "libmesh/libmesh_common.h" +#include "libmesh/quadrature.h" +#include "libmesh/tensor_tools.h" + +namespace libMesh +{ +namespace detail +{ + +class HostHilbertFEAccess +{ +public: + class QpData + { + public: + LIBMESH_DEVICE_INLINE + QpData(const HostHilbertFEAccess & fe, + const unsigned int qp) + : _fe(fe), + _qp(qp), + _Jinv(fe.build_inverse_jacobian(qp)) + { + } + + LIBMESH_DEVICE_INLINE + Real JxW() const + { + return _fe.JxW(_qp); + } + + LIBMESH_DEVICE_INLINE + Real phi(const unsigned int i) const + { + return _fe.phi(i, _qp); + } + + LIBMESH_DEVICE_INLINE + const RealGradient & dphi(const unsigned int i) const + { + return _fe.dphi(i, _qp); + } + + LIBMESH_DEVICE_INLINE + const Point & xyz() const + { + return _fe.xyz(_qp); + } + + LIBMESH_DEVICE_INLINE + unsigned int qp_index() const + { + return _qp; + } + + LIBMESH_DEVICE_INLINE + const Point & reference_point() const + { + return _fe.reference_point(_qp); + } + + LIBMESH_DEVICE_INLINE + const RealTensor & inverse_jacobian() const + { + return _Jinv; + } + + LIBMESH_DEVICE_INLINE + unsigned int elem_index() const + { + return _fe.elem_index(); + } + + private: + const HostHilbertFEAccess & _fe; + unsigned int _qp; + RealTensor _Jinv; + }; + + HostHilbertFEAccess(FEMContext & c, + const unsigned int var, + const unsigned int hilbert_order, + const unsigned int elem_index = libMesh::invalid_uint) + : _n_dofs(c.n_dof_indices(var)), + _elem_index(elem_index), + _JxW(c.get_element_fe(var)->get_JxW()), + _phi(c.get_element_fe(var)->get_phi()), + _xyz(c.get_element_fe(var)->get_xyz()), + _reference_points(c.get_element_qrule().get_points()), + _fe_map(c.get_element_fe(var)->get_fe_map()), + _dphi(hilbert_order > 0 ? &c.get_element_fe(var)->get_dphi() : nullptr) + { + } + + unsigned int n_qpoints() const + { + return cast_int(_JxW.size()); + } + + unsigned int n_dofs() const + { + return _n_dofs; + } + + Real JxW(const unsigned int qp) const + { + return _JxW[qp]; + } + + Real phi(const unsigned int i, const unsigned int qp) const + { + return _phi[i][qp]; + } + + const RealGradient & dphi(const unsigned int i, const unsigned int qp) const + { + libmesh_assert(_dphi); + return (*_dphi)[i][qp]; + } + + const Point & xyz(const unsigned int qp) const + { + return _xyz[qp]; + } + + const Point & reference_point(const unsigned int qp) const + { + return _reference_points[qp]; + } + + unsigned int elem_index() const + { + return _elem_index; + } + + RealTensor build_inverse_jacobian(const unsigned int qp) const + { + RealTensor Jinv; + Jinv(0, 0) = _fe_map.get_dxidx()[qp]; +#if LIBMESH_DIM > 1 + Jinv(0, 1) = _fe_map.get_dxidy()[qp]; + Jinv(1, 0) = _fe_map.get_detadx()[qp]; + Jinv(1, 1) = _fe_map.get_detady()[qp]; +#endif +#if LIBMESH_DIM > 2 + Jinv(0, 2) = _fe_map.get_dxidz()[qp]; + Jinv(1, 2) = _fe_map.get_detadz()[qp]; + Jinv(2, 0) = _fe_map.get_dzetadx()[qp]; + Jinv(2, 1) = _fe_map.get_dzetady()[qp]; + Jinv(2, 2) = _fe_map.get_dzetadz()[qp]; +#endif + return Jinv; + } + + LIBMESH_DEVICE_INLINE + QpData qp_data(const unsigned int qp, + const bool) const + { + return QpData(*this, qp); + } + +private: + const unsigned int _n_dofs; + const unsigned int _elem_index; + const std::vector & _JxW; + const std::vector> & _phi; + const std::vector & _xyz; + const std::vector & _reference_points; + const FEMap & _fe_map; + const std::vector> * _dphi; +}; + +class HostHilbertGoalAccess +{ +public: + HostHilbertGoalAccess(FEMFunctionBase & goal_func, + FDMGradient * goal_grad, + FEMContext & input_context) + : _goal_func(goal_func), + _goal_grad(goal_grad), + _input_context(input_context) + { + } + + template + Number value(const QpData &, const Point & p) + { + return _goal_func(_input_context, p); + } + + template + Gradient gradient(const QpData &, const Point & p) + { + libmesh_assert(_goal_grad); + return (*_goal_grad)(_input_context, p); + } + +private: + FEMFunctionBase & _goal_func; + FDMGradient * const _goal_grad; + FEMContext & _input_context; +}; + +template +class FunctionFDMGradient : public FunctionBase +{ +public: + typedef typename TensorTools::DecrementRank::type ValType; + + FunctionFDMGradient(FunctionBase & value_func, + const Real eps) + : _val_func(value_func.clone()), + _eps(eps) + { + } + + virtual std::unique_ptr> clone() const override + { + return std::make_unique>(*_val_func, _eps); + } + + virtual GradType operator()(const Point & p, + const Real time = 0.) override + { + GradType g; + + auto & val = *_val_func; + const Real one_over_dim = Real(0.5) / _eps; + + g(0) = (val(p + Point(_eps), time) - + val(p + Point(-_eps), time)) * one_over_dim; +#if LIBMESH_DIM > 1 + g(1) = (val(p + Point(0, _eps), time) - + val(p + Point(0, -_eps), time)) * one_over_dim; +#endif +#if LIBMESH_DIM > 2 + g(2) = (val(p + Point(0, 0, _eps), time) - + val(p + Point(0, 0, -_eps), time)) * one_over_dim; +#endif + + return g; + } + + virtual void operator()(const Point & p, + const Real time, + DenseVector & output) override + { + const unsigned int sz = cast_int(output.size()); + DenseVector v(sz); + + auto & val = *_val_func; + + val(p + Point(_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(0) = v(i); + + val(p + Point(-_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(0) -= v(i); + output(i)(0) /= (2 * _eps); + } + +#if LIBMESH_DIM > 1 + val(p + Point(0, _eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(1) = v(i); + + val(p + Point(0, -_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(1) -= v(i); + output(i)(1) /= (2 * _eps); + } +#endif +#if LIBMESH_DIM > 2 + val(p + Point(0, 0, _eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + output(i)(2) = v(i); + + val(p + Point(0, 0, -_eps), time, v); + for (unsigned int i = 0; i != sz; ++i) + { + output(i)(2) -= v(i); + output(i)(2) /= (2 * _eps); + } +#endif + } + +private: + std::unique_ptr> _val_func; + Real _eps; +}; + +class HostHilbertAccumulator +{ +public: + HostHilbertAccumulator(DenseSubVector & F, + DenseSubMatrix & K) + : _F(F), + _K(K) + { + } + + void add_residual(const unsigned int i, + const Number value) + { + _F(i) += value; + } + + void add_jacobian(const unsigned int i, + const unsigned int j, + const Number value) + { + _K(i, j) += value; + } + +private: + DenseSubVector & _F; + DenseSubMatrix & _K; +}; + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_HILBERT_ASSEMBLY_H diff --git a/include/systems/hilbert_assembly_kernel.h b/include/systems/hilbert_assembly_kernel.h new file mode 100644 index 00000000000..9b8c57a48e5 --- /dev/null +++ b/include/systems/hilbert_assembly_kernel.h @@ -0,0 +1,239 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#ifndef LIBMESH_HILBERT_ASSEMBLY_KERNEL_H +#define LIBMESH_HILBERT_ASSEMBLY_KERNEL_H + +#include "libmesh/libmesh_common.h" +#include "libmesh/libmesh_device.h" +#include "libmesh/point.h" +#include "libmesh/function_base.h" +#include "libmesh/vector_value.h" + +#include +#include + +namespace libMesh +{ +namespace detail +{ + +template +using hilbert_storage_t = + std::conditional_t, T, std::decay_t>; + +template > && + !std::is_array_v>, + int> = 0> +LIBMESH_DEVICE_INLINE decltype(auto) +coeff_at(const CoeffStorage & coeff, const unsigned int i) +{ + return coeff(i); +} + +template +LIBMESH_DEVICE_INLINE const Scalar & +coeff_at(const Scalar * coeff, const unsigned int i) +{ + return coeff[i]; +} + +template +LIBMESH_DEVICE_INLINE const Scalar & +coeff_at(const Scalar (&coeff)[N], const unsigned int i) +{ + libmesh_ignore(N); + return coeff[i]; +} + +template +LIBMESH_DEVICE_INLINE +Number interpolate_hilbert_value(const QpData & qp_data, + const CoeffStorage & coeff, + const unsigned int n_dofs) +{ + Number u = 0.; + + for (unsigned int i = 0; i != n_dofs; ++i) + u += coeff_at(coeff, i) * qp_data.phi(i); + + return u; +} + +template +LIBMESH_DEVICE_INLINE +Gradient interpolate_hilbert_gradient(const QpData & qp_data, + const CoeffStorage & coeff, + const unsigned int n_dofs) +{ + Gradient grad_u; + grad_u.zero(); + + for (unsigned int i = 0; i != n_dofs; ++i) + grad_u.add_scaled(qp_data.dphi(i), coeff_at(coeff, i)); + + return grad_u; +} + +template +class HilbertSolutionAccess +{ +public: + LIBMESH_DEVICE_INLINE + HilbertSolutionAccess(const FEAccess & fe, + CoeffStorage coeff, + const Number solution_derivative) + : _fe(fe), + _coeff(coeff), + _solution_derivative(solution_derivative) + { + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData & qp_data) const + { + return interpolate_hilbert_value(qp_data, _coeff, _fe.n_dofs()); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData & qp_data) const + { + return interpolate_hilbert_gradient(qp_data, _coeff, _fe.n_dofs()); + } + + LIBMESH_DEVICE_INLINE + Number solution_derivative() const + { + return _solution_derivative; + } + +private: + const FEAccess & _fe; + CoeffStorage _coeff; + Number _solution_derivative; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_solution_access(const FEAccess & fe, + CoeffStorage && coeff, + const Number solution_derivative) +{ + return HilbertSolutionAccess>( + fe, + std::forward(coeff), + solution_derivative); +} + +template +class HilbertAnalyticGoalAccess +{ +public: + LIBMESH_DEVICE_INLINE + HilbertAnalyticGoalAccess(GoalFunction goal_func, + GoalGradient goal_grad) + : _goal_func(goal_func), + _goal_grad(goal_grad) + { + } + + template + LIBMESH_DEVICE_INLINE + Number value(const QpData &, const Point & p) const + { + return _goal_func(p); + } + + template + LIBMESH_DEVICE_INLINE + Gradient gradient(const QpData &, const Point & p) const + { + return _goal_grad(p); + } + +private: + GoalFunction _goal_func; + GoalGradient _goal_grad; +}; + +template +LIBMESH_DEVICE_INLINE auto +make_hilbert_analytic_goal_access(GoalFunction && goal_func, + GoalGradient && goal_grad) +{ + return HilbertAnalyticGoalAccess, + hilbert_storage_t>( + std::forward(goal_func), + std::forward(goal_grad)); +} + +template +LIBMESH_DEVICE_INLINE void +assemble_hilbert_element(const FEAccess & fe, + const SolutionAccess & solution, + GoalAccess & goal, + const bool request_jacobian, + const unsigned int hilbert_order, + Accumulator & accum) +{ + const unsigned int n_qpoints = fe.n_qpoints(); + const unsigned int n_u_dofs = fe.n_dofs(); + + for (unsigned int qp = 0; qp != n_qpoints; qp++) + { + const auto qp_data = fe.qp_data(qp, hilbert_order > 0); + const Point & xyz = qp_data.xyz(); + const Number err_u = solution.value(qp_data) - goal.value(qp_data, xyz); + + for (unsigned int i = 0; i != n_u_dofs; i++) + accum.add_residual(i, qp_data.JxW() * (err_u * qp_data.phi(i))); + + if (hilbert_order > 0) + { + const Gradient err_grad_u = + solution.gradient(qp_data) - goal.gradient(qp_data, xyz); + + for (unsigned int i = 0; i != n_u_dofs; i++) + accum.add_residual(i, qp_data.JxW() * (err_grad_u * qp_data.dphi(i))); + } + + if (request_jacobian) + { + const Number JxWxD = qp_data.JxW() * solution.solution_derivative(); + + for (unsigned int i = 0; i != n_u_dofs; i++) + for (unsigned int j = 0; j != n_u_dofs; ++j) + accum.add_jacobian(i, j, JxWxD * (qp_data.phi(i) * qp_data.phi(j))); + + if (hilbert_order > 0) + for (unsigned int i = 0; i != n_u_dofs; i++) + for (unsigned int j = 0; j != n_u_dofs; ++j) + accum.add_jacobian(i, j, JxWxD * (qp_data.dphi(i) * qp_data.dphi(j))); + } + } +} + +} // namespace detail +} // namespace libMesh + +#endif // LIBMESH_HILBERT_ASSEMBLY_KERNEL_H diff --git a/m4/libmesh_optional_packages.m4 b/m4/libmesh_optional_packages.m4 index 2c569d088c0..a6e6c7703f5 100644 --- a/m4/libmesh_optional_packages.m4 +++ b/m4/libmesh_optional_packages.m4 @@ -861,6 +861,300 @@ AM_CONDITIONAL(LIBMESH_ENABLE_METAPHYSICL, test x$enablemetaphysicl = xyes) +# ------------------------------------------------------------- +# Kokkos -- optional, enables the native Kokkos FE math path +# ------------------------------------------------------------- +AC_ARG_WITH([kokkos], + AS_HELP_STRING([--with-kokkos=DIR], + [Enable Kokkos support using the installation at DIR]), + [KOKKOS_DIR="$withval"], + [KOKKOS_DIR="no"]) + +AC_ARG_WITH([kokkos-backend], + AS_HELP_STRING([--with-kokkos-backend=BACKEND], + [cuda|hip|sycl|openmp|serial (default: auto-detect from KokkosCore_config.h)]), + [KOKKOS_BACKEND="$withval"], [KOKKOS_BACKEND="auto"]) + +dnl Allow callers to provide the full Kokkos toolchain directly. +AC_ARG_VAR([KOKKOS_CXX], [Compiler for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_CPPFLAGS], [Preprocessor flags for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_CXXFLAGS], [C++ flags for compiling Kokkos translation units]) +AC_ARG_VAR([KOKKOS_LDFLAGS], [Linker flags for linking Kokkos translation units]) +AC_ARG_VAR([KOKKOS_LIBS], [Libraries for linking Kokkos translation units]) + +LIBMESH_KOKKOS_BUILD_CPPFLAGS="" +LIBMESH_KOKKOS_BUILD_CXXFLAGS="" +LIBMESH_KOKKOS_BUILD_LDFLAGS="" + +dnl Allow the caller (e.g. MOOSE's configure_libmesh.sh) to pre-set the +dnl Kokkos compiler and flags via environment variables. If KOKKOS_CXX is +dnl already set, we skip auto-detection entirely — the caller knows best. +dnl We use AC_SUBST (not AC_ARG_VAR) so these flags stay scoped to .K +dnl compilation rules and don't leak into the main CPPFLAGS/CXXFLAGS. + +AS_IF([test "x$KOKKOS_DIR" != "xno"], + [ + libmesh_kokkos_include_dirs="-I$KOKKOS_DIR/include" + AS_IF([test -n "$PETSC_ARCH" && test -d "$KOKKOS_DIR/$PETSC_ARCH/include"], + [libmesh_kokkos_include_dirs="$libmesh_kokkos_include_dirs -I$KOKKOS_DIR/$PETSC_ARCH/include"]) + + libmesh_kokkos_lib_dirs= + AS_IF([test -d "$KOKKOS_DIR/lib"], + [libmesh_kokkos_lib_dirs="-L$KOKKOS_DIR/lib"]) + AS_IF([test -n "$PETSC_ARCH" && test -d "$KOKKOS_DIR/$PETSC_ARCH/lib"], + [libmesh_kokkos_lib_dirs="$libmesh_kokkos_lib_dirs -L$KOKKOS_DIR/$PETSC_ARCH/lib"]) + + KOKKOS_CFG="$KOKKOS_DIR/include/KokkosCore_config.h" + AS_IF([! test -r "$KOKKOS_CFG" && test -n "$PETSC_ARCH" && + test -r "$KOKKOS_DIR/$PETSC_ARCH/include/KokkosCore_config.h"], + [KOKKOS_CFG="$KOKKOS_DIR/$PETSC_ARCH/include/KokkosCore_config.h"]) + + AS_IF([test -r "$KOKKOS_DIR/include/Kokkos_Core.hpp" || + (test -n "$PETSC_ARCH" && + test -r "$KOKKOS_DIR/$PETSC_ARCH/include/Kokkos_Core.hpp")], + [ + enablekokkos=yes + libmesh_optional_INCLUDES="$libmesh_optional_INCLUDES $libmesh_kokkos_include_dirs" + + dnl Only auto-detect if KOKKOS_CXX was not pre-set by the caller + AS_IF([test "x$KOKKOS_CXX" = "x"], + [ + dnl Auto-detect backend + AS_IF([test "x$KOKKOS_BACKEND" = "xauto"], + [ + AS_IF([test -r "$KOKKOS_CFG"], + [ + AS_IF([grep -q 'KOKKOS_ENABLE_CUDA' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=cuda], + [AS_IF([grep -q 'KOKKOS_ENABLE_HIP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=hip], + [AS_IF([grep -q 'KOKKOS_ENABLE_SYCL' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=sycl], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [KOKKOS_BACKEND=openmp], + [KOKKOS_BACKEND=serial])])])]) + ], + [KOKKOS_BACKEND=serial]) + ]) + + AC_MSG_RESULT([Kokkos backend: $KOKKOS_BACKEND]) + + dnl Check if Kokkos was built with OpenMP + have_kokkos_openmp=no + AS_IF([test -r "$KOKKOS_CFG"], + [AS_IF([grep -q 'KOKKOS_ENABLE_OPENMP' "$KOKKOS_CFG"], + [have_kokkos_openmp=yes])]) + + case "$KOKKOS_BACKEND" in + cuda) + AC_PATH_PROG([NVCC_WRAPPER],[nvcc_wrapper],[no],[$PATH]) + AS_IF([test "x$NVCC_WRAPPER" != "xno"], + [ + libmesh_kokkos_host_cxx="" + AS_IF([test "x$enablempi" = "xyes"], + [ + libmesh_kokkos_host_cxx=`$CXX --showme:command 2>/dev/null` + AS_IF([test "x$libmesh_kokkos_host_cxx" = "x"], + [libmesh_kokkos_host_cxx=`$CXX -show 2>/dev/null | sed 's/ .*//'`]) + ], + [libmesh_kokkos_host_cxx="$CXX"]) + + AC_MSG_CHECKING([for host compiler usable with nvcc_wrapper]) + AS_IF([test "x$libmesh_kokkos_host_cxx" = "x"], + [ + AC_MSG_RESULT([not found]) + AC_MSG_ERROR([Could not determine a host compiler to pass to nvcc_wrapper. Set KOKKOS_CXXFLAGS with a suitable -ccbin value or provide NVCC_WRAPPER_DEFAULT_COMPILER in the environment.]) + ], + [AC_MSG_RESULT([$libmesh_kokkos_host_cxx])]) + + libmesh_kokkos_wrapper_shim="$PWD/$srcdir/build-aux/libmesh_nvcc_wrapper" + dnl Route through a tiny libMesh-owned shim so Automake's + dnl dependency-tracking flags do not trip nvcc_wrapper up + dnl on ordinary host-only .C files. + KOKKOS_CXX="$SHELL $libmesh_kokkos_wrapper_shim $NVCC_WRAPPER" + dnl nvcc_wrapper already mediates between nvcc and the host + dnl compiler; passing raw nvcc forwarding flags through the + dnl wrapper can leak them to g++ and fail. Keep only the + dnl CUDA flags the wrapper recognizes here. + KOKKOS_CXXFLAGS="--extended-lambda --expt-relaxed-constexpr --disable-warnings -x cu -ccbin $libmesh_kokkos_host_cxx" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" + ], + [ + AC_MSG_ERROR([nvcc_wrapper was not found but Kokkos CUDA backend was requested. libMesh's project-wide Kokkos CUDA build requires nvcc_wrapper (or an explicitly provided CUDA-capable KOKKOS_CXX) rather than raw nvcc.]) + ]) + AS_IF([test "x$have_kokkos_openmp" = "xyes"], + [ + KOKKOS_CXXFLAGS="$KOKKOS_CXXFLAGS -fopenmp" + KOKKOS_LDFLAGS="$KOKKOS_LDFLAGS -fopenmp" + ]) + ;; + hip) + AC_PATH_PROG([HIPCC],[hipcc],[no],[$PATH]) + AS_IF([test "x$HIPCC" = "xno"], + [AC_MSG_ERROR([hipcc not found but Kokkos HIP backend requested])]) + KOKKOS_CXX="$HIPCC" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" + ;; + sycl) + AC_PATH_PROG([ICPX],[icpx],[no],[$PATH]) + AS_IF([test "x$ICPX" = "xno"], + [AC_MSG_ERROR([icpx not found but Kokkos SYCL backend requested])]) + KOKKOS_CXX="$ICPX" + KOKKOS_CXXFLAGS="-fsycl" + KOKKOS_LDFLAGS="-fsycl $libmesh_kokkos_lib_dirs" + ;; + openmp) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-fopenmp -x c++" + KOKKOS_LDFLAGS="-fopenmp $libmesh_kokkos_lib_dirs" + ;; + serial|*) + KOKKOS_CXX="${CXX}" + KOKKOS_CXXFLAGS="-x c++" + KOKKOS_LDFLAGS="$libmesh_kokkos_lib_dirs" + ;; + esac + ], + [AC_MSG_RESULT([Using caller-provided KOKKOS_CXX=$KOKKOS_CXX])]) + + dnl Set defaults for any variables not provided by caller or auto-detect + KOKKOS_CPPFLAGS="${KOKKOS_CPPFLAGS:--DLIBMESH_KOKKOS_COMPILATION $libmesh_kokkos_include_dirs}" + KOKKOS_LDFLAGS="${KOKKOS_LDFLAGS:-$libmesh_kokkos_lib_dirs}" + KOKKOS_LIBS="${KOKKOS_LIBS:--lkokkoscore}" + libmesh_optional_LIBS="$libmesh_optional_LIBS $KOKKOS_LDFLAGS $KOKKOS_LIBS" + + dnl If KOKKOS_CXX differs from the main compiler, it may not be the MPI + dnl wrapper and thus may need the wrapper's compile and link flags + dnl explicitly in order to find mpi.h and resolve MPI symbols. Query + dnl the primary CXX wrapper first and fall back to the configure-time + dnl MPI variables when probing is unavailable. + KOKKOS_MPI_CPPFLAGS="" + KOKKOS_MPI_LIBS="" + AS_IF([test "x$enablempi" = "xyes" && test "x$KOKKOS_CXX" != "x$CXX"], + [ + AC_MSG_CHECKING([for MPI compile flags usable with KOKKOS_CXX]) + KOKKOS_MPI_CPPFLAGS=`$CXX -showme:compile 2>/dev/null` + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS=`$CXX -compile_info 2>/dev/null`]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'`]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [KOKKOS_MPI_CPPFLAGS="$MPI_INCLUDES"]) + AS_IF([test "x$KOKKOS_MPI_CPPFLAGS" = "x"], + [AC_MSG_RESULT([not found])], + [AC_MSG_RESULT([$KOKKOS_MPI_CPPFLAGS])]) + + AC_MSG_CHECKING([for MPI link flags usable with KOKKOS_CXX]) + KOKKOS_MPI_LIBS=`$CXX -showme:link 2>/dev/null` + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS=`$CXX -link_info 2>/dev/null`]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS=`$CXX -show 2>/dev/null | sed 's/^[^ ]* //'`]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [KOKKOS_MPI_LIBS="$MPI_LDFLAGS $MPI_LIBS"]) + AS_IF([test "x$KOKKOS_MPI_LIBS" = "x"], + [AC_MSG_RESULT([not found])], + [AC_MSG_RESULT([$KOKKOS_MPI_LIBS])]) + ]) + + libmesh_optional_LIBS="$libmesh_optional_LIBS $KOKKOS_MPI_LIBS" + + dnl Fail configure early if the chosen Kokkos compiler/flags/libs cannot + dnl actually compile and link a minimal Kokkos program. + AC_MSG_CHECKING([whether the Kokkos compiler configuration works]) + libmesh_save_CXX="$CXX" + libmesh_save_CPPFLAGS="$CPPFLAGS" + libmesh_save_CXXFLAGS="$CXXFLAGS" + libmesh_save_LDFLAGS="$LDFLAGS" + libmesh_save_LIBS="$LIBS" + + CXX="$KOKKOS_CXX" + CPPFLAGS="$CPPFLAGS $KOKKOS_CPPFLAGS $KOKKOS_MPI_CPPFLAGS" + CXXFLAGS="$CXXFLAGS $KOKKOS_CXXFLAGS" + LDFLAGS="$LDFLAGS $KOKKOS_LDFLAGS" + LIBS="$LIBS $KOKKOS_LIBS $KOKKOS_MPI_LIBS" + AC_LANG_PUSH([C++]) + + AS_IF([test "x$enablempi" = "xyes"], + [ + LDFLAGS="$LDFLAGS $MPI_LDFLAGS" + LIBS="$LIBS $MPI_LIBS" + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ +#include +#include +int main(int argc, char ** argv) +{ + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + MPI_Finalize(); + return 0; +} +]])], + [kokkos_config_works=yes], + [kokkos_config_works=no]) + ], + [ + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ +#include +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + Kokkos::finalize(); + return 0; +} +]])], + [kokkos_config_works=yes], + [kokkos_config_works=no]) + ]) + AC_LANG_POP([C++]) + + CXX="$libmesh_save_CXX" + CPPFLAGS="$libmesh_save_CPPFLAGS" + CXXFLAGS="$libmesh_save_CXXFLAGS" + LDFLAGS="$libmesh_save_LDFLAGS" + LIBS="$libmesh_save_LIBS" + + AS_IF([test "x$kokkos_config_works" = "xyes"], + [AC_MSG_RESULT([yes])], + [AC_MSG_ERROR([configured Kokkos compiler/flags failed to compile and link a minimal test program])]) + + dnl Use the validated Kokkos compiler as the project-wide C++ compiler + dnl for Kokkos-enabled builds. Automake-generated rules compile .C + dnl translation units with $(CXX), so the switch has to happen at + dnl configure time rather than in Makefile.am. + CXX="$KOKKOS_CXX" + LIBMESH_KOKKOS_BUILD_CPPFLAGS="$KOKKOS_MPI_CPPFLAGS $KOKKOS_CPPFLAGS" + LIBMESH_KOKKOS_BUILD_CXXFLAGS="$KOKKOS_CXXFLAGS" + LIBMESH_KOKKOS_BUILD_LDFLAGS="$KOKKOS_LDFLAGS" + + AC_DEFINE([HAVE_KOKKOS], [1], + [Define if Kokkos support is enabled in libMesh]) + AC_MSG_RESULT(<<< Configuring library with Kokkos support >>>) + ], + [ + AC_MSG_WARN([Kokkos not found at $KOKKOS_DIR -- disabling Kokkos FE support]) + enablekokkos=no + ]) + ], + [enablekokkos=no]) + +AC_SUBST([KOKKOS_CXX]) +AC_SUBST([KOKKOS_CPPFLAGS]) +AC_SUBST([KOKKOS_CXXFLAGS]) +AC_SUBST([KOKKOS_LDFLAGS]) +AC_SUBST([KOKKOS_LIBS]) +AC_SUBST([KOKKOS_MPI_CPPFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_CPPFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_CXXFLAGS]) +AC_SUBST([LIBMESH_KOKKOS_BUILD_LDFLAGS]) +AM_CONDITIONAL(LIBMESH_ENABLE_KOKKOS, test x$enablekokkos = xyes) +# ------------------------------------------------------------- + + + AS_IF([test "$enableoptional" != no], [ AC_MSG_RESULT(----------------------------------------------) diff --git a/m4/poly2tri.m4 b/m4/poly2tri.m4 index 9c57750b511..9792bf44e74 100644 --- a/m4/poly2tri.m4 +++ b/m4/poly2tri.m4 @@ -19,7 +19,7 @@ AC_DEFUN([CONFIGURE_POLY2TRI], [ dnl The poly2tri API is distributed with libmesh, so we don't have to guess dnl where it might be installed... - POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified" + POLY2TRI_INCLUDE="-I\$(top_builddir)/contrib/poly2tri/modified -I\$(top_srcdir)/contrib/poly2tri/poly2tri" AC_DEFINE(HAVE_POLY2TRI, 1, [Flag indicating whether the library will be compiled with poly2tri support]) AC_MSG_RESULT(<<< Configuring library with poly2tri support >>>) ]) diff --git a/src/apps/L2system.C b/src/apps/L2system.C index 5d1bae58f71..e930828d2d7 100644 --- a/src/apps/L2system.C +++ b/src/apps/L2system.C @@ -17,22 +17,2289 @@ #include "L2system.h" +#include "libmesh/dof_map.h" #include "libmesh/elem.h" #include "libmesh/fe_base.h" #include "libmesh/fe_interface.h" #include "libmesh/fem_context.h" #include "libmesh/getpot.h" +#include "libmesh/linear_solver.h" #include "libmesh/mesh.h" +#include "libmesh/numeric_vector.h" +#include "libmesh/parallel_sync.h" #include "libmesh/quadrature.h" #include "libmesh/string_to_enum.h" #include "libmesh/utility.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef LIBMESH_HAVE_PETSC +#include "libmesh/petsc_matrix_base.h" +#include "libmesh/petsc_macro.h" +#include "libmesh/petsc_vector.h" +#include +#endif + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +#include "../../include/gpu/kokkos_hilbert_system.h" +#include "libmesh/fe_shape_traits.h" + +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif + using namespace libMesh; +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +constexpr unsigned int kokkos_hilbert_max_dofs = 27; +constexpr unsigned int kokkos_parsed_fem_max_fields = 16; + +template +using KokkosScalarView = ::Kokkos::View; + +using KokkosDenseJacobianView = ::Kokkos::View; +using KokkosFlatJacobianView = ::Kokkos::View; +using KokkosUnsignedIntView = ::Kokkos::View; +using KokkosPetscIntView = ::Kokkos::View; +using KokkosSizeView = ::Kokkos::View; +using KokkosFieldKeyRecordView = ::Kokkos::View; +using KokkosFieldDofRecordView = ::Kokkos::View; +using KokkosFieldKeyStorage = + libMesh::Kokkos::detail::StaticArrayAccess; +using KokkosFieldDofStorage = + libMesh::Kokkos::detail::StaticArrayAccess; +using KokkosLocalIndexView = DofMap::KokkosLocalIndexCache::elem_local_index_view; + +struct HilbertElementAssemblyRecord +{ + dof_id_type elem_id = DofObject::invalid_id; + std::size_t rhs_offset = 0; + std::size_t mat_offset = 0; + unsigned int elem_index = libMesh::invalid_uint; + unsigned int quadrature_order = 0; + unsigned int n_dofs = 0; +}; + +struct KokkosHilbertAssemblyBucket +{ + std::size_t begin = 0; + std::size_t end = 0; + FEShapeKey key; + ElemType elem_type = INVALID_ELEM; + ElemMappingType mapping_type = LAGRANGE_MAP; + unsigned int n_nodes = 0; + unsigned int elem_p_level = 0; + unsigned int quadrature_order = 0; +}; + +FEShapeKey +make_hilbert_shape_key(const Elem & elem, + const FEType & fe_type) +{ + return {fe_type.family, + elem.type(), + static_cast(fe_type.order.get_order() + cast_int(elem.p_level()))}; +} + +FEShapeKey +make_hilbert_shape_key(const ElemType elem_type, + const unsigned int elem_p_level, + const FEType & fe_type) +{ + return {fe_type.family, + elem_type, + static_cast(fe_type.order.get_order() + cast_int(elem_p_level))}; +} + +void +accumulate_hilbert_dense_outputs(const KokkosScalarView & d_F, + const KokkosDenseJacobianView & d_K, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + auto h_F = ::Kokkos::create_mirror_view(d_F); + ::Kokkos::deep_copy(h_F, d_F); + + for (unsigned int i = 0; i != F.size(); ++i) + F(i) += h_F(i); + + if (!request_jacobian) + return; + + auto h_K = ::Kokkos::create_mirror_view(d_K); + ::Kokkos::deep_copy(h_K, d_K); + + for (unsigned int i = 0; i != F.size(); ++i) + for (unsigned int j = 0; j != F.size(); ++j) + K(i, j) += h_K(i, j); +} + +void +build_hilbert_element_records(HilbertSystem & sys, + std::vector & records, + std::size_t & total_rhs_entries, + std::size_t & total_mat_entries) +{ + DofMap & dof_map = sys.get_dof_map(); + const auto * dof_index_cache = dof_map.get_kokkos_dof_index_cache(0); + libmesh_assert(dof_index_cache); + total_rhs_entries = 0; + total_mat_entries = 0; + + for (auto elem_index : index_range(dof_index_cache->host_element_ids)) + { + if (!sys.subdomains_list().empty() && + !sys.subdomains_list().count(dof_index_cache->host_element_subdomains[elem_index])) + continue; + + const unsigned int n_dofs = dof_index_cache->host_element_n_dofs[elem_index]; + if (!n_dofs) + continue; + + HilbertElementAssemblyRecord record; + record.elem_id = dof_index_cache->host_element_ids[elem_index]; + record.rhs_offset = total_rhs_entries; + record.mat_offset = total_mat_entries; + record.elem_index = cast_int(elem_index); + record.n_dofs = n_dofs; + total_rhs_entries += n_dofs; + total_mat_entries += n_dofs * n_dofs; + records.push_back(std::move(record)); + } +} + +bool +build_hilbert_record_quadrature_orders(HilbertSystem & sys, + std::vector & records) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const FEType fe_type = sys.variable_type(0); + const int quadrature_order = + cast_int(fe_type.default_quadrature_order()) + sys.extra_quadrature_order; + + for (auto & record : records) + { + libmesh_error_msg_if(quadrature_order < 0, + "Negative quadrature order is not supported for Kokkos Hilbert assembly"); + record.quadrature_order = cast_int(quadrature_order); + + const FEShapeKey shape_key = + make_hilbert_shape_key(geometry_cache.element_types(record.elem_index), + geometry_cache.element_p_levels(record.elem_index), + fe_type); + if (!libMesh::Kokkos::detail::supports_hilbert_local_assembly( + shape_key, + geometry_cache.element_mapping_types(record.elem_index), + record.quadrature_order) || + record.n_dofs > kokkos_hilbert_max_dofs) + return false; + } + + return true; +} + +template +const CachedGoal * +ensure_kokkos_goal_cache(std::unique_ptr & cache, + const HostGoalPtr & host_goal, + BuildCache && build_cache) +{ + if (cache) + return cache.get(); + + if (!host_goal) + return nullptr; + + const auto * parsed_goal = dynamic_cast(host_goal.get()); + if (!parsed_goal) + return nullptr; + + cache = build_cache(*parsed_goal); + return cache.get(); +} + +void +prewarm_kokkos_hilbert_entities(HilbertSystem & sys, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal) +{ + if (sys.current_local_solution) + sys.get_dof_map().prepare_kokkos_local_index_cache(*sys.current_local_solution, 0); + + if (!fem_goal || !sys.input_system || !sys.input_system->current_local_solution) + return; + + for (unsigned int field = 0; field != fem_goal->n_field_variables(); ++field) + sys.input_system->get_dof_map().prepare_kokkos_local_index_cache( + *sys.input_system->current_local_solution, + fem_goal->field_variable_number(field)); +} + +class HostExactParsedFEMGoalAccess +{ +public: + HostExactParsedFEMGoalAccess(const libMesh::Kokkos::KokkosParsedFEMFunction & goal, + FEMContext & input_context) + : _goal(goal), + _input_context(input_context) + { + } + + template + Number value(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + kokkos_parsed_fem_max_fields] = {}; + fill_variables(qp_data, xyz, vars); + return _goal.value(vars); + } + + template + Gradient gradient(const QpData & qp_data, const Point & xyz) const + { + Number vars[LIBMESH_DIM + 1 + kokkos_parsed_fem_max_fields] = {}; + Gradient field_gradients[kokkos_parsed_fem_max_fields]; + fill_variables(qp_data, xyz, vars); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + field_gradients[field] = + _input_context.interior_gradient(_goal.field_variable_number(field), qp_data.qp_index()); + + return _goal.gradient(vars, field_gradients); + } + +private: + template + void fill_variables(const QpData & qp_data, + const Point & xyz, + Number * vars) const + { + vars[0] = xyz(0); +#if LIBMESH_DIM > 1 + vars[1] = xyz(1); +#endif +#if LIBMESH_DIM > 2 + vars[2] = xyz(2); +#endif + vars[LIBMESH_DIM] = _goal.time(); + + for (unsigned int field = 0; field != _goal.n_field_variables(); ++field) + vars[LIBMESH_DIM + 1 + field] = + _input_context.interior_value(_goal.field_variable_number(field), qp_data.qp_index()); + } + + const libMesh::Kokkos::KokkosParsedFEMFunction & _goal; + FEMContext & _input_context; +}; + +struct KokkosElementAssemblyState +{ + const MeshBase::KokkosGeometryCache * geometry_cache = nullptr; + const DofMap::KokkosLocalIndexCache * solution_local_indices = nullptr; + PetscVector * solution_vector = nullptr; + unsigned int elem_index = libMesh::invalid_uint; + ElemType elem_type = INVALID_ELEM; + ElemMappingType mapping_type = LAGRANGE_MAP; + unsigned int elem_n_nodes = 0; + unsigned int elem_p_level = 0; +}; + +struct KokkosFEMGoalData +{ + KokkosFieldKeyStorage field_keys; + KokkosFieldDofStorage field_dofs; + std::array field_local_indices; + PetscVector * input_vector = nullptr; +}; + +struct KokkosHilbertBatchData +{ + KokkosUnsignedIntView elem_indices; + KokkosUnsignedIntView elem_n_dofs; + KokkosUnsignedIntView quadrature_orders; + KokkosSizeView rhs_offsets; + KokkosSizeView mat_offsets; +}; + +struct KokkosFEMGoalBatchData +{ + std::vector bucket_field_keys; + std::vector bucket_field_dofs; + libMesh::Kokkos::detail::StaticArrayAccess + field_local_indices; + PetscVector * input_vector = nullptr; +}; + +enum class KokkosDirectMatrixLayout +{ + none, + seq_aij, + mpi_aij +}; + +struct PetscIntPairHash +{ + std::size_t operator()(const std::pair & values) const noexcept + { + const auto first_hash = std::hash{}(values.first); + const auto second_hash = std::hash{}(values.second); + return first_hash ^ (second_hash + 0x9e3779b97f4a7c15ULL + (first_hash << 6) + (first_hash >> 2)); + } +}; + +struct KokkosPetscAssemblyPlan +{ + std::vector records; + std::vector buckets; + std::size_t total_rhs_entries = 0; + std::size_t total_mat_entries = 0; + KokkosHilbertBatchData batch_data; + std::vector local_row_offsets; + std::vector local_column_indices; + std::vector diag_row_offsets; + std::vector diag_column_indices; + std::vector offdiag_row_offsets; + std::vector offdiag_column_indices; + std::vector offdiag_global_columns; + KokkosPetscIntView local_row_offsets_view; + KokkosPetscIntView local_column_indices_view; + KokkosPetscIntView diag_row_offsets_view; + KokkosPetscIntView diag_column_indices_view; + KokkosPetscIntView offdiag_row_offsets_view; + KokkosPetscIntView offdiag_column_indices_view; + KokkosPetscIntView offdiag_global_columns_view; + KokkosSizeView rhs_local_slots; + KokkosSizeView mat_value_slots; + ::Kokkos::View rhs_remote_values; + ::Kokkos::View mat_remote_values; + ::Kokkos::View rhs_remote_root_values; + ::Kokkos::View mat_remote_root_values; + std::vector rhs_remote_rows; + std::vector mat_remote_rows; + std::vector mat_remote_cols; + std::vector mat_remote_root_indices; + KokkosPetscIntView rhs_remote_rows_view; + KokkosPetscIntView mat_remote_rows_view; + KokkosPetscIntView mat_remote_cols_view; + KokkosPetscIntView mat_remote_root_indices_view; + std::vector rhs_remote_owners; + std::vector mat_remote_owners; + PetscSF rhs_remote_sf = nullptr; + PetscSF mat_remote_sf = nullptr; + KokkosFEMGoalBatchData fem_goal_batch_data; + const void * geometry_cache_id = nullptr; + const void * dof_index_cache_id = nullptr; + FEType fe_type; + unsigned int hilbert_order = 0; + int extra_quadrature_order = 0; + std::set subdomains; + const void * graph_matrix_target = nullptr; + const void * direct_matrix_target = nullptr; + const void * direct_rhs_target = nullptr; + const void * fem_goal_target = nullptr; + const void * input_vector_target = nullptr; + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + std::size_t rhs_local_size = 0; + std::size_t mat_diag_size = 0; + std::size_t mat_offdiag_size = 0; + KokkosDirectMatrixLayout direct_matrix_layout = KokkosDirectMatrixLayout::none; + bool direct_storage_active = false; + + ~KokkosPetscAssemblyPlan() + { + if (rhs_remote_sf) + { + PetscErrorCode ierr = PetscSFDestroy(&rhs_remote_sf); + libmesh_ignore(ierr); + } + if (mat_remote_sf) + { + PetscErrorCode ierr = PetscSFDestroy(&mat_remote_sf); + libmesh_ignore(ierr); + } + } +}; + +using RemoteRhsContribution = std::pair; +using RemoteMatContribution = std::tuple; + +constexpr PetscMemType +kokkos_default_petsc_mem_type() +{ + return PETSC_MEMTYPE_KOKKOS; +} + +void +clear_kokkos_petsc_remote_sf(KokkosPetscAssemblyPlan & plan) +{ + if (plan.rhs_remote_sf) + { + const auto ierr = PetscSFDestroy(&plan.rhs_remote_sf); + libmesh_ignore(ierr); + plan.rhs_remote_sf = nullptr; + } + if (plan.mat_remote_sf) + { + const auto ierr = PetscSFDestroy(&plan.mat_remote_sf); + libmesh_ignore(ierr); + plan.mat_remote_sf = nullptr; + } + plan.rhs_remote_root_values = {}; + plan.mat_remote_root_values = {}; + plan.mat_remote_root_indices.clear(); + plan.rhs_remote_rows_view = {}; + plan.mat_remote_rows_view = {}; + plan.mat_remote_cols_view = {}; + plan.mat_remote_root_indices_view = {}; +} + +void +sync_kokkos_petsc_int_view(const std::vector & host_values, + KokkosPetscIntView & device_view, + const std::string & name) +{ + device_view = KokkosPetscIntView(name, host_values.size()); + auto host_view = ::Kokkos::create_mirror_view(device_view); + for (auto i : index_range(host_values)) + host_view(i) = host_values[i]; + ::Kokkos::deep_copy(device_view, host_view); +} + +void +sync_kokkos_petsc_owned_graph_views(KokkosPetscAssemblyPlan & plan) +{ + sync_kokkos_petsc_int_view(plan.local_row_offsets, + plan.local_row_offsets_view, + "hilbert_local_row_offsets"); + sync_kokkos_petsc_int_view(plan.local_column_indices, + plan.local_column_indices_view, + "hilbert_local_column_indices"); + sync_kokkos_petsc_int_view(plan.diag_row_offsets, + plan.diag_row_offsets_view, + "hilbert_diag_row_offsets"); + sync_kokkos_petsc_int_view(plan.diag_column_indices, + plan.diag_column_indices_view, + "hilbert_diag_column_indices"); + sync_kokkos_petsc_int_view(plan.offdiag_row_offsets, + plan.offdiag_row_offsets_view, + "hilbert_offdiag_row_offsets"); + sync_kokkos_petsc_int_view(plan.offdiag_column_indices, + plan.offdiag_column_indices_view, + "hilbert_offdiag_column_indices"); + sync_kokkos_petsc_int_view(plan.offdiag_global_columns, + plan.offdiag_global_columns_view, + "hilbert_offdiag_global_columns"); +} + +void +sync_kokkos_petsc_remote_slot_views(KokkosPetscAssemblyPlan & plan) +{ + sync_kokkos_petsc_int_view(plan.rhs_remote_rows, + plan.rhs_remote_rows_view, + "hilbert_rhs_remote_rows"); + sync_kokkos_petsc_int_view(plan.mat_remote_rows, + plan.mat_remote_rows_view, + "hilbert_mat_remote_rows"); + sync_kokkos_petsc_int_view(plan.mat_remote_cols, + plan.mat_remote_cols_view, + "hilbert_mat_remote_cols"); + sync_kokkos_petsc_int_view(plan.mat_remote_root_indices, + plan.mat_remote_root_indices_view, + "hilbert_mat_remote_root_indices"); +} + +bool +build_kokkos_petsc_owned_csr_graph(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + const KokkosDirectMatrixLayout layout, + const PetscInt row_start, + const PetscInt row_stop, + const PetscInt col_start, + const PetscInt col_stop) +{ + const auto * sp = sys.get_dof_map().get_sparsity_pattern(); + if (!sp) + return false; + + const auto & graph = sp->get_sparsity_pattern(); + plan.local_row_offsets.assign(graph.size() + 1, 0); + plan.local_column_indices.clear(); + plan.diag_row_offsets.clear(); + plan.diag_column_indices.clear(); + plan.offdiag_row_offsets.clear(); + plan.offdiag_column_indices.clear(); + plan.offdiag_global_columns.clear(); + plan.row_start = row_start; + plan.row_stop = row_stop; + plan.col_start = col_start; + plan.col_stop = col_stop; + + std::vector> row_columns(graph.size()); + std::vector> row_offdiag_globals; + + if (layout == KokkosDirectMatrixLayout::mpi_aij) + row_offdiag_globals.resize(graph.size()); + + std::vector all_offdiag_globals; + for (auto local_row : index_range(graph)) + { + auto & cols = row_columns[local_row]; + cols.reserve(graph[local_row].size()); + for (const auto dof : graph[local_row]) + cols.push_back(cast_int(dof)); + + std::sort(cols.begin(), cols.end()); + cols.erase(std::unique(cols.begin(), cols.end()), cols.end()); + plan.local_row_offsets[local_row + 1] = + plan.local_row_offsets[local_row] + cast_int(cols.size()); + plan.local_column_indices.insert(plan.local_column_indices.end(), cols.begin(), cols.end()); + + if (layout != KokkosDirectMatrixLayout::mpi_aij) + continue; + + auto & offdiag_globals = row_offdiag_globals[local_row]; + for (const auto col : cols) + if (col < col_start || col >= col_stop) + { + offdiag_globals.push_back(col); + all_offdiag_globals.push_back(col); + } + } + + if (layout != KokkosDirectMatrixLayout::mpi_aij) + return true; + + std::sort(all_offdiag_globals.begin(), all_offdiag_globals.end()); + all_offdiag_globals.erase(std::unique(all_offdiag_globals.begin(), all_offdiag_globals.end()), + all_offdiag_globals.end()); + plan.offdiag_global_columns = std::move(all_offdiag_globals); + + std::unordered_map offdiag_column_map; + offdiag_column_map.reserve(plan.offdiag_global_columns.size()); + for (auto i : index_range(plan.offdiag_global_columns)) + offdiag_column_map.emplace(plan.offdiag_global_columns[i], cast_int(i)); + + plan.diag_row_offsets.assign(graph.size() + 1, 0); + plan.offdiag_row_offsets.assign(graph.size() + 1, 0); + + for (auto local_row : index_range(graph)) + { + const auto & cols = row_columns[local_row]; + const auto & offdiag_globals = row_offdiag_globals[local_row]; + for (const auto col : cols) + if (col >= col_start && col < col_stop) + plan.diag_column_indices.push_back(col - col_start); + + for (const auto col : offdiag_globals) + plan.offdiag_column_indices.push_back(offdiag_column_map.at(col)); + + plan.diag_row_offsets[local_row + 1] = + cast_int(plan.diag_column_indices.size()); + plan.offdiag_row_offsets[local_row + 1] = + cast_int(plan.offdiag_column_indices.size()); + } + + sync_kokkos_petsc_owned_graph_views(plan); + return true; +} + +void +finalize_kokkos_direct_slots(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + const std::vector & rhs_local_slots, + const std::vector & mat_value_slots) +{ + plan.rhs_local_slots = KokkosSizeView("hilbert_rhs_local_slots", rhs_local_slots.size()); + plan.mat_value_slots = KokkosSizeView("hilbert_mat_value_slots", mat_value_slots.size()); + + auto h_rhs_local_slots = ::Kokkos::create_mirror_view(plan.rhs_local_slots); + auto h_mat_value_slots = ::Kokkos::create_mirror_view(plan.mat_value_slots); + for (auto i : index_range(rhs_local_slots)) + h_rhs_local_slots(i) = rhs_local_slots[i]; + for (auto i : index_range(mat_value_slots)) + h_mat_value_slots(i) = mat_value_slots[i]; + + ::Kokkos::deep_copy(plan.rhs_local_slots, h_rhs_local_slots); + ::Kokkos::deep_copy(plan.mat_value_slots, h_mat_value_slots); + + plan.rhs_remote_owners.resize(plan.rhs_remote_rows.size()); + for (auto i : index_range(plan.rhs_remote_rows)) + plan.rhs_remote_owners[i] = + sys.get_dof_map().dof_owner(cast_int(plan.rhs_remote_rows[i])); + + plan.mat_remote_owners.resize(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + plan.mat_remote_owners[i] = + sys.get_dof_map().dof_owner(cast_int(plan.mat_remote_rows[i])); + + if (!plan.rhs_remote_rows.empty()) + plan.rhs_remote_values = + ::Kokkos::View("hilbert_rhs_remote_values", plan.rhs_remote_rows.size()); + + if (!plan.mat_remote_rows.empty()) + plan.mat_remote_values = + ::Kokkos::View("hilbert_mat_remote_values", plan.mat_remote_rows.size()); +} + +std::size_t +lookup_kokkos_owned_matrix_slot(const KokkosPetscAssemblyPlan & plan, + const PetscInt row, + const PetscInt col) +{ + const PetscInt local_row = row - plan.row_start; + libmesh_error_msg_if(local_row < 0 || row >= plan.row_stop, + "HilbertSystem Kokkos remote matrix slot lookup received a nonlocal row."); + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt row_begin = plan.local_row_offsets[local_row]; + const PetscInt row_end = plan.local_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.local_column_indices.begin() + row_begin, + plan.local_column_indices.begin() + row_end, + col); + libmesh_error_msg_if(slot_it == plan.local_column_indices.begin() + row_end || *slot_it != col, + "HilbertSystem Kokkos owned CSR graph is missing a sequential " + "remote matrix coupling."); + return cast_int(std::distance(plan.local_column_indices.begin(), slot_it)); + } + + if (col >= plan.col_start && col < plan.col_stop) + { + const PetscInt local_col = col - plan.col_start; + const PetscInt row_begin = plan.diag_row_offsets[local_row]; + const PetscInt row_end = plan.diag_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.diag_column_indices.begin() + row_begin, + plan.diag_column_indices.begin() + row_end, + local_col); + libmesh_error_msg_if(slot_it == plan.diag_column_indices.begin() + row_end || *slot_it != local_col, + "HilbertSystem Kokkos owned CSR graph is missing a diagonal MPI " + "remote matrix coupling."); + return cast_int(std::distance(plan.diag_column_indices.begin(), slot_it)); + } + + const auto offdiag_col_it = + std::lower_bound(plan.offdiag_global_columns.begin(), plan.offdiag_global_columns.end(), col); + libmesh_error_msg_if(offdiag_col_it == plan.offdiag_global_columns.end() || *offdiag_col_it != col, + "HilbertSystem Kokkos owned CSR graph is missing an off-diagonal MPI " + "remote matrix column."); + const PetscInt offdiag_local_col = + cast_int(std::distance(plan.offdiag_global_columns.begin(), offdiag_col_it)); + const PetscInt row_begin = plan.offdiag_row_offsets[local_row]; + const PetscInt row_end = plan.offdiag_row_offsets[local_row + 1]; + const auto slot_it = std::lower_bound(plan.offdiag_column_indices.begin() + row_begin, + plan.offdiag_column_indices.begin() + row_end, + offdiag_local_col); + libmesh_error_msg_if(slot_it == plan.offdiag_column_indices.begin() + row_end || + *slot_it != offdiag_local_col, + "HilbertSystem Kokkos owned CSR graph is missing an off-diagonal MPI " + "remote matrix coupling."); + return plan.mat_diag_size + + cast_int(std::distance(plan.offdiag_column_indices.begin(), slot_it)); +} + +bool +build_kokkos_petsc_remote_rhs_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + if (plan.direct_matrix_layout != KokkosDirectMatrixLayout::mpi_aij || + plan.rhs_remote_rows.empty()) + return true; + + std::vector remote_nodes(plan.rhs_remote_rows.size()); + for (auto i : index_range(plan.rhs_remote_rows)) + { + const processor_id_type owner = plan.rhs_remote_owners[i]; + remote_nodes[i].rank = cast_int(owner); + remote_nodes[i].index = + cast_int(plan.rhs_remote_rows[i] - sys.get_dof_map().first_dof(owner)); + } + + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &plan.rhs_remote_sf)); + LibmeshPetscCall2(sys.comm(), + PetscSFSetGraph(plan.rhs_remote_sf, + cast_int(plan.rhs_local_size), + cast_int(plan.rhs_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + remote_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(plan.rhs_remote_sf)); + plan.rhs_remote_root_values = + ::Kokkos::View("hilbert_rhs_remote_root_values", plan.rhs_local_size); + return true; +} + +bool +build_kokkos_petsc_remote_mat_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + if (plan.direct_matrix_layout != KokkosDirectMatrixLayout::mpi_aij || + plan.mat_remote_rows.empty()) + return true; + + const PetscInt local_row_count = plan.row_stop - plan.row_start; + std::vector setup_nodes(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + { + const processor_id_type owner = plan.mat_remote_owners[i]; + setup_nodes[i].rank = cast_int(owner); + setup_nodes[i].index = + cast_int(plan.mat_remote_rows[i] - sys.get_dof_map().first_dof(owner)); + } + + PetscSF setup_sf = nullptr; + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &setup_sf)); + LibmeshPetscCall2(sys.comm(), + PetscSFSetGraph(setup_sf, + local_row_count, + cast_int(plan.mat_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + setup_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(setup_sf)); + + const PetscInt * degrees = nullptr; + LibmeshPetscCall2(sys.comm(), PetscSFComputeDegreeBegin(setup_sf, °rees)); + LibmeshPetscCall2(sys.comm(), PetscSFComputeDegreeEnd(setup_sf, °rees)); + + std::vector gathered_offsets(local_row_count + 1, 0); + for (PetscInt local_row = 0; local_row != local_row_count; ++local_row) + gathered_offsets[local_row + 1] = gathered_offsets[local_row] + degrees[local_row]; + + std::vector gathered_cols(gathered_offsets.back(), -1); + if (!plan.mat_remote_cols.empty()) + { + LibmeshPetscCall2(sys.comm(), + PetscSFGatherBegin(setup_sf, + MPIU_INT, + plan.mat_remote_cols.data(), + gathered_cols.data())); + LibmeshPetscCall2(sys.comm(), + PetscSFGatherEnd(setup_sf, + MPIU_INT, + plan.mat_remote_cols.data(), + gathered_cols.data())); + } + + std::vector reply_slots(gathered_cols.size(), -1); + for (PetscInt local_row = 0; local_row != local_row_count; ++local_row) + { + const PetscInt global_row = plan.row_start + local_row; + for (PetscInt k = gathered_offsets[local_row]; k != gathered_offsets[local_row + 1]; ++k) + reply_slots[k] = + cast_int(lookup_kokkos_owned_matrix_slot(plan, global_row, gathered_cols[k])); + } + + std::vector remote_root_indices(plan.mat_remote_rows.size(), -1); + if (!remote_root_indices.empty()) + { + LibmeshPetscCall2(sys.comm(), + PetscSFScatterBegin(setup_sf, + MPIU_INT, + reply_slots.data(), + remote_root_indices.data())); + LibmeshPetscCall2(sys.comm(), + PetscSFScatterEnd(setup_sf, + MPIU_INT, + reply_slots.data(), + remote_root_indices.data())); + } + LibmeshPetscCall2(sys.comm(), PetscSFDestroy(&setup_sf)); + + std::vector remote_nodes(plan.mat_remote_rows.size()); + for (auto i : index_range(plan.mat_remote_rows)) + { + libmesh_error_msg_if(remote_root_indices[i] < 0, + "HilbertSystem Kokkos remote matrix slot setup did not receive an " + "owner slot index."); + remote_nodes[i].rank = cast_int(plan.mat_remote_owners[i]); + remote_nodes[i].index = remote_root_indices[i]; + } + + LibmeshPetscCall2(sys.comm(), PetscSFCreate(sys.comm().get(), &plan.mat_remote_sf)); + LibmeshPetscCall2(sys.comm(), + PetscSFSetGraph(plan.mat_remote_sf, + cast_int(plan.mat_diag_size + plan.mat_offdiag_size), + cast_int(plan.mat_remote_rows.size()), + nullptr, + PETSC_COPY_VALUES, + remote_nodes.data(), + PETSC_COPY_VALUES)); + LibmeshPetscCall2(sys.comm(), PetscSFSetUp(plan.mat_remote_sf)); + plan.mat_remote_root_indices = std::move(remote_root_indices); + plan.mat_remote_root_values = + ::Kokkos::View("hilbert_mat_remote_root_values", + plan.mat_diag_size + plan.mat_offdiag_size); + sync_kokkos_petsc_remote_slot_views(plan); + return true; +} + +bool +build_kokkos_petsc_remote_sf(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + clear_kokkos_petsc_remote_sf(plan); + if (!build_kokkos_petsc_remote_rhs_sf(sys, plan)) + return false; + if (!build_kokkos_petsc_remote_mat_sf(sys, plan)) + return false; + sync_kokkos_petsc_remote_slot_views(plan); + return true; +} + +bool +bind_kokkos_direct_slots_from_plan_graph(HilbertSystem & sys, + const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan, + const numeric_index_type rhs_first_local, + const numeric_index_type rhs_last_local, + std::vector & rhs_slots, + std::vector & mat_slots) +{ + plan.rhs_remote_rows.clear(); + plan.mat_remote_rows.clear(); + plan.mat_remote_cols.clear(); + plan.rhs_remote_owners.clear(); + plan.mat_remote_owners.clear(); + plan.rhs_remote_values = {}; + plan.mat_remote_values = {}; + clear_kokkos_petsc_remote_sf(plan); + + std::unordered_map rhs_remote_slot_map; + std::unordered_map, std::size_t, PetscIntPairHash> mat_remote_slot_map; + std::unordered_map offdiag_column_map; + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + offdiag_column_map.reserve(plan.offdiag_global_columns.size()); + for (auto i : index_range(plan.offdiag_global_columns)) + offdiag_column_map.emplace(plan.offdiag_global_columns[i], cast_int(i)); + } + + for (const auto & record : plan.records) + { + const auto elem_dofs = + &dof_index_cache.host_element_dof_indices[record.elem_index * dof_index_cache.max_dofs]; + + for (unsigned int i = 0; i != record.n_dofs; ++i) + { + const auto row = elem_dofs[i]; + if (row >= rhs_first_local && row < rhs_last_local) + rhs_slots[record.rhs_offset + i] = row - rhs_first_local; + else + { + const PetscInt petsc_row = cast_int(row); + const auto [it, inserted] = + rhs_remote_slot_map.emplace(petsc_row, plan.rhs_remote_rows.size()); + if (inserted) + plan.rhs_remote_rows.push_back(petsc_row); + rhs_slots[record.rhs_offset + i] = (rhs_last_local - rhs_first_local) + it->second; + } + } + + for (unsigned int i = 0; i != record.n_dofs; ++i) + { + const PetscInt row = cast_int(elem_dofs[i]); + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij && + (row < plan.row_start || row >= plan.row_stop)) + { + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + const auto key = std::make_pair(row, col); + const auto [it, inserted] = + mat_remote_slot_map.emplace(key, plan.mat_remote_rows.size()); + if (inserted) + { + plan.mat_remote_rows.push_back(row); + plan.mat_remote_cols.push_back(col); + } + mat_slots[record.mat_offset + i * record.n_dofs + j] = + plan.mat_diag_size + plan.mat_offdiag_size + it->second; + } + continue; + } + + const PetscInt local_row = row - plan.row_start; + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt row_begin = plan.local_row_offsets[local_row]; + const PetscInt row_end = plan.local_row_offsets[local_row + 1]; + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + const auto slot_it = std::lower_bound(plan.local_column_indices.begin() + row_begin, + plan.local_column_indices.begin() + row_end, + col); + libmesh_error_msg_if(slot_it == plan.local_column_indices.begin() + row_end || + *slot_it != col, + "HilbertSystem Kokkos owned CSR graph is missing a " + "sequential matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + cast_int(std::distance(plan.local_column_indices.begin(), slot_it)); + } + continue; + } + + const PetscInt diag_row_begin = plan.diag_row_offsets[local_row]; + const PetscInt diag_row_end = plan.diag_row_offsets[local_row + 1]; + const PetscInt offdiag_row_begin = plan.offdiag_row_offsets[local_row]; + const PetscInt offdiag_row_end = plan.offdiag_row_offsets[local_row + 1]; + for (unsigned int j = 0; j != record.n_dofs; ++j) + { + const PetscInt col = cast_int(elem_dofs[j]); + if (col >= plan.col_start && col < plan.col_stop) + { + const PetscInt local_col = col - plan.col_start; + const auto slot_it = + std::lower_bound(plan.diag_column_indices.begin() + diag_row_begin, + plan.diag_column_indices.begin() + diag_row_end, + local_col); + libmesh_error_msg_if(slot_it == plan.diag_column_indices.begin() + diag_row_end || + *slot_it != local_col, + "HilbertSystem Kokkos owned CSR graph is missing a " + "diagonal MPI matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + cast_int(std::distance(plan.diag_column_indices.begin(), slot_it)); + } + else + { + const auto offdiag_col_it = offdiag_column_map.find(col); + libmesh_error_msg_if(offdiag_col_it == offdiag_column_map.end(), + "HilbertSystem Kokkos owned CSR graph is missing an " + "off-diagonal MPI matrix column."); + const PetscInt offdiag_local_col = offdiag_col_it->second; + const auto slot_it = + std::lower_bound(plan.offdiag_column_indices.begin() + offdiag_row_begin, + plan.offdiag_column_indices.begin() + offdiag_row_end, + offdiag_local_col); + libmesh_error_msg_if(slot_it == plan.offdiag_column_indices.begin() + offdiag_row_end || + *slot_it != offdiag_local_col, + "HilbertSystem Kokkos owned CSR graph is missing an " + "off-diagonal MPI matrix coupling."); + mat_slots[record.mat_offset + i * record.n_dofs + j] = + plan.mat_diag_size + + cast_int(std::distance(plan.offdiag_column_indices.begin(), slot_it)); + } + } + } + } + + return true; +} + +bool +ensure_kokkos_petsc_owned_matrix(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + PetscMatrixBase & system_matrix) +{ + if (plan.graph_matrix_target == &system_matrix) + return true; + + const char * mat_type = nullptr; + const char * options_prefix = nullptr; + LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); + LibmeshPetscCall2(sys.comm(), MatGetOptionsPrefix(system_matrix.mat(), &options_prefix)); + + Mat new_mat = nullptr; + LibmeshPetscCall2(sys.comm(), MatCreate(sys.comm().get(), &new_mat)); + LibmeshPetscCall2(sys.comm(), + MatSetSizes(new_mat, + cast_int(system_matrix.local_m()), + cast_int(system_matrix.local_n()), + cast_int(system_matrix.m()), + cast_int(system_matrix.n()))); + LibmeshPetscCall2(sys.comm(), MatSetBlockSize(new_mat, 1)); + if (options_prefix) + LibmeshPetscCall2(sys.comm(), MatSetOptionsPrefix(new_mat, options_prefix)); + LibmeshPetscCall2(sys.comm(), MatSetType(new_mat, mat_type)); + LibmeshPetscCall2(sys.comm(), MatSetFromOptions(new_mat)); + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + LibmeshPetscCall2(sys.comm(), + MatSeqAIJSetPreallocationCSR(new_mat, + plan.local_row_offsets.data(), + plan.local_column_indices.data(), + nullptr)); + else if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + LibmeshPetscCall2(sys.comm(), + MatMPIAIJSetPreallocationCSR(new_mat, + plan.local_row_offsets.data(), + plan.local_column_indices.data(), + nullptr)); + else + { + LibmeshPetscCall2(sys.comm(), MatDestroy(&new_mat)); + return false; + } + + LibmeshPetscCall2(sys.comm(), MatSetOption(new_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); + LibmeshPetscCall2(sys.comm(), MatAssemblyBegin(new_mat, MAT_FINAL_ASSEMBLY)); + LibmeshPetscCall2(sys.comm(), MatAssemblyEnd(new_mat, MAT_FINAL_ASSEMBLY)); + system_matrix.reset_mat(new_mat, true); + plan.graph_matrix_target = &system_matrix; + return true; +} + +bool +ensure_kokkos_petsc_direct_storage(HilbertSystem & sys, + const DofMap::KokkosDofIndexCache & dof_index_cache, + KokkosPetscAssemblyPlan & plan, + PetscMatrixBase & system_matrix, + PetscVector & system_rhs) +{ + if (plan.direct_matrix_target == &system_matrix && + plan.direct_rhs_target == &system_rhs && + plan.direct_storage_active) + return true; + + plan.direct_matrix_target = &system_matrix; + plan.direct_rhs_target = &system_rhs; + plan.direct_storage_active = false; + plan.direct_matrix_layout = KokkosDirectMatrixLayout::none; + plan.rhs_local_size = 0; + plan.mat_diag_size = 0; + plan.mat_offdiag_size = 0; + plan.rhs_remote_rows.clear(); + plan.mat_remote_rows.clear(); + plan.mat_remote_cols.clear(); + plan.rhs_remote_owners.clear(); + plan.mat_remote_owners.clear(); + plan.rhs_remote_values = {}; + plan.mat_remote_values = {}; + + const char * mat_type = nullptr; + LibmeshPetscCall2(sys.comm(), MatGetType(system_matrix.mat(), &mat_type)); + PetscBool is_seq_aij = PETSC_FALSE; + PetscBool is_seq_aijkokkos = PETSC_FALSE; + PetscBool is_mpi_aij = PETSC_FALSE; + PetscBool is_mpi_aijkokkos = PETSC_FALSE; + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJ, &is_seq_aij)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATSEQAIJKOKKOS, &is_seq_aijkokkos)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATMPIAIJ, &is_mpi_aij)); + LibmeshPetscCall2(sys.comm(), PetscStrcmp(mat_type, MATMPIAIJKOKKOS, &is_mpi_aijkokkos)); + if (!is_seq_aij && !is_seq_aijkokkos && !is_mpi_aij && !is_mpi_aijkokkos) + return false; + plan.direct_matrix_layout = + (is_seq_aij || is_seq_aijkokkos) ? KokkosDirectMatrixLayout::seq_aij : + KokkosDirectMatrixLayout::mpi_aij; + + PetscInt rhs_first_local_petsc = 0; + PetscInt rhs_last_local_petsc = 0; + LibmeshPetscCall2(sys.comm(), + VecGetOwnershipRange(system_rhs.vec(), &rhs_first_local_petsc, &rhs_last_local_petsc)); + const numeric_index_type rhs_first_local = cast_int(rhs_first_local_petsc); + const numeric_index_type rhs_last_local = cast_int(rhs_last_local_petsc); + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + LibmeshPetscCall2(sys.comm(), MatGetOwnershipRange(system_matrix.mat(), &row_start, &row_stop)); + LibmeshPetscCall2(sys.comm(), + MatGetOwnershipRangeColumn(system_matrix.mat(), &col_start, &col_stop)); + if (!build_kokkos_petsc_owned_csr_graph( + sys, plan, plan.direct_matrix_layout, row_start, row_stop, col_start, col_stop)) + return false; + + std::vector rhs_local_slots(plan.total_rhs_entries); + std::vector mat_value_slots(plan.total_mat_entries); + plan.rhs_local_size = cast_int(rhs_last_local - rhs_first_local); + plan.mat_diag_size = + plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij ? + cast_int(plan.local_row_offsets.empty() ? 0 : plan.local_row_offsets.back()) : + cast_int(plan.diag_row_offsets.empty() ? 0 : plan.diag_row_offsets.back()); + plan.mat_offdiag_size = + plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij ? + 0 : + cast_int(plan.offdiag_row_offsets.empty() ? 0 : plan.offdiag_row_offsets.back()); + + if (!bind_kokkos_direct_slots_from_plan_graph( + sys, dof_index_cache, plan, rhs_first_local, rhs_last_local, rhs_local_slots, mat_value_slots)) + return false; + + if (!ensure_kokkos_petsc_owned_matrix(sys, plan, system_matrix)) + return false; + + finalize_kokkos_direct_slots(sys, plan, rhs_local_slots, mat_value_slots); + if (!build_kokkos_petsc_remote_sf(sys, plan)) + return false; + plan.direct_storage_active = true; + return true; +} + +namespace +{ + +bool +build_kokkos_element_state(HilbertSystem & sys, + const Elem & elem, + KokkosElementAssemblyState & state) +{ + auto * solution_vector = dynamic_cast *>(sys.current_local_solution.get()); + if (!solution_vector || !solution_vector->supports_kokkos_access()) + return false; + + const DofMap & dof_map = sys.get_dof_map(); + state.geometry_cache = &sys.get_mesh().get_kokkos_geometry_cache(); + state.elem_index = sys.get_mesh().get_kokkos_elem_index(elem); + if (state.elem_index == libMesh::invalid_uint) + return false; + + state.elem_type = state.geometry_cache->element_types(state.elem_index); + state.mapping_type = state.geometry_cache->element_mapping_types(state.elem_index); + state.elem_n_nodes = state.geometry_cache->element_n_nodes(state.elem_index); + state.elem_p_level = state.geometry_cache->element_p_levels(state.elem_index); + + state.solution_local_indices = + dof_map.require_kokkos_local_index_cache(*sys.current_local_solution, 0); + if (!state.solution_local_indices) + return false; + + state.solution_vector = solution_vector; + return true; +} + +bool +build_kokkos_fem_goal_data(const ElemType elem_type, + const ElemMappingType mapping_type, + const unsigned int elem_p_level, + const unsigned int hilbert_order, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosFEMGoalData & goal_data) +{ + const unsigned int n_fields = goal_function.n_field_variables(); + if (n_fields > kokkos_parsed_fem_max_fields) + return false; + + auto * input_vector = dynamic_cast *>(input_system.current_local_solution.get()); + if (!input_vector || !input_vector->supports_kokkos_access()) + return false; + + goal_data.field_keys.size = n_fields; + goal_data.field_dofs.size = n_fields; + goal_data.input_vector = input_vector; + + for (unsigned int field = 0; field != n_fields; ++field) + { + const unsigned int var_num = goal_function.field_variable_number(field); + const FEType field_type = input_system.variable_type(var_num); + const FEShapeKey field_key = make_hilbert_shape_key(elem_type, elem_p_level, field_type); + + if (mapping_type != LAGRANGE_MAP || + !supports_shape_with_lagrange_map(field_key) || + (hilbert_order > 0 && !supports_grad_shape(field_key))) + return false; + + const unsigned int field_n_dofs = + FEInterface::n_dofs(libMesh::Kokkos::dim_from_topology(elem_type), field_type, elem_type); + if (field_n_dofs > kokkos_hilbert_max_dofs) + return false; + + const auto * local_index_cache = + input_system.get_dof_map().require_kokkos_local_index_cache( + *input_system.current_local_solution, var_num); + if (!local_index_cache) + return false; + + goal_data.field_keys.values[field] = field_key; + goal_data.field_dofs.values[field] = field_n_dofs; + goal_data.field_local_indices[field] = local_index_cache->element_local_indices; + } + + return true; +} + +auto +make_hilbert_bucket_sort_key(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + const HilbertElementAssemblyRecord & record) +{ + const unsigned int elem_index = record.elem_index; + return std::make_tuple(cast_int(geometry_cache.element_types(elem_index)), + cast_int(geometry_cache.element_mapping_types(elem_index)), + cast_int(fe_type.order.get_order() + + cast_int(geometry_cache.element_p_levels(elem_index))), + record.quadrature_order, + geometry_cache.element_n_nodes(elem_index)); +} + +void +sort_hilbert_element_records(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + std::vector & records) +{ + std::sort(records.begin(), + records.end(), + [&geometry_cache, &fe_type](const auto & lhs, const auto & rhs) + { + return make_hilbert_bucket_sort_key(geometry_cache, fe_type, lhs) < + make_hilbert_bucket_sort_key(geometry_cache, fe_type, rhs); + }); +} + +void +build_hilbert_assembly_buckets(const MeshBase::KokkosGeometryCache & geometry_cache, + const FEType & fe_type, + const std::vector & records, + std::vector & buckets) +{ + buckets.clear(); + if (records.empty()) + return; + + auto fill_bucket = [&geometry_cache, &fe_type, &records](KokkosHilbertAssemblyBucket & bucket, + const std::size_t begin, + const std::size_t end) + { + bucket.begin = begin; + bucket.end = end; + const auto & record = records[begin]; + const unsigned int elem_index = record.elem_index; + bucket.key = make_hilbert_shape_key(geometry_cache.element_types(elem_index), + geometry_cache.element_p_levels(elem_index), + fe_type); + bucket.elem_type = geometry_cache.element_types(elem_index); + bucket.mapping_type = geometry_cache.element_mapping_types(elem_index); + bucket.n_nodes = geometry_cache.element_n_nodes(elem_index); + bucket.elem_p_level = geometry_cache.element_p_levels(elem_index); + bucket.quadrature_order = record.quadrature_order; + }; + + std::size_t bucket_begin = 0; + auto current_key = make_hilbert_bucket_sort_key(geometry_cache, fe_type, records.front()); + for (std::size_t i = 1; i != records.size(); ++i) + { + const auto next_key = make_hilbert_bucket_sort_key(geometry_cache, fe_type, records[i]); + if (next_key == current_key) + continue; + + buckets.emplace_back(); + fill_bucket(buckets.back(), bucket_begin, i); + bucket_begin = i; + current_key = next_key; + } + + buckets.emplace_back(); + fill_bucket(buckets.back(), bucket_begin, records.size()); +} + +void +build_hilbert_batch_data(const std::vector & records, + KokkosHilbertBatchData & batch_data) +{ + batch_data.elem_indices = KokkosUnsignedIntView("hilbert_elem_indices", records.size()); + batch_data.elem_n_dofs = KokkosUnsignedIntView("hilbert_elem_n_dofs", records.size()); + batch_data.quadrature_orders = + KokkosUnsignedIntView("hilbert_quadrature_orders", records.size()); + batch_data.rhs_offsets = KokkosSizeView("hilbert_rhs_offsets", records.size()); + batch_data.mat_offsets = KokkosSizeView("hilbert_mat_offsets", records.size()); + + auto h_elem_indices = ::Kokkos::create_mirror_view(batch_data.elem_indices); + auto h_elem_n_dofs = ::Kokkos::create_mirror_view(batch_data.elem_n_dofs); + auto h_quadrature_orders = ::Kokkos::create_mirror_view(batch_data.quadrature_orders); + auto h_rhs_offsets = ::Kokkos::create_mirror_view(batch_data.rhs_offsets); + auto h_mat_offsets = ::Kokkos::create_mirror_view(batch_data.mat_offsets); + + for (auto record_index : index_range(records)) + { + const auto & record = records[record_index]; + h_elem_indices(record_index) = record.elem_index; + h_elem_n_dofs(record_index) = record.n_dofs; + h_quadrature_orders(record_index) = record.quadrature_order; + h_rhs_offsets(record_index) = record.rhs_offset; + h_mat_offsets(record_index) = record.mat_offset; + } + + ::Kokkos::deep_copy(batch_data.elem_indices, h_elem_indices); + ::Kokkos::deep_copy(batch_data.elem_n_dofs, h_elem_n_dofs); + ::Kokkos::deep_copy(batch_data.quadrature_orders, h_quadrature_orders); + ::Kokkos::deep_copy(batch_data.rhs_offsets, h_rhs_offsets); + ::Kokkos::deep_copy(batch_data.mat_offsets, h_mat_offsets); +} + +bool +kokkos_petsc_plan_matches(const HilbertSystem & sys, + const MeshBase::KokkosGeometryCache & geometry_cache, + const DofMap::KokkosDofIndexCache & dof_index_cache, + const KokkosPetscAssemblyPlan & plan) +{ + return plan.geometry_cache_id == &geometry_cache && + plan.dof_index_cache_id == &dof_index_cache && + plan.fe_type == sys.variable_type(0) && + plan.hilbert_order == sys.hilbert_order() && + plan.extra_quadrature_order == sys.extra_quadrature_order && + plan.subdomains == sys.subdomains_list(); +} + +bool +build_kokkos_petsc_assembly_plan(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const auto * dof_index_cache = sys.get_dof_map().get_kokkos_dof_index_cache(0); + if (!dof_index_cache) + return false; + + std::size_t total_rhs_entries = 0; + std::size_t total_mat_entries = 0; + std::vector records; + build_hilbert_element_records(sys, records, total_rhs_entries, total_mat_entries); + if (!build_hilbert_record_quadrature_orders(sys, records)) + return false; + sort_hilbert_element_records(geometry_cache, sys.variable_type(0), records); + + KokkosHilbertBatchData batch_data; + build_hilbert_batch_data(records, batch_data); + std::vector buckets; + build_hilbert_assembly_buckets(geometry_cache, sys.variable_type(0), records, buckets); + + plan.records = std::move(records); + plan.buckets = std::move(buckets); + plan.total_rhs_entries = total_rhs_entries; + plan.total_mat_entries = total_mat_entries; + plan.batch_data = std::move(batch_data); + plan.rhs_local_slots = KokkosSizeView(); + plan.mat_value_slots = KokkosSizeView(); + plan.geometry_cache_id = &geometry_cache; + plan.dof_index_cache_id = dof_index_cache; + plan.fe_type = sys.variable_type(0); + plan.hilbert_order = sys.hilbert_order(); + plan.extra_quadrature_order = sys.extra_quadrature_order; + plan.subdomains = sys.subdomains_list(); + plan.graph_matrix_target = nullptr; + plan.direct_matrix_target = nullptr; + plan.direct_rhs_target = nullptr; + plan.fem_goal_target = nullptr; + plan.input_vector_target = nullptr; + plan.direct_storage_active = false; + return true; +} + +bool +build_kokkos_fem_goal_batch_data(HilbertSystem & sys, + const KokkosPetscAssemblyPlan & plan, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosFEMGoalBatchData & batch_data) +{ + libmesh_assert(sys.input_system); + System & input_system = *sys.input_system; + const unsigned int n_fields = goal_function.n_field_variables(); + if (n_fields > kokkos_parsed_fem_max_fields) + return false; + + auto * input_vector = dynamic_cast *>(input_system.current_local_solution.get()); + if (!input_vector || !input_vector->supports_kokkos_access()) + return false; + + batch_data.input_vector = input_vector; + batch_data.field_local_indices.size = n_fields; + batch_data.bucket_field_keys.assign(plan.buckets.size(), KokkosFieldKeyStorage{}); + batch_data.bucket_field_dofs.assign(plan.buckets.size(), KokkosFieldDofStorage{}); + + for (unsigned int field = 0; field != n_fields; ++field) + { + const unsigned int var_num = goal_function.field_variable_number(field); + const FEType field_type = input_system.variable_type(var_num); + const auto * local_index_cache = + input_system.get_dof_map().require_kokkos_local_index_cache( + *input_system.current_local_solution, var_num); + if (!local_index_cache) + return false; + + batch_data.field_local_indices.values[field] = local_index_cache->element_local_indices; + + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + const FEShapeKey field_key = + make_hilbert_shape_key(bucket.elem_type, bucket.elem_p_level, field_type); + + if (bucket.mapping_type != LAGRANGE_MAP || + !supports_shape_with_lagrange_map(field_key) || + (sys.hilbert_order() > 0 && !supports_grad_shape(field_key))) + return false; + + const unsigned int field_n_dofs = + FEInterface::n_dofs(libMesh::Kokkos::dim_from_topology(bucket.elem_type), + field_type, + bucket.elem_type); + if (field_n_dofs > kokkos_hilbert_max_dofs) + return false; + + batch_data.bucket_field_keys[bucket_index].size = n_fields; + batch_data.bucket_field_dofs[bucket_index].size = n_fields; + batch_data.bucket_field_keys[bucket_index].values[field] = field_key; + batch_data.bucket_field_dofs[bucket_index].values[field] = field_n_dofs; + } + } + + return true; +} + +bool +ensure_kokkos_fem_goal_batch_data(HilbertSystem & sys, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + KokkosPetscAssemblyPlan & plan) +{ + libmesh_assert(sys.input_system); + const void * input_vector_target = sys.input_system->current_local_solution.get(); + if (plan.fem_goal_target == &goal_function && plan.input_vector_target == input_vector_target) + return true; + + KokkosFEMGoalBatchData batch_data; + if (!build_kokkos_fem_goal_batch_data(sys, plan, goal_function, batch_data)) + return false; + + plan.fem_goal_batch_data = std::move(batch_data); + plan.fem_goal_target = &goal_function; + plan.input_vector_target = input_vector_target; + return true; +} + +bool +assemble_kokkos_hilbert_element(const FEShapeKey key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const Number solution_derivative, + const libMesh::Kokkos::KokkosParsedFunction & goal_function, + const KokkosElementAssemblyState & state, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + const unsigned int n_dofs = F.size(); + KokkosScalarView d_F("hilbert_residual", n_dofs); + KokkosDenseJacobianView d_K("hilbert_jacobian", n_dofs, n_dofs); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(goal_function, + goal_function.gradient_function()); + const libMesh::Kokkos::detail::DenseElementOutputSink sink{ + d_F, d_K, n_dofs, request_jacobian}; + + auto coeff_guard = state.solution_vector->make_kokkos_read_view_guard(); + const auto coeff = + libMesh::Kokkos::detail::make_gathered_coeff_access( + coeff_guard.view(), state.solution_local_indices->element_local_indices, state.elem_index); + + const bool success = + libMesh::Kokkos::detail::run_hilbert_system_assembly( + key, + state.mapping_type, + state.geometry_cache->node_coordinates, + state.geometry_cache->element_node_ids, + state.elem_index, + state.elem_n_nodes, + quadrature_order, + hilbert_order, + coeff, + solution_derivative, + goal_access, + request_jacobian, + sink, + "hilbert_local_assembly"); + + if (!success) + return false; + + accumulate_hilbert_dense_outputs(d_F, d_K, request_jacobian, F, K); + return true; +} + +bool +assemble_kokkos_hilbert_fem_goal_element(const FEShapeKey output_key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const Number solution_derivative, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + const KokkosElementAssemblyState & state, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ + KokkosFEMGoalData goal_data; + if (!build_kokkos_fem_goal_data(state.elem_type, + state.mapping_type, + state.elem_p_level, + hilbert_order, + input_system, + goal_function, + goal_data)) + return false; + + const unsigned int n_dofs = F.size(); + KokkosScalarView d_F("hilbert_residual", n_dofs); + KokkosDenseJacobianView d_K("hilbert_jacobian", n_dofs, n_dofs); + const libMesh::Kokkos::detail::DenseElementOutputSink sink{ + d_F, d_K, n_dofs, request_jacobian}; + + const auto assemble_with_input_coeffs = [&](const auto & coeff_values, + const auto & input_coeff_values) + { + const auto coeff = + libMesh::Kokkos::detail::make_gathered_coeff_access(coeff_values, + state.solution_local_indices + ->element_local_indices, + state.elem_index); + + const auto goal_access = + libMesh::Kokkos::detail::GatheredParsedFEMGoalAccess, + KokkosLocalIndexView, + libMesh::Kokkos::KokkosParsedFEMFunction, + kokkos_parsed_fem_max_fields>( + goal_data.field_keys, + goal_data.field_dofs, + input_coeff_values, + goal_data.field_local_indices.data(), + goal_function); + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + output_key, + state.mapping_type, + state.geometry_cache->node_coordinates, + state.geometry_cache->element_node_ids, + state.elem_index, + state.elem_n_nodes, + quadrature_order, + hilbert_order, + coeff, + solution_derivative, + goal_access, + request_jacobian, + sink, + "hilbert_local_fem_goal_assembly"); + }; + + auto coeff_guard = state.solution_vector->make_kokkos_read_view_guard(); + const bool success = + (goal_data.input_vector == state.solution_vector) + ? assemble_with_input_coeffs(coeff_guard.view(), coeff_guard.view()) + : [&]() + { + auto input_guard = goal_data.input_vector->make_kokkos_read_view_guard(); + return assemble_with_input_coeffs(coeff_guard.view(), input_guard.view()); + }(); + + if (!success) + return false; + + accumulate_hilbert_dense_outputs(d_F, d_K, request_jacobian, F, K); + return true; +} + +#if defined(LIBMESH_HAVE_PETSC) +bool +assemble_host_exact_parsed_fem_goal_element(HilbertSystem & sys, + FEMContext & c, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function) +{ + if (!sys.input_system) + return false; + + FEMContext * goal_context_ptr = sys.get_input_context(c); + if (!goal_context_ptr) + return false; + + FEMContext & goal_context = *goal_context_ptr; + goal_context.pre_fe_reinit(*sys.input_system, &c.get_elem()); + goal_context.elem_fe_reinit(); + + detail::HostHilbertFEAccess fe(c, 0, sys.hilbert_order()); + detail::HostHilbertAccumulator accum(F, K); + auto solution = + detail::make_hilbert_solution_access(fe, + c.get_elem_solution(0), + c.get_elem_solution_derivative()); + HostExactParsedFEMGoalAccess goal_access(goal_function, goal_context); + detail::assemble_hilbert_element(fe, + solution, + goal_access, + request_jacobian, + sys.hilbert_order(), + accum); + return true; +} +#endif + +template +bool +assemble_kokkos_hilbert_element_device_values(const FEShapeKey key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const unsigned int elem_index, + const MeshBase::KokkosGeometryCache & geometry_cache, + const libMesh::Kokkos::KokkosParsedFunction & goal_function, + ResidualView d_rhs_values, + JacobianView d_mat_values) +{ + const unsigned int n_dofs = cast_int(d_rhs_values.extent(0)); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access(goal_function, + goal_function.gradient_function()); + const libMesh::Kokkos::detail::FlatDeviceValueSink sink{ + d_rhs_values, d_mat_values, n_dofs}; + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + key, + geometry_cache.element_mapping_types(elem_index), + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + elem_index, + geometry_cache.element_n_nodes(elem_index), + quadrature_order, + hilbert_order, + libMesh::Kokkos::detail::ZeroCoeffAccess{}, + Number(1.), + goal_access, + true, + sink, + "hilbert_device_values"); +} + +template +bool +assemble_kokkos_hilbert_fem_goal_device_values(const FEShapeKey output_key, + const unsigned int quadrature_order, + const unsigned int hilbert_order, + const unsigned int elem_index, + const MeshBase::KokkosGeometryCache & geometry_cache, + System & input_system, + const libMesh::Kokkos::KokkosParsedFEMFunction & goal_function, + ResidualView d_rhs_values, + JacobianView d_mat_values) +{ + KokkosFEMGoalData goal_data; + if (!build_kokkos_fem_goal_data(geometry_cache.element_types(elem_index), + geometry_cache.element_mapping_types(elem_index), + geometry_cache.element_p_levels(elem_index), + hilbert_order, + input_system, + goal_function, + goal_data)) + return false; + + const unsigned int n_dofs = cast_int(d_rhs_values.extent(0)); + const libMesh::Kokkos::detail::FlatDeviceValueSink sink{ + d_rhs_values, d_mat_values, n_dofs}; + + auto input_guard = goal_data.input_vector->make_kokkos_read_view_guard(); + const auto goal_access = + libMesh::Kokkos::detail::GatheredParsedFEMGoalAccess, + kokkos_parsed_fem_max_fields>( + goal_data.field_keys, + goal_data.field_dofs, + input_guard.view(), + goal_data.field_local_indices.data(), + goal_function); + + return libMesh::Kokkos::detail::run_hilbert_system_assembly( + output_key, + geometry_cache.element_mapping_types(elem_index), + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + elem_index, + geometry_cache.element_n_nodes(elem_index), + quadrature_order, + hilbert_order, + libMesh::Kokkos::detail::ZeroCoeffAccess{}, + Number(1.), + goal_access, + true, + sink, + "hilbert_device_fem_goal_values"); +} + +template +bool +assemble_kokkos_hilbert_record_values(HilbertSystem & sys, + const HilbertElementAssemblyRecord & record, + const unsigned int quadrature_order, + const libMesh::Kokkos::KokkosParsedFunction * analytic_goal, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal, + ResidualView rhs_slice, + JacobianView mat_slice) +{ + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + const FEShapeKey shape_key = + make_hilbert_shape_key(geometry_cache.element_types(record.elem_index), + geometry_cache.element_p_levels(record.elem_index), + sys.variable_type(0)); + + return analytic_goal ? + assemble_kokkos_hilbert_element_device_values(shape_key, + quadrature_order, + sys.hilbert_order(), + record.elem_index, + geometry_cache, + analytic_goal->with_time(sys.time), + rhs_slice, + mat_slice) : + assemble_kokkos_hilbert_fem_goal_device_values(shape_key, + quadrature_order, + sys.hilbert_order(), + record.elem_index, + geometry_cache, + *sys.input_system, + fem_goal->with_time(sys.time), + rhs_slice, + mat_slice); +} + +bool +assemble_kokkos_petsc_global_system(HilbertSystem & sys, + KokkosPetscAssemblyPlan & plan, + const libMesh::Kokkos::KokkosParsedFunction * analytic_goal, + const libMesh::Kokkos::KokkosParsedFEMFunction * fem_goal, + PetscMatrixBase & system_matrix, + PetscVector & system_rhs, + HilbertSystem::KokkosAssemblyPath & assembly_path) +{ + libmesh_error_msg_if(sys.has_static_condensation(), + "HilbertSystem Kokkos direct PETSc storage does not support static " + "condensation."); + libmesh_error_msg_if(sys.get_dof_map().n_constrained_dofs(), + "HilbertSystem Kokkos direct PETSc storage does not yet support " + "constrained dofs."); + libmesh_error_msg_if(!analytic_goal && !fem_goal, + "HilbertSystem Kokkos direct PETSc storage requires a parsed analytic " + "or parsed FEM goal."); + + const auto * dof_index_cache = sys.get_dof_map().get_kokkos_dof_index_cache(0); + libmesh_assert(dof_index_cache); + + if (!ensure_kokkos_petsc_direct_storage( + sys, *dof_index_cache, plan, system_matrix, system_rhs)) + libmesh_error_msg("Failed to build the HilbertSystem Kokkos direct PETSc storage path. " + "The COO fallback path has been removed; this case must be supported " + "directly or fail."); + + assembly_path = HilbertSystem::KokkosAssemblyPath::petsc_direct_storage; + using KokkosVectorWriteView = + typename PetscVector::kokkos_write_view; + using KokkosMatrixWriteView = + ::Kokkos::View>; + const auto make_matrix_write_view = + [](PetscScalar * values, const PetscMemType mem_type, const std::size_t size, const char * name) + { + const bool host_inaccessible = + PetscMemTypeHost(mem_type) && + !::Kokkos::SpaceAccessibility::accessible; + if (host_inaccessible) + libmesh_error_msg(std::string("HilbertSystem Kokkos direct PETSc storage requires ") + + name + " to be accessible from the active Kokkos execution space."); + return KokkosMatrixWriteView(reinterpret_cast(values), size); + }; + PetscInt rhs_first_local_petsc = 0; + PetscInt rhs_last_local_petsc = 0; + LibmeshPetscCall2(sys.comm(), + VecGetOwnershipRange(system_rhs.vec(), &rhs_first_local_petsc, &rhs_last_local_petsc)); + + { + auto rhs_guard = system_rhs.make_kokkos_write_view_guard(); + KokkosVectorWriteView local_rhs_values = rhs_guard.view(); + KokkosVectorWriteView remote_rhs_values; + KokkosMatrixWriteView diag_matrix_values; + KokkosMatrixWriteView offdiag_matrix_values; + KokkosMatrixWriteView remote_matrix_values; + PetscScalar * diag_values_ptr = nullptr; + PetscScalar * offdiag_values_ptr = nullptr; + PetscInt row_start = 0; + PetscInt row_stop = 0; + PetscInt col_start = 0; + PetscInt col_stop = 0; + const PetscInt * diag_row_offsets = nullptr; + const PetscInt * diag_col_indices = nullptr; + const PetscInt * offdiag_row_offsets = nullptr; + const PetscInt * offdiag_col_indices = nullptr; + const PetscInt * offdiag_global_columns = nullptr; + PetscInt offdiag_n_cols = 0; + std::unordered_map offdiag_column_map; + + if (plan.rhs_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.rhs_remote_values, Number(0)); + remote_rhs_values = KokkosVectorWriteView(plan.rhs_remote_values.data(), + plan.rhs_remote_values.extent(0)); + } + + if (plan.mat_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.mat_remote_values, Number(0)); + remote_matrix_values = KokkosMatrixWriteView(plan.mat_remote_values.data(), + plan.mat_remote_values.extent(0)); + } + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::seq_aij) + { + const PetscInt * row_offsets = nullptr; + const PetscInt * col_indices = nullptr; + PetscScalar * values = nullptr; + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + system_matrix.mat(), &row_offsets, &col_indices, &values, &mem_type)); + libmesh_ignore(col_indices); + diag_values_ptr = values; + diag_matrix_values = make_matrix_write_view(values, mem_type, plan.mat_diag_size, "PETSc matrix values"); + } + else if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + Mat diagonal = nullptr; + Mat offdiagonal = nullptr; + LibmeshPetscCall2(sys.comm(), + MatMPIAIJGetSeqAIJ(system_matrix.mat(), + &diagonal, + &offdiagonal, + &offdiag_global_columns)); + libmesh_ignore(offdiag_global_columns); + + PetscMemType mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + diagonal, &diag_row_offsets, &diag_col_indices, &diag_values_ptr, &mem_type)); + libmesh_ignore(diag_col_indices); + diag_matrix_values = make_matrix_write_view(diag_values_ptr, + mem_type, + plan.mat_diag_size, + "PETSc diagonal matrix values"); + + mem_type = PETSC_MEMTYPE_HOST; + LibmeshPetscCall2(sys.comm(), + MatSeqAIJGetCSRAndMemType( + offdiagonal, + &offdiag_row_offsets, + &offdiag_col_indices, + &offdiag_values_ptr, + &mem_type)); + libmesh_ignore(offdiag_col_indices); + offdiag_matrix_values = make_matrix_write_view(offdiag_values_ptr, + mem_type, + plan.mat_offdiag_size, + "PETSc off-diagonal matrix values"); + + LibmeshPetscCall2(sys.comm(), MatGetOwnershipRange(system_matrix.mat(), &row_start, &row_stop)); + LibmeshPetscCall2(sys.comm(), + MatGetOwnershipRangeColumn(system_matrix.mat(), &col_start, &col_stop)); + PetscInt offdiag_n_rows = 0; + LibmeshPetscCall2(sys.comm(), MatGetSize(offdiagonal, &offdiag_n_rows, &offdiag_n_cols)); + libmesh_ignore(offdiag_n_rows); + if (offdiag_global_columns && offdiag_n_cols) + { + offdiag_column_map.reserve(cast_int(offdiag_n_cols)); + for (PetscInt i = 0; i != offdiag_n_cols; ++i) + offdiag_column_map.emplace(offdiag_global_columns[i], i); + } + } + else + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage was built without a valid " + "PETSc AIJ storage layout."); + + const auto rhs_scatter = + libMesh::Kokkos::detail::SplitScatterAccess{ + local_rhs_values, remote_rhs_values, plan.rhs_local_size}; + const auto mat_scatter = + libMesh::Kokkos::detail::SplitMatrixScatterAccess{ + diag_matrix_values, + offdiag_matrix_values, + remote_matrix_values, + plan.mat_diag_size, + plan.mat_diag_size + plan.mat_offdiag_size}; + + const auto & geometry_cache = sys.get_mesh().get_kokkos_geometry_cache(); + if (analytic_goal) + { + const auto timed_goal = analytic_goal->with_time(sys.time); + const auto goal_access = + libMesh::Kokkos::detail::make_hilbert_analytic_goal_access( + timed_goal, timed_goal.gradient_function()); + + for (const auto & bucket : plan.buckets) + libMesh::Kokkos::detail::run_hilbert_system_bucket_scatter_batch< + kokkos_hilbert_max_dofs>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.rhs_local_slots, + plan.mat_value_slots, + sys.hilbert_order(), + goal_access, + rhs_scatter, + mat_scatter, + "hilbert_direct_scatter_bucket_batch"); + } + else + { + const auto timed_goal = fem_goal->with_time(sys.time); + if (!ensure_kokkos_fem_goal_batch_data(sys, *fem_goal, plan)) + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage could not build parsed " + "FEM goal batch data."); + + auto input_guard = plan.fem_goal_batch_data.input_vector->make_kokkos_read_view_guard(); + for (auto bucket_index : index_range(plan.buckets)) + { + const auto & bucket = plan.buckets[bucket_index]; + libMesh::Kokkos::detail::run_hilbert_system_fem_bucket_scatter_batch< + kokkos_hilbert_max_dofs, + kokkos_parsed_fem_max_fields>( + bucket.key, + bucket.mapping_type, + bucket.n_nodes, + bucket.quadrature_order, + geometry_cache.node_coordinates, + geometry_cache.element_node_ids, + ::Kokkos::subview(plan.batch_data.elem_indices, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.elem_n_dofs, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.rhs_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + ::Kokkos::subview(plan.batch_data.mat_offsets, + ::Kokkos::make_pair(bucket.begin, bucket.end)), + plan.rhs_local_slots, + plan.mat_value_slots, + plan.fem_goal_batch_data.bucket_field_keys[bucket_index], + plan.fem_goal_batch_data.bucket_field_dofs[bucket_index], + plan.fem_goal_batch_data.field_local_indices, + input_guard.view(), + timed_goal, + sys.hilbert_order(), + rhs_scatter, + mat_scatter, + "hilbert_direct_scatter_fem_bucket_batch"); + } + } + + ::Kokkos::fence(); + + if (plan.direct_matrix_layout == KokkosDirectMatrixLayout::mpi_aij) + { + if (plan.rhs_remote_sf && plan.rhs_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.rhs_remote_root_values, Number(0)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceWithMemTypeBegin(plan.rhs_remote_sf, + MPIU_SCALAR, + kokkos_default_petsc_mem_type(), + plan.rhs_remote_values.data(), + kokkos_default_petsc_mem_type(), + plan.rhs_remote_root_values.data(), + MPIU_SUM)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceEnd(plan.rhs_remote_sf, + MPIU_SCALAR, + plan.rhs_remote_values.data(), + plan.rhs_remote_root_values.data(), + MPIU_SUM)); + ::Kokkos::parallel_for("hilbert_apply_rhs_remote_reduce", + ::Kokkos::RangePolicy<>(0, plan.rhs_remote_root_values.extent(0)), + KOKKOS_LAMBDA(const std::size_t i) + { + local_rhs_values(i) += plan.rhs_remote_root_values(i); + }); + } + + if (plan.mat_remote_sf && plan.mat_remote_values.extent(0)) + { + ::Kokkos::deep_copy(plan.mat_remote_root_values, Number(0)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceWithMemTypeBegin(plan.mat_remote_sf, + MPIU_SCALAR, + kokkos_default_petsc_mem_type(), + plan.mat_remote_values.data(), + kokkos_default_petsc_mem_type(), + plan.mat_remote_root_values.data(), + MPIU_SUM)); + LibmeshPetscCall2(sys.comm(), + PetscSFReduceEnd(plan.mat_remote_sf, + MPIU_SCALAR, + plan.mat_remote_values.data(), + plan.mat_remote_root_values.data(), + MPIU_SUM)); + ::Kokkos::parallel_for("hilbert_apply_mat_remote_reduce", + ::Kokkos::RangePolicy<>(0, plan.mat_remote_root_values.extent(0)), + KOKKOS_LAMBDA(const std::size_t i) + { + if (i < plan.mat_diag_size) + diag_matrix_values(i) += plan.mat_remote_root_values(i); + else + offdiag_matrix_values(i - plan.mat_diag_size) += + plan.mat_remote_root_values(i); + }); + } + + ::Kokkos::fence(); + } + } + + return true; +} +} // anonymous namespace +#endif + HilbertSystem::~HilbertSystem () = default; +HilbertSystem::HilbertSystem(libMesh::EquationSystems & es, + const std::string & name, + const unsigned int number) + : libMesh::FEMSystem(es, name, number), + input_system(nullptr), + _fe_family("LAGRANGE"), + _fe_order(1), + _hilbert_order(0), + _use_kokkos_backend(false), + _use_exact_parsed_fem_host_path(false), + _fdm_eps(libMesh::TOLERANCE), + _subdomains_list() +{ +} + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +const libMesh::Kokkos::KokkosParsedFunction * +HilbertSystem::ensure_kokkos_goal_func() +{ + return ensure_kokkos_goal_cache>( + _kokkos_goal_func, + _analytic_goal_func, + [](const auto & parsed_goal) -> std::unique_ptr> + { +#ifdef LIBMESH_HAVE_FPARSER + return std::make_unique>( + parsed_goal.build_program_bundle()); +#else + libmesh_ignore(parsed_goal); + return nullptr; +#endif + }); +} + +const libMesh::Kokkos::KokkosParsedFEMFunction * +HilbertSystem::ensure_kokkos_fem_goal_func() +{ + return ensure_kokkos_goal_cache>( + _kokkos_fem_goal_func, + _goal_func, + [](const auto & parsed_goal) + -> std::unique_ptr> + { +#ifdef LIBMESH_HAVE_FPARSER + const auto program_bundle = parsed_goal.build_program_bundle(); + if (!program_bundle.supports_kokkos_value_goal() || + program_bundle.value_variable_numbers.size() > kokkos_parsed_fem_max_fields) + return nullptr; + + return std::make_unique>( + program_bundle); +#else + libmesh_ignore(parsed_goal); + return nullptr; +#endif + }); +} + +KokkosPetscAssemblyPlan * +HilbertSystem::ensure_kokkos_petsc_plan(bool * rebuilt) +{ + const auto & geometry_cache = this->get_mesh().get_kokkos_geometry_cache(); + const auto * dof_index_cache = this->get_dof_map().get_kokkos_dof_index_cache(0); + if (!dof_index_cache) + return nullptr; + + if (_kokkos_petsc_plan && + kokkos_petsc_plan_matches(*this, geometry_cache, *dof_index_cache, *_kokkos_petsc_plan)) + { + if (rebuilt) + *rebuilt = false; + return _kokkos_petsc_plan.get(); + } + + auto plan = std::make_unique(); + if (!build_kokkos_petsc_assembly_plan(*this, *plan)) + return nullptr; + + _kokkos_petsc_plan = std::move(plan); + if (rebuilt) + *rebuilt = true; + return _kokkos_petsc_plan.get(); +} + +bool +HilbertSystem::try_kokkos_element_assembly(FEMContext & c, + const bool request_jacobian, + DenseSubVector & F, + DenseSubMatrix & K) +{ +#if defined(LIBMESH_HAVE_PETSC) + const Elem & elem = c.get_elem(); + const unsigned int quadrature_order = + cast_int(c.get_element_qrule().get_order()); + KokkosElementAssemblyState state; + if (!build_kokkos_element_state(*this, elem, state)) + return false; + const FEShapeKey shape_key = + make_hilbert_shape_key(state.elem_type, state.elem_p_level, this->variable_type(0)); + + if (const auto * kokkos_goal = this->ensure_kokkos_goal_func(); + kokkos_goal && + assemble_kokkos_hilbert_element(shape_key, + quadrature_order, + _hilbert_order, + c.get_elem_solution_derivative(), + kokkos_goal->with_time(this->time), + state, + request_jacobian, + F, + K)) + return true; + + if (!input_system) + return false; + + if (const auto * kokkos_goal = this->ensure_kokkos_fem_goal_func(); + kokkos_goal && + assemble_kokkos_hilbert_fem_goal_element(shape_key, + quadrature_order, + _hilbert_order, + c.get_elem_solution_derivative(), + *input_system, + kokkos_goal->with_time(this->time), + state, + request_jacobian, + F, + K)) + return true; + + return false; +#else + libmesh_ignore(c, request_jacobian, F, K); + return false; +#endif +} + +#if defined(LIBMESH_HAVE_PETSC) +bool +HilbertSystem::try_kokkos_petsc_solve() +{ + using clock = std::chrono::steady_clock; + auto * petsc_matrix = dynamic_cast *>(this->matrix); + auto * petsc_rhs = dynamic_cast *>(this->rhs); + auto * petsc_solution = dynamic_cast *>(this->solution.get()); + + const auto * analytic_goal = this->ensure_kokkos_goal_func(); + const auto * fem_goal = this->ensure_kokkos_fem_goal_func(); + + libmesh_error_msg_if(!petsc_matrix || !petsc_rhs || !petsc_solution, + "HilbertSystem Kokkos direct PETSc storage requires PETSc-backed matrix, " + "RHS, and solution objects."); + libmesh_error_msg_if(!(analytic_goal || fem_goal), + "HilbertSystem Kokkos direct PETSc storage requires a parsed analytic " + "goal or a parsed FEM goal with Kokkos support."); + + prewarm_kokkos_hilbert_entities(*this, fem_goal); + this->_last_kokkos_timing = {}; + const auto total_start = clock::now(); + petsc_matrix->zero(); + petsc_rhs->zero(); + petsc_solution->zero(); + + bool rebuilt_plan = false; + const auto plan_start = clock::now(); + auto * plan = this->ensure_kokkos_petsc_plan(&rebuilt_plan); + libmesh_error_msg_if(!plan, + "HilbertSystem Kokkos direct PETSc storage could not build a supported " + "assembly plan for the current FE/mapping/quadrature configuration."); + const auto plan_stop = clock::now(); + this->_last_kokkos_timing.plan_seconds = + rebuilt_plan ? + std::chrono::duration_cast>(plan_stop - plan_start).count() : + 0.; + + const auto assembly_start = clock::now(); + auto assembly_path = HilbertSystem::KokkosAssemblyPath::none; + if (!assemble_kokkos_petsc_global_system(*this, + *plan, + analytic_goal, + fem_goal, + *petsc_matrix, + *petsc_rhs, + assembly_path)) + libmesh_error_msg("HilbertSystem Kokkos direct PETSc storage assembly failed."); + const auto assembly_stop = clock::now(); + this->_last_kokkos_timing.assembly_seconds = + std::chrono::duration_cast>(assembly_stop - assembly_start).count(); + this->_last_kokkos_timing.assembly_path = assembly_path; + + petsc_matrix->close(); + petsc_rhs->close(); + petsc_solution->close(); + + LinearSolver * solver = this->get_linear_solver(); + if (this->prefix_with_name()) + solver->init(this->prefix().c_str()); + else + solver->init(); + + const auto [maxlinearits, linear_tol] = this->get_linear_solve_parameters(); + const auto solve_start = clock::now(); + solver->solve(*this->matrix, + *this->solution, + *this->rhs, + linear_tol, + maxlinearits); + const auto solve_stop = clock::now(); + this->_last_kokkos_timing.solve_seconds = + std::chrono::duration_cast>(solve_stop - solve_start).count(); + + this->update(); + this->mesh_position_set(); + const auto total_stop = clock::now(); + this->_last_kokkos_timing.total_seconds = + std::chrono::duration_cast>(total_stop - total_start).count(); + return true; +} +#endif +#endif + void HilbertSystem::init_data () { + this->get_dof_map().full_sparsity_pattern_needed(); this->add_variable ("u", static_cast(_fe_order), Utility::string_to_enum(_fe_family)); @@ -76,10 +2343,27 @@ void HilbertSystem::init_context(DiffContext & context) if (input_system && !input_context) { input_context = std::make_unique(*input_system); + } + + libmesh_assert(_goal_func || _analytic_goal_func); + + if (_goal_func) + _goal_func->init_context(input_system ? *input_context : c); - libmesh_assert(_goal_func.get()); - _goal_func->init_context(*input_context); +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (input_system && + this->_hilbert_order > 0 && + dynamic_cast *>(_goal_func.get()) && + this->ensure_kokkos_fem_goal_func()) + { + for (const auto & dim : elem_dims) + for (unsigned int var = 0; var != input_system->n_vars(); ++var) + { + input_context->get_element_fe(var, my_fe, dim); + my_fe->get_dphi(); + } } +#endif FEMSystem::init_context(context); } @@ -96,74 +2380,106 @@ bool HilbertSystem::element_time_derivative (bool request_jacobian, !_subdomains_list.count(elem.subdomain_id())) return request_jacobian; - // First we get some references to cell-specific data that - // will be used to assemble the linear system. - - // Element Jacobian * quadrature weights for interior integration - const std::vector & JxW = c.get_element_fe(0)->get_JxW(); - - const std::vector> & phi = c.get_element_fe(0)->get_phi(); - - const std::vector & xyz = c.get_element_fe(0)->get_xyz(); - - // The number of local degrees of freedom in each variable - const unsigned int n_u_dofs = c.n_dof_indices(0); - // The subvectors and submatrices we need to fill: DenseSubMatrix & K = c.get_elem_jacobian(0, 0); DenseSubVector & F = c.get_elem_residual(0); - unsigned int n_qpoints = c.get_element_qrule().n_points(); - - FEMContext & input_c = *libmesh_map_find(input_contexts, &c); - if (input_system) +#ifdef LIBMESH_HAVE_KOKKOS + if (_use_kokkos_backend) { - input_c.pre_fe_reinit(*input_system, &elem); - input_c.elem_fe_reinit(); +#if !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (this->try_kokkos_element_assembly(c, request_jacobian, F, K)) + return request_jacobian; +#else + if (_analytic_goal_func && + dynamic_cast *>(_analytic_goal_func.get())) + libmesh_error_msg("HilbertSystem Kokkos backend does not support ParsedFunction goals " + "when libMesh is built with complex Number."); +#endif } +#endif - for (unsigned int qp=0; qp != n_qpoints; qp++) - { - const Number u = c.interior_value(0, qp); - const Number ufunc = (*_goal_func)(input_c, xyz[qp]); - const Number err_u = u - ufunc; + detail::HostHilbertFEAccess fe(c, 0, _hilbert_order); + const auto assemble_with_goal = [&](auto & goal) + { + auto solution = + detail::make_hilbert_solution_access(fe, + c.get_elem_solution(0), + c.get_elem_solution_derivative()); + detail::HostHilbertAccumulator accum(F, K); + detail::assemble_hilbert_element(fe, + solution, + goal, + request_jacobian, + _hilbert_order, + accum); + }; - for (unsigned int i=0; i != n_u_dofs; i++) - F(i) += JxW[qp] * (err_u * phi[i][qp]); +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (const auto * kokkos_goal = this->ensure_kokkos_goal_func()) + { + const auto parsed_goal = kokkos_goal->with_time(this->time); + auto goal = detail::make_hilbert_analytic_goal_access(parsed_goal, + parsed_goal.gradient_function()); + assemble_with_goal(goal); + return request_jacobian; + } - if (_hilbert_order > 0) - { - const std::vector> & dphi = - c.get_element_fe(0)->get_dphi(); +#if defined(LIBMESH_HAVE_PETSC) + if (_use_exact_parsed_fem_host_path && input_system) + if (const auto * kokkos_fem_goal = this->ensure_kokkos_fem_goal_func(); + kokkos_fem_goal && + assemble_host_exact_parsed_fem_goal_element(*this, + c, + request_jacobian, + F, + K, + kokkos_fem_goal->with_time(this->time))) + return request_jacobian; +#endif +#endif - const Gradient grad_u = c.interior_gradient(0, qp); - Gradient ufuncgrad = (*_goal_grad)(input_c, xyz[qp]); - const Gradient err_grad_u = grad_u - ufuncgrad; + if (_analytic_goal_func) + { + auto goal = detail::make_hilbert_analytic_goal_access(*_analytic_goal_func, + *_analytic_goal_grad); + assemble_with_goal(goal); + } + else + { + FEMContext & goal_context = + input_system ? *libmesh_map_find(input_contexts, &c) : c; - for (unsigned int i=0; i != n_u_dofs; i++) - F(i) += JxW[qp] * (err_grad_u * dphi[i][qp]); + if (input_system) + { + goal_context.pre_fe_reinit(*input_system, &elem); + goal_context.elem_fe_reinit(); } - if (request_jacobian) - { - const Number JxWxD = JxW[qp] * - context.get_elem_solution_derivative(); + detail::HostHilbertGoalAccess goal(*_goal_func, _goal_grad.get(), goal_context); + assemble_with_goal(goal); + } - for (unsigned int i=0; i != n_u_dofs; i++) - for (unsigned int j=0; j != n_u_dofs; ++j) - K(i,j) += JxWxD * (phi[i][qp] * phi[j][qp]); + return request_jacobian; +} - if (_hilbert_order > 0) - { - const std::vector> & dphi = - c.get_element_fe(0)->get_dphi(); +void HilbertSystem::solve() +{ + _last_kokkos_timing = {}; +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + if (_use_kokkos_backend) + { + if (this->try_kokkos_petsc_solve()) + return; - for (unsigned int i=0; i != n_u_dofs; i++) - for (unsigned int j=0; j != n_u_dofs; ++j) - K(i,j) += JxWxD * (dphi[i][qp] * dphi[j][qp]); - } - } - } // end of the quadrature point qp-loop + libmesh_error_msg("HilbertSystem Kokkos backend did not complete the direct PETSc " + "storage solve path."); + } +#else + libmesh_error_msg_if(_use_kokkos_backend, + "HilbertSystem Kokkos backend requires a libMesh build with Kokkos, " + "PETSc, and real Number support."); +#endif - return request_jacobian; + FEMSystem::solve(); } diff --git a/src/apps/L2system.h b/src/apps/L2system.h index c3d197c521a..4e5f86b2b44 100644 --- a/src/apps/L2system.h +++ b/src/apps/L2system.h @@ -16,10 +16,19 @@ // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // libMesh includes +#include "../../include/systems/hilbert_assembly.h" +#include "../../include/numerics/parsed_fem_function.h" +#include "../../include/numerics/parsed_function.h" + +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +#include "../../include/gpu/kokkos_parsed_function.h" +#endif + #include "libmesh/enum_fe_family.h" #include "libmesh/fdm_gradient.h" #include "libmesh/fem_function_base.h" #include "libmesh/fem_system.h" +#include "libmesh/function_base.h" #include "libmesh/libmesh_common.h" // C++ includes @@ -29,19 +38,32 @@ // FEMSystem, TimeSolver and NewtonSolver will handle most tasks, // but we must specify element residuals +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +struct KokkosPetscAssemblyPlan; +#endif + class HilbertSystem : public libMesh::FEMSystem { public: + enum class KokkosAssemblyPath + { + none, + petsc_direct_storage + }; + + struct KokkosTimingInfo + { + libMesh::Real plan_seconds = 0.; + libMesh::Real assembly_seconds = 0.; + libMesh::Real solve_seconds = 0.; + libMesh::Real total_seconds = 0.; + KokkosAssemblyPath assembly_path = KokkosAssemblyPath::none; + }; + // Constructor HilbertSystem(libMesh::EquationSystems & es, const std::string & name, - const unsigned int number) - : libMesh::FEMSystem(es, name, number), - input_system(nullptr), - _fe_family("LAGRANGE"), - _fe_order(1), - _hilbert_order(0), - _subdomains_list() {} + const unsigned int number); // Default destructor ~HilbertSystem(); @@ -49,18 +71,39 @@ class HilbertSystem : public libMesh::FEMSystem std::string & fe_family() { return _fe_family; } unsigned int & fe_order() { return _fe_order; } std::set & subdomains_list() { return _subdomains_list; } + const std::set & subdomains_list() const { return _subdomains_list; } unsigned int & hilbert_order() { return _hilbert_order; } + unsigned int hilbert_order() const { return _hilbert_order; } + void use_kokkos_backend(bool use) { _use_kokkos_backend = use; } + bool use_kokkos_backend() const { return _use_kokkos_backend; } + void use_exact_parsed_fem_host_path(bool use) { _use_exact_parsed_fem_host_path = use; } + bool use_exact_parsed_fem_host_path() const { return _use_exact_parsed_fem_host_path; } + const KokkosTimingInfo & last_kokkos_timing() const { return _last_kokkos_timing; } + virtual void solve () override; void set_fdm_eps(libMesh::Real eps) { _fdm_eps = eps; - if (_goal_func.get()) - _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + rebuild_goal_gradient(); + rebuild_analytic_goal_gradient(); } void set_goal_func(libMesh::FEMFunctionBase & goal) { _goal_func = goal.clone(); - _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + _analytic_goal_func.reset(); + _analytic_goal_grad.reset(); + reset_kokkos_goal_cache(); + rebuild_goal_gradient(); + } + + void set_goal_func(libMesh::FunctionBase & goal) + { + _analytic_goal_func = goal.clone(); + _analytic_goal_func->init(); + _goal_func.reset(); + _goal_grad.reset(); + reset_kokkos_goal_cache(); + rebuild_analytic_goal_gradient(); } // We want to be able to project functions based on *other* systems' @@ -70,22 +113,29 @@ class HilbertSystem : public libMesh::FEMSystem // case) for that system. libMesh::System * input_system; + libMesh::FEMContext * get_input_context(libMesh::FEMContext & c) + { + const auto it = input_contexts.find(&c); + return (it == input_contexts.end()) ? nullptr : it->second.get(); + } + protected: std::unique_ptr > _goal_func; + std::unique_ptr> _analytic_goal_func; std::map> input_contexts; // System initialization - virtual void init_data (); + virtual void init_data () override; // Context initialization - virtual void init_context (libMesh::DiffContext & context); + virtual void init_context (libMesh::DiffContext & context) override; // Element residual and jacobian calculations // Time dependent parts virtual bool element_time_derivative (bool request_jacobian, - libMesh::DiffContext & context); + libMesh::DiffContext & context) override; // The FE type to use std::string _fe_family; @@ -94,9 +144,20 @@ class HilbertSystem : public libMesh::FEMSystem // The Hilbert order our subclass will project with unsigned int _hilbert_order; + bool _use_kokkos_backend; + bool _use_exact_parsed_fem_host_path; + // The function we will call to finite difference our goal // function std::unique_ptr> _goal_grad; + std::unique_ptr> _analytic_goal_grad; +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + std::unique_ptr> _kokkos_goal_func; + std::unique_ptr> _kokkos_fem_goal_func; +#if defined(LIBMESH_HAVE_PETSC) + std::unique_ptr _kokkos_petsc_plan; +#endif +#endif // The perturbation we will use when finite differencing our goal // function @@ -104,4 +165,48 @@ class HilbertSystem : public libMesh::FEMSystem // Which subdomains to integrate on (all subdomains, if empty()) std::set _subdomains_list; + KokkosTimingInfo _last_kokkos_timing; + +private: +#if defined(LIBMESH_HAVE_KOKKOS) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + const libMesh::Kokkos::KokkosParsedFunction * ensure_kokkos_goal_func(); + const libMesh::Kokkos::KokkosParsedFEMFunction * ensure_kokkos_fem_goal_func(); + bool try_kokkos_element_assembly(libMesh::FEMContext & c, + bool request_jacobian, + libMesh::DenseSubVector & F, + libMesh::DenseSubMatrix & K); +#if defined(LIBMESH_HAVE_PETSC) + bool try_kokkos_petsc_solve(); + KokkosPetscAssemblyPlan * ensure_kokkos_petsc_plan(bool * rebuilt = nullptr); +#endif + + void reset_kokkos_goal_cache() + { + _kokkos_goal_func.reset(); + _kokkos_fem_goal_func.reset(); + } +#else + void reset_kokkos_goal_cache() {} +#endif + + void rebuild_goal_gradient() + { + if (_goal_func) + _goal_grad = std::make_unique>(*_goal_func, _fdm_eps); + else + _goal_grad.reset(); + } + + void rebuild_analytic_goal_gradient() + { + if (_analytic_goal_func) + { + _analytic_goal_grad = + std::make_unique>(*_analytic_goal_func, + _fdm_eps); + _analytic_goal_grad->init(); + } + else + _analytic_goal_grad.reset(); + } }; diff --git a/src/apps/calculator.C b/src/apps/calculator.C index 639910a81fd..827d37ce0e5 100644 --- a/src/apps/calculator.C +++ b/src/apps/calculator.C @@ -53,6 +53,11 @@ #include #include +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif using namespace libMesh; @@ -81,6 +86,7 @@ void usage_error(const char * progname) << " Hilbert order [default: off]\n" << " --jump_slits calculate jumps across slits [default: off]\n" << " --integral only calculate func integral, not projection\n" + << " --kokkos use Kokkos local element assembly when supported\n" << std::endl; exit(1); @@ -159,11 +165,44 @@ private: Number _integral; }; +#ifdef LIBMESH_HAVE_KOKKOS +struct KokkosScope +{ + KokkosScope(int & argc, + char ** & argv, + const bool enable) + : _enabled(enable) + { + if (_enabled) + ::Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + if (_enabled) + ::Kokkos::finalize(); + } + + bool _enabled; +}; +#endif + int main(int argc, char ** argv) { LibMeshInit init(argc, argv); + const bool use_kokkos = libMesh::on_command_line("--kokkos"); + +#ifndef LIBMESH_HAVE_KOKKOS + libmesh_error_msg_if(use_kokkos, + "--kokkos was requested, but this libMesh build does not have Kokkos enabled"); +#endif + +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv, use_kokkos); +#endif + // In case the mesh file doesn't let us auto-infer dimension, we let // the user specify it on the command line const unsigned char requested_dim = @@ -228,6 +267,7 @@ int main(int argc, char ** argv) const unsigned int order = libMesh::command_line_next("--order", 1u); std::unique_ptr> goal_function; + std::unique_ptr> analytic_goal_function; if (solnname != "") { @@ -270,8 +310,10 @@ int main(int argc, char ** argv) old_es.print_info(); + analytic_goal_function = + std::make_unique>(calcfunc); goal_function = - std::make_unique>(ParsedFunction(calcfunc)); + std::make_unique>(*analytic_goal_function); } libMesh::out << "Calculating with system " << current_sys_name << std::endl; @@ -310,8 +352,12 @@ int main(int argc, char ** argv) new_sys.fe_family() = family; new_sys.fe_order() = order; + new_sys.use_kokkos_backend(use_kokkos); - new_sys.set_goal_func(*goal_function); + if (analytic_goal_function) + new_sys.set_goal_func(*analytic_goal_function); + else + new_sys.set_goal_func(*goal_function); const Real fdm_eps = libMesh::command_line_next("--fdm_eps", Real(TOLERANCE)); diff --git a/src/apps/hilbert_kokkos_benchmark.C b/src/apps/hilbert_kokkos_benchmark.C new file mode 100644 index 00000000000..037e37aad87 --- /dev/null +++ b/src/apps/hilbert_kokkos_benchmark.C @@ -0,0 +1,457 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#include "L2system.h" + +#include "libmesh/libmesh_common.h" +#include "libmesh/diff_solver.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_preconditioner_type.h" +#include "libmesh/enum_solver_type.h" +#include "libmesh/equation_systems.h" +#include "libmesh/explicit_system.h" +#include "libmesh/libmesh.h" +#include "libmesh/linear_solver.h" +#include "libmesh/mesh_generation.h" +#include "libmesh/mesh.h" +#include "libmesh/numeric_vector.h" +#include "libmesh/parsed_fem_function.h" +#include "libmesh/parsed_function.h" +#include "libmesh/steady_solver.h" +#include "libmesh/string_to_enum.h" + +#include +#include +#include +#include + +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ +#endif + +using namespace libMesh; + +namespace +{ + +enum class GoalKind +{ + analytic, + fem +}; + +struct BenchmarkOptions +{ + unsigned int nx = 256; + unsigned int ny = 256; + unsigned int repeats = 1; + unsigned int hilbert_order = 0; + unsigned int fe_order = 1; + unsigned int max_linear_iterations = 5000; + std::string fe_family = "LAGRANGE"; + std::string calc_expression = "sin(pi*x) + 0.25*y"; + std::string input_expression = "sin(pi*x) + 0.5*y"; + ElemType elem_type = QUAD4; + Real linear_tolerance = 1.e-10; + Real fdm_eps = 1.e-7; + SolverType solver_type = CG; + PreconditionerType preconditioner_type = JACOBI_PRECOND; + GoalKind goal_kind = GoalKind::analytic; +}; + +struct BenchmarkResult +{ + std::vector solution; + Real average_solve_seconds = 0.; + Real average_plan_seconds = 0.; + Real average_assembly_seconds = 0.; + Real average_solver_seconds = 0.; + Real average_total_seconds = 0.; + dof_id_type n_dofs = 0; + HilbertSystem::KokkosAssemblyPath assembly_path = HilbertSystem::KokkosAssemblyPath::none; +}; + +#ifdef LIBMESH_HAVE_KOKKOS +struct KokkosScope +{ + KokkosScope(int & argc, char ** & argv) + { + ::Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + ::Kokkos::finalize(); + } +}; +#endif + +void usage_error(const char * progname) +{ + libMesh::out + << "Options: " << progname << '\n' + << " --nx n number of elements in x [default: 256]\n" + << " --ny n number of elements in y [default: 256]\n" + << " --elem-type type element type [default: QUAD4]\n" + << " --goal kind analytic|fem [default: analytic]\n" + << " --calc expr projection goal expression\n" + << " default analytic: sin(pi*x) + 0.25*y\n" + << " default fem: u*u + x - 0.25*y\n" + << " --input-func expr input field for fem goal [default: sin(pi*x) + 0.5*y]\n" + << " --family fam output FE family [default: LAGRANGE]\n" + << " --order p output FE order [default: 1]\n" + << " --hilbert p Hilbert order [default: 0]\n" + << " --fdm_eps eps fallback finite-difference eps [default: 1e-7]\n" + << " --linear_tol tol linear solve tolerance [default: 1e-10]\n" + << " --max_its n linear max iterations [default: 5000]\n" + << " --solver type solver type [default: CG]\n" + << " --pc type preconditioner type [default: JACOBI_PRECOND]\n" + << " --repeats n number of fresh runs to average [default: 1]\n" + << std::endl; + + std::exit(1); +} + +GoalKind parse_goal_kind(const std::string & goal_string) +{ + if (goal_string == "analytic") + return GoalKind::analytic; + if (goal_string == "fem") + return GoalKind::fem; + + libmesh_error_msg("Unsupported --goal value '" << goal_string << "'. Use analytic or fem."); +} + +void configure_hilbert_system(HilbertSystem & sys, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + sys.hilbert_order() = options.hilbert_order; + sys.fe_family() = options.fe_family; + sys.fe_order() = options.fe_order; + sys.use_kokkos_backend(use_kokkos); + sys.set_fdm_eps(options.fdm_eps); + sys.time_solver = std::make_unique(sys); +} + +void configure_linear_solver(HilbertSystem & sys, + const BenchmarkOptions & options) +{ + DiffSolver & solver = *sys.time_solver->diff_solver(); + solver.quiet = true; + solver.verbose = false; + solver.relative_step_tolerance = 1.e-12; + + sys.parameters.set("linear solver maximum iterations") = + options.max_linear_iterations; + sys.parameters.set("linear solver tolerance") = + options.linear_tolerance; + + auto * linear_solver = sys.get_linear_solver(); + linear_solver->set_solver_type(options.solver_type); + linear_solver->set_preconditioner_type(options.preconditioner_type); +} + +const char * +kokkos_assembly_path_name(const HilbertSystem::KokkosAssemblyPath path) +{ + switch (path) + { + case HilbertSystem::KokkosAssemblyPath::none: + return "none"; + case HilbertSystem::KokkosAssemblyPath::petsc_direct_storage: + return "direct PETSc storage"; + } + + return "unknown"; +} + +BenchmarkResult solve_projection_once(const Parallel::Communicator & comm, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + Mesh mesh(comm, 2); + MeshTools::Generation::build_square(mesh, + options.nx, + options.ny, + 0., + 1., + 0., + 1., + options.elem_type); + + EquationSystems es(mesh); + ExplicitSystem * input_system = nullptr; + if (options.goal_kind == GoalKind::fem) + { + input_system = &es.add_system("input"); + input_system->add_variable("u", FIRST, LAGRANGE); + } + + HilbertSystem & projection = es.add_system("projection"); + configure_hilbert_system(projection, options, use_kokkos); + projection.input_system = input_system; + es.init(); + + if (options.goal_kind == GoalKind::analytic) + { + ParsedFunction goal(options.calc_expression); + projection.set_goal_func(goal); + } + else + { + libmesh_assert(input_system); + ParsedFunction input_function(options.input_expression); + input_system->project_solution(&input_function); + + ParsedFEMFunction goal(*input_system, options.calc_expression); + projection.set_goal_func(goal); + } + + configure_linear_solver(projection, options); + + const auto start = std::chrono::steady_clock::now(); + projection.solve(); + const auto stop = std::chrono::steady_clock::now(); + + BenchmarkResult result; + result.average_solve_seconds = + std::chrono::duration_cast>(stop - start).count(); + result.average_total_seconds = result.average_solve_seconds; + if (use_kokkos) + { + const auto & timing = projection.last_kokkos_timing(); + result.average_plan_seconds = timing.plan_seconds; + result.average_assembly_seconds = timing.assembly_seconds; + result.average_solver_seconds = timing.solve_seconds; + result.average_total_seconds = timing.total_seconds; + result.assembly_path = timing.assembly_path; + } + result.n_dofs = projection.n_dofs(); + projection.solution->localize(result.solution); + return result; +} + +BenchmarkResult solve_projection(const Parallel::Communicator & comm, + const BenchmarkOptions & options, + const bool use_kokkos) +{ + BenchmarkResult result; + + for (unsigned int repeat = 0; repeat != options.repeats; ++repeat) + { + auto single = solve_projection_once(comm, options, use_kokkos); + result.average_solve_seconds += single.average_solve_seconds; + result.average_plan_seconds += single.average_plan_seconds; + result.average_assembly_seconds += single.average_assembly_seconds; + result.average_solver_seconds += single.average_solver_seconds; + result.average_total_seconds += single.average_total_seconds; + result.n_dofs = single.n_dofs; + result.assembly_path = single.assembly_path; + result.solution = std::move(single.solution); + } + + result.average_solve_seconds /= options.repeats; + result.average_plan_seconds /= options.repeats; + result.average_assembly_seconds /= options.repeats; + result.average_solver_seconds /= options.repeats; + result.average_total_seconds /= options.repeats; + return result; +} + +void +print_kokkos_phase_diagnostics(const BenchmarkResult & result) +{ + if (result.average_total_seconds <= 0.) + return; + + const Real plan_fraction = result.average_plan_seconds / result.average_total_seconds; + const Real assembly_fraction = result.average_assembly_seconds / result.average_total_seconds; + const Real solver_fraction = result.average_solver_seconds / result.average_total_seconds; + const Real accounted_fraction = plan_fraction + assembly_fraction + solver_fraction; + const Real other_fraction = std::max(0., 1. - accounted_fraction); + + const char * dominant_name = "plan"; + Real dominant_fraction = plan_fraction; + if (assembly_fraction > dominant_fraction) + { + dominant_name = "assembly"; + dominant_fraction = assembly_fraction; + } + if (solver_fraction > dominant_fraction) + { + dominant_name = "solver"; + dominant_fraction = solver_fraction; + } + if (other_fraction > dominant_fraction) + { + dominant_name = "other"; + dominant_fraction = other_fraction; + } + + libMesh::out << "Kokkos phase fractions:" + << " plan=" << 100. * plan_fraction << '%' + << " assembly=" << 100. * assembly_fraction << '%' + << " solver=" << 100. * solver_fraction << '%' + << " other=" << 100. * other_fraction << '%' + << std::endl; + libMesh::out << "Dominant Kokkos phase: " + << dominant_name + << " (" << 100. * dominant_fraction << "% of total)" + << std::endl; +} + +void compute_difference_metrics(const std::vector & host_solution, + const std::vector & kokkos_solution, + Real & max_abs_host, + Real & max_abs_diff) +{ + libmesh_assert_equal_to(host_solution.size(), kokkos_solution.size()); + + max_abs_host = 0.; + max_abs_diff = 0.; + + for (const auto i : index_range(host_solution)) + { + max_abs_host = std::max(max_abs_host, std::abs(libmesh_real(host_solution[i]))); + max_abs_diff = std::max(max_abs_diff, + std::abs(libmesh_real(host_solution[i] - kokkos_solution[i]))); + } +} + +BenchmarkOptions parse_options() +{ + BenchmarkOptions options; + + if (libMesh::on_command_line("--help")) + usage_error("hilbert_kokkos_benchmark"); + + options.nx = libMesh::command_line_next("--nx", options.nx); + options.ny = libMesh::command_line_next("--ny", options.ny); + options.repeats = libMesh::command_line_next("--repeats", options.repeats); + options.hilbert_order = libMesh::command_line_next("--hilbert", options.hilbert_order); + options.fe_order = libMesh::command_line_next("--order", options.fe_order); + options.fe_family = libMesh::command_line_next("--family", options.fe_family); + options.input_expression = libMesh::command_line_next("--input-func", options.input_expression); + options.elem_type = + Utility::string_to_enum(libMesh::command_line_next("--elem-type", + std::string("QUAD4"))); + options.fdm_eps = libMesh::command_line_next("--fdm_eps", options.fdm_eps); + options.linear_tolerance = libMesh::command_line_next("--linear_tol", options.linear_tolerance); + options.max_linear_iterations = + libMesh::command_line_next("--max_its", options.max_linear_iterations); + options.solver_type = + Utility::string_to_enum(libMesh::command_line_next("--solver", + std::string("CG"))); + options.preconditioner_type = + Utility::string_to_enum( + libMesh::command_line_next("--pc", std::string("JACOBI_PRECOND"))); + options.goal_kind = + parse_goal_kind(libMesh::command_line_next("--goal", std::string("analytic"))); + + const std::string default_calc = + options.goal_kind == GoalKind::analytic ? + std::string("sin(pi*x) + 0.25*y") : + std::string("u*u + x - 0.25*y"); + options.calc_expression = libMesh::command_line_next("--calc", default_calc); + + libmesh_error_msg_if(options.nx == 0 || options.ny == 0, + "--nx and --ny must both be positive"); + libmesh_error_msg_if(options.repeats == 0, + "--repeats must be positive"); + + return options; +} + +} // namespace + +int main(int argc, char ** argv) +{ +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv); +#endif + LibMeshInit init(argc, argv); + +#ifndef LIBMESH_HAVE_KOKKOS + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with Kokkos enabled"); +#endif +#ifndef LIBMESH_HAVE_PETSC + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with PETSc enabled"); +#endif +#ifndef LIBMESH_HAVE_FPARSER + libmesh_error_msg("hilbert_kokkos_benchmark requires a libMesh build with FPARSER enabled"); +#endif +#ifdef LIBMESH_USE_COMPLEX_NUMBERS + libmesh_error_msg("hilbert_kokkos_benchmark does not support complex Number builds"); +#endif + + const auto options = parse_options(); + + libMesh::out << std::setprecision(std::numeric_limits::max_digits10); + libMesh::out << "Running Hilbert benchmark with" + << " nx=" << options.nx + << " ny=" << options.ny + << " elem_type=" << Utility::enum_to_string(options.elem_type) + << " goal=" << (options.goal_kind == GoalKind::analytic ? "analytic" : "fem") + << " family=" << options.fe_family + << " order=" << options.fe_order + << " hilbert=" << options.hilbert_order + << " solver=" << Utility::enum_to_string(options.solver_type) + << " pc=" << Utility::enum_to_string(options.preconditioner_type) + << " repeats=" << options.repeats + << std::endl; + + libMesh::out << "Starting host projection" << std::endl; + const auto host_result = solve_projection(init.comm(), options, false); + + libMesh::out << "Starting Kokkos projection" << std::endl; + const auto kokkos_result = solve_projection(init.comm(), options, true); + + libmesh_assert_equal_to(host_result.n_dofs, kokkos_result.n_dofs); + libmesh_assert_equal_to(host_result.solution.size(), kokkos_result.solution.size()); + + Real max_abs_host = 0.; + Real max_abs_diff = 0.; + compute_difference_metrics(host_result.solution, + kokkos_result.solution, + max_abs_host, + max_abs_diff); + + libMesh::out << "Degrees of freedom: " << host_result.n_dofs << std::endl; + libMesh::out << "Host solve time: " << host_result.average_solve_seconds << " s" << std::endl; + libMesh::out << "Kokkos solve time: " << kokkos_result.average_solve_seconds << " s" << std::endl; + libMesh::out << "Kokkos assembly path: " + << kokkos_assembly_path_name(kokkos_result.assembly_path) << std::endl; + libMesh::out << "Kokkos plan time: " << kokkos_result.average_plan_seconds << " s" << std::endl; + libMesh::out << "Kokkos assembly: " << kokkos_result.average_assembly_seconds << " s" << std::endl; + libMesh::out << "Kokkos solver: " << kokkos_result.average_solver_seconds << " s" << std::endl; + libMesh::out << "Kokkos total time: " << kokkos_result.average_total_seconds << " s" << std::endl; + print_kokkos_phase_diagnostics(kokkos_result); + + if (kokkos_result.average_solve_seconds > 0.) + libMesh::out << "Host/Kokkos ratio: " + << host_result.average_solve_seconds / kokkos_result.average_solve_seconds + << std::endl; + + libMesh::out << "Max |host|: " << max_abs_host << std::endl; + libMesh::out << "Max |host-kokkos|: " << max_abs_diff << std::endl; + + return 0; +} diff --git a/src/base/dof_map.C b/src/base/dof_map.C index d0443d27be4..c259ba33f47 100644 --- a/src/base/dof_map.C +++ b/src/base/dof_map.C @@ -43,6 +43,14 @@ #include "libmesh/system.h" #include "libmesh/parallel_fe_type.h" +#ifdef LIBMESH_HAVE_KOKKOS +#include "libmesh/kokkos_storage_policy.h" +#endif + +#ifdef LIBMESH_HAVE_PETSC +#include "libmesh/petsc_vector.h" +#endif + // TIMPI includes #include "timpi/parallel_implementation.h" #include "timpi/parallel_sync.h" @@ -211,6 +219,156 @@ DofMap::~DofMap() _mesh.remove_ghosting_functor(*_default_evaluating); } +#ifdef LIBMESH_HAVE_KOKKOS +const DofMap::KokkosDofIndexCache * +DofMap::get_kokkos_dof_index_cache(const unsigned int vn) const +{ + if (auto it = _kokkos_dof_index_caches.find(vn); it != _kokkos_dof_index_caches.end()) + return it->second.get(); + + const auto & geometry_cache = _mesh.get_kokkos_geometry_cache(); + auto cache = std::make_unique(); + std::vector dof_indices; + cache->host_element_ids = geometry_cache.host_element_ids; + + for (const auto elem_id : cache->host_element_ids) + { + const Elem * elem = _mesh.query_elem_ptr(elem_id); + libmesh_assert(elem); + this->dof_indices(elem, dof_indices, vn); + cache->max_dofs = std::max(cache->max_dofs, cast_int(dof_indices.size())); + } + + cache->element_ids = + KokkosDofIndexCache::elem_id_view("dof_map_kokkos_element_ids", + cache->host_element_ids.size()); + cache->element_dof_indices = + KokkosDofIndexCache::elem_dof_id_view("dof_map_kokkos_dof_indices", + cache->host_element_ids.size(), + cache->max_dofs); + cache->element_n_dofs = + KokkosDofIndexCache::elem_dof_count_view("dof_map_kokkos_n_dofs", + cache->host_element_ids.size()); + cache->element_subdomains = + KokkosDofIndexCache::elem_subdomain_view("dof_map_kokkos_elem_subdomains", + cache->host_element_ids.size()); + + auto h_element_ids = ::Kokkos::create_mirror_view(cache->element_ids); + auto h_dof_indices = ::Kokkos::create_mirror_view(cache->element_dof_indices); + auto h_n_dofs = ::Kokkos::create_mirror_view(cache->element_n_dofs); + auto h_subdomains = ::Kokkos::create_mirror_view(cache->element_subdomains); + cache->host_element_dof_indices.resize(cache->host_element_ids.size() * cache->max_dofs, + DofObject::invalid_id); + cache->host_element_n_dofs.resize(cache->host_element_ids.size(), 0); + cache->host_element_subdomains.resize(cache->host_element_ids.size(), + static_cast(Elem::invalid_subdomain_id)); + + for (auto elem_index : index_range(cache->host_element_ids)) + { + const dof_id_type elem_id = cache->host_element_ids[elem_index]; + const Elem * elem = _mesh.query_elem_ptr(elem_id); + libmesh_assert(elem); + h_element_ids(cast_int(elem_index)) = elem_id; + h_subdomains(cast_int(elem_index)) = elem->subdomain_id(); + cache->host_element_subdomains[elem_index] = elem->subdomain_id(); + this->dof_indices(elem, dof_indices, vn); + h_n_dofs(cast_int(elem_index)) = cast_int(dof_indices.size()); + cache->host_element_n_dofs[elem_index] = cast_int(dof_indices.size()); + for (auto i : index_range(dof_indices)) + { + h_dof_indices(cast_int(elem_index), cast_int(i)) = + dof_indices[i]; + cache->host_element_dof_indices[elem_index * cache->max_dofs + i] = dof_indices[i]; + } + } + + ::Kokkos::deep_copy(cache->element_ids, h_element_ids); + ::Kokkos::deep_copy(cache->element_dof_indices, h_dof_indices); + ::Kokkos::deep_copy(cache->element_n_dofs, h_n_dofs); + ::Kokkos::deep_copy(cache->element_subdomains, h_subdomains); + + auto [it, inserted] = _kokkos_dof_index_caches.emplace(vn, std::move(cache)); + libmesh_ignore(inserted); + return it->second.get(); +} + +const DofMap::KokkosLocalIndexCache * +DofMap::get_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ +#ifdef LIBMESH_HAVE_PETSC + const auto key = std::make_pair(vn, &local_vector); + if (auto it = _kokkos_local_index_caches.find(key); it != _kokkos_local_index_caches.end()) + return it->second.get(); + + const auto * petsc_vector = dynamic_cast *>(&local_vector); + if (!petsc_vector) + return nullptr; + + const auto * dof_index_cache = this->get_kokkos_dof_index_cache(vn); + if (!dof_index_cache) + return nullptr; + + const auto & geometry_cache = _mesh.get_kokkos_geometry_cache(); + auto cache = std::make_unique(); + cache->max_dofs = dof_index_cache->max_dofs; + + cache->element_local_indices = + KokkosLocalIndexCache::elem_local_index_view("dof_map_kokkos_local_indices", + geometry_cache.host_element_ids.size(), + cache->max_dofs); + auto h_local_indices = ::Kokkos::create_mirror_view(cache->element_local_indices); + + for (auto elem_index : index_range(geometry_cache.host_element_ids)) + { + const unsigned int n_dofs = dof_index_cache->host_element_n_dofs[elem_index]; + for (unsigned int i = 0; i != n_dofs; ++i) + h_local_indices(cast_int(elem_index), + i) = + cast_int( + petsc_vector->map_global_to_local_index( + dof_index_cache->host_element_dof_indices[elem_index * cache->max_dofs + i])); + } + + ::Kokkos::deep_copy(cache->element_local_indices, h_local_indices); + auto [it, inserted] = _kokkos_local_index_caches.emplace(key, std::move(cache)); + libmesh_ignore(inserted); + return it->second.get(); +#else + libmesh_ignore(local_vector, vn); + return nullptr; +#endif +} + +const DofMap::KokkosLocalIndexCache * +DofMap::require_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ + this->prepare_kokkos_local_index_cache(local_vector, vn); + return this->get_kokkos_local_index_cache(local_vector, vn); +} + +void +DofMap::prepare_kokkos_dof_index_caches() const +{ + for (auto vn : make_range(this->n_variables())) + libmesh_ignore(this->get_kokkos_dof_index_cache(vn)); +} + +void +DofMap::prepare_kokkos_local_index_cache(const NumericVector & local_vector, + const unsigned int vn) const +{ + libmesh_ignore(this->get_kokkos_local_index_cache(local_vector, vn)); +} + +void DofMap::clear_kokkos_caches() const +{ + _kokkos_dof_index_caches.clear(); + _kokkos_local_index_caches.clear(); +} +#endif + #ifdef LIBMESH_ENABLE_PERIODIC @@ -472,6 +630,10 @@ void DofMap::reinit { libmesh_assert (mesh.is_prepared()); +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + LOG_SCOPE("reinit()", "DofMap"); // This is the common case and we want to optimize for it @@ -861,6 +1023,10 @@ void DofMap::invalidate_dofs(MeshBase & mesh) const void DofMap::clear() { +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + DofMapBase::clear(); // we don't want to clear @@ -939,6 +1105,10 @@ void DofMap::clear() std::size_t DofMap::distribute_dofs (MeshBase & mesh) { +#ifdef LIBMESH_HAVE_KOKKOS + this->clear_kokkos_caches(); +#endif + // This function must be run on all processors at once parallel_object_only(); @@ -1104,6 +1274,10 @@ std::size_t DofMap::distribute_dofs (MeshBase & mesh) // dependencies to the send_list too. // this->sort_send_list (); +#ifdef LIBMESH_HAVE_KOKKOS + this->prepare_kokkos_dof_index_caches(); +#endif + return n_dofs; } diff --git a/src/fe/fe_abstract.C b/src/fe/fe_abstract.C index 20d435ae44a..2d39d86f39f 100644 --- a/src/fe/fe_abstract.C +++ b/src/fe/fe_abstract.C @@ -26,6 +26,7 @@ #include "libmesh/dof_map.h" #include "libmesh/elem.h" #include "libmesh/fe_interface.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/numeric_vector.h" #include "libmesh/periodic_boundaries.h" #include "libmesh/periodic_boundary.h" @@ -405,230 +406,10 @@ void FEAbstract::get_refspace_nodes(const ElemType itemType, std::vector Utility::enum_to_string(itemType)); nodes.resize(n_nodes); - switch(itemType) - { - case NODEELEM: - { - nodes[0] = Point (0.,0.,0.); - return; - } - case EDGE3: - { - nodes[2] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case EDGE2: - { - nodes[0] = Point (-1.,0.,0.); - nodes[1] = Point (1.,0.,0.); - return; - } - case EDGE4: // not nested with EDGE3 - { - nodes[0] = Point (-1.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (-1./3.,0.,0.); - nodes[3] - Point (1./3.,0.,0.); - return; - } - case TRI7: - { - nodes[6] = Point (1./3.,1./3.,0.); - libmesh_fallthrough(); - } - case TRI6: - { - nodes[3] = Point (.5,0.,0.); - nodes[4] = Point (.5,.5,0.); - nodes[5] = Point (0.,.5,0.); - libmesh_fallthrough(); - } - case TRI3: - case TRISHELL3: - { - nodes[0] = Point (0.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (0.,1.,0.); - return; - } - case QUAD9: - case QUADSHELL9: - { - nodes[8] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case QUAD8: - case QUADSHELL8: - { - nodes[4] = Point (0.,-1.,0.); - nodes[5] = Point (1.,0.,0.); - nodes[6] = Point (0.,1.,0.); - nodes[7] = Point (-1.,0.,0.); - libmesh_fallthrough(); - } - case QUAD4: - case QUADSHELL4: - { - nodes[0] = Point (-1.,-1.,0.); - nodes[1] = Point (1.,-1.,0.); - nodes[2] = Point (1.,1.,0.); - nodes[3] = Point (-1.,1.,0.); - return; - } - case TET14: - { - nodes[10] = Point (1/Real(3),1/Real(3),0.); - nodes[11] = Point (1/Real(3),0.,1/Real(3)); - nodes[12] = Point (1/Real(3),1/Real(3),1/Real(3)); - nodes[13] = Point (0.,1/Real(3),1/Real(3)); - libmesh_fallthrough(); - } - case TET10: - { - nodes[4] = Point (.5,0.,0.); - nodes[5] = Point (.5,.5,0.); - nodes[6] = Point (0.,.5,0.); - nodes[7] = Point (0.,0.,.5); - nodes[8] = Point (.5,0.,.5); - nodes[9] = Point (0.,.5,.5); - libmesh_fallthrough(); - } - case TET4: - { - nodes[0] = Point (0.,0.,0.); - nodes[1] = Point (1.,0.,0.); - nodes[2] = Point (0.,1.,0.); - nodes[3] = Point (0.,0.,1.); - return; - } - case HEX27: - { - nodes[20] = Point (0.,0.,-1.); - nodes[21] = Point (0.,-1.,0.); - nodes[22] = Point (1.,0.,0.); - nodes[23] = Point (0.,1.,0.); - nodes[24] = Point (-1.,0.,0.); - nodes[25] = Point (0.,0.,1.); - nodes[26] = Point (0.,0.,0.); - libmesh_fallthrough(); - } - case HEX20: - { - nodes[8] = Point (0.,-1.,-1.); - nodes[9] = Point (1.,0.,-1.); - nodes[10] = Point (0.,1.,-1.); - nodes[11] = Point (-1.,0.,-1.); - nodes[12] = Point (-1.,-1.,0.); - nodes[13] = Point (1.,-1.,0.); - nodes[14] = Point (1.,1.,0.); - nodes[15] = Point (-1.,1.,0.); - nodes[16] = Point (0.,-1.,1.); - nodes[17] = Point (1.,0.,1.); - nodes[18] = Point (0.,1.,1.); - nodes[19] = Point (-1.,0.,1.); - libmesh_fallthrough(); - } - case HEX8: - { - nodes[0] = Point (-1.,-1.,-1.); - nodes[1] = Point (1.,-1.,-1.); - nodes[2] = Point (1.,1.,-1.); - nodes[3] = Point (-1.,1.,-1.); - nodes[4] = Point (-1.,-1.,1.); - nodes[5] = Point (1.,-1.,1.); - nodes[6] = Point (1.,1.,1.); - nodes[7] = Point (-1.,1.,1.); - return; - } - case PRISM21: - { - nodes[20] = Point (1/Real(3),1/Real(3),0); - libmesh_fallthrough(); - } - case PRISM20: - { - nodes[18] = Point (1/Real(3),1/Real(3),-1); - nodes[19] = Point (1/Real(3),1/Real(3),1); - libmesh_fallthrough(); - } - case PRISM18: - { - nodes[15] = Point (.5,0.,0.); - nodes[16] = Point (.5,.5,0.); - nodes[17] = Point (0.,.5,0.); - libmesh_fallthrough(); - } - case PRISM15: - { - nodes[6] = Point (.5,0.,-1.); - nodes[7] = Point (.5,.5,-1.); - nodes[8] = Point (0.,.5,-1.); - nodes[9] = Point (0.,0.,0.); - nodes[10] = Point (1.,0.,0.); - nodes[11] = Point (0.,1.,0.); - nodes[12] = Point (.5,0.,1.); - nodes[13] = Point (.5,.5,1.); - nodes[14] = Point (0.,.5,1.); - libmesh_fallthrough(); - } - case PRISM6: - { - nodes[0] = Point (0.,0.,-1.); - nodes[1] = Point (1.,0.,-1.); - nodes[2] = Point (0.,1.,-1.); - nodes[3] = Point (0.,0.,1.); - nodes[4] = Point (1.,0.,1.); - nodes[5] = Point (0.,1.,1.); - return; - } - case PYRAMID18: - { - // triangle centers - nodes[14] = Point (-2/Real(3),0.,1/Real(3)); - nodes[15] = Point (0.,2/Real(3),1/Real(3)); - nodes[16] = Point (2/Real(3),0.,1/Real(3)); - nodes[17] = Point (0.,-2/Real(3),1/Real(3)); - - libmesh_fallthrough(); - } - case PYRAMID14: - { - // base center - nodes[13] = Point (0.,0.,0.); - - libmesh_fallthrough(); - } - case PYRAMID13: - { - // base midedge - nodes[5] = Point (0.,-1.,0.); - nodes[6] = Point (1.,0.,0.); - nodes[7] = Point (0.,1.,0.); - nodes[8] = Point (-1,0.,0.); - - // lateral midedge - nodes[9] = Point (-.5,-.5,.5); - nodes[10] = Point (.5,-.5,.5); - nodes[11] = Point (.5,.5,.5); - nodes[12] = Point (-.5,.5,.5); - - libmesh_fallthrough(); - } - case PYRAMID5: - { - // base corners - nodes[0] = Point (-1.,-1.,0.); - nodes[1] = Point (1.,-1.,0.); - nodes[2] = Point (1.,1.,0.); - nodes[3] = Point (-1.,1.,0.); - // apex - nodes[4] = Point (0.,0.,1.); - return; - } - - default: - libmesh_error_msg("ERROR: Unknown element type " << Utility::enum_to_string(itemType)); - } + for (unsigned int i = 0; i != n_nodes; ++i) + if (!try_refspace_node(itemType, i, nodes[i])) + libmesh_error_msg("ERROR: Unknown reference-space node " << i << " for element type " << + Utility::enum_to_string(itemType)); } diff --git a/src/fe/fe_lagrange_shape_2D.C b/src/fe/fe_lagrange_shape_2D.C index 7749e783582..326fc703c99 100644 --- a/src/fe/fe_lagrange_shape_2D.C +++ b/src/fe/fe_lagrange_shape_2D.C @@ -20,6 +20,9 @@ #include "libmesh/fe.h" #include "libmesh/elem.h" #include "libmesh/fe_lagrange_shape_1D.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" #include "libmesh/enum_to_string.h" #include "libmesh/face_c0polygon.h" @@ -346,17 +349,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUADSHELL9: { // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - return (fe_lagrange_1D_linear_shape(i0[i], xi)* - fe_lagrange_1D_linear_shape(i1[i], eta)); + return libMesh::detail::fe_lagrange_quad4_shape(i, p(0), p(1)); } case TRI3: @@ -364,26 +358,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - libmesh_assert_less (i, 3); - - switch(i) - { - case 0: - return zeta0; - - case 1: - return zeta1; - - case 2: - return zeta2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri3_shape(i, p(0), p(1)); } case C0POLYGON: @@ -434,40 +410,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 8); - - switch (i) - { - case 0: - return .25*(1. - xi)*(1. - eta)*(-1. - xi - eta); - - case 1: - return .25*(1. + xi)*(1. - eta)*(-1. + xi - eta); - - case 2: - return .25*(1. + xi)*(1. + eta)*(-1. + xi + eta); - - case 3: - return .25*(1. - xi)*(1. + eta)*(-1. - xi + eta); - - case 4: - return .5*(1. - xi*xi)*(1. - eta); - - case 5: - return .5*(1. + xi)*(1. - eta*eta); - - case 6: - return .5*(1. - xi*xi)*(1. + eta); - - case 7: - return .5*(1. - xi)*(1. - eta*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_quad8_shape(i, p(0), p(1)); } case QUAD4: @@ -477,19 +421,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case QUAD9: case QUADSHELL9: { - - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - return (fe_lagrange_1D_quadratic_shape(i0[i], xi)* - fe_lagrange_1D_quadratic_shape(i1[i], eta)); + return libMesh::detail::fe_lagrange_quad9_shape(i, p(0), p(1)); } case TRI3: @@ -499,35 +432,8 @@ Real fe_lagrange_2D_shape(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - libmesh_assert_less (i, 6); - - switch(i) - { - case 0: - return 2.*zeta0*(zeta0-0.5); - - case 1: - return 2.*zeta1*(zeta1-0.5); - - case 2: - return 2.*zeta2*(zeta2-0.5); - - case 3: - return 4.*zeta0*zeta1; - - case 4: - return 4.*zeta1*zeta2; - - case 5: - return 4.*zeta2*zeta0; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri6_shape(i, p(0), p(1)); } default: @@ -544,39 +450,8 @@ Real fe_lagrange_2D_shape(const ElemType type, { case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - const Real bubble_27th = zeta0*zeta1*zeta2; - libmesh_assert_less (i, 7); - - switch(i) - { - case 0: - return 2.*zeta0*(zeta0-0.5) + 3.*bubble_27th; - - case 1: - return 2.*zeta1*(zeta1-0.5) + 3.*bubble_27th; - - case 2: - return 2.*zeta2*(zeta2-0.5) + 3.*bubble_27th; - - case 3: - return 4.*zeta0*zeta1 - 12.*bubble_27th; - - case 4: - return 4.*zeta1*zeta2 - 12.*bubble_27th; - - case 5: - return 4.*zeta2*zeta0 - 12.*bubble_27th; - - case 6: - return 27.*bubble_27th; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } + return libMesh::detail::fe_lagrange_tri7_shape(i, p(0), p(1)); } default: @@ -624,31 +499,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - switch (j) - { - // d()/dxi - case 0: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)); - - // d()/deta - case 1: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad4_shape_deriv(i, j, p(0), p(1)); } case TRI3: @@ -657,56 +509,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 3); - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return dzeta0dxi; - - case 1: - return dzeta1dxi; - - case 2: - return dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - // d()/deta - case 1: - { - switch(i) - { - case 0: - return dzeta0deta; - - case 1: - return dzeta1deta; - - case 2: - return dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri3_shape_deriv(i, j); } case C0POLYGON: @@ -795,88 +598,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 8); - - switch (j) - { - // d/dxi - case 0: - switch (i) - { - case 0: - return .25*(1. - eta)*((1. - xi)*(-1.) + - (-1.)*(-1. - xi - eta)); - - case 1: - return .25*(1. - eta)*((1. + xi)*(1.) + - (1.)*(-1. + xi - eta)); - - case 2: - return .25*(1. + eta)*((1. + xi)*(1.) + - (1.)*(-1. + xi + eta)); - - case 3: - return .25*(1. + eta)*((1. - xi)*(-1.) + - (-1.)*(-1. - xi + eta)); - - case 4: - return .5*(-2.*xi)*(1. - eta); - - case 5: - return .5*(1.)*(1. - eta*eta); - - case 6: - return .5*(-2.*xi)*(1. + eta); - - case 7: - return .5*(-1.)*(1. - eta*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - - // d/deta - case 1: - switch (i) - { - case 0: - return .25*(1. - xi)*((1. - eta)*(-1.) + - (-1.)*(-1. - xi - eta)); - - case 1: - return .25*(1. + xi)*((1. - eta)*(-1.) + - (-1.)*(-1. + xi - eta)); - - case 2: - return .25*(1. + xi)*((1. + eta)*(1.) + - (1.)*(-1. + xi + eta)); - - case 3: - return .25*(1. - xi)*((1. + eta)*(1.) + - (1.)*(-1. - xi + eta)); - - case 4: - return .5*(1. - xi*xi)*(-1.); - - case 5: - return .5*(1. + xi)*(-2.*eta); - - case 6: - return .5*(1. - xi*xi)*(1.); - - case 7: - return .5*(1. - xi)*(-2.*eta); - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad8_shape_deriv(i, j, p(0), p(1)); } case QUAD4: @@ -886,31 +609,8 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - switch (j) - { - // d()/dxi - case 0: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)); - - // d()/deta - case 1: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad9_shape_deriv(i, j, p(0), p(1)); } case TRI3: @@ -921,77 +621,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 6); - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - - switch(j) - { - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0dxi; - - case 1: - return (4.*zeta1-1.)*dzeta1dxi; - - case 2: - return (4.*zeta2-1.)*dzeta2dxi; - - case 3: - return 4.*zeta1*dzeta0dxi + 4.*zeta0*dzeta1dxi; - - case 4: - return 4.*zeta2*dzeta1dxi + 4.*zeta1*dzeta2dxi; - - case 5: - return 4.*zeta2*dzeta0dxi + 4*zeta0*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0deta; - - case 1: - return (4.*zeta1-1.)*dzeta1deta; - - case 2: - return (4.*zeta2-1.)*dzeta2deta; - - case 3: - return 4.*zeta1*dzeta0deta + 4.*zeta0*dzeta1deta; - - case 4: - return 4.*zeta2*dzeta1deta + 4.*zeta1*dzeta2deta; - - case 5: - return 4.*zeta2*dzeta0deta + 4*zeta0*dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri6_shape_deriv(i, j, p(0), p(1)); } default: @@ -1009,86 +639,7 @@ Real fe_lagrange_2D_shape_deriv(const ElemType type, case TRI7: { libmesh_assert_less (i, 7); - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta0 = 1. - zeta1 - zeta2; - // const Real bubble_27th = zeta0*zeta1*zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dbubbledxi = zeta2 * (1. - 2.*zeta1 - zeta2); - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dbubbledeta= zeta1 * (1. - zeta1 - 2.*zeta2); - - switch(j) - { - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0dxi + 3.*dbubbledxi; - - case 1: - return (4.*zeta1-1.)*dzeta1dxi + 3.*dbubbledxi; - - case 2: - return (4.*zeta2-1.)*dzeta2dxi + 3.*dbubbledxi; - - case 3: - return 4.*zeta1*dzeta0dxi + 4.*zeta0*dzeta1dxi - 12.*dbubbledxi; - - case 4: - return 4.*zeta2*dzeta1dxi + 4.*zeta1*dzeta2dxi - 12.*dbubbledxi; - - case 5: - return 4.*zeta2*dzeta0dxi + 4*zeta0*dzeta2dxi - 12.*dbubbledxi; - - case 6: - return 27.*dbubbledxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0-1.)*dzeta0deta + 3.*dbubbledeta; - - case 1: - return (4.*zeta1-1.)*dzeta1deta + 3.*dbubbledeta; - - case 2: - return (4.*zeta2-1.)*dzeta2deta + 3.*dbubbledeta; - - case 3: - return 4.*zeta1*dzeta0deta + 4.*zeta0*dzeta1deta - 12.*dbubbledeta; - - case 4: - return 4.*zeta2*dzeta1deta + 4.*zeta1*dzeta2deta - 12.*dbubbledeta; - - case 5: - return 4.*zeta2*dzeta0deta + 4*zeta0*dzeta2deta - 12.*dbubbledeta; - - case 6: - return 27.*dbubbledeta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_tri7_shape_deriv(i, j, p(0), p(1)); } default: @@ -1141,34 +692,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute quad shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 4); - - // 0 1 2 3 - static const unsigned int i0[] = {0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1}; - - switch (j) - { - // d^2() / dxi^2 - case 0: - return 0.; - - // d^2() / dxi deta - case 1: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)); - - // d^2() / deta^2 - case 2: - return 0.; - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } + return libMesh::detail::fe_lagrange_quad4_shape_second_deriv(i, j, p(0), p(1)); } // All second derivatives for linear triangles are zero. @@ -1200,106 +725,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD8: case QUADSHELL8: { - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - case 1: - return 0.5*(1.-eta); - - case 2: - case 3: - return 0.5*(1.+eta); - - case 4: - return eta - 1.; - - case 5: - case 7: - return 0.0; - - case 6: - return -1. - eta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 0.25*( 1. - 2.*xi - 2.*eta); - - case 1: - return 0.25*(-1. - 2.*xi + 2.*eta); - - case 2: - return 0.25*( 1. + 2.*xi + 2.*eta); - - case 3: - return 0.25*(-1. + 2.*xi - 2.*eta); - - case 4: - return xi; - - case 5: - return -eta; - - case 6: - return -xi; - - case 7: - return eta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - case 3: - return 0.5*(1.-xi); - - case 1: - case 2: - return 0.5*(1.+xi); - - case 4: - case 6: - return 0.0; - - case 5: - return -1.0 - xi; - - case 7: - return xi - 1.0; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_quad8_shape_second_deriv(i, j, p(0), p(1)); } // end case QUAD8 case QUAD4: @@ -1309,36 +736,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case QUAD9: case QUADSHELL9: { - // Compute QUAD9 second derivatives as tensor product - const Real xi = p(0); - const Real eta = p(1); - libmesh_assert_less (i, 9); - - // 0 1 2 3 4 5 6 7 8 - static const unsigned int i0[] = {0, 1, 1, 0, 2, 1, 2, 0, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 2, 1, 2, 2}; - - switch (j) - { - // d^2() / dxi^2 - case 0: - return (fe_lagrange_1D_quadratic_shape_second_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)); - - // d^2() / dxi deta - case 1: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)); - - // d^2() / deta^2 - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_second_deriv(i1[i], 0, eta)); - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_quad9_shape_second_deriv(i, j, p(0), p(1)); } // end case QUAD9 case TRI3: @@ -1348,105 +747,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case TRI6: case TRI7: { - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0dxi; - - case 1: - return 4.*dzeta1dxi*dzeta1dxi; - - case 2: - return 4.*dzeta2dxi*dzeta2dxi; - - case 3: - return 8.*dzeta0dxi*dzeta1dxi; - - case 4: - return 8.*dzeta1dxi*dzeta2dxi; - - case 5: - return 8.*dzeta0dxi*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0deta; - - case 1: - return 4.*dzeta1dxi*dzeta1deta; - - case 2: - return 4.*dzeta2dxi*dzeta2deta; - - case 3: - return 4.*dzeta1deta*dzeta0dxi + 4.*dzeta0deta*dzeta1dxi; - - case 4: - return 4.*dzeta2deta*dzeta1dxi + 4.*dzeta1deta*dzeta2dxi; - - case 5: - return 4.*dzeta2deta*dzeta0dxi + 4.*dzeta0deta*dzeta2dxi; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - return 4.*dzeta0deta*dzeta0deta; - - case 1: - return 4.*dzeta1deta*dzeta1deta; - - case 2: - return 4.*dzeta2deta*dzeta2deta; - - case 3: - return 8.*dzeta0deta*dzeta1deta; - - case 4: - return 8.*dzeta1deta*dzeta2deta; - - case 5: - return 8.*dzeta0deta*dzeta2deta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_tri6_shape_second_deriv(i, j); } // end case TRI6+TRI7 default: @@ -1468,124 +770,8 @@ Real fe_lagrange_2D_shape_second_deriv(const ElemType type, case TRI6: case TRI7: { - const Real zeta1 = p(0); - const Real zeta2 = p(1); - // const Real zeta0 = 1. - zeta1 - zeta2; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - // const Real dbubbledxi = zeta2 * (1. - 2.*zeta1 - zeta2); - const Real d2bubbledxi2 = -2. * zeta2; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - // const Real dbubbledeta= zeta1 * (1. - zeta1 - 2.*zeta2); - const Real d2bubbledeta2 = -2. * zeta1; - - const Real d2bubbledxideta = (1. - 2.*zeta1 - 2.*zeta2); - libmesh_assert_less (j, 3); - - switch (j) - { - // d^2() / dxi^2 - case 0: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0dxi + 3.*d2bubbledxi2; - - case 1: - return 4.*dzeta1dxi*dzeta1dxi + 3.*d2bubbledxi2; - - case 2: - return 4.*dzeta2dxi*dzeta2dxi + 3.*d2bubbledxi2; - - case 3: - return 8.*dzeta0dxi*dzeta1dxi - 12.*d2bubbledxi2; - - case 4: - return 8.*dzeta1dxi*dzeta2dxi - 12.*d2bubbledxi2; - - case 5: - return 8.*dzeta0dxi*dzeta2dxi - 12.*d2bubbledxi2; - - case 6: - return 27.*d2bubbledxi2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / dxi deta - case 1: - { - switch (i) - { - case 0: - return 4.*dzeta0dxi*dzeta0deta + 3.*d2bubbledxideta; - - case 1: - return 4.*dzeta1dxi*dzeta1deta + 3.*d2bubbledxideta; - - case 2: - return 4.*dzeta2dxi*dzeta2deta + 3.*d2bubbledxideta; - - case 3: - return 4.*dzeta1deta*dzeta0dxi + 4.*dzeta0deta*dzeta1dxi - 12.*d2bubbledxideta; - - case 4: - return 4.*dzeta2deta*dzeta1dxi + 4.*dzeta1deta*dzeta2dxi - 12.*d2bubbledxideta; - - case 5: - return 4.*dzeta2deta*dzeta0dxi + 4.*dzeta0deta*dzeta2dxi - 12.*d2bubbledxideta; - - case 6: - return 27.*d2bubbledxideta; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - // d^2() / deta^2 - case 2: - { - switch (i) - { - case 0: - return 4.*dzeta0deta*dzeta0deta + 3.*d2bubbledeta2; - - case 1: - return 4.*dzeta1deta*dzeta1deta + 3.*d2bubbledeta2; - - case 2: - return 4.*dzeta2deta*dzeta2deta + 3.*d2bubbledeta2; - - case 3: - return 8.*dzeta0deta*dzeta1deta - 12.*d2bubbledeta2; - - case 4: - return 8.*dzeta1deta*dzeta2deta - 12.*d2bubbledeta2; - - case 5: - return 8.*dzeta0deta*dzeta2deta - 12.*d2bubbledeta2; - - case 6: - return 27.*d2bubbledeta2; - - default: - libmesh_error_msg("Invalid shape function index i = " << i); - } - } - - default: - libmesh_error_msg("ERROR: Invalid derivative index j = " << j); - } // end switch (j) + return libMesh::detail::fe_lagrange_tri7_shape_second_deriv(i, j, p(0), p(1)); } // end case TRI6+TRI7 default: diff --git a/src/fe/fe_lagrange_shape_3D.C b/src/fe/fe_lagrange_shape_3D.C index f4c5a649822..fb45fbc7ae0 100644 --- a/src/fe/fe_lagrange_shape_3D.C +++ b/src/fe/fe_lagrange_shape_3D.C @@ -20,6 +20,9 @@ #include "libmesh/fe.h" #include "libmesh/elem.h" #include "libmesh/fe_lagrange_shape_1D.h" +#include "libmesh/fe_serendipity_lagrange.h" +#include "libmesh/fe_simplex_lagrange.h" +#include "libmesh/fe_tensor_product_lagrange.h" #include "libmesh/enum_to_string.h" #include "libmesh/cell_c0polyhedron.h" #include "libmesh/tensor_value.h" @@ -105,32 +108,12 @@ void FE<3,LAGRANGE>::all_shapes { libmesh_assert_less_equal (n_sf, 8); - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][2] = { - {fe_lagrange_1D_linear_shape(0, xi), - fe_lagrange_1D_linear_shape(1, xi)}, - {fe_lagrange_1D_linear_shape(0, eta), - fe_lagrange_1D_linear_shape(1, eta)}, - {fe_lagrange_1D_linear_shape(0, zeta), - fe_lagrange_1D_linear_shape(1, zeta)}}; for (unsigned int i : make_range(n_sf)) - v[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; + v[i][qp] = libMesh::detail::fe_lagrange_hex8_shape(i, q_point(0), q_point(1), q_point(2)); } return; } @@ -156,38 +139,12 @@ void FE<3,LAGRANGE>::all_shapes { libmesh_assert_less_equal (n_sf, 27); - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // linear_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][3] = { - {fe_lagrange_1D_quadratic_shape(0, xi), - fe_lagrange_1D_quadratic_shape(1, xi), - fe_lagrange_1D_quadratic_shape(2, xi)}, - {fe_lagrange_1D_quadratic_shape(0, eta), - fe_lagrange_1D_quadratic_shape(1, eta), - fe_lagrange_1D_quadratic_shape(2, eta)}, - {fe_lagrange_1D_quadratic_shape(0, zeta), - fe_lagrange_1D_quadratic_shape(1, zeta), - fe_lagrange_1D_quadratic_shape(2, zeta)}}; for (unsigned int i : make_range(n_sf)) - v[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; + v[i][qp] = libMesh::detail::fe_lagrange_hex27_shape(i, q_point(0), q_point(1), q_point(2)); } return; } @@ -273,49 +230,15 @@ void FE<3,LAGRANGE>::all_shape_derivs { libmesh_assert_equal_to (n_sf, 8); - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][2] = { - {fe_lagrange_1D_linear_shape(0, xi), - fe_lagrange_1D_linear_shape(1, xi)}, - {fe_lagrange_1D_linear_shape(0, eta), - fe_lagrange_1D_linear_shape(1, eta)}, - {fe_lagrange_1D_linear_shape(0, zeta), - fe_lagrange_1D_linear_shape(1, zeta)}}; - - // one_d_derivs[dim][i] = dphi_i/dxi(p(dim)) - Real one_d_derivs[3][2] = { - {fe_lagrange_1D_linear_shape_deriv(0, 0, xi), - fe_lagrange_1D_linear_shape_deriv(1, 0, xi)}, - {fe_lagrange_1D_linear_shape_deriv(0, 0, eta), - fe_lagrange_1D_linear_shape_deriv(1, 0, eta)}, - {fe_lagrange_1D_linear_shape_deriv(0, 0, zeta), - fe_lagrange_1D_linear_shape_deriv(1, 0, zeta)}}; - - for (unsigned int i : make_range(n_sf)) - { - (*comps[0])[i][qp] = one_d_derivs[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[1])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_derivs[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[2])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_derivs[2][i2[i]]; - } + for (unsigned int i : make_range(n_sf)) + { + (*comps[0])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 0, q_point(0), q_point(1), q_point(2)); + (*comps[1])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 1, q_point(0), q_point(1), q_point(2)); + (*comps[2])[i][qp] = libMesh::detail::fe_lagrange_hex8_shape_deriv(i, 2, q_point(0), q_point(1), q_point(2)); + } } return; } @@ -341,58 +264,15 @@ void FE<3,LAGRANGE>::all_shape_derivs { libmesh_assert_less_equal (n_sf, 27); - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - for (auto qp : index_range(p)) { const Point & q_point = p[qp]; - // Compute hex shape functions as a tensor-product - const Real xi = q_point(0); - const Real eta = q_point(1); - const Real zeta = q_point(2); - - // one_d_shapes[dim][i] = phi_i(p(dim)) - Real one_d_shapes[3][3] = { - {fe_lagrange_1D_quadratic_shape(0, xi), - fe_lagrange_1D_quadratic_shape(1, xi), - fe_lagrange_1D_quadratic_shape(2, xi)}, - {fe_lagrange_1D_quadratic_shape(0, eta), - fe_lagrange_1D_quadratic_shape(1, eta), - fe_lagrange_1D_quadratic_shape(2, eta)}, - {fe_lagrange_1D_quadratic_shape(0, zeta), - fe_lagrange_1D_quadratic_shape(1, zeta), - fe_lagrange_1D_quadratic_shape(2, zeta)}}; - - // one_d_derivs[dim][i] = dphi_i/dxi(p(dim)) - Real one_d_derivs[3][3] = { - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, xi), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, xi), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, xi)}, - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, eta), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, eta), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, eta)}, - {fe_lagrange_1D_quadratic_shape_deriv(0, 0, zeta), - fe_lagrange_1D_quadratic_shape_deriv(1, 0, zeta), - fe_lagrange_1D_quadratic_shape_deriv(2, 0, zeta)}}; - - for (unsigned int i : make_range(n_sf)) - { - (*comps[0])[i][qp] = one_d_derivs[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[1])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_derivs[1][i1[i]] * - one_d_shapes[2][i2[i]]; - (*comps[2])[i][qp] = one_d_shapes[0][i0[i]] * - one_d_shapes[1][i1[i]] * - one_d_derivs[2][i2[i]]; - } + for (unsigned int i : make_range(n_sf)) + { + (*comps[0])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 0, q_point(0), q_point(1), q_point(2)); + (*comps[1])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 1, q_point(0), q_point(1), q_point(2)); + (*comps[2])[i][qp] = libMesh::detail::fe_lagrange_hex27_shape_deriv(i, 2, q_point(0), q_point(1), q_point(2)); + } } return; } @@ -691,19 +571,7 @@ Real fe_lagrange_3D_shape(const ElemType type, { libmesh_assert_less (i, 8); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // 0 1 2 3 4 5 6 7 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - return (fe_lagrange_1D_linear_shape(i0[i], xi)* - fe_lagrange_1D_linear_shape(i1[i], eta)* - fe_lagrange_1D_linear_shape(i2[i], zeta)); + return libMesh::detail::fe_lagrange_hex8_shape(i, p(0), p(1), p(2)); } // linear tetrahedral shape functions @@ -712,30 +580,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 4); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - switch(i) - { - case 0: - return zeta0; - - case 1: - return zeta1; - - case 2: - return zeta2; - - case 3: - return zeta3; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet4_shape(i, p(0), p(1), p(2)); } // linear prism shape functions @@ -848,82 +693,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); - - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); - - switch (i) - { - case 0: - return (1. - x)*(1. - y)*(1. - z)*(1. - 2.*x - 2.*y - 2.*z); - - case 1: - return x*(1. - y)*(1. - z)*(2.*x - 2.*y - 2.*z - 1.); - - case 2: - return x*y*(1. - z)*(2.*x + 2.*y - 2.*z - 3.); - - case 3: - return (1. - x)*y*(1. - z)*(2.*y - 2.*x - 2.*z - 1.); - - case 4: - return (1. - x)*(1. - y)*z*(2.*z - 2.*x - 2.*y - 1.); - - case 5: - return x*(1. - y)*z*(2.*x - 2.*y + 2.*z - 3.); - - case 6: - return x*y*z*(2.*x + 2.*y + 2.*z - 5.); - - case 7: - return (1. - x)*y*z*(2.*y - 2.*x + 2.*z - 3.); - - case 8: - return 4.*x*(1. - x)*(1. - y)*(1. - z); - - case 9: - return 4.*x*y*(1. - y)*(1. - z); - - case 10: - return 4.*x*(1. - x)*y*(1. - z); - - case 11: - return 4.*(1. - x)*y*(1. - y)*(1. - z); - - case 12: - return 4.*(1. - x)*(1. - y)*z*(1. - z); - - case 13: - return 4.*x*(1. - y)*z*(1. - z); - - case 14: - return 4.*x*y*z*(1. - z); - - case 15: - return 4.*(1. - x)*y*z*(1. - z); - - case 16: - return 4.*x*(1. - x)*(1. - y)*z; - - case 17: - return 4.*x*y*(1. - y)*z; - - case 18: - return 4.*x*(1. - x)*y*z; - - case 19: - return 4.*(1. - x)*y*(1. - y)*z; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_hex20_shape(i, p(0), p(1), p(2)); } // triquadratic hexahedral shape functions @@ -935,22 +705,7 @@ Real fe_lagrange_3D_shape(const ElemType type, { libmesh_assert_less (i, 27); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - return (fe_lagrange_1D_quadratic_shape(i0[i], xi)* - fe_lagrange_1D_quadratic_shape(i1[i], eta)* - fe_lagrange_1D_quadratic_shape(i2[i], zeta)); + return libMesh::detail::fe_lagrange_hex27_shape(i, p(0), p(1), p(2)); } // quadratic tetrahedral shape functions @@ -964,48 +719,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - switch(i) - { - case 0: - return zeta0*(2.*zeta0 - 1.); - - case 1: - return zeta1*(2.*zeta1 - 1.); - - case 2: - return zeta2*(2.*zeta2 - 1.); - - case 3: - return zeta3*(2.*zeta3 - 1.); - - case 4: - return 4.*zeta0*zeta1; - - case 5: - return 4.*zeta1*zeta2; - - case 6: - return 4.*zeta2*zeta0; - - case 7: - return 4.*zeta0*zeta3; - - case 8: - return 4.*zeta1*zeta3; - - case 9: - return 4.*zeta2*zeta3; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet10_shape(i, p(0), p(1), p(2)); } // "serendipity" prism @@ -1402,66 +1116,7 @@ Real fe_lagrange_3D_shape(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - // Bubble functions (not yet scaled) on side nodes - const Real bubble_012 = zeta0*zeta1*zeta2; - const Real bubble_013 = zeta0*zeta1*zeta3; - const Real bubble_123 = zeta1*zeta2*zeta3; - const Real bubble_023 = zeta0*zeta2*zeta3; - - switch(i) - { - case 0: - return zeta0*(2.*zeta0 - 1.) + 3.*(bubble_012+bubble_013+bubble_023); - - case 1: - return zeta1*(2.*zeta1 - 1.) + 3.*(bubble_012+bubble_013+bubble_123); - - case 2: - return zeta2*(2.*zeta2 - 1.) + 3.*(bubble_012+bubble_023+bubble_123); - - case 3: - return zeta3*(2.*zeta3 - 1.) + 3.*(bubble_013+bubble_023+bubble_123); - - case 4: - return 4.*zeta0*zeta1 - 12.*(bubble_012+bubble_013); - - case 5: - return 4.*zeta1*zeta2 - 12.*(bubble_012+bubble_123); - - case 6: - return 4.*zeta2*zeta0 - 12.*(bubble_012+bubble_023); - - case 7: - return 4.*zeta0*zeta3 - 12.*(bubble_013+bubble_023); - - case 8: - return 4.*zeta1*zeta3 - 12.*(bubble_013+bubble_123); - - case 9: - return 4.*zeta2*zeta3 - 12.*(bubble_023+bubble_123); - - case 10: - return 27.*bubble_012; - - case 11: - return 27.*bubble_013; - - case 12: - return 27.*bubble_123; - - case 13: - return 27.*bubble_023; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet14_shape(i, p(0), p(1), p(2)); } default: @@ -1508,35 +1163,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, { libmesh_assert_less (i, 8); - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - switch(j) - { - case 0: - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 1: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 2: - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex8_shape_deriv(i, j, p(0), p(1), p(2)); } // linear tetrahedral shape functions @@ -1545,94 +1172,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 4); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return dzeta0dxi; - - case 1: - return dzeta1dxi; - - case 2: - return dzeta2dxi; - - case 3: - return dzeta3dxi; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return dzeta0deta; - - case 1: - return dzeta1deta; - - case 2: - return dzeta2deta; - - case 3: - return dzeta3deta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return dzeta0dzeta; - - case 1: - return dzeta1dzeta; - - case 2: - return dzeta2dzeta; - - case 3: - return dzeta3dzeta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid shape function derivative j = " << j); - } + return libMesh::detail::fe_lagrange_tet4_shape_deriv(i, j); } // linear prism shape functions @@ -1876,458 +1416,32 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); + return libMesh::detail::fe_lagrange_hex20_shape_deriv(i, j, p(0), p(1), p(2)); + } - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); + // triquadratic hexahedral shape functions + case HEX8: + libmesh_assert_msg(T == L2_LAGRANGE, + "High order on first order elements only supported for L2 families"); + libmesh_fallthrough(); + case HEX27: + { + libmesh_assert_less (i, 27); - // and don't forget the chain rule! + return libMesh::detail::fe_lagrange_hex27_shape_deriv(i, j, p(0), p(1), p(2)); + } - switch (j) - { - - // d/dx*dx/dxi - case 0: - switch (i) - { - case 0: - return .5*(1. - y)*(1. - z)*((1. - x)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*(1. - y)*(1. - z)*(x*(2.) + - (1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*y*(1. - z)*(x*(2.) + - (1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*y*(1. - z)*((1. - x)*(-2.) + - (-1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - y)*z*((1. - x)*(-2.) + - (-1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*(1. - y)*z*(x*(2.) + - (1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*y*z*(x*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*y*z*((1. - x)*(-2.) + - (-1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*(1. - y)*(1. - z)*(1. - 2.*x); - - case 9: - return 2.*y*(1. - y)*(1. - z); - - case 10: - return 2.*y*(1. - z)*(1. - 2.*x); - - case 11: - return 2.*y*(1. - y)*(1. - z)*(-1.); - - case 12: - return 2.*(1. - y)*z*(1. - z)*(-1.); - - case 13: - return 2.*(1. - y)*z*(1. - z); - - case 14: - return 2.*y*z*(1. - z); - - case 15: - return 2.*y*z*(1. - z)*(-1.); - - case 16: - return 2.*(1. - y)*z*(1. - 2.*x); - - case 17: - return 2.*y*(1. - y)*z; - - case 18: - return 2.*y*z*(1. - 2.*x); - - case 19: - return 2.*y*(1. - y)*z*(-1.); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - - // d/dy*dy/deta - case 1: - switch (i) - { - case 0: - return .5*(1. - x)*(1. - z)*((1. - y)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*x*(1. - z)*((1. - y)*(-2.) + - (-1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*x*(1. - z)*(y*(2.) + - (1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*(1. - x)*(1. - z)*(y*(2.) + - (1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - x)*z*((1. - y)*(-2.) + - (-1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*x*z*((1. - y)*(-2.) + - (-1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*x*z*(y*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*(1. - x)*z*(y*(2.) + - (1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*x*(1. - x)*(1. - z)*(-1.); - - case 9: - return 2.*x*(1. - z)*(1. - 2.*y); - - case 10: - return 2.*x*(1. - x)*(1. - z); - - case 11: - return 2.*(1. - x)*(1. - z)*(1. - 2.*y); - - case 12: - return 2.*(1. - x)*z*(1. - z)*(-1.); - - case 13: - return 2.*x*z*(1. - z)*(-1.); - - case 14: - return 2.*x*z*(1. - z); - - case 15: - return 2.*(1. - x)*z*(1. - z); - - case 16: - return 2.*x*(1. - x)*z*(-1.); - - case 17: - return 2.*x*z*(1. - 2.*y); - - case 18: - return 2.*x*(1. - x)*z; - - case 19: - return 2.*(1. - x)*z*(1. - 2.*y); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - - // d/dz*dz/dzeta - case 2: - switch (i) - { - case 0: - return .5*(1. - x)*(1. - y)*((1. - z)*(-2.) + - (-1.)*(1. - 2.*x - 2.*y - 2.*z)); - - case 1: - return .5*x*(1. - y)*((1. - z)*(-2.) + - (-1.)*(2.*x - 2.*y - 2.*z - 1.)); - - case 2: - return .5*x*y*((1. - z)*(-2.) + - (-1.)*(2.*x + 2.*y - 2.*z - 3.)); - - case 3: - return .5*(1. - x)*y*((1. - z)*(-2.) + - (-1.)*(2.*y - 2.*x - 2.*z - 1.)); - - case 4: - return .5*(1. - x)*(1. - y)*(z*(2.) + - (1.)*(2.*z - 2.*x - 2.*y - 1.)); - - case 5: - return .5*x*(1. - y)*(z*(2.) + - (1.)*(2.*x - 2.*y + 2.*z - 3.)); - - case 6: - return .5*x*y*(z*(2.) + - (1.)*(2.*x + 2.*y + 2.*z - 5.)); - - case 7: - return .5*(1. - x)*y*(z*(2.) + - (1.)*(2.*y - 2.*x + 2.*z - 3.)); - - case 8: - return 2.*x*(1. - x)*(1. - y)*(-1.); - - case 9: - return 2.*x*y*(1. - y)*(-1.); - - case 10: - return 2.*x*(1. - x)*y*(-1.); - - case 11: - return 2.*(1. - x)*y*(1. - y)*(-1.); - - case 12: - return 2.*(1. - x)*(1. - y)*(1. - 2.*z); - - case 13: - return 2.*x*(1. - y)*(1. - 2.*z); - - case 14: - return 2.*x*y*(1. - 2.*z); - - case 15: - return 2.*(1. - x)*y*(1. - 2.*z); - - case 16: - return 2.*x*(1. - x)*(1. - y); - - case 17: - return 2.*x*y*(1. - y); - - case 18: - return 2.*x*(1. - x)*y; - - case 19: - return 2.*(1. - x)*y*(1. - y); - - default: - libmesh_error_msg("Invalid i = " << i); - } - - default: - libmesh_error_msg("Invalid shape function derivative j = " << j); - } - } - - // triquadratic hexahedral shape functions - case HEX8: - libmesh_assert_msg(T == L2_LAGRANGE, - "High order on first order elements only supported for L2 families"); - libmesh_fallthrough(); - case HEX27: - { - libmesh_assert_less (i, 27); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - switch(j) - { - case 0: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - case 1: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } - } - - // quadratic tetrahedral shape functions - case TET4: - libmesh_assert_msg(T == L2_LAGRANGE, - "High order on first order elements only supported for L2 families"); - libmesh_fallthrough(); - case TET10: - case TET14: - { - libmesh_assert_less (i, 10); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dxi; - - case 1: - return (4.*zeta1 - 1.)*dzeta1dxi; - - case 2: - return (4.*zeta2 - 1.)*dzeta2dxi; - - case 3: - return (4.*zeta3 - 1.)*dzeta3dxi; - - case 4: - return 4.*(zeta0*dzeta1dxi + dzeta0dxi*zeta1); - - case 5: - return 4.*(zeta1*dzeta2dxi + dzeta1dxi*zeta2); - - case 6: - return 4.*(zeta0*dzeta2dxi + dzeta0dxi*zeta2); - - case 7: - return 4.*(zeta0*dzeta3dxi + dzeta0dxi*zeta3); - - case 8: - return 4.*(zeta1*dzeta3dxi + dzeta1dxi*zeta3); - - case 9: - return 4.*(zeta2*dzeta3dxi + dzeta2dxi*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0deta; - - case 1: - return (4.*zeta1 - 1.)*dzeta1deta; - - case 2: - return (4.*zeta2 - 1.)*dzeta2deta; - - case 3: - return (4.*zeta3 - 1.)*dzeta3deta; - - case 4: - return 4.*(zeta0*dzeta1deta + dzeta0deta*zeta1); - - case 5: - return 4.*(zeta1*dzeta2deta + dzeta1deta*zeta2); - - case 6: - return 4.*(zeta0*dzeta2deta + dzeta0deta*zeta2); - - case 7: - return 4.*(zeta0*dzeta3deta + dzeta0deta*zeta3); - - case 8: - return 4.*(zeta1*dzeta3deta + dzeta1deta*zeta3); - - case 9: - return 4.*(zeta2*dzeta3deta + dzeta2deta*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dzeta; - - case 1: - return (4.*zeta1 - 1.)*dzeta1dzeta; - - case 2: - return (4.*zeta2 - 1.)*dzeta2dzeta; - - case 3: - return (4.*zeta3 - 1.)*dzeta3dzeta; - - case 4: - return 4.*(zeta0*dzeta1dzeta + dzeta0dzeta*zeta1); - - case 5: - return 4.*(zeta1*dzeta2dzeta + dzeta1dzeta*zeta2); - - case 6: - return 4.*(zeta0*dzeta2dzeta + dzeta0dzeta*zeta2); - - case 7: - return 4.*(zeta0*dzeta3dzeta + dzeta0dzeta*zeta3); - - case 8: - return 4.*(zeta1*dzeta3dzeta + dzeta1dzeta*zeta3); - - case 9: - return 4.*(zeta2*dzeta3dzeta + dzeta2dzeta*zeta3); - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid j = " << j); - } - } + // quadratic tetrahedral shape functions + case TET4: + libmesh_assert_msg(T == L2_LAGRANGE, + "High order on first order elements only supported for L2 families"); + libmesh_fallthrough(); + case TET10: + case TET14: + { + libmesh_assert_less (i, 10); + return libMesh::detail::fe_lagrange_tet10_shape_deriv(i, j, p(0), p(1), p(2)); + } // "serendipity" prism @@ -2931,201 +2045,7 @@ Real fe_lagrange_3D_shape_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // Area coordinates, pg. 205, Vol. I, Carey, Oden, Becker FEM - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - const Real dzeta0dxi = -1.; - const Real dzeta1dxi = 1.; - const Real dzeta2dxi = 0.; - const Real dzeta3dxi = 0.; - const Real dbubble012dxi = (zeta0-zeta1)*zeta2; - const Real dbubble013dxi = (zeta0-zeta1)*zeta3; - const Real dbubble123dxi = zeta2*zeta3; - const Real dbubble023dxi = -zeta2*zeta3; - - const Real dzeta0deta = -1.; - const Real dzeta1deta = 0.; - const Real dzeta2deta = 1.; - const Real dzeta3deta = 0.; - const Real dbubble012deta = (zeta0-zeta2)*zeta1; - const Real dbubble013deta = -zeta1*zeta3; - const Real dbubble123deta = zeta1*zeta3; - const Real dbubble023deta = (zeta0-zeta2)*zeta3; - - const Real dzeta0dzeta = -1.; - const Real dzeta1dzeta = 0.; - const Real dzeta2dzeta = 0.; - const Real dzeta3dzeta = 1.; - const Real dbubble012dzeta = -zeta1*zeta2; - const Real dbubble013dzeta = (zeta0-zeta3)*zeta1; - const Real dbubble123dzeta = zeta1*zeta2; - const Real dbubble023dzeta = (zeta0-zeta3)*zeta2; - - switch (j) - { - // d()/dxi - case 0: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dxi + 3.*(dbubble012dxi+dbubble013dxi+dbubble023dxi); - - case 1: - return (4.*zeta1 - 1.)*dzeta1dxi + 3.*(dbubble012dxi+dbubble013dxi+dbubble123dxi); - - case 2: - return (4.*zeta2 - 1.)*dzeta2dxi + 3.*(dbubble012dxi+dbubble023dxi+dbubble123dxi); - - case 3: - return (4.*zeta3 - 1.)*dzeta3dxi + 3.*(dbubble013dxi+dbubble023dxi+dbubble123dxi); - - case 4: - return 4.*(zeta0*dzeta1dxi + dzeta0dxi*zeta1) - 12.*(dbubble012dxi+dbubble013dxi); - - case 5: - return 4.*(zeta1*dzeta2dxi + dzeta1dxi*zeta2) - 12.*(dbubble012dxi+dbubble123dxi); - - case 6: - return 4.*(zeta0*dzeta2dxi + dzeta0dxi*zeta2) - 12.*(dbubble012dxi+dbubble023dxi); - - case 7: - return 4.*(zeta0*dzeta3dxi + dzeta0dxi*zeta3) - 12.*(dbubble013dxi+dbubble023dxi); - - case 8: - return 4.*(zeta1*dzeta3dxi + dzeta1dxi*zeta3) - 12.*(dbubble013dxi+dbubble123dxi); - - case 9: - return 4.*(zeta2*dzeta3dxi + dzeta2dxi*zeta3) - 12.*(dbubble023dxi+dbubble123dxi); - - case 10: - return 27.*dbubble012dxi; - - case 11: - return 27.*dbubble013dxi; - - case 12: - return 27.*dbubble123dxi; - - case 13: - return 27.*dbubble023dxi; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/deta - case 1: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0deta + 3.*(dbubble012deta+dbubble013deta+dbubble023deta);; - - case 1: - return (4.*zeta1 - 1.)*dzeta1deta + 3.*(dbubble012deta+dbubble013deta+dbubble123deta); - - case 2: - return (4.*zeta2 - 1.)*dzeta2deta + 3.*(dbubble012deta+dbubble023deta+dbubble123deta); - - case 3: - return (4.*zeta3 - 1.)*dzeta3deta + 3.*(dbubble013deta+dbubble023deta+dbubble123deta); - - case 4: - return 4.*(zeta0*dzeta1deta + dzeta0deta*zeta1) - 12.*(dbubble012deta+dbubble013deta); - - case 5: - return 4.*(zeta1*dzeta2deta + dzeta1deta*zeta2) - 12.*(dbubble012deta+dbubble123deta); - - case 6: - return 4.*(zeta0*dzeta2deta + dzeta0deta*zeta2) - 12.*(dbubble012deta+dbubble023deta); - - case 7: - return 4.*(zeta0*dzeta3deta + dzeta0deta*zeta3) - 12.*(dbubble013deta+dbubble023deta); - - case 8: - return 4.*(zeta1*dzeta3deta + dzeta1deta*zeta3) - 12.*(dbubble013deta+dbubble123deta); - - case 9: - return 4.*(zeta2*dzeta3deta + dzeta2deta*zeta3) - 12.*(dbubble023deta+dbubble123deta); - - case 10: - return 27.*dbubble012deta; - - case 11: - return 27.*dbubble013deta; - - case 12: - return 27.*dbubble123deta; - - case 13: - return 27.*dbubble023deta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - // d()/dzeta - case 2: - { - switch(i) - { - case 0: - return (4.*zeta0 - 1.)*dzeta0dzeta + 3.*(dbubble012dzeta+dbubble013dzeta+dbubble023dzeta); - - case 1: - return (4.*zeta1 - 1.)*dzeta1dzeta + 3.*(dbubble012dzeta+dbubble013dzeta+dbubble123dzeta); - - case 2: - return (4.*zeta2 - 1.)*dzeta2dzeta + 3.*(dbubble012dzeta+dbubble023dzeta+dbubble123dzeta); - - case 3: - return (4.*zeta3 - 1.)*dzeta3dzeta + 3.*(dbubble013dzeta+dbubble023dzeta+dbubble123dzeta); - - case 4: - return 4.*(zeta0*dzeta1dzeta + dzeta0dzeta*zeta1) - 12.*(dbubble012dzeta+dbubble013dzeta); - - case 5: - return 4.*(zeta1*dzeta2dzeta + dzeta1dzeta*zeta2) - 12.*(dbubble012dzeta+dbubble123dzeta); - - case 6: - return 4.*(zeta0*dzeta2dzeta + dzeta0dzeta*zeta2) - 12.*(dbubble012dzeta+dbubble023dzeta); - - case 7: - return 4.*(zeta0*dzeta3dzeta + dzeta0dzeta*zeta3) - 12.*(dbubble013dzeta+dbubble023dzeta); - - case 8: - return 4.*(zeta1*dzeta3dzeta + dzeta1dzeta*zeta3) - 12.*(dbubble013dzeta+dbubble123dzeta); - - case 9: - return 4.*(zeta2*dzeta3dzeta + dzeta2dzeta*zeta3) - 12.*(dbubble023dzeta+dbubble123dzeta); - - case 10: - return 27.*dbubble012dzeta; - - case 11: - return 27.*dbubble013dzeta; - - case 12: - return 27.*dbubble123dzeta; - - case 13: - return 27.*dbubble023dzeta; - - default: - libmesh_error_msg("Invalid i = " << i); - } - } - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_tet14_shape_deriv(i, j, p(0), p(1), p(2)); } case PRISM20: @@ -3412,48 +2332,11 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, // Trilinear shape functions on HEX8s have nonzero mixed second derivatives case HEX8: - case HEX20: - case HEX27: - { - libmesh_assert_less (i, 8); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1}; - - switch (j) - { - // All repeated second derivatives are zero on HEX8 - case 0: // d^2()/dxi^2 - case 2: // d^2()/deta^2 - case 5: // d^2()/dzeta^2 - { - return 0.; - } - - case 1: // d^2()/dxideta - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape (i2[i], zeta)); - - case 3: // d^2()/dxidzeta - return (fe_lagrange_1D_linear_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_linear_shape (i1[i], eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - case 4: // d^2()/detadzeta - return (fe_lagrange_1D_linear_shape (i0[i], xi)* - fe_lagrange_1D_linear_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_linear_shape_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + case HEX20: + case HEX27: + { + libmesh_assert_less (i, 8); + return libMesh::detail::fe_lagrange_hex8_shape_second_deriv(i, j, p(0), p(1), p(2)); } // All second derivatives for piecewise-linear polyhedra are @@ -3480,269 +2363,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case HEX20: { libmesh_assert_less (i, 20); - - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // these functions are defined for (x,y,z) in [0,1]^3 - // so transform the locations - const Real x = .5*(xi + 1.); - const Real y = .5*(eta + 1.); - const Real z = .5*(zeta + 1.); - - switch(j) - { - case 0: // d^2()/dxi^2 - { - switch(i) - { - case 0: - case 1: - return (1. - y) * (1. - z); - case 2: - case 3: - return y * (1. - z); - case 4: - case 5: - return (1. - y) * z; - case 6: - case 7: - return y * z; - case 8: - return -2. * (1. - y) * (1. - z); - case 10: - return -2. * y * (1. - z); - case 16: - return -2. * (1. - y) * z; - case 18: - return -2. * y * z; - case 9: - case 11: - case 12: - case 13: - case 14: - case 15: - case 17: - case 19: - return 0; - default: - libmesh_error_msg("Invalid i = " << i); - } - } - case 1: // d^2()/dxideta - { - switch(i) - { - case 0: - return (1.25 - x - y - .5*z) * (1. - z); - case 1: - return (-x + y + .5*z - .25) * (1. - z); - case 2: - return (x + y - .5*z - .75) * (1. - z); - case 3: - return (-y + x + .5*z - .25) * (1. - z); - case 4: - return -.25*z * (4.*x + 4.*y - 2.*z - 3); - case 5: - return -.25*z * (-4.*y + 4.*x + 2.*z - 1.); - case 6: - return .25*z * (-5 + 4.*x + 4.*y + 2.*z); - case 7: - return .25*z * (4.*x - 4.*y - 2.*z + 1.); - case 8: - return (-1. + 2.*x) * (1. - z); - case 9: - return (1. - 2.*y) * (1. - z); - case 10: - return (1. - 2.*x) * (1. - z); - case 11: - return (-1. + 2.*y) * (1. - z); - case 12: - return z * (1. - z); - case 13: - return -z * (1. - z); - case 14: - return z * (1. - z); - case 15: - return -z * (1. - z); - case 16: - return (-1. + 2.*x) * z; - case 17: - return (1. - 2.*y) * z; - case 18: - return (1. - 2.*x) * z; - case 19: - return (-1. + 2.*y) * z; - default: - libmesh_error_msg("Invalid i = " << i); - } - } - case 2: // d^2()/deta^2 - switch(i) - { - case 0: - case 3: - return (1. - x) * (1. - z); - case 1: - case 2: - return x * (1. - z); - case 4: - case 7: - return (1. - x) * z; - case 5: - case 6: - return x * z; - case 9: - return -2. * x * (1. - z); - case 11: - return -2. * (1. - x) * (1. - z); - case 17: - return -2. * x * z; - case 19: - return -2. * (1. - x) * z; - case 8: - case 10: - case 12: - case 13: - case 14: - case 15: - case 16: - case 18: - return 0.; - default: - libmesh_error_msg("Invalid i = " << i); - } - case 3: // d^2()/dxidzeta - switch(i) - { - case 0: - return (1.25 - x - .5*y - z) * (1. - y); - case 1: - return (-x + .5*y + z - .25) * (1. - y); - case 2: - return -.25*y * (2.*y + 4.*x - 4.*z - 1.); - case 3: - return -.25*y * (-2.*y + 4.*x + 4.*z - 3); - case 4: - return (-z + x + .5*y - .25) * (1. - y); - case 5: - return (x - .5*y + z - .75) * (1. - y); - case 6: - return .25*y * (2.*y + 4.*x + 4.*z - 5); - case 7: - return .25*y * (-2.*y + 4.*x - 4.*z + 1.); - case 8: - return (-1. + 2.*x) * (1. - y); - case 9: - return -y * (1. - y); - case 10: - return (-1. + 2.*x) * y; - case 11: - return y * (1. - y); - case 12: - return (-1. + 2.*z) * (1. - y); - case 13: - return (1. - 2.*z) * (1. - y); - case 14: - return (1. - 2.*z) * y; - case 15: - return (-1. + 2.*z) * y; - case 16: - return (1. - 2.*x) * (1. - y); - case 17: - return y * (1. - y); - case 18: - return (1. - 2.*x) * y; - case 19: - return -y * (1. - y); - default: - libmesh_error_msg("Invalid i = " << i); - } - case 4: // d^2()/detadzeta - switch(i) - { - case 0: - return (1.25 - .5*x - y - z) * (1. - x); - case 1: - return .25*x * (2.*x - 4.*y - 4.*z + 3.); - case 2: - return -.25*x * (2.*x + 4.*y - 4.*z - 1.); - case 3: - return (-y + .5*x + z - .25) * (1. - x); - case 4: - return (-z + .5*x + y - .25) * (1. - x); - case 5: - return -.25*x * (2.*x - 4.*y + 4.*z - 1.); - case 6: - return .25*x * (2.*x + 4.*y + 4.*z - 5); - case 7: - return (y - .5*x + z - .75) * (1. - x); - case 8: - return x * (1. - x); - case 9: - return (-1. + 2.*y) * x; - case 10: - return -x * (1. - x); - case 11: - return (-1. + 2.*y) * (1. - x); - case 12: - return (-1. + 2.*z) * (1. - x); - case 13: - return (-1. + 2.*z) * x; - case 14: - return (1. - 2.*z) * x; - case 15: - return (1. - 2.*z) * (1. - x); - case 16: - return -x * (1. - x); - case 17: - return (1. - 2.*y) * x; - case 18: - return x * (1. - x); - case 19: - return (1. - 2.*y) * (1. - x); - default: - libmesh_error_msg("Invalid i = " << i); - } - case 5: // d^2()/dzeta^2 - switch(i) - { - case 0: - case 4: - return (1. - x) * (1. - y); - case 1: - case 5: - return x * (1. - y); - case 2: - case 6: - return x * y; - case 3: - case 7: - return (1. - x) * y; - case 12: - return -2. * (1. - x) * (1. - y); - case 13: - return -2. * x * (1. - y); - case 14: - return -2. * x * y; - case 15: - return -2. * (1. - x) * y; - case 8: - case 9: - case 10: - case 11: - case 16: - case 17: - case 18: - case 19: - return 0.; - default: - libmesh_error_msg("Invalid i = " << i); - } - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex20_shape_second_deriv(i, j, p(0), p(1), p(2)); } // triquadratic hexahedral shape functions @@ -3753,61 +2374,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case HEX27: { libmesh_assert_less (i, 27); - - // Compute hex shape functions as a tensor-product - const Real xi = p(0); - const Real eta = p(1); - const Real zeta = p(2); - - // The only way to make any sense of this - // is to look at the mgflo/mg2/mgf documentation - // and make the cut-out cube! - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 - static const unsigned int i0[] = {0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 2, 0, 2, 2}; - static const unsigned int i1[] = {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 1, 2, 2, 0, 2, 1, 2, 2, 2}; - static const unsigned int i2[] = {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 0, 2, 2, 2, 2, 1, 2}; - - switch(j) - { - // d^2()/dxi^2 - case 0: - return (fe_lagrange_1D_quadratic_shape_second_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/dxideta - case 1: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/deta^2 - case 2: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_second_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape (i2[i], zeta)); - - // d^2()/dxidzeta - case 3: - return (fe_lagrange_1D_quadratic_shape_deriv(i0[i], 0, xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - // d^2()/detadzeta - case 4: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape_deriv(i1[i], 0, eta)* - fe_lagrange_1D_quadratic_shape_deriv(i2[i], 0, zeta)); - - // d^2()/dzeta^2 - case 5: - return (fe_lagrange_1D_quadratic_shape (i0[i], xi)* - fe_lagrange_1D_quadratic_shape (i1[i], eta)* - fe_lagrange_1D_quadratic_shape_second_deriv(i2[i], 0, zeta)); - - default: - libmesh_error_msg("Invalid j = " << j); - } + return libMesh::detail::fe_lagrange_hex27_shape_second_deriv(i, j, p(0), p(1), p(2)); } // quadratic tetrahedral shape functions @@ -3818,69 +2385,8 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case TET10: case TET14: { - // The area coordinates are the same as used for the - // shape() and shape_deriv() functions. - // const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - // const Real zeta1 = p(0); - // const Real zeta2 = p(1); - // const Real zeta3 = p(2); - static const Real dzetadxi[4][3] = - { - {-1., -1., -1.}, - {1., 0., 0.}, - {0., 1., 0.}, - {0., 0., 1.} - }; - - // Convert from j -> (j,k) indices for independent variable - // (0=xi, 1=eta, 2=zeta) - static const unsigned short int independent_var_indices[6][2] = - { - {0, 0}, // d^2 phi / dxi^2 - {0, 1}, // d^2 phi / dxi deta - {1, 1}, // d^2 phi / deta^2 - {0, 2}, // d^2 phi / dxi dzeta - {1, 2}, // d^2 phi / deta dzeta - {2, 2} // d^2 phi / dzeta^2 - }; - - // Convert from i -> zeta indices. Each quadratic shape - // function for the Tet10 depends on up to two of the zeta - // area coordinate functions (see the shape() function above). - // This table just tells which two area coords it uses. - static const unsigned short int zeta_indices[10][2] = - { - {0, 0}, - {1, 1}, - {2, 2}, - {3, 3}, - {0, 1}, - {1, 2}, - {2, 0}, - {0, 3}, - {1, 3}, - {2, 3}, - }; - - // Look up the independent variable indices for this value of j. - const unsigned int my_j = independent_var_indices[j][0]; - const unsigned int my_k = independent_var_indices[j][1]; - - if (i<4) - { - return 4.*dzetadxi[i][my_j]*dzetadxi[i][my_k]; - } - - else if (i<10) - { - const unsigned short int my_m = zeta_indices[i][0]; - const unsigned short int my_n = zeta_indices[i][1]; - - return 4.*(dzetadxi[my_n][my_j]*dzetadxi[my_m][my_k] + - dzetadxi[my_m][my_j]*dzetadxi[my_n][my_k] ); - } - else - libmesh_error_msg("Invalid shape function index " << i); + libmesh_assert_less (i, 10); + return libMesh::detail::fe_lagrange_tet10_shape_second_deriv(i, j); } @@ -4983,190 +3489,7 @@ Real fe_lagrange_3D_shape_second_deriv(const ElemType type, case TET14: { libmesh_assert_less (i, 14); - - // The area coordinates are the same as used for the - // shape() and shape_deriv() functions. - // const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - // const Real zeta1 = p(0); - // const Real zeta2 = p(1); - // const Real zeta3 = p(2); - static const Real dzetadxi[4][3] = - { - {-1., -1., -1.}, - {1., 0., 0.}, - {0., 1., 0.}, - {0., 0., 1.} - }; - - // Convert from j -> (j,k) indices for independent variable - // (0=xi, 1=eta, 2=zeta) - static const unsigned short int independent_var_indices[6][2] = - { - {0, 0}, // d^2 phi / dxi^2 - {0, 1}, // d^2 phi / dxi deta - {1, 1}, // d^2 phi / deta^2 - {0, 2}, // d^2 phi / dxi dzeta - {1, 2}, // d^2 phi / deta dzeta - {2, 2} // d^2 phi / dzeta^2 - }; - - // Convert from i -> zeta indices. Each quadratic shape - // function for the Tet10 depends on up to two of the zeta - // area coordinate functions (see the shape() function above). - // This table just tells which two area coords it uses. - static const unsigned short int zeta_indices[10][2] = - { - {0, 0}, - {1, 1}, - {2, 2}, - {3, 3}, - {0, 1}, - {1, 2}, - {2, 0}, - {0, 3}, - {1, 3}, - {2, 3}, - }; - - // Look up the independent variable indices for this value of j. - const unsigned int my_j = independent_var_indices[j][0]; - const unsigned int my_k = independent_var_indices[j][1]; - - Real returnval = 0; - if (i<4) - returnval = 4.*dzetadxi[i][my_j]*dzetadxi[i][my_k]; - - else if (i<10) - { - const unsigned short int my_m = zeta_indices[i][0]; - const unsigned short int my_n = zeta_indices[i][1]; - - returnval = - 4.*(dzetadxi[my_n][my_j]*dzetadxi[my_m][my_k] + - dzetadxi[my_m][my_j]*dzetadxi[my_n][my_k] ); - } - - const Real zeta1 = p(0); - const Real zeta2 = p(1); - const Real zeta3 = p(2); - const Real zeta0 = 1. - zeta1 - zeta2 - zeta3; - - // Fill these with whichever derivative we're concerned - // with - Real d2bubble012, d2bubble013, d2bubble023, d2bubble123; - switch (j) - { - // d^2()/dxi^2 - case 0: - { - d2bubble012 = -2.*zeta2; - d2bubble013 = -2.*zeta3; - d2bubble023 = 0.; - d2bubble123 = 0.; - break; - } - - // d^2()/dxideta - case 1: - { - d2bubble012 = (zeta0-zeta1)-zeta2; - d2bubble013 = -zeta3; - d2bubble123 = zeta3; - d2bubble023 = -zeta3; - break; - } - - // d^2()/deta^2 - case 2: - { - d2bubble012 = -2.*zeta1; - d2bubble013 = 0.; - d2bubble123 = 0.; - d2bubble023 = -2.*zeta3; - break; - } - - // d^2()/dxi dzeta - case 3: - { - d2bubble012 = -zeta2; - d2bubble013 = (zeta0-zeta3)-zeta1; - d2bubble123 = zeta2; - d2bubble023 = -zeta2; - break; - } - - // d^2()/deta dzeta - case 4: - { - d2bubble012 = -zeta1; - d2bubble013 = -zeta1; - d2bubble123 = zeta1; - d2bubble023 = (zeta0-zeta3)-zeta2; - break; - } - - // d^2()/dzeta^2 - case 5: - { - d2bubble012 = 0.; - d2bubble013 = -2.*zeta1; - d2bubble123 = 0.; - d2bubble023 = -2.*zeta2; - break; - } - - default: - libmesh_error_msg("Invalid j = " << j); - } - - switch (i) - { - case 0: - return returnval + 3.*(d2bubble012+d2bubble013+d2bubble023); - - case 1: - return returnval + 3.*(d2bubble012+d2bubble013+d2bubble123); - - case 2: - return returnval + 3.*(d2bubble012+d2bubble023+d2bubble123); - - case 3: - return returnval + 3.*(d2bubble013+d2bubble023+d2bubble123); - - case 4: - return returnval - 12.*(d2bubble012+d2bubble013); - - case 5: - return returnval - 12.*(d2bubble012+d2bubble123); - - case 6: - return returnval - 12.*(d2bubble012+d2bubble023); - - case 7: - return returnval - 12.*(d2bubble013+d2bubble023); - - case 8: - return returnval - 12.*(d2bubble013+d2bubble123); - - case 9: - return returnval - 12.*(d2bubble023+d2bubble123); - - case 10: - return 27.*d2bubble012; - - case 11: - return 27.*d2bubble013; - - case 12: - return 27.*d2bubble123; - - case 13: - return 27.*d2bubble023; - - default: - libmesh_error_msg("Invalid i = " << i); - } + return libMesh::detail::fe_lagrange_tet14_shape_second_deriv(i, j, p(0), p(1), p(2)); } case PRISM20: diff --git a/src/geom/cell_hex20.C b/src/geom/cell_hex20.C index d695ce4e15a..a1f54f00fe0 100644 --- a/src/geom/cell_hex20.C +++ b/src/geom/cell_hex20.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_hex20.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,32 +35,6 @@ const int Hex20::num_nodes; const int Hex20::nodes_per_side; const int Hex20::nodes_per_edge; -const unsigned int Hex20::side_nodes_map[Hex20::num_sides][Hex20::nodes_per_side] = - { - {0, 3, 2, 1, 11, 10, 9, 8}, // Side 0 - {0, 1, 5, 4, 8, 13, 16, 12}, // Side 1 - {1, 2, 6, 5, 9, 14, 17, 13}, // Side 2 - {2, 3, 7, 6, 10, 15, 18, 14}, // Side 3 - {3, 0, 4, 7, 11, 12, 19, 15}, // Side 4 - {4, 5, 6, 7, 16, 17, 18, 19} // Side 5 - }; - -const unsigned int Hex20::edge_nodes_map[Hex20::num_edges][Hex20::nodes_per_edge] = - { - {0, 1, 8}, // Edge 0 - {1, 2, 9}, // Edge 1 - {2, 3, 10}, // Edge 2 - {0, 3, 11}, // Edge 3 - {0, 4, 12}, // Edge 4 - {1, 5, 13}, // Edge 5 - {2, 6, 14}, // Edge 6 - {3, 7, 15}, // Edge 7 - {4, 5, 16}, // Edge 8 - {5, 6, 17}, // Edge 9 - {6, 7, 18}, // Edge 10 - {4, 7, 19} // Edge 11 - }; - // ------------------------------------------------------------ // Hex20 class member functions @@ -86,32 +61,44 @@ bool Hex20::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Hex20::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Hex20::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Hex20::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -182,7 +169,10 @@ unsigned int Hex20::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Hex20::nodes_per_side); - return Hex20::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Hex20::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -193,7 +183,10 @@ unsigned int Hex20::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Hex20::nodes_per_edge); - return Hex20::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Hex20::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_hex27.C b/src/geom/cell_hex27.C index c432ddfb7fc..395590ec269 100644 --- a/src/geom/cell_hex27.C +++ b/src/geom/cell_hex27.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_hex27.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,32 +35,6 @@ const int Hex27::num_nodes; const int Hex27::nodes_per_side; const int Hex27::nodes_per_edge; -const unsigned int Hex27::side_nodes_map[Hex27::num_sides][Hex27::nodes_per_side] = - { - {0, 3, 2, 1, 11, 10, 9, 8, 20}, // Side 0 - {0, 1, 5, 4, 8, 13, 16, 12, 21}, // Side 1 - {1, 2, 6, 5, 9, 14, 17, 13, 22}, // Side 2 - {2, 3, 7, 6, 10, 15, 18, 14, 23}, // Side 3 - {3, 0, 4, 7, 11, 12, 19, 15, 24}, // Side 4 - {4, 5, 6, 7, 16, 17, 18, 19, 25} // Side 5 - }; - -const unsigned int Hex27::edge_nodes_map[Hex27::num_edges][Hex27::nodes_per_edge] = - { - {0, 1, 8}, // Edge 0 - {1, 2, 9}, // Edge 1 - {2, 3, 10}, // Edge 2 - {0, 3, 11}, // Edge 3 - {0, 4, 12}, // Edge 4 - {1, 5, 13}, // Edge 5 - {2, 6, 14}, // Edge 6 - {3, 7, 15}, // Edge 7 - {4, 5, 16}, // Edge 8 - {5, 6, 17}, // Edge 9 - {6, 7, 18}, // Edge 10 - {4, 7, 19} // Edge 11 - }; - // ------------------------------------------------------------ // Hex27 class member functions @@ -92,32 +67,44 @@ bool Hex27::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Hex27::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Hex27::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Hex27::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -225,7 +212,10 @@ unsigned int Hex27::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Hex27::nodes_per_side); - return Hex27::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Hex27::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -236,7 +226,10 @@ unsigned int Hex27::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Hex27::nodes_per_edge); - return Hex27::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Hex27::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_prism15.C b/src/geom/cell_prism15.C index 51f0adb30de..a03dd164a4f 100644 --- a/src/geom/cell_prism15.C +++ b/src/geom/cell_prism15.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism15.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" @@ -35,28 +36,6 @@ const int Prism15::num_nodes; const int Prism15::nodes_per_side; const int Prism15::nodes_per_edge; -const unsigned int Prism15::side_nodes_map[Prism15::num_sides][Prism15::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11}, // Side 3 - {3, 4, 5, 12, 13, 14, 99, 99} // Side 4 - }; - -const unsigned int Prism15::edge_nodes_map[Prism15::num_edges][Prism15::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism15 class member functions @@ -83,33 +62,44 @@ bool Prism15::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism15::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism15::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism15::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -162,7 +152,10 @@ unsigned int Prism15::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 6); - return Prism15::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism15::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -173,7 +166,10 @@ unsigned int Prism15::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism15::nodes_per_edge); - return Prism15::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism15::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -205,7 +201,7 @@ std::unique_ptr Prism15::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism15::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -252,7 +248,7 @@ void Prism15::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism15::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism18.C b/src/geom/cell_prism18.C index 56bce347090..d8651c7fe4a 100644 --- a/src/geom/cell_prism18.C +++ b/src/geom/cell_prism18.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism18.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" @@ -36,28 +37,6 @@ const int Prism18::num_nodes; const int Prism18::nodes_per_side; const int Prism18::nodes_per_edge; -const unsigned int Prism18::side_nodes_map[Prism18::num_sides][Prism18::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 99, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 99, 99, 99} // Side 4 - }; - -const unsigned int Prism18::edge_nodes_map[Prism18::num_edges][Prism18::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism18 class member functions @@ -88,33 +67,44 @@ bool Prism18::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism18::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 3; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism18::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism18::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -201,7 +191,10 @@ unsigned int Prism18::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 6); - return Prism18::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism18::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -212,7 +205,10 @@ unsigned int Prism18::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism18::nodes_per_edge); - return Prism18::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism18::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -244,7 +240,7 @@ std::unique_ptr Prism18::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism18::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -292,7 +288,7 @@ void Prism18::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism18::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism20.C b/src/geom/cell_prism20.C index c1cab408568..d1f1866dfe2 100644 --- a/src/geom/cell_prism20.C +++ b/src/geom/cell_prism20.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism20.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" @@ -36,28 +37,6 @@ const int Prism20::num_nodes; const int Prism20::nodes_per_side; const int Prism20::nodes_per_edge; -const unsigned int Prism20::side_nodes_map[Prism20::num_sides][Prism20::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 18, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 19, 99, 99} // Side 4 - }; - -const unsigned int Prism20::edge_nodes_map[Prism20::num_edges][Prism20::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism20 class member functions @@ -88,33 +67,44 @@ bool Prism20::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism20::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism20::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism20::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -211,7 +201,10 @@ unsigned int Prism20::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 7); - return Prism20::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism20::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -222,7 +215,10 @@ unsigned int Prism20::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism20::nodes_per_edge); - return Prism20::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism20::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -254,7 +250,7 @@ std::unique_ptr Prism20::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism20::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -302,7 +298,7 @@ void Prism20::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism20::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_prism21.C b/src/geom/cell_prism21.C index 2e6a5777849..ad733084217 100644 --- a/src/geom/cell_prism21.C +++ b/src/geom/cell_prism21.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_prism21.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" @@ -51,28 +52,6 @@ const int Prism21::num_nodes; const int Prism21::nodes_per_side; const int Prism21::nodes_per_edge; -const unsigned int Prism21::side_nodes_map[Prism21::num_sides][Prism21::nodes_per_side] = - { - {0, 2, 1, 8, 7, 6, 18, 99, 99}, // Side 0 - {0, 1, 4, 3, 6, 10, 12, 9, 15}, // Side 1 - {1, 2, 5, 4, 7, 11, 13, 10, 16}, // Side 2 - {2, 0, 3, 5, 8, 9, 14, 11, 17}, // Side 3 - {3, 4, 5, 12, 13, 14, 19, 99, 99} // Side 4 - }; - -const unsigned int Prism21::edge_nodes_map[Prism21::num_edges][Prism21::nodes_per_edge] = - { - {0, 1, 6}, // Edge 0 - {1, 2, 7}, // Edge 1 - {0, 2, 8}, // Edge 2 - {0, 3, 9}, // Edge 3 - {1, 4, 10}, // Edge 4 - {2, 5, 11}, // Edge 5 - {3, 4, 12}, // Edge 6 - {4, 5, 13}, // Edge 7 - {3, 5, 14} // Edge 8 - }; - // ------------------------------------------------------------ // Prism21 class member functions @@ -105,33 +84,44 @@ bool Prism21::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Prism21::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s > 0 && s < 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Prism21::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Prism21::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -231,7 +221,10 @@ unsigned int Prism21::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(!(side==0 || side==4) || side_node < 7); - return Prism21::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Prism21::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -242,7 +235,10 @@ unsigned int Prism21::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Prism21::nodes_per_edge); - return Prism21::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Prism21::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -274,7 +270,7 @@ std::unique_ptr Prism21::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Prism21::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -322,7 +318,7 @@ void Prism21::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Prism21::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid13.C b/src/geom/cell_pyramid13.C index faf84e00f3d..8a69a61590d 100644 --- a/src/geom/cell_pyramid13.C +++ b/src/geom/cell_pyramid13.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid13.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid13::num_nodes; const int Pyramid13::nodes_per_side; const int Pyramid13::nodes_per_edge; -const unsigned int Pyramid13::side_nodes_map[Pyramid13::num_sides][Pyramid13::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5} // Side 4 (base) - }; - -const unsigned int Pyramid13::edge_nodes_map[Pyramid13::num_edges][Pyramid13::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid13 class member functions @@ -89,33 +69,44 @@ bool Pyramid13::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid13::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid13::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid13::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -147,7 +138,10 @@ unsigned int Pyramid13::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(side == 4 || side_node < 6); - return Pyramid13::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid13::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -158,7 +152,10 @@ unsigned int Pyramid13::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid13::nodes_per_edge); - return Pyramid13::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid13::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -190,7 +187,7 @@ std::unique_ptr Pyramid13::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid13::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -236,7 +233,7 @@ void Pyramid13::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid13::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid14.C b/src/geom/cell_pyramid14.C index bcdd7e0f9e6..69d3e164eab 100644 --- a/src/geom/cell_pyramid14.C +++ b/src/geom/cell_pyramid14.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid14.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid14::num_nodes; const int Pyramid14::nodes_per_side; const int Pyramid14::nodes_per_edge; -const unsigned int Pyramid14::side_nodes_map[Pyramid14::num_sides][Pyramid14::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 99, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 99, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 99, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 99, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5, 13} // Side 4 (base) - }; - -const unsigned int Pyramid14::edge_nodes_map[Pyramid14::num_edges][Pyramid14::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid14 class member functions @@ -93,33 +73,44 @@ bool Pyramid14::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid14::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 3; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid14::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid14::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } bool Pyramid14::has_affine_map() const @@ -171,7 +162,10 @@ unsigned int Pyramid14::local_side_node(unsigned int side, // Some sides have 6 nodes. libmesh_assert(side == 4 || side_node < 6); - return Pyramid14::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid14::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -182,7 +176,10 @@ unsigned int Pyramid14::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid14::nodes_per_edge); - return Pyramid14::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid14::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -214,7 +211,7 @@ std::unique_ptr Pyramid14::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid14::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -260,7 +257,7 @@ void Pyramid14::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid14::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_pyramid18.C b/src/geom/cell_pyramid18.C index 12f7ad69f5f..f4c2c5e6d6a 100644 --- a/src/geom/cell_pyramid18.C +++ b/src/geom/cell_pyramid18.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_pyramid18.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" @@ -36,27 +37,6 @@ const int Pyramid18::num_nodes; const int Pyramid18::nodes_per_side; const int Pyramid18::nodes_per_edge; -const unsigned int Pyramid18::side_nodes_map[Pyramid18::num_sides][Pyramid18::nodes_per_side] = - { - {0, 1, 4, 5, 10, 9, 14, 99, 99}, // Side 0 (front) - {1, 2, 4, 6, 11, 10, 15, 99, 99}, // Side 1 (right) - {2, 3, 4, 7, 12, 11, 16, 99, 99}, // Side 2 (back) - {3, 0, 4, 8, 9, 12, 17, 99, 99}, // Side 3 (left) - {0, 3, 2, 1, 8, 7, 6, 5, 13} // Side 4 (base) - }; - -const unsigned int Pyramid18::edge_nodes_map[Pyramid18::num_edges][Pyramid18::nodes_per_edge] = - { - {0, 1, 5}, // Edge 0 - {1, 2, 6}, // Edge 1 - {2, 3, 7}, // Edge 2 - {0, 3, 8}, // Edge 3 - {0, 4, 9}, // Edge 4 - {1, 4, 10}, // Edge 5 - {2, 4, 11}, // Edge 6 - {3, 4, 12} // Edge 7 - }; - // ------------------------------------------------------------ // Pyramid18 class member functions @@ -93,33 +73,44 @@ bool Pyramid18::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Pyramid18::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - auto trim = (s == 4) ? 0 : 2; - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s]) - trim}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Pyramid18::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Pyramid18::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -173,7 +164,10 @@ unsigned int Pyramid18::local_side_node(unsigned int side, // Some sides have 7 nodes. libmesh_assert(side == 4 || side_node < 7); - return Pyramid18::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Pyramid18::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -184,7 +178,10 @@ unsigned int Pyramid18::local_edge_node(unsigned int edge, libmesh_assert_less(edge, this->n_edges()); libmesh_assert_less(edge_node, Pyramid18::nodes_per_edge); - return Pyramid18::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Pyramid18::local_edge_node(): unsupported shared edge-node lookup"); + return node; } @@ -216,7 +213,7 @@ std::unique_ptr Pyramid18::build_side_ptr (const unsigned int i) // Set the nodes for (auto n : face->node_index_range()) - face->set_node(n, this->node_ptr(Pyramid18::side_nodes_map[i][n])); + face->set_node(n, this->node_ptr(this->local_side_node(i, n))); face->set_interior_parent(this); face->inherit_data_from(*this); @@ -262,7 +259,7 @@ void Pyramid18::build_side_ptr (std::unique_ptr & side, // Set the nodes for (auto n : side->node_index_range()) - side->set_node(n, this->node_ptr(Pyramid18::side_nodes_map[i][n])); + side->set_node(n, this->node_ptr(this->local_side_node(i, n))); } diff --git a/src/geom/cell_tet10.C b/src/geom/cell_tet10.C index f876afdcccd..edab2defba8 100644 --- a/src/geom/cell_tet10.C +++ b/src/geom/cell_tet10.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_tet10.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -34,24 +35,6 @@ const int Tet10::num_nodes; const int Tet10::nodes_per_side; const int Tet10::nodes_per_edge; -const unsigned int Tet10::side_nodes_map[Tet10::num_sides][Tet10::nodes_per_side] = - { - {0, 2, 1, 6, 5, 4}, // Side 0 - {0, 1, 3, 4, 8, 7}, // Side 1 - {1, 2, 3, 5, 9, 8}, // Side 2 - {2, 0, 3, 6, 7, 9} // Side 3 - }; - -const unsigned int Tet10::edge_nodes_map[Tet10::num_edges][Tet10::nodes_per_edge] = - { - {0, 1, 4}, // Edge 0 - {1, 2, 5}, // Edge 1 - {0, 2, 6}, // Edge 2 - {0, 3, 7}, // Edge 3 - {1, 3, 8}, // Edge 4 - {2, 3, 9} // Edge 5 - }; - // ------------------------------------------------------------ // Tet10 class member functions @@ -78,32 +61,44 @@ bool Tet10::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tet10::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Tet10::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Tet10::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -185,7 +180,10 @@ unsigned int Tet10::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tet10::nodes_per_side); - return Tet10::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tet10::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -196,7 +194,10 @@ unsigned int Tet10::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Tet10::nodes_per_edge); - return Tet10::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Tet10::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/cell_tet14.C b/src/geom/cell_tet14.C index b214ee1c36b..7962ae5e51b 100644 --- a/src/geom/cell_tet14.C +++ b/src/geom/cell_tet14.C @@ -19,6 +19,7 @@ // Local includes #include "libmesh/cell_tet14.h" #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -42,24 +43,6 @@ const int Tet14::num_nodes; const int Tet14::nodes_per_side; const int Tet14::nodes_per_edge; -const unsigned int Tet14::side_nodes_map[Tet14::num_sides][Tet14::nodes_per_side] = - { - {0, 2, 1, 6, 5, 4, 10}, // Side 0 - {0, 1, 3, 4, 8, 7, 11}, // Side 1 - {1, 2, 3, 5, 9, 8, 12}, // Side 2 - {2, 0, 3, 6, 7, 9, 13} // Side 3 - }; - -const unsigned int Tet14::edge_nodes_map[Tet14::num_edges][Tet14::nodes_per_edge] = - { - {0, 1, 4}, // Edge 0 - {1, 2, 5}, // Edge 1 - {0, 2, 6}, // Edge 2 - {0, 3, 7}, // Edge 3 - {1, 3, 8}, // Edge 4 - {2, 3, 9} // Edge 5 - }; - // ------------------------------------------------------------ // Tet14 class member functions @@ -88,32 +71,44 @@ bool Tet14::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tet14::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector Tet14::nodes_on_edge(const unsigned int e) const { libmesh_assert_less(e, n_edges()); - return {std::begin(edge_nodes_map[e]), std::end(edge_nodes_map[e])}; + const auto count = edge_node_count_or_zero(this->type(), e); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_edge_node(e, i); + return nodes; } bool Tet14::is_node_on_edge(const unsigned int n, const unsigned int e) const { libmesh_assert_less (e, n_edges()); - return std::find(std::begin(edge_nodes_map[e]), - std::end(edge_nodes_map[e]), - n) != std::end(edge_nodes_map[e]); + const auto count = edge_node_count_or_zero(this->type(), e); + for (unsigned int i = 0; i != count; ++i) + if (this->local_edge_node(e, i) == n) + return true; + return false; } @@ -210,7 +205,10 @@ unsigned int Tet14::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tet14::nodes_per_side); - return Tet14::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tet14::local_side_node(): unsupported shared side-node lookup"); + return node; } @@ -221,7 +219,10 @@ unsigned int Tet14::local_edge_node(unsigned int edge, libmesh_assert_less (edge, this->n_edges()); libmesh_assert_less (edge_node, Tet14::nodes_per_edge); - return Tet14::edge_nodes_map[edge][edge_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_edge_node(this->type(), edge, edge_node, node), + "Tet14::local_edge_node(): unsupported shared edge-node lookup"); + return node; } diff --git a/src/geom/face_quad8.C b/src/geom/face_quad8.C index 720f77dd4d1..e710f4b1ae4 100644 --- a/src/geom/face_quad8.C +++ b/src/geom/face_quad8.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad8.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,15 +33,6 @@ namespace libMesh const int Quad8::num_nodes; const int Quad8::nodes_per_side; -const unsigned int Quad8::side_nodes_map[Quad8::num_sides][Quad8::nodes_per_side] = - { - {0, 1, 4}, // Side 0 - {1, 2, 5}, // Side 1 - {2, 3, 6}, // Side 2 - {3, 0, 7} // Side 3 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Quad8::_embedding_matrix[Quad8::num_children][Quad8::num_nodes][Quad8::num_nodes] = @@ -128,16 +120,22 @@ bool Quad8::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Quad8::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -212,7 +210,10 @@ unsigned int Quad8::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Quad8::nodes_per_side); - return Quad8::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Quad8::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_quad9.C b/src/geom/face_quad9.C index 7182b023a43..60f56aca69f 100644 --- a/src/geom/face_quad9.C +++ b/src/geom/face_quad9.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_quad9.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,15 +33,6 @@ namespace libMesh const int Quad9::num_nodes; const int Quad9::nodes_per_side; -const unsigned int Quad9::side_nodes_map[Quad9::num_sides][Quad9::nodes_per_side] = - { - {0, 1, 4}, // Side 0 - {1, 2, 5}, // Side 1 - {2, 3, 6}, // Side 2 - {3, 0, 7} // Side 3 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Quad9::_embedding_matrix[Quad9::num_children][Quad9::num_nodes][Quad9::num_nodes] = @@ -136,16 +128,22 @@ bool Quad9::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Quad9::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -230,7 +228,10 @@ unsigned int Quad9::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Quad9::nodes_per_side); - return Quad9::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Quad9::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_tri6.C b/src/geom/face_tri6.C index fb24d0fe701..f844ebfe635 100644 --- a/src/geom/face_tri6.C +++ b/src/geom/face_tri6.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri6.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -32,14 +33,6 @@ namespace libMesh const int Tri6::num_nodes; const int Tri6::nodes_per_side; -const unsigned int Tri6::side_nodes_map[Tri6::num_sides][Tri6::nodes_per_side] = - { - {0, 1, 3}, // Side 0 - {1, 2, 4}, // Side 1 - {2, 0, 5} // Side 2 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Tri6::_embedding_matrix[Tri6::num_children][Tri6::num_nodes][Tri6::num_nodes] = @@ -119,16 +112,22 @@ bool Tri6::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tri6::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -199,7 +198,10 @@ unsigned int Tri6::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tri6::nodes_per_side); - return Tri6::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tri6::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/geom/face_tri7.C b/src/geom/face_tri7.C index e30c72ced52..e007678d0fc 100644 --- a/src/geom/face_tri7.C +++ b/src/geom/face_tri7.C @@ -17,6 +17,7 @@ // Local includes #include "libmesh/edge_edge3.h" +#include "libmesh/fe_reference_element_traits.h" #include "libmesh/face_tri7.h" #include "libmesh/enum_io_package.h" #include "libmesh/enum_order.h" @@ -38,14 +39,6 @@ namespace libMesh const int Tri7::num_nodes; const int Tri7::nodes_per_side; -const unsigned int Tri7::side_nodes_map[Tri7::num_sides][Tri7::nodes_per_side] = - { - {0, 1, 3}, // Side 0 - {1, 2, 4}, // Side 1 - {2, 0, 5} // Side 2 - }; - - #ifdef LIBMESH_ENABLE_AMR const Real Tri7::_embedding_matrix[Tri7::num_children][Tri7::num_nodes][Tri7::num_nodes] = @@ -143,16 +136,22 @@ bool Tri7::is_node_on_side(const unsigned int n, const unsigned int s) const { libmesh_assert_less (s, n_sides()); - return std::find(std::begin(side_nodes_map[s]), - std::end(side_nodes_map[s]), - n) != std::end(side_nodes_map[s]); + const auto count = side_node_count_or_zero(this->type(), s); + for (unsigned int i = 0; i != count; ++i) + if (this->local_side_node(s, i) == n) + return true; + return false; } std::vector Tri7::nodes_on_side(const unsigned int s) const { libmesh_assert_less(s, n_sides()); - return {std::begin(side_nodes_map[s]), std::end(side_nodes_map[s])}; + const auto count = side_node_count_or_zero(this->type(), s); + std::vector nodes(count); + for (unsigned int i = 0; i != count; ++i) + nodes[i] = this->local_side_node(s, i); + return nodes; } std::vector @@ -236,7 +235,10 @@ unsigned int Tri7::local_side_node(unsigned int side, libmesh_assert_less (side, this->n_sides()); libmesh_assert_less (side_node, Tri7::nodes_per_side); - return Tri7::side_nodes_map[side][side_node]; + unsigned int node = invalid_uint; + libmesh_error_msg_if(!try_local_side_node(this->type(), side, side_node, node), + "Tri7::local_side_node(): unsupported shared side-node lookup"); + return node; } diff --git a/src/libmesh_SOURCES b/src/libmesh_SOURCES index d73987f9168..7fbf2e69689 100644 --- a/src/libmesh_SOURCES +++ b/src/libmesh_SOURCES @@ -274,6 +274,7 @@ libmesh_SOURCES = \ src/numerics/laspack_vector.C \ src/numerics/lumped_mass_matrix.C \ src/numerics/numeric_vector.C \ + src/numerics/parsed_function_program.C \ src/numerics/petsc_matrix.C \ src/numerics/petsc_matrix_base.C \ src/numerics/petsc_matrix_shell_matrix.C \ diff --git a/src/mesh/checkpoint_io.C b/src/mesh/checkpoint_io.C index 7e7ef2b2df3..d25dac6ea54 100644 --- a/src/mesh/checkpoint_io.C +++ b/src/mesh/checkpoint_io.C @@ -1194,7 +1194,7 @@ void CheckpointIO::read_connectivity (Xdr & io) cast_int (elem_data[2] % mesh.n_processors()); const subdomain_id_type subdomain_id = - restrict_int(elem_data[3]); + cast_int(elem_data[3]); // Old broken files used processsor_id_type(-1)... // But we *know* our first element will be level 0 diff --git a/src/mesh/exodusII_io.C b/src/mesh/exodusII_io.C index 4bf7bcf164e..2c83d16f0cc 100644 --- a/src/mesh/exodusII_io.C +++ b/src/mesh/exodusII_io.C @@ -441,7 +441,7 @@ void ExodusII_IO::read (const std::string & fname) // Read the information for block i exio_helper->read_elem_in_block (i); const subdomain_id_type subdomain_id = - restrict_int(exio_helper->get_block_id(i)); + cast_int(exio_helper->get_block_id(i)); max_subdomain_id = std::max(max_subdomain_id, subdomain_id); // populate the map of names diff --git a/src/mesh/exodusII_io_helper.C b/src/mesh/exodusII_io_helper.C index 13f74055cf0..365c030630e 100644 --- a/src/mesh/exodusII_io_helper.C +++ b/src/mesh/exodusII_io_helper.C @@ -3435,7 +3435,7 @@ void ExodusII_IO_Helper::initialize_element_variables(std::vector n std::set current_set; if (vars_active_subdomains[var_num].empty()) for (auto block_id : block_ids) - current_set.insert(restrict_int(block_id)); + current_set.insert(cast_int(block_id)); else current_set = vars_active_subdomains[var_num]; diff --git a/src/mesh/gmsh_io.C b/src/mesh/gmsh_io.C index 9684b4ed00f..11a0d08c49a 100644 --- a/src/mesh/gmsh_io.C +++ b/src/mesh/gmsh_io.C @@ -274,12 +274,12 @@ void GmshIO::read_mesh(std::istream & in) // conditions. if (s.find("lower_dimensional_block") != std::string::npos) { - lower_dimensional_blocks.insert(restrict_int(phys_id)); + lower_dimensional_blocks.insert(cast_int(phys_id)); // The user has explicitly told us that this // block is a subdomain, so set that association // in the Mesh. - mesh.subdomain_name(restrict_int(phys_id)) = phys_name; + mesh.subdomain_name(cast_int(phys_id)) = phys_name; } } } @@ -795,7 +795,7 @@ void GmshIO::read_mesh(std::istream & in) // If the physical's dimension matches the largest // dimension we've seen, it's a subdomain name. if (phys_dim == max_elem_dimension_seen) - mesh.subdomain_name(restrict_int(phys_id)) = phys_name; + mesh.subdomain_name(cast_int(phys_id)) = phys_name; // If it's zero-dimensional then it's a nodeset else if (phys_dim == 0) diff --git a/src/mesh/mesh_base.C b/src/mesh/mesh_base.C index 7dc7a6f31cf..e1ece45bfb7 100644 --- a/src/mesh/mesh_base.C +++ b/src/mesh/mesh_base.C @@ -36,6 +36,7 @@ #include "libmesh/point_locator_base.h" #include "libmesh/sparse_matrix.h" #include "libmesh/threads.h" +#include "libmesh/utility.h" #include "libmesh/enum_elem_type.h" #include "libmesh/enum_point_locator_type.h" #include "libmesh/enum_to_string.h" @@ -192,6 +193,9 @@ MeshBase& MeshBase::operator= (MeshBase && other_mesh) _default_mapping_data = other_mesh.default_mapping_data(); _preparation = other_mesh._preparation; _point_locator = std::move(other_mesh._point_locator); +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif _count_lower_dim_elems_in_point_locator = other_mesh.get_count_lower_dim_elems_in_point_locator(); #ifdef LIBMESH_ENABLE_UNIQUE_ID _next_unique_id = other_mesh.next_unique_id(); @@ -603,6 +607,9 @@ void MeshBase::set_spatial_dimension(unsigned char d) // libMesh will only *increase* the spatial dimension, however, // never decrease it. _spatial_dimension = d; +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } @@ -998,6 +1005,10 @@ void MeshBase::complete_preparation() MeshTools::libmesh_assert_valid_unique_ids(*this); #endif #endif + +#ifdef LIBMESH_HAVE_KOKKOS + this->prepare_kokkos_geometry_cache(); +#endif } void @@ -1036,8 +1047,127 @@ void MeshBase::clear () // Clear our point locator. this->clear_point_locator(); +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif +} + +#ifdef LIBMESH_HAVE_KOKKOS +const MeshBase::KokkosGeometryCache & +MeshBase::get_kokkos_geometry_cache() const +{ + if (_kokkos_geometry_cache) + return *_kokkos_geometry_cache; + + auto cache = std::make_unique(); + cache->host_element_ids.reserve(this->n_active_local_elem()); + + for (const auto & elem : this->active_local_element_ptr_range()) + { + cache->element_lookup.emplace(elem->id(), cast_int(cache->host_element_ids.size())); + cache->host_element_ids.push_back(elem->id()); + cache->max_nodes = std::max(cache->max_nodes, elem->n_nodes()); + + for (unsigned int n = 0; n != elem->n_nodes(); ++n) + { + const dof_id_type node_id = elem->node_id(n); + if (!cache->node_lookup.count(node_id)) + { + cache->node_lookup.emplace(node_id, cast_int(cache->host_node_ids.size())); + cache->host_node_ids.push_back(node_id); + } + } + } + + cache->node_ids = + KokkosGeometryCache::node_id_view("mesh_kokkos_node_ids", cache->host_node_ids.size()); + cache->element_ids = + KokkosGeometryCache::elem_id_view("mesh_kokkos_element_ids", cache->host_element_ids.size()); + cache->node_coordinates = + KokkosGeometryCache::node_coord_view("mesh_kokkos_node_coordinates", + cache->host_node_ids.size(), + LIBMESH_DIM); + cache->element_node_ids = + KokkosGeometryCache::elem_node_id_view("mesh_kokkos_element_node_ids", + cache->host_element_ids.size(), + cache->max_nodes); + cache->element_types = + KokkosGeometryCache::elem_type_view("mesh_kokkos_element_types", cache->host_element_ids.size()); + cache->element_mapping_types = + KokkosGeometryCache::elem_mapping_type_view("mesh_kokkos_element_mapping_types", + cache->host_element_ids.size()); + cache->element_n_nodes = + KokkosGeometryCache::elem_n_nodes_view("mesh_kokkos_element_n_nodes", cache->host_element_ids.size()); + cache->element_p_levels = + KokkosGeometryCache::elem_p_level_view("mesh_kokkos_element_p_levels", cache->host_element_ids.size()); + cache->element_subdomains = + KokkosGeometryCache::elem_subdomain_view("mesh_kokkos_element_subdomains", + cache->host_element_ids.size()); + + auto h_node_ids = ::Kokkos::create_mirror_view(cache->node_ids); + auto h_element_ids = ::Kokkos::create_mirror_view(cache->element_ids); + auto h_node_coordinates = ::Kokkos::create_mirror_view(cache->node_coordinates); + auto h_element_node_ids = ::Kokkos::create_mirror_view(cache->element_node_ids); + auto h_element_types = ::Kokkos::create_mirror_view(cache->element_types); + auto h_element_mapping_types = ::Kokkos::create_mirror_view(cache->element_mapping_types); + auto h_element_n_nodes = ::Kokkos::create_mirror_view(cache->element_n_nodes); + auto h_element_p_levels = ::Kokkos::create_mirror_view(cache->element_p_levels); + auto h_element_subdomains = ::Kokkos::create_mirror_view(cache->element_subdomains); + + for (auto node_index : index_range(cache->host_node_ids)) + { + const dof_id_type node_id = cache->host_node_ids[node_index]; + const Node & node = *this->query_node_ptr(node_id); + h_node_ids(cast_int(node_index)) = node_id; + for (unsigned int component = 0; component != LIBMESH_DIM; ++component) + h_node_coordinates(cast_int(node_index), component) = node(component); + } + + for (auto elem_index : index_range(cache->host_element_ids)) + { + const dof_id_type elem_id = cache->host_element_ids[elem_index]; + const Elem & elem = *this->query_elem_ptr(elem_id); + h_element_ids(cast_int(elem_index)) = elem_id; + h_element_types(cast_int(elem_index)) = elem.type(); + h_element_mapping_types(cast_int(elem_index)) = elem.mapping_type(); + h_element_n_nodes(cast_int(elem_index)) = elem.n_nodes(); + h_element_p_levels(cast_int(elem_index)) = elem.p_level(); + h_element_subdomains(cast_int(elem_index)) = elem.subdomain_id(); + for (unsigned int n = 0; n != elem.n_nodes(); ++n) + h_element_node_ids(cast_int(elem_index), n) = + libmesh_map_find(cache->node_lookup, elem.node_id(n)); + } + + ::Kokkos::deep_copy(cache->node_ids, h_node_ids); + ::Kokkos::deep_copy(cache->element_ids, h_element_ids); + ::Kokkos::deep_copy(cache->node_coordinates, h_node_coordinates); + ::Kokkos::deep_copy(cache->element_node_ids, h_element_node_ids); + ::Kokkos::deep_copy(cache->element_types, h_element_types); + ::Kokkos::deep_copy(cache->element_mapping_types, h_element_mapping_types); + ::Kokkos::deep_copy(cache->element_n_nodes, h_element_n_nodes); + ::Kokkos::deep_copy(cache->element_p_levels, h_element_p_levels); + ::Kokkos::deep_copy(cache->element_subdomains, h_element_subdomains); + _kokkos_geometry_cache = std::move(cache); + return *_kokkos_geometry_cache; } +void +MeshBase::prepare_kokkos_geometry_cache() const +{ + libmesh_ignore(this->get_kokkos_geometry_cache()); +} + +unsigned int +MeshBase::get_kokkos_elem_index(const Elem & elem) const +{ + const auto & cache = this->get_kokkos_geometry_cache(); + if (auto it = cache.element_lookup.find(elem.id()); it != cache.element_lookup.end()) + return it->second; + + return libMesh::invalid_uint; +} +#endif + bool MeshBase::is_prepared() const { @@ -1741,6 +1871,9 @@ void MeshBase::partition (const unsigned int n_parts) } _preparation.is_partitioned = true; +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } void MeshBase::all_second_order (const bool full_ordered) @@ -2290,6 +2423,10 @@ MeshBase::post_dofobject_moves(MeshBase && other_mesh) if (other_mesh.partitioner()) _partitioner = std::move(other_mesh.partitioner()); + +#ifdef LIBMESH_HAVE_KOKKOS + _kokkos_geometry_cache.reset(); +#endif } diff --git a/src/mesh/nemesis_io.C b/src/mesh/nemesis_io.C index 1c83c38728f..f05892c357c 100644 --- a/src/mesh/nemesis_io.C +++ b/src/mesh/nemesis_io.C @@ -857,7 +857,7 @@ void Nemesis_IO::read (const std::string & base_filename) // Set subdomain ID based on the block ID. subdomain_id_type subdomain_id = - restrict_int(nemhelper->block_ids[i]); + cast_int(nemhelper->block_ids[i]); // Create a type string (this uses the null-terminated string ctor). const std::string type_str ( nemhelper->elem_type.data() ); diff --git a/src/mesh/nemesis_io_helper.C b/src/mesh/nemesis_io_helper.C index c116232393e..46400016eef 100644 --- a/src/mesh/nemesis_io_helper.C +++ b/src/mesh/nemesis_io_helper.C @@ -2285,7 +2285,7 @@ void Nemesis_IO_Helper::write_elements(const MeshBase & mesh, bool /*use_discont // empty string if there is no name associated with the current // block. names_table.push_back_entry - (mesh.subdomain_name(restrict_int(this->global_elem_blk_ids[i]))); + (mesh.subdomain_name(cast_int(this->global_elem_blk_ids[i]))); // Search for the current global block ID in the map if (const auto it = this->block_id_to_elem_connectivity.find( this->global_elem_blk_ids[i] ); @@ -2661,7 +2661,7 @@ Nemesis_IO_Helper::write_element_values(const MeshBase & mesh, for (const int sbd_id_int : global_elem_blk_ids) { const subdomain_id_type sbd_id = - restrict_int(sbd_id_int); + cast_int(sbd_id_int); auto it = subdomain_map.find(sbd_id); const std::vector empty_vec; const std::vector & elem_ids = diff --git a/src/mesh/tetgen_io.C b/src/mesh/tetgen_io.C index 9010d01a330..b61169d75ab 100644 --- a/src/mesh/tetgen_io.C +++ b/src/mesh/tetgen_io.C @@ -257,7 +257,7 @@ void TetGenIO::element_in (std::istream & ele_stream) // Make sure that the id we read can be successfully cast to // an integral value of type subdomain_id_type. - elem->subdomain_id() = restrict_int(region); + elem->subdomain_id() = cast_int(region); } } } diff --git a/src/mesh/ucd_io.C b/src/mesh/ucd_io.C index b0b6feb745b..e6f6cd565c8 100644 --- a/src/mesh/ucd_io.C +++ b/src/mesh/ucd_io.C @@ -221,7 +221,7 @@ void UCDIO::read_implementation (std::istream & in) elems_of_dimension[elem->dim()] = true; // Set the element's subdomain ID based on the material_id. - elem->subdomain_id() = restrict_int(material_id); + elem->subdomain_id() = cast_int(material_id); // Add the element to the mesh elem->set_id(i); diff --git a/src/mesh/unv_io.C b/src/mesh/unv_io.C index 4f4344502a0..0d333b2dafd 100644 --- a/src/mesh/unv_io.C +++ b/src/mesh/unv_io.C @@ -519,7 +519,7 @@ void UNVIO::groups_in (std::istream & in_file) // Set the current group number as the lower-dimensional element's subdomain ID. // We will use this later to set a boundary ID. group_elem->subdomain_id() = - restrict_int(group_number); + cast_int(group_number); // Store the lower-dimensional element in the provide_bcs container. provide_bcs.emplace(group_elem->key(), group_elem); @@ -530,7 +530,7 @@ void UNVIO::groups_in (std::istream & in_file) { is_subdomain_group = true; group_elem->subdomain_id() = - restrict_int(group_number); + cast_int(group_number); } else diff --git a/src/mesh/xdr_io.C b/src/mesh/xdr_io.C index 097d7c2f46b..862313e8444 100644 --- a/src/mesh/xdr_io.C +++ b/src/mesh/xdr_io.C @@ -1897,7 +1897,7 @@ XdrIO::read_serialized_connectivity (Xdr & io, cast_int(*it++); const subdomain_id_type subdomain_id = - restrict_int(*it++); + cast_int(*it++); tmp = *it++; #ifdef LIBMESH_ENABLE_AMR diff --git a/src/numerics/parsed_function_program.C b/src/numerics/parsed_function_program.C new file mode 100644 index 00000000000..a93ca327038 --- /dev/null +++ b/src/numerics/parsed_function_program.C @@ -0,0 +1,155 @@ +// The libMesh Finite Element Library. +// Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner + +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. + +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +#include "libmesh/parsed_function_program.h" + +#include "fparser_ad.hh" +#include "extrasrc/fptypes.hh" + +namespace +{ + +template +void +validate_kokkos_program_opcode(const unsigned int opcode) +{ + using libMesh::ParsedFunctionOpcode; + + if (libMesh::parsed_function_is_var_opcode(opcode)) + return; + + switch (static_cast(opcode)) + { + case ParsedFunctionOpcode::cAbs: + case ParsedFunctionOpcode::cAcos: + case ParsedFunctionOpcode::cAcosh: + case ParsedFunctionOpcode::cAsin: + case ParsedFunctionOpcode::cAsinh: + case ParsedFunctionOpcode::cAtan: + case ParsedFunctionOpcode::cAtan2: + case ParsedFunctionOpcode::cAtanh: + case ParsedFunctionOpcode::cCbrt: + case ParsedFunctionOpcode::cCeil: + case ParsedFunctionOpcode::cCos: + case ParsedFunctionOpcode::cCosh: + case ParsedFunctionOpcode::cCot: + case ParsedFunctionOpcode::cCsc: + case ParsedFunctionOpcode::cExp: + case ParsedFunctionOpcode::cExp2: + case ParsedFunctionOpcode::cFloor: + case ParsedFunctionOpcode::cHypot: + case ParsedFunctionOpcode::cIf: + case ParsedFunctionOpcode::cInt: + case ParsedFunctionOpcode::cLog: + case ParsedFunctionOpcode::cLog10: + case ParsedFunctionOpcode::cLog2: + case ParsedFunctionOpcode::cMax: + case ParsedFunctionOpcode::cMin: + case ParsedFunctionOpcode::cPow: + case ParsedFunctionOpcode::cSec: + case ParsedFunctionOpcode::cSin: + case ParsedFunctionOpcode::cSinh: + case ParsedFunctionOpcode::cSqrt: + case ParsedFunctionOpcode::cTan: + case ParsedFunctionOpcode::cTanh: + case ParsedFunctionOpcode::cTrunc: + case ParsedFunctionOpcode::cImmed: + case ParsedFunctionOpcode::cJump: + case ParsedFunctionOpcode::cNeg: + case ParsedFunctionOpcode::cAdd: + case ParsedFunctionOpcode::cSub: + case ParsedFunctionOpcode::cMul: + case ParsedFunctionOpcode::cDiv: + case ParsedFunctionOpcode::cMod: + case ParsedFunctionOpcode::cEqual: + case ParsedFunctionOpcode::cNEqual: + case ParsedFunctionOpcode::cLess: + case ParsedFunctionOpcode::cLessOrEq: + case ParsedFunctionOpcode::cGreater: + case ParsedFunctionOpcode::cGreaterOrEq: + case ParsedFunctionOpcode::cNot: + case ParsedFunctionOpcode::cAnd: + case ParsedFunctionOpcode::cOr: + case ParsedFunctionOpcode::cNotNot: + case ParsedFunctionOpcode::cDeg: + case ParsedFunctionOpcode::cRad: + case ParsedFunctionOpcode::cPopNMov: + case ParsedFunctionOpcode::cLog2by: + case ParsedFunctionOpcode::cNop: + case ParsedFunctionOpcode::cSinCos: + case ParsedFunctionOpcode::cSinhCosh: + case ParsedFunctionOpcode::cAbsAnd: + case ParsedFunctionOpcode::cAbsOr: + case ParsedFunctionOpcode::cAbsNot: + case ParsedFunctionOpcode::cAbsNotNot: + case ParsedFunctionOpcode::cAbsIf: + case ParsedFunctionOpcode::cDup: + case ParsedFunctionOpcode::cFetch: + case ParsedFunctionOpcode::cInv: + case ParsedFunctionOpcode::cSqr: + case ParsedFunctionOpcode::cRDiv: + case ParsedFunctionOpcode::cRSub: + case ParsedFunctionOpcode::cRSqrt: + return; + + case ParsedFunctionOpcode::cArg: + case ParsedFunctionOpcode::cConj: + case ParsedFunctionOpcode::cImag: + case ParsedFunctionOpcode::cPolar: + case ParsedFunctionOpcode::cReal: + libmesh_error_msg("Kokkos parsed-function export does not support complex-valued fparser opcodes"); + + case ParsedFunctionOpcode::cFCall: + case ParsedFunctionOpcode::cPCall: + libmesh_error_msg("Kokkos parsed-function export does not support user-defined or nested parser calls"); + + case ParsedFunctionOpcode::VarBegin: + return; + } + + libmesh_error_msg("Kokkos parsed-function export encountered an unknown opcode " << opcode); +} + +} // anonymous namespace + +namespace libMesh +{ + +template +ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser) +{ + ParsedFunctionProgram program; + const auto * data = parser.parser_data(); + libmesh_assert(data); + + program.bytecode.assign(data->mByteCode.begin(), data->mByteCode.end()); + program.immediates.assign(data->mImmed.begin(), data->mImmed.end()); + program.stack_size = data->mStackSize; + program.n_variables = data->mVariablesAmount; + program.epsilon = FunctionParserBase::epsilon(); + + for (const auto opcode : program.bytecode) + validate_kokkos_program_opcode(opcode); + + return program; +} + +template ParsedFunctionProgram +build_parsed_function_program(const FunctionParserADBase & parser); + +} // namespace libMesh diff --git a/src/numerics/petsc_matrix_base.C b/src/numerics/petsc_matrix_base.C index 33d439622ec..4666e7a3258 100644 --- a/src/numerics/petsc_matrix_base.C +++ b/src/numerics/petsc_matrix_base.C @@ -93,7 +93,6 @@ void PetscMatrixBase::set_destroy_mat_on_exit(bool destroy) this->_destroy_mat_on_exit = destroy; } - template void PetscMatrixBase::swap(PetscMatrixBase & m_in) { diff --git a/src/numerics/petsc_vector.C b/src/numerics/petsc_vector.C index 129f3abfeab..c9d3161f5d0 100644 --- a/src/numerics/petsc_vector.C +++ b/src/numerics/petsc_vector.C @@ -1178,8 +1178,6 @@ void PetscVector::create_subvector(NumericVector & subvector, petsc_subvector->_is_closed = true; } - - template void PetscVector::_get_array(bool read_only) const { diff --git a/src/quadrature/quadrature_gauss_1D.C b/src/quadrature/quadrature_gauss_1D.C index 0e72fc7c8a4..3292fff2706 100644 --- a/src/quadrature/quadrature_gauss_1D.C +++ b/src/quadrature/quadrature_gauss_1D.C @@ -21,6 +21,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" +#include "libmesh/quadrature_gauss_rules.h" namespace libMesh { @@ -31,6 +32,22 @@ void QGauss::init_1D() { //---------------------------------------------------------------------- // 1D quadrature rules + const auto shared_rule = Quadrature::Gauss::gauss_legendre_rule(static_cast(get_order())); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i]; + _weights[i] = shared_rule.weights[i]; + } + + return; + } + switch(get_order()) { case CONSTANT: diff --git a/src/quadrature/quadrature_gauss_2D.C b/src/quadrature/quadrature_gauss_2D.C index 06e30bff52b..8dd2e5dc1c1 100644 --- a/src/quadrature/quadrature_gauss_2D.C +++ b/src/quadrature/quadrature_gauss_2D.C @@ -20,6 +20,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" #include "libmesh/quadrature_conical.h" +#include "libmesh/quadrature_gauss_rules.h" #include "libmesh/enum_to_string.h" #include "libmesh/face_c0polygon.h" @@ -76,6 +77,24 @@ void QGauss::init_2D() case TRI6: case TRI7: { + const auto shared_rule = + Quadrature::Gauss::triangle_rule(static_cast(get_order())); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i].x; + _points[i](1) = shared_rule.points[i].y; + _weights[i] = shared_rule.points[i].w; + } + + return; + } + switch(get_order()) { case CONSTANT: diff --git a/src/quadrature/quadrature_gauss_3D.C b/src/quadrature/quadrature_gauss_3D.C index e9986c216cb..39f3d6e139c 100644 --- a/src/quadrature/quadrature_gauss_3D.C +++ b/src/quadrature/quadrature_gauss_3D.C @@ -20,6 +20,7 @@ // Local includes #include "libmesh/quadrature_gauss.h" #include "libmesh/quadrature_conical.h" +#include "libmesh/quadrature_gauss_rules.h" #include "libmesh/quadrature_gm.h" #include "libmesh/enum_to_string.h" #include "libmesh/cell_c0polyhedron.h" @@ -56,6 +57,26 @@ void QGauss::init_3D() case TET10: case TET14: { + const auto shared_rule = + Quadrature::Gauss::tetrahedron_rule(static_cast(get_order()), + allow_rules_with_negative_weights); + + if (shared_rule.count) + { + _points.resize(shared_rule.count); + _weights.resize(shared_rule.count); + + for (unsigned int i = 0; i < shared_rule.count; ++i) + { + _points[i](0) = shared_rule.points[i].x; + _points[i](1) = shared_rule.points[i].y; + _points[i](2) = shared_rule.points[i].z; + _weights[i] = shared_rule.points[i].w; + } + + return; + } + switch(get_order()) { // Taken from pg. 222 of "The finite element method," vol. 1 @@ -181,8 +202,6 @@ void QGauss::init_3D() // Note: if !allow_rules_with_negative_weights, fall through to next case. } - - // Originally a Keast rule, // Patrick Keast, // Moderate Degree Tetrahedral Quadrature Formulas, diff --git a/tests/Makefile.am b/tests/Makefile.am index bb12f424833..740c6440af5 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -6,7 +6,11 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ $(libmesh_contrib_INCLUDES) $(CPPUNIT_CFLAGS) \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) +AM_CPPFLAGS += $(LIBMESH_KOKKOS_BUILD_CPPFLAGS) +AM_CXXFLAGS += $(LIBMESH_KOKKOS_BUILD_CXXFLAGS) +AM_LDFLAGS += $(LIBMESH_KOKKOS_BUILD_LDFLAGS) LIBS = $(libmesh_optional_LIBS) $(CPPUNIT_LIBS) +KOKKOS_TEST_CPPFLAGS = # We might have turned on -Werror and/or paranoid warnings CXXFLAGS_DBG += $(ACSM_ANY_WERROR_FLAG) $(ACSM_ANY_PARANOID_FLAGS) @@ -134,12 +138,14 @@ unit_tests_sources = \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ + quadrature/quadrature_exactness.h \ quadrature/quadrature_test.C \ solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ systems/periodic_bc_test.C \ systems/disjoint_neighbor_test.C \ systems/systems_test.C \ @@ -240,7 +246,7 @@ data = matrices/geom_1_extraction_op.h5 \ unit_tests_data = $(data) # Why isn't this working automatically? -EXTRA_DIST = $(data) +EXTRA_DIST = $(data) fe/kokkos_fe_oracle_test_utils.h if LIBMESH_ENABLE_FPARSER unit_tests_sources += \ @@ -248,6 +254,93 @@ if LIBMESH_ENABLE_FPARSER endif check_PROGRAMS = # empty, append below +TESTS = + +if LIBMESH_ENABLE_KOKKOS + KOKKOS_TEST_CPPFLAGS += -I$(top_srcdir)/include + + check_PROGRAMS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + TESTS += kokkos_fe_types_oracle_unit kokkos_fe_shape_oracle_unit \ + kokkos_fe_map_oracle_unit kokkos_fe_invariant_unit \ + kokkos_fe_contract_unit kokkos_fe_permuted_map_oracle_unit \ + kokkos_fe_reconstruction_oracle_unit \ + kokkos_fe_side_trace_oracle_unit + + kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K + kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K + kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K + kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K + kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K + kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K + kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K + kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K + kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + check_PROGRAMS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + TESTS += kokkos_quadrature_oracle_unit kokkos_vector_ops_oracle_unit \ + kokkos_tensor_ops_oracle_unit + + kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K + kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K + kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la + + kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K + kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) + kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) + kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) + kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la +endif # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, @@ -358,9 +451,16 @@ $(top_builddir)/libmesh_oprof.la: FORCE if LIBMESH_ENABLE_CPPUNIT -TESTS = run_unit_tests.sh +TESTS += run_unit_tests.sh endif +# Compile .K translation units with the same project-wide compiler model. +.K.o: + $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) \ + -c $< -o $@ + CLEANFILES = cube_mesh.xda \ slit_mesh.xda \ slit_solution.xda \ diff --git a/tests/Makefile.in b/tests/Makefile.in index 59ed2e7641e..dbbf441b633 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -95,25 +95,53 @@ target_triplet = @target@ @LIBMESH_ENABLE_FPARSER_TRUE@ fparser/autodiff.C check_PROGRAMS = $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3) \ - $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) + $(am__EXEEXT_4) $(am__EXEEXT_5) $(am__EXEEXT_6) \ + $(am__EXEEXT_7) +TESTS = $(am__EXEEXT_1) $(am__append_11) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_2 = -I$(top_srcdir)/include $(KOKKOS_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_3 = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_types_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit +@LIBMESH_ENABLE_KOKKOS_TRUE@am__append_4 = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_types_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit # our GLIBC debugging preprocessor flags seem to potentially conflict # with libcppunit binaries. Some cppunit versions work fine for us, # others segfault and/or hang. By default we will not run # GLIBCXX-debugging builds with cppunit unless specifically # configured to. -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_2 = unit_tests-dbg -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_3 = unit_tests-dbg -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_4 = unit_tests-devel -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_5 = unit_tests-prof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_6 = unit_tests-oprof -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_7 = unit_tests-opt -@LIBMESH_VPATH_BUILD_TRUE@am__append_8 = .linkstamp +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_5 = unit_tests-dbg +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_6 = unit_tests-dbg +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_7 = unit_tests-devel +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__append_8 = unit_tests-prof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__append_9 = unit_tests-oprof +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__append_10 = unit_tests-opt +@LIBMESH_ENABLE_CPPUNIT_TRUE@am__append_11 = run_unit_tests.sh +@LIBMESH_VPATH_BUILD_TRUE@am__append_12 = .linkstamp ###################################################################### # # Don't leave code coverage outputs lying around -@CODE_COVERAGE_ENABLED_TRUE@am__append_9 = */*.gcda */*.gcno +@CODE_COVERAGE_ENABLED_TRUE@am__append_13 = */*.gcda */*.gcno subdir = tests ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = \ @@ -182,12 +210,124 @@ mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/include/libmesh_config.h.tmp CONFIG_CLEAN_FILES = run_unit_tests.sh CONFIG_CLEAN_VPATH_FILES = -@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_1 = unit_tests-dbg$(EXEEXT) -@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) -@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-devel$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_4 = unit_tests-prof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-oprof$(EXEEXT) -@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_6 = unit_tests-opt$(EXEEXT) +@LIBMESH_ENABLE_KOKKOS_TRUE@am__EXEEXT_1 = kokkos_fe_types_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_shape_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_map_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_invariant_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_contract_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_permuted_map_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_reconstruction_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_fe_side_trace_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_quadrature_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_vector_ops_oracle_unit$(EXEEXT) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ kokkos_tensor_ops_oracle_unit$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_CPPUNIT_TRUE@@ACSM_ENABLE_GLIBCXX_DEBUGGING_TRUE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_2 = unit_tests-dbg$(EXEEXT) +@ACSM_ENABLE_GLIBCXX_DEBUGGING_FALSE@@LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_3 = unit_tests-dbg$(EXEEXT) +@LIBMESH_DEVEL_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@am__EXEEXT_4 = unit_tests-devel$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_PROF_MODE_TRUE@am__EXEEXT_5 = unit_tests-prof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPROF_MODE_TRUE@am__EXEEXT_6 = unit_tests-oprof$(EXEEXT) +@LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@am__EXEEXT_7 = unit_tests-opt$(EXEEXT) +am__kokkos_fe_contract_unit_SOURCES_DIST = \ + fe/kokkos_fe_contract_test.K +am__dirstamp = $(am__leading_dot)dirstamp +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_contract_unit_OBJECTS = fe/kokkos_fe_contract_test.$(OBJEXT) +kokkos_fe_contract_unit_OBJECTS = \ + $(am_kokkos_fe_contract_unit_OBJECTS) +am__DEPENDENCIES_1 = +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_invariant_unit_SOURCES_DIST = \ + fe/kokkos_fe_invariant_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_invariant_unit_OBJECTS = fe/kokkos_fe_invariant_test.$(OBJEXT) +kokkos_fe_invariant_unit_OBJECTS = \ + $(am_kokkos_fe_invariant_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_map_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_map_oracle_unit_OBJECTS = fe/kokkos_fe_map_oracle_test.$(OBJEXT) +kokkos_fe_map_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_map_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_permuted_map_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_permuted_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_permuted_map_oracle_unit_OBJECTS = fe/kokkos_fe_permuted_map_oracle_test.$(OBJEXT) +kokkos_fe_permuted_map_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_permuted_map_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_reconstruction_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_reconstruction_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_reconstruction_oracle_unit_OBJECTS = fe/kokkos_fe_reconstruction_oracle_test.$(OBJEXT) +kokkos_fe_reconstruction_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_reconstruction_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_shape_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_shape_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_shape_oracle_unit_OBJECTS = fe/kokkos_fe_shape_oracle_test.$(OBJEXT) +kokkos_fe_shape_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_shape_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_side_trace_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_side_trace_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_side_trace_oracle_unit_OBJECTS = fe/kokkos_fe_side_trace_oracle_test.$(OBJEXT) +kokkos_fe_side_trace_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_side_trace_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_fe_types_oracle_unit_SOURCES_DIST = \ + fe/kokkos_fe_types_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_fe_types_oracle_unit_OBJECTS = fe/kokkos_fe_types_oracle_test.$(OBJEXT) +kokkos_fe_types_oracle_unit_OBJECTS = \ + $(am_kokkos_fe_types_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_quadrature_oracle_unit_SOURCES_DIST = \ + fe/kokkos_quadrature_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_quadrature_oracle_unit_OBJECTS = fe/kokkos_quadrature_oracle_test.$(OBJEXT) +kokkos_quadrature_oracle_unit_OBJECTS = \ + $(am_kokkos_quadrature_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_tensor_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_tensor_ops_oracle_unit_OBJECTS = numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT) +kokkos_tensor_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_tensor_ops_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) +am__kokkos_vector_ops_oracle_unit_SOURCES_DIST = \ + numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@am_kokkos_vector_ops_oracle_unit_OBJECTS = numerics/kokkos_vector_ops_oracle_test.$(OBJEXT) +kokkos_vector_ops_oracle_unit_OBJECTS = \ + $(am_kokkos_vector_ops_oracle_unit_OBJECTS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_DEPENDENCIES = \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(top_builddir)/libmesh_opt.la \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) \ +@LIBMESH_ENABLE_KOKKOS_TRUE@ $(am__DEPENDENCIES_1) am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ stream_redirector.h test_comm.h base/dof_object_test.h \ base/dof_map_test.C base/default_coupling_test.C \ @@ -253,16 +393,18 @@ am__unit_tests_dbg_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C -am__dirstamp = $(am__leading_dot)dirstamp + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_1 = fparser/unit_tests_dbg-autodiff.$(OBJEXT) am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ base/unit_tests_dbg-dof_map_test.$(OBJEXT) \ @@ -375,6 +517,7 @@ am__objects_2 = unit_tests_dbg-driver.$(OBJEXT) \ solvers/unit_tests_dbg-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_dbg-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_dbg-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_dbg-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_dbg-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_dbg-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_dbg-systems_test.$(OBJEXT) \ @@ -460,15 +603,18 @@ am__unit_tests_devel_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_3 = fparser/unit_tests_devel-autodiff.$(OBJEXT) am__objects_4 = unit_tests_devel-driver.$(OBJEXT) \ base/unit_tests_devel-dof_map_test.$(OBJEXT) \ @@ -581,6 +727,7 @@ am__objects_4 = unit_tests_devel-driver.$(OBJEXT) \ solvers/unit_tests_devel-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_devel-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_devel-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_devel-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_devel-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_devel-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_devel-systems_test.$(OBJEXT) \ @@ -662,15 +809,18 @@ am__unit_tests_oprof_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_5 = fparser/unit_tests_oprof-autodiff.$(OBJEXT) am__objects_6 = unit_tests_oprof-driver.$(OBJEXT) \ base/unit_tests_oprof-dof_map_test.$(OBJEXT) \ @@ -783,6 +933,7 @@ am__objects_6 = unit_tests_oprof-driver.$(OBJEXT) \ solvers/unit_tests_oprof-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_oprof-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_oprof-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_oprof-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_oprof-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_oprof-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_oprof-systems_test.$(OBJEXT) \ @@ -864,15 +1015,18 @@ am__unit_tests_opt_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_7 = fparser/unit_tests_opt-autodiff.$(OBJEXT) am__objects_8 = unit_tests_opt-driver.$(OBJEXT) \ base/unit_tests_opt-dof_map_test.$(OBJEXT) \ @@ -985,6 +1139,7 @@ am__objects_8 = unit_tests_opt-driver.$(OBJEXT) \ solvers/unit_tests_opt-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_opt-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_opt-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_opt-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_opt-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_opt-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_opt-systems_test.$(OBJEXT) \ @@ -1066,15 +1221,18 @@ am__unit_tests_prof_SOURCES_DIST = driver.C libmesh_cppunit.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C fparser/autodiff.C + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C fparser/autodiff.C @LIBMESH_ENABLE_FPARSER_TRUE@am__objects_9 = fparser/unit_tests_prof-autodiff.$(OBJEXT) am__objects_10 = unit_tests_prof-driver.$(OBJEXT) \ base/unit_tests_prof-dof_map_test.$(OBJEXT) \ @@ -1187,6 +1345,7 @@ am__objects_10 = unit_tests_prof-driver.$(OBJEXT) \ solvers/unit_tests_prof-second_order_unsteady_solver_test.$(OBJEXT) \ systems/unit_tests_prof-constraint_operator_test.$(OBJEXT) \ systems/unit_tests_prof-equation_systems_test.$(OBJEXT) \ + systems/unit_tests_prof-hilbert_system_kokkos_test.$(OBJEXT) \ systems/unit_tests_prof-periodic_bc_test.$(OBJEXT) \ systems/unit_tests_prof-disjoint_neighbor_test.$(OBJEXT) \ systems/unit_tests_prof-systems_test.$(OBJEXT) \ @@ -1771,26 +1930,31 @@ am__depfiles_remade = ./$(DEPDIR)/unit_tests_dbg-driver.Po \ systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po \ systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_devel-systems_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po \ systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_opt-systems_test.Po \ systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po \ systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po \ systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po \ + systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po \ systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po \ systems/$(DEPDIR)/unit_tests_prof-systems_test.Po \ utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po \ @@ -1860,10 +2024,32 @@ AM_V_CCLD = $(am__v_CCLD_@AM_V@) am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) am__v_CCLD_0 = @echo " CCLD " $@; am__v_CCLD_1 = -SOURCES = $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ +SOURCES = $(kokkos_fe_contract_unit_SOURCES) \ + $(kokkos_fe_invariant_unit_SOURCES) \ + $(kokkos_fe_map_oracle_unit_SOURCES) \ + $(kokkos_fe_permuted_map_oracle_unit_SOURCES) \ + $(kokkos_fe_reconstruction_oracle_unit_SOURCES) \ + $(kokkos_fe_shape_oracle_unit_SOURCES) \ + $(kokkos_fe_side_trace_oracle_unit_SOURCES) \ + $(kokkos_fe_types_oracle_unit_SOURCES) \ + $(kokkos_quadrature_oracle_unit_SOURCES) \ + $(kokkos_tensor_ops_oracle_unit_SOURCES) \ + $(kokkos_vector_ops_oracle_unit_SOURCES) \ + $(unit_tests_dbg_SOURCES) $(unit_tests_devel_SOURCES) \ $(unit_tests_oprof_SOURCES) $(unit_tests_opt_SOURCES) \ $(unit_tests_prof_SOURCES) -DIST_SOURCES = $(am__unit_tests_dbg_SOURCES_DIST) \ +DIST_SOURCES = $(am__kokkos_fe_contract_unit_SOURCES_DIST) \ + $(am__kokkos_fe_invariant_unit_SOURCES_DIST) \ + $(am__kokkos_fe_map_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_permuted_map_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_reconstruction_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_shape_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_side_trace_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_fe_types_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_quadrature_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_tensor_ops_oracle_unit_SOURCES_DIST) \ + $(am__kokkos_vector_ops_oracle_unit_SOURCES_DIST) \ + $(am__unit_tests_dbg_SOURCES_DIST) \ $(am__unit_tests_devel_SOURCES_DIST) \ $(am__unit_tests_oprof_SOURCES_DIST) \ $(am__unit_tests_opt_SOURCES_DIST) \ @@ -2064,11 +2250,19 @@ HDF5_DIR = @HDF5_DIR@ HDF5_LDFLAGS = @HDF5_LDFLAGS@ HDF5_LIBS = @HDF5_LIBS@ HDF5_PREFIX = @HDF5_PREFIX@ +HIPCC = @HIPCC@ +ICPX = @ICPX@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KOKKOS_CPPFLAGS = @KOKKOS_CPPFLAGS@ +KOKKOS_CXX = @KOKKOS_CXX@ +KOKKOS_CXXFLAGS = @KOKKOS_CXXFLAGS@ +KOKKOS_LDFLAGS = @KOKKOS_LDFLAGS@ +KOKKOS_LIBS = @KOKKOS_LIBS@ +KOKKOS_MPI_CPPFLAGS = @KOKKOS_MPI_CPPFLAGS@ LASPACK_INCLUDE = @LASPACK_INCLUDE@ LASPACK_LIB = @LASPACK_LIB@ LD = @LD@ @@ -2116,6 +2310,7 @@ NODEPRECATEDFLAG = @NODEPRECATEDFLAG@ NOX_INCLUDES = @NOX_INCLUDES@ NOX_LIBS = @NOX_LIBS@ NOX_MAKEFILE_EXPORT = @NOX_MAKEFILE_EXPORT@ +NVCC = @NVCC@ NVTX_INCLUDE = @NVTX_INCLUDE@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ @@ -2288,6 +2483,7 @@ AM_CPPFLAGS = $(libmesh_optional_INCLUDES) -I$(top_builddir)/include \ -DLIBMESH_IS_UNIT_TESTING AM_LDFLAGS = $(libmesh_LDFLAGS) $(libmesh_contrib_LDFLAGS) +KOKKOS_TEST_CPPFLAGS = $(am__append_2) unit_tests_sources = driver.C libmesh_cppunit.h stream_redirector.h \ test_comm.h base/dof_object_test.h base/dof_map_test.C \ base/default_coupling_test.C base/getpot_test.C \ @@ -2353,15 +2549,18 @@ unit_tests_sources = driver.C libmesh_cppunit.h stream_redirector.h \ partitioning/morton_sfc_partitioner_test.C \ partitioning/parmetis_partitioner_test.C \ partitioning/sfc_partitioner_test.C \ - quadrature/quadrature_test.C solvers/time_solver_test_common.h \ + quadrature/quadrature_exactness.h quadrature/quadrature_test.C \ + solvers/time_solver_test_common.h \ solvers/first_order_unsteady_solver_test.C \ solvers/second_order_unsteady_solver_test.C \ systems/constraint_operator_test.C \ - systems/equation_systems_test.C systems/periodic_bc_test.C \ - systems/disjoint_neighbor_test.C systems/systems_test.C \ - utils/parameters_test.C utils/point_locator_test.C \ - utils/rb_parameters_test.C utils/transparent_comparator.C \ - utils/vectormap_test.C utils/xdr_test.C $(am__append_1) + systems/equation_systems_test.C \ + systems/hilbert_system_kokkos_test.C \ + systems/periodic_bc_test.C systems/disjoint_neighbor_test.C \ + systems/systems_test.C utils/parameters_test.C \ + utils/point_locator_test.C utils/rb_parameters_test.C \ + utils/transparent_comparator.C utils/vectormap_test.C \ + utils/xdr_test.C $(am__append_1) data = matrices/geom_1_extraction_op.h5 \ matrices/geom_1_extraction_op.m \ matrices/geom_1_extraction_op.m.gz \ @@ -2449,7 +2648,62 @@ data = matrices/geom_1_extraction_op.h5 \ unit_tests_data = $(data) # Why isn't this working automatically? -EXTRA_DIST = $(data) +EXTRA_DIST = $(data) fe/kokkos_fe_oracle_test_utils.h +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_SOURCES = fe/kokkos_fe_types_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_types_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_SOURCES = fe/kokkos_fe_shape_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_shape_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_SOURCES = fe/kokkos_fe_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_SOURCES = fe/kokkos_fe_invariant_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_invariant_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_SOURCES = fe/kokkos_fe_contract_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_contract_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_SOURCES = fe/kokkos_fe_permuted_map_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_permuted_map_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_SOURCES = fe/kokkos_fe_reconstruction_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_reconstruction_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_SOURCES = fe/kokkos_fe_side_trace_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_fe_side_trace_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_SOURCES = fe/kokkos_quadrature_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_quadrature_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_SOURCES = numerics/kokkos_vector_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_vector_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_SOURCES = numerics/kokkos_tensor_ops_oracle_test.K +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CPPFLAGS = $(AM_CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_CXXFLAGS = $(AM_CXXFLAGS) $(KOKKOS_CXXFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDFLAGS = $(AM_LDFLAGS) $(KOKKOS_LDFLAGS) +@LIBMESH_ENABLE_KOKKOS_TRUE@kokkos_tensor_ops_oracle_unit_LDADD = $(top_builddir)/libmesh_opt.la $(KOKKOS_LIBS) $(libmesh_optional_LIBS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_SOURCES = $(unit_tests_sources) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CPPFLAGS = $(CPPFLAGS_DBG) $(AM_CPPFLAGS) @LIBMESH_DBG_MODE_TRUE@@LIBMESH_ENABLE_CPPUNIT_TRUE@unit_tests_dbg_CXXFLAGS = $(CXXFLAGS_DBG) @@ -2480,7 +2734,52 @@ EXTRA_DIST = $(data) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_LDADD = $(top_builddir)/libmesh_opt.la @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_optdir = $(datadir) @LIBMESH_ENABLE_CPPUNIT_TRUE@@LIBMESH_OPT_MODE_TRUE@unit_tests_opt_DATA = $(data) -@LIBMESH_ENABLE_CPPUNIT_TRUE@TESTS = run_unit_tests.sh + +# Custom link rules so the Kokkos compiler drives the final link step. +kokkos_fe_types_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_types_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_shape_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_shape_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_invariant_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_invariant_unit_LDFLAGS) -o $@ + +kokkos_fe_contract_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_contract_unit_LDFLAGS) -o $@ + +kokkos_fe_permuted_map_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_permuted_map_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_reconstruction_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_reconstruction_oracle_unit_LDFLAGS) -o $@ + +kokkos_fe_side_trace_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_fe_side_trace_oracle_unit_LDFLAGS) -o $@ + +kokkos_quadrature_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_quadrature_oracle_unit_LDFLAGS) -o $@ + +kokkos_vector_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_vector_ops_oracle_unit_LDFLAGS) -o $@ + +kokkos_tensor_ops_oracle_unit_LINK = \ + $(LIBTOOL) --tag=CXX --mode=link $(KOKKOS_CXX) \ + $(LDFLAGS) $(kokkos_tensor_ops_oracle_unit_LDFLAGS) -o $@ + CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ mesh_with_soln.e elemental_from_nodal.e write_elemset_data.e \ write_sideset_data.e write_nodeset_data.e write_edgeset_data.e \ @@ -2514,8 +2813,8 @@ CLEANFILES = cube_mesh.xda slit_mesh.xda slit_solution.xda out.e \ write_exodus_QUADSHELL9.e write_exodus_TET10.e \ write_exodus_TET14.e write_exodus_TET4.e write_exodus_TRI3.e \ write_exodus_TRI6.e write_exodus_TRI7.e \ - write_exodus_TRISHELL3.e smoother.out $(am__append_8) \ - $(am__append_9) + write_exodus_TRISHELL3.e smoother.out $(am__append_12) \ + $(am__append_13) # need to link any data files for VPATH builds @LIBMESH_VPATH_BUILD_TRUE@BUILT_SOURCES = .linkstamp @@ -2523,7 +2822,7 @@ all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: -.SUFFIXES: .C .lo .o .obj +.SUFFIXES: .C .K .lo .o .obj $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ @@ -2559,6 +2858,84 @@ run_unit_tests.sh: $(top_builddir)/config.status $(srcdir)/run_unit_tests.sh.in clean-checkPROGRAMS: $(am__rm_f) $(check_PROGRAMS) test -z "$(EXEEXT)" || $(am__rm_f) $(check_PROGRAMS:$(EXEEXT)=) +fe/$(am__dirstamp): + @$(MKDIR_P) fe + @: >>fe/$(am__dirstamp) +fe/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) fe/$(DEPDIR) + @: >>fe/$(DEPDIR)/$(am__dirstamp) +fe/kokkos_fe_contract_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_contract_unit$(EXEEXT): $(kokkos_fe_contract_unit_OBJECTS) $(kokkos_fe_contract_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_contract_unit_DEPENDENCIES) + @rm -f kokkos_fe_contract_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_contract_unit_LINK) $(kokkos_fe_contract_unit_OBJECTS) $(kokkos_fe_contract_unit_LDADD) $(LIBS) +fe/kokkos_fe_invariant_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_invariant_unit$(EXEEXT): $(kokkos_fe_invariant_unit_OBJECTS) $(kokkos_fe_invariant_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_invariant_unit_DEPENDENCIES) + @rm -f kokkos_fe_invariant_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_invariant_unit_LINK) $(kokkos_fe_invariant_unit_OBJECTS) $(kokkos_fe_invariant_unit_LDADD) $(LIBS) +fe/kokkos_fe_map_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_map_oracle_unit$(EXEEXT): $(kokkos_fe_map_oracle_unit_OBJECTS) $(kokkos_fe_map_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_map_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_map_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_map_oracle_unit_LINK) $(kokkos_fe_map_oracle_unit_OBJECTS) $(kokkos_fe_map_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_permuted_map_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_permuted_map_oracle_unit$(EXEEXT): $(kokkos_fe_permuted_map_oracle_unit_OBJECTS) $(kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_permuted_map_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_permuted_map_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_permuted_map_oracle_unit_LINK) $(kokkos_fe_permuted_map_oracle_unit_OBJECTS) $(kokkos_fe_permuted_map_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_reconstruction_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_reconstruction_oracle_unit$(EXEEXT): $(kokkos_fe_reconstruction_oracle_unit_OBJECTS) $(kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_reconstruction_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_reconstruction_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_reconstruction_oracle_unit_LINK) $(kokkos_fe_reconstruction_oracle_unit_OBJECTS) $(kokkos_fe_reconstruction_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_shape_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_shape_oracle_unit$(EXEEXT): $(kokkos_fe_shape_oracle_unit_OBJECTS) $(kokkos_fe_shape_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_shape_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_shape_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_shape_oracle_unit_LINK) $(kokkos_fe_shape_oracle_unit_OBJECTS) $(kokkos_fe_shape_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_side_trace_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_side_trace_oracle_unit$(EXEEXT): $(kokkos_fe_side_trace_oracle_unit_OBJECTS) $(kokkos_fe_side_trace_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_side_trace_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_side_trace_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_side_trace_oracle_unit_LINK) $(kokkos_fe_side_trace_oracle_unit_OBJECTS) $(kokkos_fe_side_trace_oracle_unit_LDADD) $(LIBS) +fe/kokkos_fe_types_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_fe_types_oracle_unit$(EXEEXT): $(kokkos_fe_types_oracle_unit_OBJECTS) $(kokkos_fe_types_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_fe_types_oracle_unit_DEPENDENCIES) + @rm -f kokkos_fe_types_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_fe_types_oracle_unit_LINK) $(kokkos_fe_types_oracle_unit_OBJECTS) $(kokkos_fe_types_oracle_unit_LDADD) $(LIBS) +fe/kokkos_quadrature_oracle_test.$(OBJEXT): fe/$(am__dirstamp) \ + fe/$(DEPDIR)/$(am__dirstamp) + +kokkos_quadrature_oracle_unit$(EXEEXT): $(kokkos_quadrature_oracle_unit_OBJECTS) $(kokkos_quadrature_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_quadrature_oracle_unit_DEPENDENCIES) + @rm -f kokkos_quadrature_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_quadrature_oracle_unit_LINK) $(kokkos_quadrature_oracle_unit_OBJECTS) $(kokkos_quadrature_oracle_unit_LDADD) $(LIBS) +numerics/$(am__dirstamp): + @$(MKDIR_P) numerics + @: >>numerics/$(am__dirstamp) +numerics/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) numerics/$(DEPDIR) + @: >>numerics/$(DEPDIR)/$(am__dirstamp) +numerics/kokkos_tensor_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_tensor_ops_oracle_unit$(EXEEXT): $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_tensor_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_tensor_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_tensor_ops_oracle_unit_LINK) $(kokkos_tensor_ops_oracle_unit_OBJECTS) $(kokkos_tensor_ops_oracle_unit_LDADD) $(LIBS) +numerics/kokkos_vector_ops_oracle_test.$(OBJEXT): \ + numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) + +kokkos_vector_ops_oracle_unit$(EXEEXT): $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_DEPENDENCIES) $(EXTRA_kokkos_vector_ops_oracle_unit_DEPENDENCIES) + @rm -f kokkos_vector_ops_oracle_unit$(EXEEXT) + $(AM_V_GEN)$(kokkos_vector_ops_oracle_unit_LINK) $(kokkos_vector_ops_oracle_unit_OBJECTS) $(kokkos_vector_ops_oracle_unit_LDADD) $(LIBS) base/$(am__dirstamp): @$(MKDIR_P) base @: >>base/$(am__dirstamp) @@ -2579,12 +2956,6 @@ base/unit_tests_dbg-nonmanifold_coupling_test.$(OBJEXT): \ base/$(am__dirstamp) base/$(DEPDIR)/$(am__dirstamp) base/unit_tests_dbg-multi_evaluable_pred_test.$(OBJEXT): \ base/$(am__dirstamp) base/$(DEPDIR)/$(am__dirstamp) -fe/$(am__dirstamp): - @$(MKDIR_P) fe - @: >>fe/$(am__dirstamp) -fe/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) fe/$(DEPDIR) - @: >>fe/$(DEPDIR)/$(am__dirstamp) fe/unit_tests_dbg-fe_bernstein_test.$(OBJEXT): fe/$(am__dirstamp) \ fe/$(DEPDIR)/$(am__dirstamp) fe/unit_tests_dbg-fe_clough_test.$(OBJEXT): fe/$(am__dirstamp) \ @@ -2727,12 +3098,6 @@ mesh/unit_tests_dbg-project_solution_test.$(OBJEXT): \ mesh/$(am__dirstamp) mesh/$(DEPDIR)/$(am__dirstamp) mesh/unit_tests_dbg-xdrio_test.$(OBJEXT): mesh/$(am__dirstamp) \ mesh/$(DEPDIR)/$(am__dirstamp) -numerics/$(am__dirstamp): - @$(MKDIR_P) numerics - @: >>numerics/$(am__dirstamp) -numerics/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) numerics/$(DEPDIR) - @: >>numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-composite_function_test.$(OBJEXT): \ numerics/$(am__dirstamp) numerics/$(DEPDIR)/$(am__dirstamp) numerics/unit_tests_dbg-coupling_matrix_test.$(OBJEXT): \ @@ -2847,6 +3212,8 @@ systems/unit_tests_dbg-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_dbg-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_dbg-disjoint_neighbor_test.$(OBJEXT): \ @@ -3111,6 +3478,8 @@ systems/unit_tests_devel-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_devel-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_devel-disjoint_neighbor_test.$(OBJEXT): \ @@ -3363,6 +3732,8 @@ systems/unit_tests_oprof-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_oprof-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_oprof-disjoint_neighbor_test.$(OBJEXT): \ @@ -3615,6 +3986,8 @@ systems/unit_tests_opt-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_opt-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_opt-disjoint_neighbor_test.$(OBJEXT): \ @@ -3867,6 +4240,8 @@ systems/unit_tests_prof-constraint_operator_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-equation_systems_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) +systems/unit_tests_prof-hilbert_system_kokkos_test.$(OBJEXT): \ + systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-periodic_bc_test.$(OBJEXT): \ systems/$(am__dirstamp) systems/$(DEPDIR)/$(am__dirstamp) systems/unit_tests_prof-disjoint_neighbor_test.$(OBJEXT): \ @@ -4463,26 +4838,31 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_devel-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_opt-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@systems/$(DEPDIR)/unit_tests_prof-systems_test.Po@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po@am__quote@ # am--include-marker @@ -6100,6 +6480,20 @@ systems/unit_tests_dbg-equation_systems_test.obj: systems/equation_systems_test. @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_dbg-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_dbg-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_dbg-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_dbg-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_dbg-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_dbg-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_dbg_CPPFLAGS) $(CPPFLAGS) $(unit_tests_dbg_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_dbg-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Tpo -c -o systems/unit_tests_dbg-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po @@ -7794,6 +8188,20 @@ systems/unit_tests_devel-equation_systems_test.obj: systems/equation_systems_tes @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_devel-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_devel-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_devel-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_devel-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_devel-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_devel-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_devel_CPPFLAGS) $(CPPFLAGS) $(unit_tests_devel_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_devel-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Tpo -c -o systems/unit_tests_devel-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po @@ -9488,6 +9896,20 @@ systems/unit_tests_oprof-equation_systems_test.obj: systems/equation_systems_tes @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_oprof-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_oprof-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_oprof-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_oprof-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_oprof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_oprof-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_oprof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_oprof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_oprof-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Tpo -c -o systems/unit_tests_oprof-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po @@ -11182,6 +11604,20 @@ systems/unit_tests_opt-equation_systems_test.obj: systems/equation_systems_test. @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_opt-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_opt-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_opt-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_opt-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_opt-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_opt-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_opt_CPPFLAGS) $(CPPFLAGS) $(unit_tests_opt_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_opt-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Tpo -c -o systems/unit_tests_opt-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po @@ -12876,6 +13312,20 @@ systems/unit_tests_prof-equation_systems_test.obj: systems/equation_systems_test @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-equation_systems_test.obj `if test -f 'systems/equation_systems_test.C'; then $(CYGPATH_W) 'systems/equation_systems_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/equation_systems_test.C'; fi` +systems/unit_tests_prof-hilbert_system_kokkos_test.o: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-hilbert_system_kokkos_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_prof-hilbert_system_kokkos_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.o `test -f 'systems/hilbert_system_kokkos_test.C' || echo '$(srcdir)/'`systems/hilbert_system_kokkos_test.C + +systems/unit_tests_prof-hilbert_system_kokkos_test.obj: systems/hilbert_system_kokkos_test.C +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-hilbert_system_kokkos_test.obj -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Tpo systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='systems/hilbert_system_kokkos_test.C' object='systems/unit_tests_prof-hilbert_system_kokkos_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -c -o systems/unit_tests_prof-hilbert_system_kokkos_test.obj `if test -f 'systems/hilbert_system_kokkos_test.C'; then $(CYGPATH_W) 'systems/hilbert_system_kokkos_test.C'; else $(CYGPATH_W) '$(srcdir)/systems/hilbert_system_kokkos_test.C'; fi` + systems/unit_tests_prof-periodic_bc_test.o: systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(unit_tests_prof_CPPFLAGS) $(CPPFLAGS) $(unit_tests_prof_CXXFLAGS) $(CXXFLAGS) -MT systems/unit_tests_prof-periodic_bc_test.o -MD -MP -MF systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Tpo -c -o systems/unit_tests_prof-periodic_bc_test.o `test -f 'systems/periodic_bc_test.C' || echo '$(srcdir)/'`systems/periodic_bc_test.C @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Tpo systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po @@ -13932,26 +14382,31 @@ distclean: distclean-am -rm -f systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-systems_test.Po -rm -f utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po @@ -14584,26 +15039,31 @@ maintainer-clean: maintainer-clean-am -rm -f systems/$(DEPDIR)/unit_tests_dbg-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_dbg-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_dbg-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_devel-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_devel-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_oprof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_oprof-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_opt-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_opt-systems_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-constraint_operator_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-disjoint_neighbor_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-equation_systems_test.Po + -rm -f systems/$(DEPDIR)/unit_tests_prof-hilbert_system_kokkos_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-periodic_bc_test.Po -rm -f systems/$(DEPDIR)/unit_tests_prof-systems_test.Po -rm -f utils/$(DEPDIR)/unit_tests_dbg-parameters_test.Po @@ -14717,6 +15177,16 @@ $(top_builddir)/libmesh_prof.la: FORCE $(top_builddir)/libmesh_oprof.la: FORCE (cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) libmesh_oprof.la) +# Compile .K translation units with the Kokkos device compiler. +# If KOKKOS_CXX is not the MPI wrapper, configure populates +# $(KOKKOS_MPI_CPPFLAGS) from the wrapper's compile flags so mpi.h and +# any wrapper-provided defines remain visible. +.K.o: + $(KOKKOS_CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(KOKKOS_MPI_CPPFLAGS) $(MPI_INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(KOKKOS_TEST_CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) $(KOKKOS_CXXFLAGS) \ + -c $< -o $@ + @LIBMESH_VPATH_BUILD_TRUE@.linkstamp: @LIBMESH_VPATH_BUILD_TRUE@ -rm -f solutions && $(LN_S) -f $(srcdir)/solutions . @LIBMESH_VPATH_BUILD_TRUE@ -rm -f meshes && $(LN_S) -f $(srcdir)/meshes . diff --git a/tests/driver.C b/tests/driver.C index 3ad324a6e20..220f288152e 100644 --- a/tests/driver.C +++ b/tests/driver.C @@ -13,6 +13,25 @@ #include "libmesh_cppunit.h" #include "test_comm.h" +#ifdef LIBMESH_HAVE_KOKKOS +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +struct KokkosScope +{ + KokkosScope(int & argc, char ** & argv) + { + Kokkos::initialize(argc, argv); + } + + ~KokkosScope() + { + Kokkos::finalize(); + } +}; +#endif + #ifdef LIBMESH_HAVE_CXX11_REGEX // C++ includes @@ -107,6 +126,9 @@ int add_matching_tests_to_runner(CppUnit::Test * test, int main(int argc, char ** argv) { +#ifdef LIBMESH_HAVE_KOKKOS + KokkosScope kokkos_scope(argc, argv); +#endif // Initialize the library. This is necessary because the library // may depend on a number of other libraries (i.e. MPI and Petsc) // that require initialization before use. diff --git a/tests/fe/kokkos_fe_contract_test.K b/tests/fe/kokkos_fe_contract_test.K new file mode 100644 index 00000000000..4fd2d0243fd --- /dev/null +++ b/tests/fe/kokkos_fe_contract_test.K @@ -0,0 +1,336 @@ +// Host-side contract tests for libMesh::Kokkos hard-fail paths. +// +// This executable self-spawns child processes that intentionally invoke +// unsupported Kokkos FE entry points. A child succeeds only if the call +// returns normally; the parent test expects those calls to terminate with a +// non-zero exit status instead. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/node.h" + +#include +#include +#include +#include +#include +#include + +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; + +namespace +{ + +struct contract_case +{ + const char * name; + bool expect_abort; +}; + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +} // anonymous namespace + +static element_fixture +build_master_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(elem_type); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + std::vector refspace_nodes; + libMesh::FEBase::get_refspace_nodes(elem_type, refspace_nodes); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point refspace = refspace_nodes[i]; + fixture.nodes.push_back(libMesh::Node::build(refspace(0), refspace(1), refspace(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static std::unique_ptr +find_side_of_type(const libMesh::Elem & parent, + libMesh::ElemType desired_type, + unsigned int & side_id) +{ + for (unsigned int s = 0; s < parent.n_sides(); ++s) + { + auto side = parent.build_side_ptr(s); + if (side->type() == desired_type) + { + side_id = s; + return side; + } + } + + side_id = libMesh::invalid_uint; + return nullptr; +} + +static void +invoke_face_jacobian_on_side(const libMesh::Elem & side) +{ + constexpr unsigned int max_face_nodes = 9; + + RealVector face_nodes[max_face_nodes]; + for (unsigned int i = 0; i < side.n_nodes(); ++i) + face_nodes[i] = libMesh::Kokkos::point_to_real_vector(side.point(i)); + + (void)libMesh::Kokkos::face_jacobian( + libMesh::LAGRANGE_MAP, side.type(), face_nodes, side.n_nodes(), 0.0, 0.0, 0.0); +} + +static int +run_child_case(const std::string & case_name) +{ + using libMesh::Kokkos::FEShapeKey; + + if (case_name == "noop") + return 0; + + if (case_name == "get_side_topology_prism6") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PRISM6); + return 0; + } + + if (case_name == "get_side_topology_pyramid5") + { + (void)libMesh::Kokkos::get_side_topology(libMesh::PYRAMID5); + return 0; + } + + if (case_name == "shape_lagrange_edge4_third") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_lagrange_prism6_first") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "shape_monomial_hex27_sixth") + { + (void)libMesh::Kokkos::shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_shape_monomial_tri7_sixth") + { + (void)libMesh::Kokkos::grad_shape( + FEShapeKey{libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH}, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "ndofs_lagrange_prism6_first") + { + (void)libMesh::Kokkos::n_dofs( + FEShapeKey{libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST}); + return 0; + } + + if (case_name == "map_shape_rational") + { + (void)libMesh::Kokkos::map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "grad_map_shape_rational") + { + (void)libMesh::Kokkos::grad_map_shape( + libMesh::RATIONAL_BERNSTEIN_MAP, libMesh::QUAD4, 0, 0.0, 0.0, 0.0); + return 0; + } + + if (case_name == "face_normal_parent_dim2") + { + RealTensor J = libMesh::Kokkos::zero_tensor(); + J(0, 0) = 1.0; + J(0, 1) = 2.0; + (void)libMesh::Kokkos::face_normal(J, 2); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "map_face_qp_to_parent_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + (void)libMesh::Kokkos::map_face_qp_to_parent( + *side, libMesh::LAGRANGE_MAP, side->type(), libMesh::Kokkos::zero_vector()); + return 0; + } + + if (case_name == "face_jacobian_prism20_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM20); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_prism21_tri7") + { + auto fixture = build_master_fixture(libMesh::PRISM21); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + if (case_name == "face_jacobian_pyramid18_tri7") + { + auto fixture = build_master_fixture(libMesh::PYRAMID18); + unsigned int side_id = libMesh::invalid_uint; + auto side = find_side_of_type(*fixture.elem, libMesh::TRI7, side_id); + if (!side) + return 2; + invoke_face_jacobian_on_side(*side); + return 0; + } + + std::fprintf(stderr, "Unknown child case: %s\n", case_name.c_str()); + return 3; +} + +static int +run_command(const std::string & command) +{ + std::fflush(nullptr); + return std::system(command.c_str()); +} + +static bool +expect_child_success(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + return status == 0; +} + +static bool +expect_child_abort(const char * argv0, const char * case_name) +{ + const std::string command = + std::string(argv0) + " --child " + case_name + " >/dev/null 2>&1"; + const int status = run_command(command); + if (status == -1) + return false; + + if (WIFSIGNALED(status)) + return true; + + if (!WIFEXITED(status)) + return false; + + const int exit_code = WEXITSTATUS(status); + return exit_code != 0 && exit_code != 2 && exit_code != 3; +} + +int +main(int argc, char ** argv) +{ + if (argc == 3 && std::string(argv[1]) == "--child") + return run_child_case(argv[2]); + + if (!expect_child_success(argv[0], "noop")) + { + std::printf("[contract_spawn] FAIL (could not successfully respawn test executable)\n"); + return 1; + } + + const contract_case cases[] = { + { "get_side_topology_prism6", true }, + { "get_side_topology_pyramid5", true }, + { "shape_lagrange_edge4_third", false }, + { "grad_shape_lagrange_prism6_first", true }, + { "shape_monomial_hex27_sixth", true }, + { "grad_shape_monomial_tri7_sixth", true }, + { "ndofs_lagrange_prism6_first", true }, + { "map_shape_rational", true }, + { "grad_map_shape_rational", true }, + { "face_normal_parent_dim2", true }, + { "face_jacobian_prism20_tri7", true }, + { "face_jacobian_prism21_tri7", true }, + { "face_jacobian_pyramid18_tri7", true }, + { "map_face_qp_to_parent_prism20_tri7", true }, + { "map_face_qp_to_parent_prism21_tri7", true }, + { "map_face_qp_to_parent_pyramid18_tri7", true } + }; + + int total_fail = 0; + for (const auto & info : cases) + { + const bool passed = info.expect_abort ? expect_child_abort(argv[0], info.name) + : expect_child_success(argv[0], info.name); + const int fail = passed ? 0 : 1; + std::printf("[%s] [%s] %s (%d failures)\n", + info.expect_abort ? "contract_abort" : "contract_success", + info.name, + passed ? "PASS" : "FAIL", + fail); + total_fail += fail; + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_invariant_test.K b/tests/fe/kokkos_fe_invariant_test.K new file mode 100644 index 00000000000..f0c070e4406 --- /dev/null +++ b/tests/fe/kokkos_fe_invariant_test.K @@ -0,0 +1,418 @@ +// Kokkos kernel regression tests for libMesh::Kokkos FE invariants and quadrature exactness. +// The test suite covers: +// A. Partition of unity for all implemented LAGRANGE map topologies. +// B. Zero-sum gradients for the same map topologies. +// C. Nodal Kronecker-delta behavior at master nodes for the same map topologies. +// D. Quadrature exactness sweeps for the canonical tensor-product and simplex +// reference topologies using analytic monomial integrals on the reference +// element. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_quadrature.h" +#include "../quadrature/quadrature_exactness.h" + +#include "libmesh/elem.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::upload_real; + +static constexpr double invariant_tol = 1.0e-13; +static constexpr double exactness_tol = 2.0e-12; +static constexpr unsigned int quadrature_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct quadrature_exactness_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int max_order; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" } +}; + +// These cases sweep the full exactness range provided by the current Kokkos +// QGauss helper: +// - tensor-product rules: through order 13 (7-point 1D Gauss-Legendre) +// - simplex rules: through order 6 (highest explicit triangle/tet tables) +static const quadrature_exactness_case quadrature_cases[] = { + { libMesh::EDGE2, 1, 13, "EDGE2" }, + { libMesh::TRI3, 2, 6, "TRI3" }, + { libMesh::QUAD4, 2, 13, "QUAD4" }, + { libMesh::TET4, 3, 6, "TET4" }, + { libMesh::HEX8, 3, 13, "HEX8" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE Real +int_pow(Real x, unsigned int p) +{ + Real result = 1.0; + for (unsigned int i = 0; i < p; ++i) + result *= x; + return result; +} + +template +static int +test_partition_of_unity_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "unity_xi"); + auto d_eta = upload_real(eta_h, "unity_eta"); + auto d_zeta = upload_real(zeta_h, "unity_zeta"); + + Kokkos::View d_sum(std::string("unity_sum"), nqp); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + Real sum = 0.0; + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::map_shape(i, d_xi(q), d_eta(q), d_zeta(q)); + d_sum(q) = sum; + }); + Kokkos::fence(); + + std::vector ref_values(nqp, 1.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct partition_of_unity_dispatch +{ + explicit partition_of_unity_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_partition_of_unity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported partition topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_partition_of_unity(const map_elem_info & info) +{ + const partition_of_unity_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_zero_sum_gradients_impl(const map_elem_info & info) +{ + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.topo, info.dim, quadrature_order, xi_h, eta_h, zeta_h); + + auto d_xi = upload_real(xi_h, "gradsum_xi"); + auto d_eta = upload_real(eta_h, "gradsum_eta"); + auto d_zeta = upload_real(zeta_h, "gradsum_zeta"); + + Kokkos::View d_sum(std::string("gradsum"), info.dim * nqp); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int dim = info.dim; + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + RealVector sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + sum += libMesh::Kokkos::grad_map_shape( + i, d_xi(q), d_eta(q), d_zeta(q)); + + for (unsigned int d = 0; d < dim; ++d) + d_sum(dim * q + d) = sum(d); + }); + Kokkos::fence(); + + std::vector ref_values(info.dim * nqp, 0.0); + return compare_device_values(d_sum, ref_values, invariant_tol); +} + +struct zero_sum_gradients_dispatch +{ + explicit zero_sum_gradients_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_zero_sum_gradients_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported zero-sum gradient topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_zero_sum_gradients(const map_elem_info & info) +{ + const zero_sum_gradients_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_kronecker_delta_impl(const map_elem_info & info) +{ + std::vector xi_h(info.n_dofs), eta_h(info.n_dofs), zeta_h(info.n_dofs); + for (unsigned int j = 0; j < info.n_dofs; ++j) + { + libMesh::Point p; + libmesh_error_msg_if(!libMesh::try_reference_node(Topo, j, p), + "test_kronecker_delta_impl(): unsupported reference-node lookup"); + xi_h[j] = p(0); + eta_h[j] = p(1); + zeta_h[j] = p(2); + } + + auto d_xi = upload_real(xi_h, "delta_xi"); + auto d_eta = upload_real(eta_h, "delta_eta"); + auto d_zeta = upload_real(zeta_h, "delta_zeta"); + + Kokkos::View d_values(std::string("delta_values"), info.n_dofs * info.n_dofs); + + const unsigned int n_dofs = info.n_dofs; + Kokkos::parallel_for( + n_dofs * n_dofs, + KOKKOS_LAMBDA(int idx) { + const unsigned int i = static_cast(idx) / n_dofs; + const unsigned int j = static_cast(idx) % n_dofs; + d_values(idx) = libMesh::Kokkos::map_shape( + i, d_xi(j), d_eta(j), d_zeta(j)); + }); + Kokkos::fence(); + + std::vector ref_values(info.n_dofs * info.n_dofs, 0.0); + for (unsigned int i = 0; i < info.n_dofs; ++i) + ref_values[i * info.n_dofs + i] = 1.0; + + return compare_device_values(d_values, ref_values, invariant_tol); +} + +struct kronecker_delta_dispatch +{ + explicit kronecker_delta_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_kronecker_delta_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported kronecker topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_kronecker_delta(const map_elem_info & info) +{ + const kronecker_delta_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +static double +integrate_monomial_on_device(const quadrature_exactness_case & info, + unsigned int order, + unsigned int a, + unsigned int b, + unsigned int c) +{ + const unsigned int nqp = GaussQuadrature::n_points(info.topo, order); + const libMesh::ElemType topo = info.topo; + + double integral = 0.0; + Kokkos::parallel_reduce( + nqp, + KOKKOS_LAMBDA(int qp, double & local_sum) { + const RealVector pt = GaussQuadrature::point(topo, order, qp); + const Real weight = GaussQuadrature::weight(topo, order, qp); + Real monomial = int_pow(pt(0), a); + +#if LIBMESH_DIM > 1 + monomial *= int_pow(pt(1), b); +#else + libmesh_assert_equal_to(b, 0); +#endif + +#if LIBMESH_DIM > 2 + monomial *= int_pow(pt(2), c); +#else + libmesh_assert_equal_to(c, 0); +#endif + + local_sum += static_cast(weight) * static_cast(monomial); + }, + integral); + + return integral; +} + +static int +test_quadrature_exactness(const quadrature_exactness_case & info) +{ + int fail = 0; + + for (unsigned int order = 0; order <= info.max_order; ++order) + switch (info.dim) + { + case 1: + for (unsigned int a = 0; a <= order; ++a) + { + const double actual = integrate_monomial_on_device(info, order, a, 0, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, 0, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 2: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + { + const double actual = integrate_monomial_on_device(info, order, a, b, 0); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, 0); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + case 3: + for (unsigned int a = 0; a <= order; ++a) + for (unsigned int b = 0; a + b <= order; ++b) + for (unsigned int c = 0; a + b + c <= order; ++c) + { + const double actual = integrate_monomial_on_device(info, order, a, b, c); + const double expected = quadrature_exactness::monomial_integral(info.topo, a, b, c); + if (std::fabs(actual - expected) > exactness_tol) + ++fail; + } + break; + + default: + ++fail; + break; + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : map_elems) + { + { + const int f = test_partition_of_unity(info); + std::printf("[partition_of_unity] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_zero_sum_gradients(info); + std::printf("[zero_sum_gradients] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_kronecker_delta(info); + std::printf("[kronecker_delta] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (const auto & info : quadrature_cases) + { + const int f = test_quadrature_exactness(info); + std::printf("[quadrature_exactness] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_map_oracle_test.K b/tests/fe/kokkos_fe_map_oracle_test.K new file mode 100644 index 00000000000..4658289e82d --- /dev/null +++ b/tests/fe/kokkos_fe_map_oracle_test.K @@ -0,0 +1,527 @@ +// GPU kernel tests for libMesh::Kokkos map helpers across broad topology coverage. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for all implemented LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for all +// sides of the implemented 2D and 3D parent topologies. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for all +// sides and multiple side quadrature points, including supported +// mixed-face prism and pyramid element types. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_qp_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +} // anonymous namespace + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_oracle"); + const int fail = evaluate_map_helper_context(context, "map_oracle_results", tol); + if (fail) + std::printf(" map helper mismatch: topo=%s (%d failures)\n", info.name, fail); + + return fail; +} + +struct map_helper_dispatch +{ + explicit map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-helper topology: topo=%s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_oracle_results", tol); + if (fail) + std::printf(" face helper mismatch: parent=%s side_id=%u side_type=%d (%d failures)\n", + parent_name, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct face_helper_side_dispatch_3d +{ + face_helper_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_3d_impl(context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +template +struct face_helper_side_dispatch_2d +{ + face_helper_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_helper_side_case_2d_impl( + context, side_id, parent_name, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper side: parent=%s side_id=%u side_type=%d\n", + parent_name, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +struct face_helper_parent_dispatch_2d +{ + face_helper_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const face_helper_side_dispatch_2d dispatch( + context, side_id, parent_name, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported face-helper parent: parent=%s parent_type=%d side_id=%u\n", + parent_name, + static_cast(topo), + side_id); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + libMesh::ElemType side_topo; +}; + +static int +test_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "face_oracle"); + + if (context.parent_dim == 3) + { + const face_helper_side_dispatch_3d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const face_helper_parent_dispatch_2d dispatch(context, side_id, parent_name, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported face-helper parent dimension: parent=%s dim=%u side_id=%u\n", + parent_name, + context.parent_dim, + side_id); + return 1; +} + +static int +test_face_helpers_for_parent(const face_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + fail += test_face_helper_side_case(fixture, side_id, info.name); + + return fail; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::printf(" face_qp mismatch: parent=%s side_id=%u parent_type=%d side_type=%d\n", + parent_name, + side_id, + static_cast(parent.type()), + static_cast(side.type())); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + libMesh::Point parent_refspace; + libmesh_error_msg_if(!libMesh::try_reference_side_node(parent.type(), side_id, k, parent_refspace), + "check_face_qp_to_parent_case(): unsupported parent side-node lookup"); + std::printf(" k=%u side_node_id=%llu parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_for_parent(const face_qp_parent_case & info) +{ + auto fixture = build_reference_fixture(info.topo); + + int fail = 0; + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + if (side->n_nodes() == 1) + { + fail += + check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported face_qp side: parent=%s side_id=%u side_type=%d\n", + info.name, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_face_qp_to_parent_case(info.name, *fixture.elem, *side, side_id, face_qpt); + } + } + + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const map_helper_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::EDGE4, "EDGE4" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_parent_case cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_helpers_for_parent(info); + std::printf("[face_helper_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + { + const face_qp_parent_case cases[] = { + { libMesh::EDGE2, "EDGE2" }, + { libMesh::EDGE3, "EDGE3" }, + { libMesh::EDGE4, "EDGE4" }, + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } + }; + + for (const auto & info : cases) + { + const int f = test_face_qp_to_parent_for_parent(info); + std::printf("[face_qp_parent_breadth] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_oracle_test_utils.h b/tests/fe/kokkos_fe_oracle_test_utils.h new file mode 100644 index 00000000000..9e2e0915afa --- /dev/null +++ b/tests/fe/kokkos_fe_oracle_test_utils.h @@ -0,0 +1,666 @@ +#ifndef LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H +#define LIBMESH_TESTS_FE_KOKKOS_FE_ORACLE_TEST_UTILS_H + +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_shape_dispatch.h" +#include "gpu/kokkos_fe_types.h" +#include "gpu/kokkos_storage_policy.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +#include +#include +#include +#include + +// This header is intended for the standalone Kokkos test executables, which +// include Kokkos before pulling in these helpers. + +namespace kokkos_test_utils +{ + +LIBMESH_DEVICE_INLINE libMesh::Real +vector_component(const libMesh::Kokkos::RealVector & v, unsigned int component) +{ + switch (component) + { + case 0: + return v(0); + case 1: +#if LIBMESH_DIM > 1 + return v(1); +#else + return 0.0; +#endif + case 2: +#if LIBMESH_DIM > 2 + return v(2); +#else + return 0.0; +#endif + default: + return 0.0; + } +} + +LIBMESH_DEVICE_INLINE libMesh::Real +tensor_component(const libMesh::Kokkos::RealTensor & T, unsigned int i, unsigned int j) +{ +#if LIBMESH_DIM > 2 + return T(i, j); +#elif LIBMESH_DIM > 1 + if (i < 2 && j < 2) + return T(i, j); + return 0.0; +#else + if (i == 0 && j == 0) + return T(0, 0); + return 0.0; +#endif +} + +struct element_fixture +{ + std::unique_ptr elem; + std::vector> nodes; +}; + +struct map_helper_context +{ + std::vector ref_values; + libMesh::Kokkos::default_storage_policy::vector_view d_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + unsigned int nqp; + unsigned int dim; + unsigned int n_nodes; +}; + +struct face_helper_context +{ + std::vector ref_values; + libMesh::Kokkos::default_storage_policy::vector_view d_face_coords; + libMesh::Kokkos::default_storage_policy::vector_view d_parent_coords; + Kokkos::View d_xi; + Kokkos::View d_eta; + Kokkos::View d_zeta; + Kokkos::View d_w; + Kokkos::View d_parent_xi; + Kokkos::View d_parent_eta; + Kokkos::View d_parent_zeta; + unsigned int nqp; + unsigned int parent_dim; + unsigned int n_parent_nodes; + unsigned int n_face_nodes; +}; + +using libMesh::Kokkos::dispatch_supported_lagrange_face_map_topology; +using libMesh::Kokkos::dispatch_supported_lagrange_map_topology; +using libMesh::Kokkos::dispatch_supported_shape_key; +using libMesh::Kokkos::dispatch_supported_shape_key_with_lagrange_map; +using libMesh::Kokkos::grad_shape_for_key; +using libMesh::Kokkos::is_supported_lagrange_face_map_topology; +using libMesh::Kokkos::is_supported_lagrange_map_topology; +using libMesh::Kokkos::shape_for_key; +using libMesh::Kokkos::supports_shape_key_with_lagrange_map; + +inline int +compare_device_values(const Kokkos::View & d_values, + const std::vector & ref_values, + double tol = 1.0e-13) +{ + auto h_values = Kokkos::create_mirror_view(d_values); + Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > tol) + ++fail; + + return fail; +} + +inline std::unique_ptr +build_reference_elem(libMesh::ElemType elem_type) +{ + auto elem = libMesh::Elem::build(elem_type); + elem->set_mapping_type(libMesh::LAGRANGE_MAP); + return elem; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + unsigned int quadrature_order, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + libMesh::QGauss qr(dim, static_cast(quadrature_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(elem_type); + + const unsigned int nqp = qr.n_points(); + xi_h.resize(nqp); + eta_h.resize(nqp); + zeta_h.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + } + + return nqp; +} + +inline unsigned int +build_qps(libMesh::ElemType elem_type, + unsigned int dim, + std::vector & xi_h, + std::vector & eta_h, + std::vector & zeta_h) +{ + return build_qps(elem_type, dim, /*quadrature_order=*/4, xi_h, eta_h, zeta_h); +} + +inline unsigned int +build_host_qgauss(libMesh::ElemType topo, + unsigned int dim, + unsigned int order, + std::vector & x_ref, + std::vector & y_ref, + std::vector & z_ref, + std::vector & w_ref) +{ + libMesh::QGauss qr(dim, static_cast(order)); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + const unsigned int nqp = qr.n_points(); + x_ref.resize(nqp); + y_ref.resize(nqp); + z_ref.resize(nqp); + w_ref.resize(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + x_ref[q] = qr.qp(q)(0); + y_ref[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + z_ref[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_ref[q] = qr.w(q); + } + + return nqp; +} + +inline Kokkos::View +upload_real(const std::vector & values, const char * label) +{ + Kokkos::View d(std::string(label), values.size()); + auto h = Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + Kokkos::deep_copy(d, h); + return d; +} + +inline libMesh::Kokkos::default_storage_policy::vector_view +upload_point_coordinates(const libMesh::Elem & elem, const char * label) +{ + auto d = libMesh::Kokkos::make_vector_storage(label, elem.n_nodes()); + auto h = Kokkos::create_mirror_view(d); + for (unsigned int i = 0; i < elem.n_nodes(); ++i) + { + h(i, 0) = elem.point(i)(0); +#if LIBMESH_DIM > 1 + h(i, 1) = elem.point(i)(1); +#endif +#if LIBMESH_DIM > 2 + h(i, 2) = elem.point(i)(2); +#endif + } + Kokkos::deep_copy(d, h); + return d; +} + +inline std::string +make_label(const char * prefix, const char * suffix) +{ + return std::string(prefix) + suffix; +} + +inline element_fixture +build_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + const libMesh::Point master = fixture.elem->master_point(i); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point( + 0.7 + 0.8 * xi + 0.06 * xi * xi, + -0.3 + 0.25 * xi + 0.04 * xi * xi, + 0.2 + 0.1 * xi - 0.03 * xi * xi); + break; + + case 2: + xyz = libMesh::Point( + 0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.1 + 0.12 * xi - 0.08 * eta + 0.02 * xi * eta); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_flat_reference_fixture(libMesh::ElemType elem_type) +{ + element_fixture fixture; + fixture.elem = build_reference_elem(elem_type); + fixture.nodes.reserve(fixture.elem->n_nodes()); + + const unsigned int dim = fixture.elem->dim(); + + for (unsigned int i = 0; i < fixture.elem->n_nodes(); ++i) + { + libMesh::Point master; + libmesh_error_msg_if(!libMesh::try_reference_node(elem_type, i, master), + "build_flat_reference_fixture(): unsupported reference-node lookup"); + const libMesh::Real xi = master(0); + const libMesh::Real eta = master(1); + const libMesh::Real zeta = master(2); + + libMesh::Point xyz; + switch (dim) + { + case 1: + xyz = libMesh::Point(0.7 + 0.8 * xi + 0.06 * xi * xi, + 0.0, + 0.0); + break; + + case 2: + xyz = libMesh::Point(0.4 + 0.9 * xi + 0.15 * eta + 0.04 * xi * eta + 0.03 * eta * eta, + -0.2 + 0.2 * xi + 0.85 * eta + 0.05 * xi * xi + 0.03 * xi * eta, + 0.0); + break; + + case 3: + xyz = libMesh::Point( + 0.3 + 0.9 * xi + 0.12 * eta + 0.08 * zeta + 0.03 * xi * eta + 0.02 * zeta * zeta, + -0.1 + 0.18 * xi + 0.8 * eta + 0.11 * zeta + 0.02 * eta * zeta, + 0.2 + 0.10 * xi + 0.14 * eta + 0.85 * zeta + 0.02 * xi * zeta + 0.01 * xi * eta); + break; + + default: + xyz = libMesh::Point(); + break; + } + + fixture.nodes.push_back(libMesh::Node::build(xyz(0), xyz(1), xyz(2), i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +inline element_fixture +build_permuted_reference_fixture(libMesh::ElemType elem_type, + unsigned int perm_num) +{ + element_fixture fixture = build_reference_fixture(elem_type); + fixture.elem->permute(perm_num); + return fixture; +} + +inline map_helper_context +build_map_helper_context(const element_fixture & fixture, + libMesh::ElemType topo, + const char * label_prefix) +{ + map_helper_context context; + + const unsigned int dim = fixture.elem->dim(); + const unsigned int n_nodes = fixture.elem->n_nodes(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + auto fe = libMesh::FEBase::build(dim, fe_type); + + libMesh::QGauss qr(dim, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(topo); + + fe->attach_quadrature_rule(&qr); + fe->get_xyz(); + fe->get_dxyzdxi(); + if (dim >= 2) + fe->get_dxyzdeta(); + if (dim >= 3) + fe->get_dxyzdzeta(); + fe->get_JxW(); + fe->reinit(fixture.elem.get()); + + const unsigned int nqp = qr.n_points(); + const auto & xyz = fe->get_xyz(); + const auto & dxyzdxi = fe->get_dxyzdxi(); + const auto & jxw = fe->get_JxW(); + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + for (unsigned int q = 0; q < nqp; ++q) + { + libMesh::RealGradient dxyzdeta(0.0); + libMesh::RealGradient dxyzdzeta(0.0); + if (dim >= 2) + dxyzdeta = fe->get_dxyzdeta()[q]; + if (dim >= 3) + dxyzdzeta = fe->get_dxyzdzeta()[q]; + + const unsigned int base = 13 * q; + context.ref_values[base + 0] = xyz[q](0); + context.ref_values[base + 1] = xyz[q](1); + context.ref_values[base + 2] = xyz[q](2); + context.ref_values[base + 3] = dxyzdxi[q](0); + context.ref_values[base + 4] = dxyzdxi[q](1); + context.ref_values[base + 5] = dxyzdxi[q](2); + context.ref_values[base + 6] = dxyzdeta(0); + context.ref_values[base + 7] = dxyzdeta(1); + context.ref_values[base + 8] = dxyzdeta(2); + context.ref_values[base + 9] = dxyzdzeta(0); + context.ref_values[base + 10] = dxyzdzeta(1); + context.ref_values[base + 11] = dxyzdzeta(2); + context.ref_values[base + 12] = jxw[q]; + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (dim >= 2) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = (dim >= 3) ? qr.qp(q)(2) : libMesh::Real(0); + w_h[q] = qr.w(q); + } + + context.d_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.nqp = nqp; + context.dim = dim; + context.n_nodes = n_nodes; + + return context; +} + +template +inline int +evaluate_map_helper_context(const map_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_coords = context.d_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int dim_ = context.dim; + const unsigned int n_nodes_ = context.n_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + libMesh::Kokkos::RealVector xyz; + libMesh::Kokkos::RealTensor J; + libMesh::Kokkos::physical_point_and_jacobian( + d_coords, n_nodes_, d_xi(q), d_eta(q), d_zeta(q), xyz, J); + + const libMesh::Real jxw_q = libMesh::Kokkos::volume_jxw(J, dim_, d_w(q)); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = vector_component(xyz, 0); + d_results(base + 1) = vector_component(xyz, 1); + d_results(base + 2) = vector_component(xyz, 2); + d_results(base + 3) = tensor_component(J, 0, 0); + d_results(base + 4) = tensor_component(J, 0, 1); + d_results(base + 5) = tensor_component(J, 0, 2); + d_results(base + 6) = tensor_component(J, 1, 0); + d_results(base + 7) = tensor_component(J, 1, 1); + d_results(base + 8) = tensor_component(J, 1, 2); + d_results(base + 9) = tensor_component(J, 2, 0); + d_results(base + 10) = tensor_component(J, 2, 1); + d_results(base + 11) = tensor_component(J, 2, 2); + d_results(base + 12) = jxw_q; + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +inline face_helper_context +build_face_helper_context(const element_fixture & fixture, + const libMesh::Elem & side, + unsigned int side_id, + const char * label_prefix) +{ + face_helper_context context; + const unsigned int parent_dim = fixture.elem->dim(); + const libMesh::FEType fe_type(fixture.elem->default_order(), + libMesh::FEMap::map_fe_type(*fixture.elem)); + const unsigned int side_dim = side.dim(); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side.type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_JxW(); + side_fe->get_normals(); + side_fe->get_dxyzdxi(); + if (parent_dim >= 3) + side_fe->get_dxyzdeta(); + side_fe->reinit(fixture.elem.get(), side_id); + + const unsigned int nqp = qr.n_points(); + const unsigned int n_parent_nodes = fixture.elem->n_nodes(); + const unsigned int n_face_nodes = side.n_nodes(); + + std::vector side_ref_points(nqp); + for (unsigned int q = 0; q < nqp; ++q) + side_ref_points[q] = qr.qp(q); + + std::vector parent_ref_points; + if (parent_dim == 2) + { + auto side_map_fe = libMesh::FEBase::build(parent_dim, fe_type); + side_map_fe->get_xyz(); + side_map_fe->side_map(fixture.elem.get(), &side, side_id, side_ref_points, parent_ref_points); + } + + context.ref_values.resize(13 * nqp); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp), w_h(nqp); + std::vector parent_xi_h(nqp, 0.0), parent_eta_h(nqp, 0.0), parent_zeta_h(nqp, 0.0); + for (unsigned int q = 0; q < nqp; ++q) + { + const libMesh::Point row0 = libMesh::FEMap::map_deriv(side_dim, &side, 0, side_ref_points[q]); + libMesh::Point row1(0.0); + if (side_dim >= 2) + row1 = libMesh::FEMap::map_deriv(side_dim, &side, 1, side_ref_points[q]); + const auto & normal = side_fe->get_normals()[q]; + const unsigned int base = 13 * q; + + context.ref_values[base + 0] = row0(0); + context.ref_values[base + 1] = row0(1); + context.ref_values[base + 2] = row0(2); + context.ref_values[base + 3] = row1(0); + context.ref_values[base + 4] = row1(1); + context.ref_values[base + 5] = row1(2); + context.ref_values[base + 6] = 0.0; + context.ref_values[base + 7] = 0.0; + context.ref_values[base + 8] = 0.0; + context.ref_values[base + 9] = side_fe->get_JxW()[q]; + context.ref_values[base + 10] = normal(0); + context.ref_values[base + 11] = normal(1); + context.ref_values[base + 12] = normal(2); + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (parent_dim >= 3) ? qr.qp(q)(1) : libMesh::Real(0); + zeta_h[q] = 0.0; + w_h[q] = qr.w(q); + + if (parent_dim == 2) + { + parent_xi_h[q] = parent_ref_points[q](0); + parent_eta_h[q] = parent_ref_points[q](1); + parent_zeta_h[q] = parent_ref_points[q](2); + } + } + + context.d_face_coords = upload_point_coordinates(side, make_label(label_prefix, "_coords").c_str()); + context.d_parent_coords = upload_point_coordinates(*fixture.elem, make_label(label_prefix, "_parent_coords").c_str()); + context.d_xi = upload_real(xi_h, make_label(label_prefix, "_xi").c_str()); + context.d_eta = upload_real(eta_h, make_label(label_prefix, "_eta").c_str()); + context.d_zeta = upload_real(zeta_h, make_label(label_prefix, "_zeta").c_str()); + context.d_w = upload_real(w_h, make_label(label_prefix, "_w").c_str()); + context.d_parent_xi = upload_real(parent_xi_h, make_label(label_prefix, "_parent_xi").c_str()); + context.d_parent_eta = upload_real(parent_eta_h, make_label(label_prefix, "_parent_eta").c_str()); + context.d_parent_zeta = upload_real(parent_zeta_h, make_label(label_prefix, "_parent_zeta").c_str()); + context.nqp = nqp; + context.parent_dim = parent_dim; + context.n_parent_nodes = n_parent_nodes; + context.n_face_nodes = n_face_nodes; + + return context; +} + +template +inline int +evaluate_face_helper_context_2d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_parent_coords = context.d_parent_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const auto d_parent_xi = context.d_parent_xi; + const auto d_parent_eta = context.d_parent_eta; + const auto d_parent_zeta = context.d_parent_zeta; + const unsigned int n_parent_nodes_ = context.n_parent_nodes; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + d_face_coords, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Kokkos::RealTensor parent_J = libMesh::Kokkos::jacobian( + d_parent_coords, n_parent_nodes_, d_parent_xi(q), d_parent_eta(q), d_parent_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/2u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::edge_normal_on_parent_surface(J, parent_J); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +template +inline int +evaluate_face_helper_context_3d(const face_helper_context & context, + const char * result_label, + double tol = 1.0e-13) +{ + Kokkos::View d_results(std::string(result_label), context.ref_values.size()); + const auto d_face_coords = context.d_face_coords; + const auto d_xi = context.d_xi; + const auto d_eta = context.d_eta; + const auto d_zeta = context.d_zeta; + const auto d_w = context.d_w; + const unsigned int n_face_nodes_ = context.n_face_nodes; + + Kokkos::parallel_for( + context.nqp, + KOKKOS_LAMBDA(int q) { + const libMesh::Kokkos::RealTensor J = libMesh::Kokkos::face_jacobian( + d_face_coords, n_face_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + const libMesh::Real jxw_q = libMesh::Kokkos::face_jxw(J, /*parent_dim=*/3u, d_w(q)); + const libMesh::Kokkos::RealVector normal_q = libMesh::Kokkos::face_normal(J, /*parent_dim=*/3u); + const unsigned int base = 13 * static_cast(q); + + d_results(base + 0) = tensor_component(J, 0, 0); + d_results(base + 1) = tensor_component(J, 0, 1); + d_results(base + 2) = tensor_component(J, 0, 2); + d_results(base + 3) = tensor_component(J, 1, 0); + d_results(base + 4) = tensor_component(J, 1, 1); + d_results(base + 5) = tensor_component(J, 1, 2); + d_results(base + 6) = tensor_component(J, 2, 0); + d_results(base + 7) = tensor_component(J, 2, 1); + d_results(base + 8) = tensor_component(J, 2, 2); + d_results(base + 9) = jxw_q; + d_results(base + 10) = vector_component(normal_q, 0); + d_results(base + 11) = vector_component(normal_q, 1); + d_results(base + 12) = vector_component(normal_q, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, context.ref_values, tol); +} + +} // namespace kokkos_test_utils + +#endif diff --git a/tests/fe/kokkos_fe_permuted_map_oracle_test.K b/tests/fe/kokkos_fe_permuted_map_oracle_test.K new file mode 100644 index 00000000000..d988bc9543e --- /dev/null +++ b/tests/fe/kokkos_fe_permuted_map_oracle_test.K @@ -0,0 +1,512 @@ +// GPU kernel tests for permuted libMesh::Kokkos map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEMap, FEBase, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase +// for every non-identity element permutation of the implemented +// LAGRANGE map topologies. +// B. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles for every +// non-identity parent permutation of the supported 2D and 3D parent +// topologies, including mixed-face prism and pyramid parents. +// C. map_face_qp_to_parent() against libMesh FEBase::side_map() for every +// non-identity permutation of those same parent topologies. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_permuted_reference_fixture; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::is_supported_lagrange_face_map_topology; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_parent_case +{ + libMesh::ElemType topo; + const char * name; +}; + +static const map_helper_case map_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" } +}; + +static const face_parent_case face_cases[] = { + { libMesh::TRI3, "TRI3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD4, "QUAD4" }, + { libMesh::QUAD8, "QUAD8" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET4, "TET4" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX8, "HEX8" }, + { libMesh::HEX20, "HEX20" }, + { libMesh::HEX27, "HEX27" }, + { libMesh::PRISM6, "PRISM6" }, + { libMesh::PRISM15, "PRISM15" }, + { libMesh::PRISM18, "PRISM18" }, + { libMesh::PYRAMID5, "PYRAMID5" }, + { libMesh::PYRAMID13, "PYRAMID13" }, + { libMesh::PYRAMID14, "PYRAMID14" } +}; + +} // anonymous namespace + +template +static int +test_permuted_map_case_impl(const map_helper_case & info, unsigned int perm_num) +{ + auto fixture = build_permuted_reference_fixture(Topo, perm_num); + const auto context = build_map_helper_context(fixture, info.topo, "perm_map"); + const int fail = evaluate_map_helper_context(context, "perm_map_results", tol); + if (fail) + std::printf(" permuted map mismatch: topo=%s perm=%u (%d failures)\n", + info.name, perm_num, fail); + + return fail; +} + +struct permuted_map_dispatch +{ + permuted_map_dispatch(const map_helper_case & in_info, unsigned int in_perm_num) + : info(in_info), perm_num(in_perm_num) + { + } + + template + int operator()() const + { + return test_permuted_map_case_impl(info, perm_num); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted map topology: topo=%s perm=%u type=%d\n", + info.name, + perm_num, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; + unsigned int perm_num; +}; + +static int +test_permuted_map_case(const map_helper_case & info, unsigned int perm_num) +{ + const permuted_map_dispatch dispatch(info, perm_num); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_permuted_face_helper_side_case_3d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_permuted_face_helper_side_case_2d_impl(const face_helper_context & context, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "perm_face_results", tol); + if (fail) + std::printf(" permuted face mismatch: parent=%s perm=%u side_id=%u side_type=%d (%d failures)\n", + parent_name, + perm_num, + side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct permuted_face_side_dispatch_3d +{ + permuted_face_side_dispatch_3d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_3d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +template +struct permuted_face_side_dispatch_2d +{ + permuted_face_side_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_permuted_face_helper_side_case_2d_impl( + context, side_id, parent_name, perm_num, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +struct permuted_face_parent_dispatch_2d +{ + permuted_face_parent_dispatch_2d(const face_helper_context & in_context, + unsigned int in_side_id, + const char * in_parent_name, + unsigned int in_perm_num, + libMesh::ElemType in_side_topo) + : context(in_context), + side_id(in_side_id), + parent_name(in_parent_name), + perm_num(in_perm_num), + side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const permuted_face_side_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported permuted face-helper parent: parent=%s perm=%u side_id=%u parent_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + unsigned int side_id; + const char * parent_name; + unsigned int perm_num; + libMesh::ElemType side_topo; +}; + +static int +test_permuted_face_helper_side_case(const element_fixture & fixture, + unsigned int side_id, + const char * parent_name, + unsigned int perm_num) +{ + auto side = fixture.elem->build_side_ptr(side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, side_id, "perm_face"); + + if (context.parent_dim == 3) + { + const permuted_face_side_dispatch_3d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const permuted_face_parent_dispatch_2d dispatch( + context, side_id, parent_name, perm_num, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported permuted face-helper parent dimension: parent=%s perm=%u side_id=%u dim=%u\n", + parent_name, + perm_num, + side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_permuted_face_qp_case(const char * parent_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + unsigned int perm_num, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + std::printf(" permuted face_qp mismatch: parent=%s perm=%u side_id=%u side_type=%d\n", + parent_name, + perm_num, + side_id, + static_cast(side.type())); + + return fail; +} + +static int +test_map_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : map_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + fail += test_permuted_map_case(info, perm); + } + return fail; +} + +static int +test_face_helpers_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face-helper side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + fail += test_permuted_face_helper_side_case(fixture, side_id, info.name, perm); + } + } + } + return fail; +} + +static int +test_face_qp_to_parent_for_all_permutations() +{ + int fail = 0; + for (const auto & info : face_cases) + { + const auto elem = build_reference_elem(info.topo); + for (unsigned int perm = 1; perm < elem->n_permutations(); ++perm) + { + auto fixture = build_permuted_reference_fixture(info.topo, perm); + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + if (side->n_nodes() == 1) + { + fail += check_permuted_face_qp_case( + info.name, *fixture.elem, *side, side_id, perm, libMesh::Kokkos::zero_vector()); + continue; + } + + if (!is_supported_lagrange_face_map_topology(side->type())) + { + std::printf(" unexpected unsupported permuted face_qp side: parent=%s perm=%u side_id=%u side_type=%d\n", + info.name, + perm, + side_id, + static_cast(side->type())); + ++fail; + continue; + } + + libMesh::QGauss qr(side->dim(), libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + for (unsigned int q = 0; q < qr.n_points(); ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + fail += check_permuted_face_qp_case(info.name, *fixture.elem, *side, side_id, perm, face_qpt); + } + } + } + } + return fail; +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int fail = 0; + + const int map_fail = test_map_helpers_for_all_permutations(); + fail += map_fail; + std::printf("[permuted_map_helpers] %s (%d failures)\n", map_fail ? "FAIL" : "OK", map_fail); + + const int face_fail = test_face_helpers_for_all_permutations(); + fail += face_fail; + std::printf("[permuted_face_helpers] %s (%d failures)\n", face_fail ? "FAIL" : "OK", face_fail); + + const int face_qp_fail = test_face_qp_to_parent_for_all_permutations(); + fail += face_qp_fail; + std::printf("[permuted_face_qp] %s (%d failures)\n", face_qp_fail ? "FAIL" : "OK", face_qp_fail); + + Kokkos::finalize(); + return fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_reconstruction_oracle_test.K b/tests/fe/kokkos_fe_reconstruction_oracle_test.K new file mode 100644 index 00000000000..b95311d053a --- /dev/null +++ b/tests/fe/kokkos_fe_reconstruction_oracle_test.K @@ -0,0 +1,315 @@ +// GPU kernel tests for libMesh::Kokkos FE reconstruction on physical elements. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase provides the host physical-space oracle values and gradients. +// +// The test suite covers: +// A. Reconstruction of solution values on distorted physical elements for +// every exact LAGRANGE key currently supported by the Kokkos evaluator. +// B. Reconstruction of physical-space gradients on the same elements by +// pulling reference-space gradients through the element Jacobian. +// C. The same value/gradient reconstruction parity for representative exact +// MONOMIAL keys across all supported dimensions and orders. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_base.h" +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct reconstruction_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +static const reconstruction_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "LAGRANGE/EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "LAGRANGE/EDGE3/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const reconstruction_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +static std::vector +build_coefficients(const reconstruction_case & info) +{ + std::vector coeffs(info.n_dofs); + const Real family_bias = (info.key.family == libMesh::MONOMIAL) ? Real(0.19) : Real(0.07); + const Real order_bias = Real(static_cast(info.key.order) + 1u) * Real(0.013); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const Real sign = (i % 2) ? Real(-1.0) : Real(1.0); + coeffs[i] = sign * (Real(0.17) + Real(0.041) * Real(i + 1u) + family_bias + order_bias); + } + + return coeffs; +} + +template +static int +test_reconstruction_case_impl(const reconstruction_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + element_fixture fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto fe = libMesh::FEBase::build(info.dim, fe_type); + + libMesh::QGauss qr(info.dim, static_cast(quad_order)); + qr.allow_rules_with_negative_weights = true; + qr.init(info.key.elem_type); + + fe->attach_quadrature_rule(&qr); + fe->get_phi(); + fe->get_dphi(); + fe->reinit(fixture.elem.get()); + + const auto & phi = fe->get_phi(); + const auto & dphi = fe->get_dphi(); + const unsigned int nqp = qr.n_points(); + + const std::vector coeffs = build_coefficients(info); + std::vector ref_u(nqp, 0.0); + std::vector ref_gx(nqp, 0.0); + std::vector ref_gy(nqp, 0.0); + std::vector ref_gz(nqp, 0.0); + std::vector xi_h(nqp), eta_h(nqp), zeta_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + ref_u[q] += phi[i][q] * coeffs[i]; + ref_gx[q] += dphi[i][q](0) * coeffs[i]; + ref_gy[q] += dphi[i][q](1) * coeffs[i]; + ref_gz[q] += dphi[i][q](2) * coeffs[i]; + } + + xi_h[q] = qr.qp(q)(0); + eta_h[q] = (info.dim >= 2) ? qr.qp(q)(1) : Real(0); + zeta_h[q] = (info.dim >= 3) ? qr.qp(q)(2) : Real(0); + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "recon_coords"); + auto d_coeffs = upload_real(coeffs, "recon_coeffs"); + auto d_xi = upload_real(xi_h, "recon_xi"); + auto d_eta = upload_real(eta_h, "recon_eta"); + auto d_zeta = upload_real(zeta_h, "recon_zeta"); + + Kokkos::View d_u(std::string("recon_u"), nqp); + Kokkos::View d_gx(std::string("recon_gx"), nqp); + Kokkos::View d_gy(std::string("recon_gy"), nqp); + Kokkos::View d_gz(std::string("recon_gz"), nqp); + + const unsigned int dim = info.dim; + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + nqp, + KOKKOS_LAMBDA(int q) { + const RealTensor J = libMesh::Kokkos::jacobian( + d_coords, n_geom_nodes_, d_xi(q), d_eta(q), d_zeta(q)); + + Real u = 0.0; + RealVector grad_ref_sum = libMesh::Kokkos::zero_vector(); + for (unsigned int i = 0; i < n_dofs; ++i) + { + const Real coeff = d_coeffs(i); + u += coeff * shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + grad_ref_sum += + coeff * grad_shape_for_key(i, d_xi(q), d_eta(q), d_zeta(q)); + } + + const RealTensor invJ = libMesh::Kokkos::inverse(J, dim); + const RealVector grad_phys = invJ * grad_ref_sum; + + d_u(q) = u; + d_gx(q) = vector_component(grad_phys, 0); + d_gy(q) = vector_component(grad_phys, 1); + d_gz(q) = vector_component(grad_phys, 2); + }); + Kokkos::fence(); + + int fail = 0; + fail += compare_device_values(d_u, ref_u, value_tol); + fail += compare_device_values(d_gx, ref_gx, grad_tol); + fail += compare_device_values(d_gy, ref_gy, grad_tol); + fail += compare_device_values(d_gz, ref_gz, grad_tol); + + if (fail) + std::printf(" reconstruction mismatch: %s (%d failures)\n", info.name, fail); + + return fail; +} + +struct reconstruction_dispatch +{ + explicit reconstruction_dispatch(const reconstruction_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_reconstruction_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported reconstruction key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const reconstruction_case & info; +}; + +static int +test_reconstruction_case(const reconstruction_case & info) +{ + const reconstruction_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_reconstruction_case(info); + std::printf("[reconstruction_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_shape_oracle_test.K b/tests/fe/kokkos_fe_shape_oracle_test.K new file mode 100644 index 00000000000..8d664723221 --- /dev/null +++ b/tests/fe/kokkos_fe_shape_oracle_test.K @@ -0,0 +1,630 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE shape functions. +// The test suite covers: +// A. Geometry-map shape parity against the libMesh FE map oracle for the +// 12 implemented LAGRANGE map topologies. +// B. Geometry-map gradient parity against the libMesh FE map oracle for the +// same topologies. +// C. Physics FE parity for exact libMesh LAGRANGE keys that the Kokkos +// evaluator currently supports. +// D. Physics FE gradient parity for the same exact LAGRANGE keys. +// E. Physics FE parity for Kokkos-supported exact MONOMIAL keys. +// F. Physics FE gradient parity for the same exact MONOMIAL keys. +// +// Unsupported exact keys are expected to hard-abort in the Kokkos path and +// are therefore intentionally not invoked here. + +#include "libmesh/elem.h" +#include "gpu/kokkos_fe_base.h" +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/fe.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" +#include +#include +#include +#include + +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_qps; +using kokkos_test_utils::build_reference_elem; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; +static constexpr unsigned int quad_order = 4; + +namespace +{ + +struct map_elem_info +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +struct physics_shape_info +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +static const map_elem_info map_elems[] = { + { libMesh::EDGE2, 1, 2, "EDGE2" }, + { libMesh::EDGE3, 1, 3, "EDGE3" }, + { libMesh::TRI3, 2, 3, "TRI3" }, + { libMesh::TRI6, 2, 6, "TRI6" }, + { libMesh::QUAD4, 2, 4, "QUAD4" }, + { libMesh::QUAD8, 2, 8, "QUAD8" }, + { libMesh::QUAD9, 2, 9, "QUAD9" }, + { libMesh::TET4, 3, 4, "TET4" }, + { libMesh::TET10, 3, 10, "TET10" }, + { libMesh::HEX8, 3, 8, "HEX8" }, + { libMesh::HEX20, 3, 20, "HEX20" }, + { libMesh::HEX27, 3, 27, "HEX27" }, +}; +static constexpr unsigned int n_map_elems = sizeof(map_elems) / sizeof(map_elems[0]); + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Only exact libMesh LAGRANGE keys whose evaluator topology is implemented in +// the current Kokkos path are included here. +static const physics_shape_info lagrange_physics_cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, 1, 2, "EDGE2/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, 1, 2, "EDGE3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, 1, 3, "EDGE3/SECOND" }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, 1, 2, "EDGE4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, 1, 4, "EDGE4/THIRD" }, + + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, 2, 3, "TRI7/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, 2, 6, "TRI7/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, 3, 4, "TET14/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, 3, 10, "TET14/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "HEX27/SECOND" }, +}; +static constexpr unsigned int n_lagrange_physics_cases = + sizeof(lagrange_physics_cases) / sizeof(lagrange_physics_cases[0]); + +// These MONOMIAL cases cover all implemented MonomialImpl paths +// for orders 0..5, plus representative non-simplex exact keys. +static const physics_shape_info monomial_physics_cases[] = { + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::CONSTANT }, 1, monomial_n_dofs_for_dim(1, libMesh::CONSTANT), "MONOMIAL/EDGE2/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::FIRST }, 1, monomial_n_dofs_for_dim(1, libMesh::FIRST), "MONOMIAL/EDGE2/FIRST" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::SECOND }, 1, monomial_n_dofs_for_dim(1, libMesh::SECOND), "MONOMIAL/EDGE3/SECOND" }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, 1, monomial_n_dofs_for_dim(1, libMesh::THIRD), "MONOMIAL/EDGE4/THIRD" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FOURTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FOURTH), "MONOMIAL/EDGE3/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::EDGE3, libMesh::FIFTH }, 1, monomial_n_dofs_for_dim(1, libMesh::FIFTH), "MONOMIAL/EDGE3/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::THIRD }, 2, monomial_n_dofs_for_dim(2, libMesh::THIRD), "MONOMIAL/TRI7/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FOURTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FOURTH), "MONOMIAL/TRI7/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/TRI7/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::THIRD }, 3, monomial_n_dofs_for_dim(3, libMesh::THIRD), "MONOMIAL/TET14/THIRD" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FOURTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FOURTH), "MONOMIAL/TET14/FOURTH" }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/TET14/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PRISM6, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PRISM6/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PRISM21/FIFTH" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID5, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/PYRAMID5/FIRST" }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/PYRAMID14/FIFTH" }, +}; +static constexpr unsigned int n_monomial_physics_cases = + sizeof(monomial_physics_cases) / sizeof(monomial_physics_cases[0]); + +} // anonymous namespace + +static Real +host_map_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_map_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +static Real +host_physics_shape(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + return libMesh::FEInterface::shape(fe_type, 0, &elem, i, libMesh::Point(xi, eta, zeta)); +} + +static RealVector +host_physics_grad(const libMesh::Elem & elem, + const libMesh::FEType & fe_type, + unsigned int dim, + unsigned int i, + Real xi, + Real eta, + Real zeta) +{ + const libMesh::Point p(xi, eta, zeta); + const Real gx = libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 0, p); + const Real gy = (dim >= 2) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 1, p) + : Real(0); + const Real gz = (dim >= 3) ? libMesh::FEInterface::shape_deriv(fe_type, 0, &elem, i, 2, p) + : Real(0); + return libMesh::Kokkos::make_vector(gx, gy, gz); +} + +// --------------------------------------------------------------------------- +// Test A: Geometry-map shape parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_shape_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_map_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_phi(std::string("dev_phi"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = + libMesh::Kokkos::map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +struct map_shape_dispatch +{ + explicit map_shape_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_shape_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-shape topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_shape_parity(const map_elem_info & e) +{ + const map_shape_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test B: Geometry-map gradient parity against libMesh FE map dispatch. +// --------------------------------------------------------------------------- +template +static int +test_map_grad_parity_impl(const map_elem_info & e) +{ + auto elem = build_reference_elem(Topo); + const libMesh::FEType fe_type(elem->default_order(), libMesh::FEMap::map_fe_type(*elem)); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(e.topo, e.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = e.n_dofs; + const unsigned int dim = e.dim; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = host_map_grad(*elem, fe_type, dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "xi"); + auto d_eta = upload_real(eta_h, "eta"); + auto d_zeta = upload_real(zeta_h, "zeta"); + + Kokkos::View d_gx(std::string("map_gx"), nd * nqp); + Kokkos::View d_gy(std::string("map_gy"), nd * nqp); + Kokkos::View d_gz(std::string("map_gz"), nd * nqp); + + const unsigned int nd_ = nd; + const unsigned int nqp_ = nqp; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = + libMesh::Kokkos::grad_map_shape(static_cast(i), + d_xi(q), + d_eta(q), + d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct map_grad_dispatch +{ + explicit map_grad_dispatch(const map_elem_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_grad_parity_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported map-grad topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_elem_info & info; +}; + +static int +test_map_grad_parity(const map_elem_info & e) +{ + const map_grad_dispatch dispatch(e); + return dispatch_supported_lagrange_map_topology(e.topo, dispatch); +} + +// --------------------------------------------------------------------------- +// Test C: Exact-key physics shape parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_phi(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + ref_phi[i * nqp + q] = + host_physics_shape(*elem, fe_type, i, xi_h[q], eta_h[q], zeta_h[q]); + + auto d_xi = upload_real(xi_h, "phys_xi"); + auto d_eta = upload_real(eta_h, "phys_eta"); + auto d_zeta = upload_real(zeta_h, "phys_zeta"); + + Kokkos::View d_phi(std::string("phys_phi"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + d_phi(idx) = shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + }); + Kokkos::fence(); + + return compare_device_values(d_phi, ref_phi, tol); +} + +// --------------------------------------------------------------------------- +// Test D: Exact-key physics gradient parity against libMesh CPU FE dispatch. +// --------------------------------------------------------------------------- +template +static int +test_grad_shape_parity_impl(const physics_shape_info & info) +{ + auto elem = build_reference_elem(info.key.elem_type); + const libMesh::FEType fe_type(info.key.order, info.key.family); + + std::vector xi_h, eta_h, zeta_h; + const unsigned int nqp = build_qps(info.key.elem_type, info.dim, quad_order, xi_h, eta_h, zeta_h); + const unsigned int nd = info.n_dofs; + + std::vector ref_gx(nd * nqp); + std::vector ref_gy(nd * nqp); + std::vector ref_gz(nd * nqp); + for (unsigned int i = 0; i < nd; ++i) + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector g = + host_physics_grad(*elem, fe_type, info.dim, i, xi_h[q], eta_h[q], zeta_h[q]); + ref_gx[i * nqp + q] = vector_component(g, 0); + ref_gy[i * nqp + q] = vector_component(g, 1); + ref_gz[i * nqp + q] = vector_component(g, 2); + } + + auto d_xi = upload_real(xi_h, "grad_xi"); + auto d_eta = upload_real(eta_h, "grad_eta"); + auto d_zeta = upload_real(zeta_h, "grad_zeta"); + + Kokkos::View d_gx(std::string("phys_gx"), nd * nqp); + Kokkos::View d_gy(std::string("phys_gy"), nd * nqp); + Kokkos::View d_gz(std::string("phys_gz"), nd * nqp); + + const unsigned int nqp_ = nqp; + const unsigned int nd_ = nd; + + Kokkos::parallel_for( + nd_ * nqp_, + KOKKOS_LAMBDA(int idx) { + const int i = idx / static_cast(nqp_); + const int q = idx % static_cast(nqp_); + const RealVector g = grad_shape_for_key( + static_cast(i), d_xi(q), d_eta(q), d_zeta(q)); + d_gx(idx) = vector_component(g, 0); + d_gy(idx) = vector_component(g, 1); + d_gz(idx) = vector_component(g, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_gx, ref_gx, tol) + + compare_device_values(d_gy, ref_gy, tol) + + compare_device_values(d_gz, ref_gz, tol); +} + +struct shape_dispatch +{ + explicit shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +struct grad_shape_dispatch +{ + explicit grad_shape_dispatch(const physics_shape_info & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_grad_shape_parity_impl(info); + } + + int unsupported_key(libMesh::Kokkos::FEShapeKey key) const + { + std::printf(" unexpected unsupported grad-shape key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const physics_shape_info & info; +}; + +static int +test_shape_parity(const physics_shape_info & info) +{ + const shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +static int +test_grad_shape_parity(const physics_shape_info & info) +{ + const grad_shape_dispatch dispatch(info); + return dispatch_supported_shape_key(info.key, dispatch); +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (unsigned int e = 0; e < n_map_elems; ++e) + { + const map_elem_info & info = map_elems[e]; + + { + const int f = test_map_shape_parity(info); + std::printf("[map_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_map_grad_parity(info); + std::printf("[map_grad_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_lagrange_physics_cases; ++c) + { + const physics_shape_info & info = lagrange_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + for (unsigned int c = 0; c < n_monomial_physics_cases; ++c) + { + const physics_shape_info & info = monomial_physics_cases[c]; + + { + const int f = test_shape_parity(info); + std::printf("[shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_grad_shape_parity(info); + std::printf("[grad_shape_parity ][%s] %s (%d)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + Kokkos::finalize(); + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_side_trace_oracle_test.K b/tests/fe/kokkos_fe_side_trace_oracle_test.K new file mode 100644 index 00000000000..309eaa49b66 --- /dev/null +++ b/tests/fe/kokkos_fe_side_trace_oracle_test.K @@ -0,0 +1,337 @@ +// GPU kernel tests for libMesh::Kokkos side-trace FE parity. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// FEBase side reinit provides host side-trace oracle values. +// +// The test suite covers: +// A. Side-restricted shape values for supported exact LAGRANGE keys on +// distorted physical elements. +// B. Tangential physical gradients on those same sides, using host FEBase +// side traces as the oracle. +// C. The same side value and tangential-gradient parity for representative +// exact MONOMIAL keys whose parent geometry topology is supported by the +// current Kokkos map implementation. +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_base.h" +#include "gpu/kokkos_fe_evaluator.h" +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/libmesh.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using libMesh::Kokkos::FEShapeKey; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_flat_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_shape_key; +using kokkos_test_utils::dispatch_supported_shape_key_with_lagrange_map; +using kokkos_test_utils::grad_shape_for_key; +using kokkos_test_utils::shape_for_key; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::upload_real; +using kokkos_test_utils::vector_component; + +static constexpr double value_tol = 5.0e-13; +static constexpr double grad_tol = 5.0e-12; + +namespace +{ + +struct side_trace_case +{ + FEShapeKey key; + unsigned int dim; + unsigned int n_dofs; + const char * name; +}; + +constexpr unsigned int +monomial_n_dofs_for_dim(unsigned int dim, libMesh::Order order) +{ + const unsigned int p = static_cast(order); + + switch (dim) + { + case 1: + return p + 1; + case 2: + return (p + 1) * (p + 2) / 2; + case 3: + return (p + 1) * (p + 2) * (p + 3) / 6; + default: + return 0; + } +} + +// Restrict to parent geometries currently handled by the Kokkos map path. +static const side_trace_case lagrange_cases[] = { + { { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI3/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, 2, 3, "LAGRANGE/TRI6/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, 2, 6, "LAGRANGE/TRI6/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, 2, 8, "LAGRANGE/QUAD8/SECOND" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, 2, 4, "LAGRANGE/QUAD9/FIRST" }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, 2, 9, "LAGRANGE/QUAD9/SECOND" }, + + { { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, 3, 4, "LAGRANGE/TET4/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, 3, 4, "LAGRANGE/TET10/FIRST" }, + { { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, 3, 10, "LAGRANGE/TET10/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX8/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX20/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, 3, 20, "LAGRANGE/HEX20/SECOND" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, 3, 8, "LAGRANGE/HEX27/FIRST" }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, 3, 27, "LAGRANGE/HEX27/SECOND" } +}; + +static const side_trace_case monomial_cases[] = { + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::CONSTANT }, 2, monomial_n_dofs_for_dim(2, libMesh::CONSTANT), "MONOMIAL/TRI3/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TRI3, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/TRI3/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TRI6, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/TRI6/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD4, libMesh::FIRST }, 2, monomial_n_dofs_for_dim(2, libMesh::FIRST), "MONOMIAL/QUAD4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::SECOND }, 2, monomial_n_dofs_for_dim(2, libMesh::SECOND), "MONOMIAL/QUAD9/SECOND" }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, 2, monomial_n_dofs_for_dim(2, libMesh::FIFTH), "MONOMIAL/QUAD9/FIFTH" }, + + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::CONSTANT }, 3, monomial_n_dofs_for_dim(3, libMesh::CONSTANT), "MONOMIAL/TET4/CONSTANT" }, + { { libMesh::MONOMIAL, libMesh::TET4, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/TET4/FIRST" }, + { { libMesh::MONOMIAL, libMesh::TET10, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/TET10/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX8, libMesh::FIRST }, 3, monomial_n_dofs_for_dim(3, libMesh::FIRST), "MONOMIAL/HEX8/FIRST" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SECOND }, 3, monomial_n_dofs_for_dim(3, libMesh::SECOND), "MONOMIAL/HEX27/SECOND" }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::FIFTH }, 3, monomial_n_dofs_for_dim(3, libMesh::FIFTH), "MONOMIAL/HEX27/FIFTH" } +}; + +} // anonymous namespace + +LIBMESH_DEVICE_INLINE RealVector +tangential_component(const RealVector & v, const RealVector & normal) +{ + return v - ((v * normal) * normal); +} + +template +static int +test_side_trace_case_impl(const side_trace_case & info) +{ + constexpr unsigned int max_geom_nodes = 27; + + auto fixture = build_flat_reference_fixture(ExactTopo); + const unsigned int n_geom_nodes = fixture.elem->n_nodes(); + const unsigned int parent_dim = fixture.elem->dim(); + + const libMesh::FEType fe_type(info.key.order, info.key.family); + auto side_fe = libMesh::FEBase::build(parent_dim, fe_type); + + int fail = 0; + + for (unsigned int side_id = 0; side_id < fixture.elem->n_sides(); ++side_id) + { + auto side = fixture.elem->build_side_ptr(side_id); + + libMesh::QGauss qr(parent_dim - 1, libMesh::FOURTH); + qr.allow_rules_with_negative_weights = true; + qr.init(side->type()); + + side_fe->attach_quadrature_rule(&qr); + side_fe->get_phi(); + side_fe->get_dphi(); + side_fe->get_normals(); + side_fe->reinit(fixture.elem.get(), side_id); + + const auto & phi = side_fe->get_phi(); + const auto & dphi = side_fe->get_dphi(); + const auto & normals = side_fe->get_normals(); + + if (phi.size() != info.n_dofs || dphi.size() != info.n_dofs) + { + std::printf(" unexpected host side FE size: %s side_id=%u phi=%llu dphi=%llu expected=%u\n", + info.name, + side_id, + libMesh::cast_int(phi.size()), + libMesh::cast_int(dphi.size()), + info.n_dofs); + fail += 1; + continue; + } + + const unsigned int nqp = qr.n_points(); + std::vector ref_phi(info.n_dofs * nqp); + std::vector ref_tgx(info.n_dofs * nqp); + std::vector ref_tgy(info.n_dofs * nqp); + std::vector ref_tgz(info.n_dofs * nqp); + std::vector parent_xi_h(nqp), parent_eta_h(nqp), parent_zeta_h(nqp); + std::vector normal_x_h(nqp), normal_y_h(nqp), normal_z_h(nqp); + + for (unsigned int q = 0; q < nqp; ++q) + { + const RealVector face_qpt = libMesh::Kokkos::make_vector( + qr.qp(q)(0), + (side->dim() >= 2) ? qr.qp(q)(1) : Real(0), + (side->dim() >= 3) ? qr.qp(q)(2) : Real(0)); + const RealVector parent_qpt = + libMesh::Kokkos::map_face_qp_to_parent(*side, libMesh::LAGRANGE_MAP, side->type(), face_qpt); + + parent_xi_h[q] = vector_component(parent_qpt, 0); + parent_eta_h[q] = vector_component(parent_qpt, 1); + parent_zeta_h[q] = vector_component(parent_qpt, 2); + + const auto & n = normals[q]; + normal_x_h[q] = n(0); + normal_y_h[q] = n(1); + normal_z_h[q] = n(2); + + for (unsigned int i = 0; i < info.n_dofs; ++i) + { + const unsigned int idx = q * info.n_dofs + i; + const RealVector host_tg = tangential_component( + libMesh::Kokkos::make_vector(dphi[i][q](0), dphi[i][q](1), dphi[i][q](2)), + libMesh::Kokkos::make_vector(n(0), n(1), n(2))); + + ref_phi[idx] = phi[i][q]; + ref_tgx[idx] = vector_component(host_tg, 0); + ref_tgy[idx] = vector_component(host_tg, 1); + ref_tgz[idx] = vector_component(host_tg, 2); + } + } + + auto d_coords = upload_point_coordinates(*fixture.elem, "side_trace_coords"); + auto d_parent_xi = upload_real(parent_xi_h, "side_trace_parent_xi"); + auto d_parent_eta = upload_real(parent_eta_h, "side_trace_parent_eta"); + auto d_parent_zeta = upload_real(parent_zeta_h, "side_trace_parent_zeta"); + auto d_normal_x = upload_real(normal_x_h, "side_trace_normal_x"); + auto d_normal_y = upload_real(normal_y_h, "side_trace_normal_y"); + auto d_normal_z = upload_real(normal_z_h, "side_trace_normal_z"); + + Kokkos::View d_phi(std::string("side_trace_phi"), ref_phi.size()); + Kokkos::View d_tgx(std::string("side_trace_tgx"), ref_tgx.size()); + Kokkos::View d_tgy(std::string("side_trace_tgy"), ref_tgy.size()); + Kokkos::View d_tgz(std::string("side_trace_tgz"), ref_tgz.size()); + + const unsigned int n_dofs = info.n_dofs; + const unsigned int n_geom_nodes_ = n_geom_nodes; + + Kokkos::parallel_for( + static_cast(ref_phi.size()), + KOKKOS_LAMBDA(int idx) { + const unsigned int q = static_cast(idx) / n_dofs; + const unsigned int i = static_cast(idx) % n_dofs; + + const Real xi = d_parent_xi(q); + const Real eta = d_parent_eta(q); + const Real zeta = d_parent_zeta(q); + const RealTensor J = + libMesh::Kokkos::jacobian(d_coords, n_geom_nodes_, xi, eta, zeta); + const RealVector grad_ref = grad_shape_for_key(i, xi, eta, zeta); + const RealVector grad_phys = libMesh::Kokkos::inverse(J, parent_dim) * grad_ref; + const RealVector normal = libMesh::Kokkos::make_vector(d_normal_x(q), d_normal_y(q), d_normal_z(q)); + const RealVector tangential_grad = tangential_component(grad_phys, normal); + + d_phi(idx) = shape_for_key(i, xi, eta, zeta); + d_tgx(idx) = vector_component(tangential_grad, 0); + d_tgy(idx) = vector_component(tangential_grad, 1); + d_tgz(idx) = vector_component(tangential_grad, 2); + }); + Kokkos::fence(); + + const int side_fail = + compare_device_values(d_phi, ref_phi, value_tol) + + compare_device_values(d_tgx, ref_tgx, grad_tol) + + compare_device_values(d_tgy, ref_tgy, grad_tol) + + compare_device_values(d_tgz, ref_tgz, grad_tol); + + if (side_fail) + std::printf(" side-trace mismatch: %s side_id=%u side_type=%d (%d failures)\n", + info.name, + side_id, + static_cast(side->type()), + side_fail); + + fail += side_fail; + } + + return fail; +} + +struct side_trace_dispatch +{ + explicit side_trace_dispatch(const side_trace_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_side_trace_case_impl(info); + } + + int unsupported_key(FEShapeKey key) const + { + std::printf(" unexpected unsupported side-trace key: %s family=%d elem_type=%d order=%d\n", + info.name, + static_cast(key.family), + static_cast(key.elem_type), + static_cast(key.order)); + return 1; + } + + const side_trace_case & info; +}; + +static int +test_side_trace_case(const side_trace_case & info) +{ + const side_trace_dispatch dispatch(info); + return dispatch_supported_shape_key_with_lagrange_map(info.key, dispatch); +} + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + for (const auto & info : lagrange_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_lagrange] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + for (const auto & info : monomial_cases) + { + const int f = test_side_trace_case(info); + std::printf("[side_trace_monomial] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_fe_types_oracle_test.K b/tests/fe/kokkos_fe_types_oracle_test.K new file mode 100644 index 00000000000..9dfe558e499 --- /dev/null +++ b/tests/fe/kokkos_fe_types_oracle_test.K @@ -0,0 +1,464 @@ +// GPU kernel oracle tests for libMesh::Kokkos FE type helpers. +// The test suite covers: +// A. get_side_topology() against libMesh side topology, with the 1D +// NODEELEM -> EDGE2 surrogate used by the Kokkos path. +// B. class_from_topology() against a class inferred from libMesh +// first-order LAGRANGE FE spaces. +// C. n_dofs(FEShapeKey) for Kokkos-supported exact LAGRANGE keys against +// libMesh::FEInterface::n_dofs(). +// D. n_dofs(FEShapeKey) for Kokkos-supported MONOMIAL keys against +// libMesh::FEInterface::n_dofs(). +// E. supports_shape()/supports_grad_shape()/supports_n_dofs() agree on the +// current Kokkos evaluator support boundary. +// +// Returns 0 on success, non-zero on failure. + +#include "gpu/kokkos_fe_types.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_interface.h" +#include "libmesh/fe_type.h" +#include "libmesh/libmesh.h" + +#include "libmesh/enum_elem_type.h" +#include "libmesh/enum_fe_family.h" +#include "libmesh/enum_order.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include + +using kokkos_test_utils::build_reference_elem; + +namespace +{ + +struct side_topology_case +{ + libMesh::ElemType parent; + libMesh::ElemType expected; +}; + +struct class_from_topology_case +{ + libMesh::ElemType topo; + libMesh::FEElemClass expected; +}; + +struct n_dof_case +{ + libMesh::Kokkos::FEShapeKey key; + unsigned int expected; +}; + +struct support_case +{ + libMesh::Kokkos::FEShapeKey key; + bool expected; +}; + +static libMesh::ElemType +host_side_topology_oracle(libMesh::ElemType parent_type) +{ + return libMesh::side_topology_or_invalid(parent_type); +} + +static libMesh::FEElemClass +host_class_from_topology_oracle(libMesh::ElemType topo) +{ + return libMesh::class_from_topology_or_invalid(topo); +} + +static unsigned int +host_n_dofs_oracle(libMesh::Kokkos::FEShapeKey key) +{ + auto elem = build_reference_elem(key.elem_type); + return libMesh::FEInterface::n_dofs(libMesh::FEType(key.order, key.family), 0, elem.get()); +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// Test 1: get_side_topology() against libMesh side topology. +// --------------------------------------------------------------------------- +static int +test_side_topology() +{ + static const libMesh::ElemType parents[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::TRI6, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::HEX8, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX20, + libMesh::HEX27 + }; + constexpr int n_cases = sizeof(parents) / sizeof(parents[0]); + + Kokkos::View d_cases(std::string("side_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).parent = parents[i]; + h(i).expected = host_side_topology_oracle(parents[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("side_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (get_side_topology(d_cases(i).parent) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 2: class_from_topology() against libMesh FE oracle. +// --------------------------------------------------------------------------- +static int +test_class_from_topology() +{ + static const libMesh::ElemType topos[] = { + libMesh::EDGE2, + libMesh::EDGE3, + libMesh::EDGE4, + libMesh::TRI3, + libMesh::TRI6, + libMesh::TRI7, + libMesh::QUAD4, + libMesh::QUAD8, + libMesh::QUAD9, + libMesh::TET4, + libMesh::TET10, + libMesh::TET14, + libMesh::HEX8, + libMesh::HEX20, + libMesh::HEX27, + libMesh::PRISM6, + libMesh::PRISM15, + libMesh::PRISM18, + libMesh::PRISM20, + libMesh::PRISM21, + libMesh::PYRAMID5, + libMesh::PYRAMID13, + libMesh::PYRAMID14, + libMesh::PYRAMID18 + }; + constexpr int n_cases = sizeof(topos) / sizeof(topos[0]); + + Kokkos::View d_cases(std::string("class_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).topo = topos[i]; + h(i).expected = host_class_from_topology_oracle(topos[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("class_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (class_from_topology(d_cases(i).topo) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 3: n_dofs() for Kokkos-supported exact LAGRANGE keys against +// libMesh FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_lagrange() +{ + using libMesh::Kokkos::FEShapeKey; + + static const FEShapeKey keys[] = { + { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::FIRST }, + + { libMesh::LAGRANGE, libMesh::TRI3, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI6, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD8, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, + + { libMesh::LAGRANGE, libMesh::TET4, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET10, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX8, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX20, libMesh::SECOND }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::FIRST }, + { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND } + }; + constexpr int n_cases = sizeof(keys) / sizeof(keys[0]); + + Kokkos::View d_cases(std::string("lagrange_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + { + h(i).key = keys[i]; + h(i).expected = host_n_dofs_oracle(keys[i]); + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("lagrange_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 4: n_dofs() for Kokkos-supported MONOMIAL keys against libMesh +// FEInterface. +// --------------------------------------------------------------------------- +static int +test_n_dofs_monomial() +{ + using libMesh::Kokkos::FEShapeKey; + + static const libMesh::Order orders[] = { + libMesh::CONSTANT, + libMesh::FIRST, + libMesh::SECOND, + libMesh::THIRD, + libMesh::FOURTH, + libMesh::FIFTH + }; + static const libMesh::ElemType higher_dim_topos[] = { + libMesh::TRI7, + libMesh::QUAD9, + libMesh::TET14, + libMesh::HEX27, + libMesh::PRISM21 + }; + + constexpr int n_cases = sizeof(orders) / sizeof(orders[0]) * + (2 + sizeof(higher_dim_topos) / sizeof(higher_dim_topos[0])); + + Kokkos::View d_cases(std::string("monomial_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + int i = 0; + for (const auto order : orders) + { + const libMesh::ElemType edge_topo = + (order <= libMesh::THIRD) ? libMesh::EDGE4 : libMesh::EDGE3; + const libMesh::ElemType pyramid_topo = + (order <= libMesh::THIRD) ? libMesh::PYRAMID18 : libMesh::PYRAMID14; + + h(i).key = { libMesh::MONOMIAL, edge_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + h(i).key = { libMesh::MONOMIAL, pyramid_topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + + for (const auto topo : higher_dim_topos) + { + h(i).key = { libMesh::MONOMIAL, topo, order }; + h(i).expected = host_n_dofs_oracle(h(i).key); + ++i; + } + } + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("monomial_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + if (n_dofs(d_cases(i).key) != d_cases(i).expected) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// Test 5: support predicates agree on the Kokkos evaluator boundary. +// --------------------------------------------------------------------------- +static int +test_support_contract() +{ + using libMesh::Kokkos::FEShapeKey; + + static const support_case cases[] = { + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::FIRST }, true }, + { { libMesh::LAGRANGE, libMesh::EDGE3, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::QUAD9, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::HEX27, libMesh::SECOND }, true }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::LAGRANGE, libMesh::NODEELEM, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE4, libMesh::THIRD }, true }, + { { libMesh::LAGRANGE, libMesh::TRI7, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::TET14, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM6, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PRISM15, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID5, libMesh::FIRST }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID14, libMesh::SECOND }, false }, + { { libMesh::LAGRANGE, libMesh::PYRAMID18, libMesh::THIRD }, false }, + { { libMesh::LAGRANGE, libMesh::EDGE2, libMesh::INVALID_ORDER }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::THIRD }, true }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::QUAD9, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::TET14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PRISM21, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::PYRAMID14, libMesh::FIFTH }, true }, + { { libMesh::MONOMIAL, libMesh::NODEELEM, libMesh::CONSTANT }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE4, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::PYRAMID18, libMesh::FOURTH }, false }, + { { libMesh::MONOMIAL, libMesh::TRI7, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::HEX27, libMesh::SIXTH }, false }, + { { libMesh::MONOMIAL, libMesh::EDGE2, libMesh::INVALID_ORDER }, false } + }; + constexpr int n_cases = sizeof(cases) / sizeof(cases[0]); + + Kokkos::View d_cases(std::string("support_cases"), n_cases); + { + auto h = Kokkos::create_mirror_view(d_cases); + for (int i = 0; i < n_cases; ++i) + h(i) = cases[i]; + Kokkos::deep_copy(d_cases, h); + } + + Kokkos::View d_fail(std::string("support_fail")); + Kokkos::deep_copy(d_fail, 0); + + Kokkos::parallel_for( + n_cases, + KOKKOS_LAMBDA(int i) { + using namespace libMesh::Kokkos; + + const bool shape_supported = supports_shape(d_cases(i).key); + const bool grad_supported = supports_grad_shape(d_cases(i).key); + const bool ndofs_supported = supports_n_dofs(d_cases(i).key); + + if (shape_supported != d_cases(i).expected || + grad_supported != d_cases(i).expected || + ndofs_supported != d_cases(i).expected || + shape_supported != grad_supported || + shape_supported != ndofs_supported) + Kokkos::atomic_add(&d_fail(), 1); + }); + Kokkos::fence(); + + int fail = 0; + Kokkos::deep_copy(fail, d_fail); + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_side_topology(); + std::printf("[side_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_class_from_topology(); + std::printf("[class_topology_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_lagrange(); + std::printf("[lagrange_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_n_dofs_monomial(); + std::printf("[monomial_ndofs_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_support_contract(); + std::printf("[support_contract_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/fe/kokkos_quadrature_oracle_test.K b/tests/fe/kokkos_quadrature_oracle_test.K new file mode 100644 index 00000000000..c4fad7b4aee --- /dev/null +++ b/tests/fe/kokkos_quadrature_oracle_test.K @@ -0,0 +1,734 @@ +// GPU kernel oracle tests for libMesh::Kokkos quadrature and map helpers. +// +// Standalone executable (no CppUnit). Uses libMesh::LibMeshInit so that +// QGauss, FEMap, and FEBase::side_map are available for oracle values. +// +// The test suite covers: +// A. GaussQuadrature point and weight tables against libMesh QGauss. +// B. physical_point() and jacobian() against libMesh FEMap::map() and +// FEMap::map_deriv(). +// C. physical_point_and_jacobian() and volume_jxw() against libMesh FEBase. +// D. face_jacobian(), face_jxw(), face_normal(), and +// edge_normal_on_parent_surface() against libMesh FE oracles. +// E. map_face_qp_to_parent() against libMesh FEBase::side_map(). +// +// Returns 0 on success, non-zero on failure. + +#include "libmesh/libmesh_config.h" + +#include "gpu/kokkos_fe_face_map.h" +#include "gpu/kokkos_fe_map.h" +#include "gpu/kokkos_quadrature.h" + +#include "libmesh/elem.h" +#include "libmesh/fe_base.h" +#include "libmesh/fe_map.h" +#include "libmesh/libmesh.h" +#include "libmesh/node.h" +#include "libmesh/quadrature_gauss.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include "kokkos_fe_oracle_test_utils.h" + +#include +#include +#include +#include +#include + +using libMesh::Kokkos::GaussQuadrature; +using libMesh::Kokkos::Real; +using libMesh::Kokkos::RealTensor; +using libMesh::Kokkos::RealVector; +using kokkos_test_utils::build_face_helper_context; +using kokkos_test_utils::build_map_helper_context; +using kokkos_test_utils::build_host_qgauss; +using kokkos_test_utils::build_reference_fixture; +using kokkos_test_utils::compare_device_values; +using kokkos_test_utils::dispatch_supported_lagrange_face_map_topology; +using kokkos_test_utils::dispatch_supported_lagrange_map_topology; +using kokkos_test_utils::element_fixture; +using kokkos_test_utils::evaluate_face_helper_context_2d; +using kokkos_test_utils::evaluate_face_helper_context_3d; +using kokkos_test_utils::evaluate_map_helper_context; +using kokkos_test_utils::face_helper_context; +using kokkos_test_utils::tensor_component; +using kokkos_test_utils::upload_point_coordinates; +using kokkos_test_utils::vector_component; + +static constexpr double tol = 1.0e-13; + +namespace +{ + +struct quadrature_case +{ + libMesh::ElemType topo; + unsigned int dim; + unsigned int order; +}; + +struct map_helper_case +{ + libMesh::ElemType topo; + const char * name; +}; + +struct face_helper_case +{ + libMesh::ElemType parent_topo; + unsigned int side_id; + const char * name; +}; + +} // anonymous namespace + +static int +test_quadrature_case(const quadrature_case & info) +{ + std::vector x_ref, y_ref, z_ref, w_ref; + const unsigned int host_nqp = + build_host_qgauss(info.topo, info.dim, info.order, x_ref, y_ref, z_ref, w_ref); + + Kokkos::View d_nqp(std::string("nqp")); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + d_nqp() = GaussQuadrature::n_points(info.topo, info.order); + }); + Kokkos::fence(); + + unsigned int device_nqp = 0; + Kokkos::deep_copy(device_nqp, d_nqp); + + int fail = 0; + if (device_nqp != host_nqp) + ++fail; + + Kokkos::View d_x(std::string("qx"), host_nqp); + Kokkos::View d_y(std::string("qy"), host_nqp); + Kokkos::View d_z(std::string("qz"), host_nqp); + Kokkos::View d_w(std::string("qw"), host_nqp); + + Kokkos::parallel_for( + host_nqp, + KOKKOS_LAMBDA(int qp) { + const RealVector pt = GaussQuadrature::point(info.topo, info.order, qp); + d_x(qp) = vector_component(pt, 0); + d_y(qp) = vector_component(pt, 1); + d_z(qp) = vector_component(pt, 2); + d_w(qp) = GaussQuadrature::weight(info.topo, info.order, qp); + }); + Kokkos::fence(); + + fail += compare_device_values(d_x, x_ref); + fail += compare_device_values(d_y, y_ref); + fail += compare_device_values(d_z, z_ref); + fail += compare_device_values(d_w, w_ref); + + if (fail) + std::printf(" quadrature mismatch: topo=%d dim=%u order=%u (%d failures)\n", + static_cast(info.topo), info.dim, info.order, fail); + + return fail; +} + +static int +test_quadrature_against_qgauss() +{ + int fail = 0; + + const libMesh::ElemType edge_topos[] = { libMesh::EDGE2, libMesh::EDGE3 }; + const libMesh::ElemType quad_topos[] = { libMesh::QUAD4, libMesh::QUAD8, libMesh::QUAD9 }; + const libMesh::ElemType hex_topos[] = { libMesh::HEX8, libMesh::HEX20, libMesh::HEX27 }; + const libMesh::ElemType tri_topos[] = { libMesh::TRI3, libMesh::TRI6 }; + const libMesh::ElemType tet_topos[] = { libMesh::TET4, libMesh::TET10 }; + + for (const auto topo : edge_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 1, order }); + + for (const auto topo : quad_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : hex_topos) + for (unsigned int order = 0; order <= 12; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + for (const auto topo : tri_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 2, order }); + + for (const auto topo : tet_topos) + for (unsigned int order = 0; order <= 6; ++order) + fail += test_quadrature_case({ topo, 3, order }); + + return fail; +} + + +static element_fixture +build_hex8_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::HEX8); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(8); + + static const double coords[8][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {1.0, 1.0, 0.0}, + {0.0, 1.0, 0.0}, + {0.0, 0.0, 1.0}, + {1.0, 0.0, 1.0}, + {1.0, 1.0, 1.0}, + {0.0, 1.0, 1.0} + }; + + for (unsigned int i = 0; i < 8; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static element_fixture +build_tri3_fixture() +{ + element_fixture fixture; + fixture.elem = libMesh::Elem::build(libMesh::TRI3); + fixture.elem->set_mapping_type(libMesh::LAGRANGE_MAP); + fixture.nodes.reserve(3); + + static const double coords[3][3] = { + {0.0, 0.0, 0.0}, + {1.0, 0.0, 0.0}, + {0.0, 1.0, 0.0} + }; + + for (unsigned int i = 0; i < 3; ++i) + { + fixture.nodes.push_back(libMesh::Node::build(coords[i][0], coords[i][1], coords[i][2], i)); + fixture.elem->set_node(i, fixture.nodes.back().get()); + } + + return fixture; +} + +static int +test_physical_map_hex8() +{ + auto fixture = build_hex8_fixture(); + + const libMesh::Point ref_center(0.0, 0.0, 0.0); + const libMesh::Point ref_corner(-1.0, -1.0, -1.0); + + const libMesh::Point host_center = libMesh::FEMap::map(3, fixture.elem.get(), ref_center); + const libMesh::Point host_corner = libMesh::FEMap::map(3, fixture.elem.get(), ref_corner); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 0, ref_center); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 1, ref_center); + const libMesh::Point host_dzeta = libMesh::FEMap::map_deriv(3, fixture.elem.get(), 2, ref_center); + + std::vector ref_values = { + host_center(0), host_center(1), host_center(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2), + host_dzeta(0), host_dzeta(1), host_dzeta(2), + host_corner(0), host_corner(1), host_corner(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "hex_coords"); + + Kokkos::View d_results(std::string("hex_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const RealVector xyz_center = + libMesh::Kokkos::physical_point(d_coords, 8, 0.0, 0.0, 0.0); + const RealTensor J_center = + libMesh::Kokkos::jacobian(d_coords, 8, 0.0, 0.0, 0.0); + const RealVector xyz_corner = libMesh::Kokkos::physical_point( + d_coords, 8, -1.0, -1.0, -1.0); + + d_results(0) = vector_component(xyz_center, 0); + d_results(1) = vector_component(xyz_center, 1); + d_results(2) = vector_component(xyz_center, 2); + d_results(3) = tensor_component(J_center, 0, 0); + d_results(4) = tensor_component(J_center, 0, 1); + d_results(5) = tensor_component(J_center, 0, 2); + d_results(6) = tensor_component(J_center, 1, 0); + d_results(7) = tensor_component(J_center, 1, 1); + d_results(8) = tensor_component(J_center, 1, 2); + d_results(9) = tensor_component(J_center, 2, 0); + d_results(10) = tensor_component(J_center, 2, 1); + d_results(11) = tensor_component(J_center, 2, 2); + d_results(12) = vector_component(xyz_corner, 0); + d_results(13) = vector_component(xyz_corner, 1); + d_results(14) = vector_component(xyz_corner, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +static int +test_physical_map_tri3() +{ + auto fixture = build_tri3_fixture(); + + const libMesh::Point ref_pt(1.0 / 3.0, 1.0 / 3.0, 0.0); + + const libMesh::Point host_xyz = libMesh::FEMap::map(2, fixture.elem.get(), ref_pt); + const libMesh::Point host_dxi = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 0, ref_pt); + const libMesh::Point host_deta = libMesh::FEMap::map_deriv(2, fixture.elem.get(), 1, ref_pt); + + std::vector ref_values = { + host_xyz(0), host_xyz(1), host_xyz(2), + host_dxi(0), host_dxi(1), host_dxi(2), + host_deta(0), host_deta(1), host_deta(2) + }; + + auto d_coords = upload_point_coordinates(*fixture.elem, "tri_coords"); + + Kokkos::View d_results(std::string("tri_results"), ref_values.size()); + Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const RealVector xyz = libMesh::Kokkos::physical_point( + d_coords, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + const RealTensor J = libMesh::Kokkos::jacobian( + d_coords, 3, 1.0 / 3.0, 1.0 / 3.0, 0.0); + + d_results(0) = vector_component(xyz, 0); + d_results(1) = vector_component(xyz, 1); + d_results(2) = vector_component(xyz, 2); + d_results(3) = tensor_component(J, 0, 0); + d_results(4) = tensor_component(J, 0, 1); + d_results(5) = tensor_component(J, 0, 2); + d_results(6) = tensor_component(J, 1, 0); + d_results(7) = tensor_component(J, 1, 1); + d_results(8) = tensor_component(J, 1, 2); + }); + Kokkos::fence(); + + return compare_device_values(d_results, ref_values); +} + +template +static int +test_map_helpers_case_impl(const map_helper_case & info) +{ + auto fixture = build_reference_fixture(Topo); + const auto context = build_map_helper_context(fixture, info.topo, "map_helper"); + const int fail = evaluate_map_helper_context(context, "map_helper_results", tol); + if (fail) + std::printf(" quadrature map-helper mismatch: %s (%d failures)\n", + info.name, + fail); + return fail; +} + +struct quadrature_map_helper_dispatch +{ + explicit quadrature_map_helper_dispatch(const map_helper_case & in_info) : info(in_info) {} + + template + int operator()() const + { + return test_map_helpers_case_impl(info); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature map-helper topology: %s type=%d\n", + info.name, + static_cast(topo)); + return 1; + } + + const map_helper_case & info; +}; + +static int +test_map_helpers_case(const map_helper_case & info) +{ + const quadrature_map_helper_dispatch dispatch(info); + return dispatch_supported_lagrange_map_topology(info.topo, dispatch); +} + +template +static int +test_face_map_helpers_case_3d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = evaluate_face_helper_context_3d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +template +static int +test_face_map_helpers_case_2d_impl(const face_helper_context & context, + const face_helper_case & info, + libMesh::ElemType side_topo) +{ + const int fail = + evaluate_face_helper_context_2d(context, "face_helper_results", tol); + if (fail) + std::printf(" quadrature face-helper mismatch: %s parent_type=%d side_id=%u side_type=%d (%d failures)\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(side_topo), + fail); + + return fail; +} + +struct quadrature_face_side_dispatch_3d +{ + quadrature_face_side_dispatch_3d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_3d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +template +struct quadrature_face_side_dispatch_2d +{ + quadrature_face_side_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + return test_face_map_helpers_case_2d_impl(context, info, side_topo); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper side: %s parent_type=%d side_id=%u side_type=%d\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + static_cast(topo)); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +struct quadrature_face_parent_dispatch_2d +{ + quadrature_face_parent_dispatch_2d(const face_helper_context & in_context, + const face_helper_case & in_info, + libMesh::ElemType in_side_topo) + : context(in_context), info(in_info), side_topo(in_side_topo) + { + } + + template + int operator()() const + { + const quadrature_face_side_dispatch_2d dispatch(context, info, side_topo); + return dispatch_supported_lagrange_face_map_topology(side_topo, dispatch); + } + + int unsupported_topology(libMesh::ElemType topo) const + { + std::printf(" unexpected unsupported quadrature face-helper parent: %s parent_type=%d side_id=%u\n", + info.name, + static_cast(topo), + info.side_id); + return 1; + } + + const face_helper_context & context; + const face_helper_case & info; + libMesh::ElemType side_topo; +}; + +static int +test_face_map_helpers_case(const face_helper_case & info) +{ + auto fixture = build_reference_fixture(info.parent_topo); + auto side = fixture.elem->build_side_ptr(info.side_id); + const face_helper_context context = + build_face_helper_context(fixture, *side, info.side_id, "face_helper"); + + if (context.parent_dim == 3) + { + const quadrature_face_side_dispatch_3d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_face_map_topology(side->type(), dispatch); + } + + if (context.parent_dim == 2) + { + const quadrature_face_parent_dispatch_2d dispatch(context, info, side->type()); + return dispatch_supported_lagrange_map_topology(fixture.elem->type(), dispatch); + } + + std::printf(" unexpected unsupported quadrature face-helper parent dimension: %s parent_type=%d side_id=%u dim=%u\n", + info.name, + static_cast(info.parent_topo), + info.side_id, + context.parent_dim); + return 1; +} + +static RealVector +host_face_qp_to_parent_oracle(const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + const libMesh::FEType fe_type(parent.default_order(), libMesh::FEMap::map_fe_type(parent)); + auto fe = libMesh::FEBase::build(parent.dim(), fe_type); + + // FE::side_map() relies on FEMap::psi_map, which is only populated after + // some mapping quantity (e.g. xyz) has been requested on the FE object. + fe->get_xyz(); + + std::vector ref_side_points(1); + ref_side_points[0] = libMesh::Point( + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + + std::vector ref_points; + fe->side_map(&parent, &side, side_id, ref_side_points, ref_points); + + return libMesh::Kokkos::make_vector(ref_points[0](0), ref_points[0](1), ref_points[0](2)); +} + +static int +check_face_qp_to_parent_case(const char * case_name, + const libMesh::Elem & parent, + const libMesh::Elem & side, + unsigned int side_id, + RealVector face_qpt) +{ + using libMesh::Kokkos::map_face_qp_to_parent; + + const RealVector host = host_face_qp_to_parent_oracle(parent, side, side_id, face_qpt); + const RealVector kokkos = + map_face_qp_to_parent(side, libMesh::LAGRANGE_MAP, side.type(), face_qpt); + + int fail = 0; + for (unsigned int d = 0; d < 3; ++d) + if (std::fabs(vector_component(kokkos, d) - vector_component(host, d)) > tol) + ++fail; + + if (fail) + { + std::printf(" face_qp mismatch: case=%s parent_type=%d side_type=%d side_id=%u\n", + case_name, + static_cast(parent.type()), + static_cast(side.type()), + side_id); + std::printf(" face_qpt=(%.17g, %.17g, %.17g)\n", + vector_component(face_qpt, 0), vector_component(face_qpt, 1), vector_component(face_qpt, 2)); + std::printf(" host =(%.17g, %.17g, %.17g)\n", + vector_component(host, 0), vector_component(host, 1), vector_component(host, 2)); + std::printf(" kokkos =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0), vector_component(kokkos, 1), vector_component(kokkos, 2)); + std::printf(" diff =(%.17g, %.17g, %.17g)\n", + vector_component(kokkos, 0) - vector_component(host, 0), + vector_component(kokkos, 1) - vector_component(host, 1), + vector_component(kokkos, 2) - vector_component(host, 2)); + std::printf(" side nodes / parent refspace nodes:\n"); + + for (unsigned int k = 0; k < side.n_nodes(); ++k) + { + libMesh::Point parent_refspace; + libmesh_error_msg_if(!libMesh::try_reference_side_node(parent.type(), side_id, k, parent_refspace), + "check_face_qp_to_parent_case(): unsupported parent side-node lookup"); + std::printf(" k=%u side_node_id=%llu parent_refspace=(%.17g, %.17g, %.17g)\n", + k, + libMesh::cast_int(side.node_id(k)), + parent_refspace(0), + parent_refspace(1), + parent_refspace(2)); + } + } + + return fail; +} + +static int +test_face_qp_to_parent_ref_coords() +{ + using libMesh::Elem; + using libMesh::Node; + + int fail = 0; + + { + auto edge = Elem::build(libMesh::EDGE2); + edge->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(3.25, -2.0, 5.0, 0); + auto n1 = Node::build(9.50, 4.0, -1.0, 1); + edge->set_node(0, n0.get()); + edge->set_node(1, n1.get()); + + auto side0 = edge->build_side_ptr(0); + auto side1 = edge->build_side_ptr(1); + + fail += check_face_qp_to_parent_case("edge2_side0", *edge, *side0, 0, libMesh::Kokkos::zero_vector()); + fail += check_face_qp_to_parent_case("edge2_side1", *edge, *side1, 1, libMesh::Kokkos::zero_vector()); + } + + { + auto tri3 = Elem::build(libMesh::TRI3); + tri3->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(10.0, 20.0, 0.0, 0); + auto n1 = Node::build(14.0, 20.0, 0.0, 1); + auto n2 = Node::build(10.0, 23.0, 0.0, 2); + tri3->set_node(0, n0.get()); + tri3->set_node(1, n1.get()); + tri3->set_node(2, n2.get()); + + auto side0 = tri3->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri3_side0", *tri3, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + { + auto tri6 = Elem::build(libMesh::TRI6); + tri6->set_mapping_type(libMesh::LAGRANGE_MAP); + auto n0 = Node::build(4.0, 1.0, 0.0, 0); + auto n1 = Node::build(9.0, 2.0, 0.0, 1); + auto n2 = Node::build(3.0, 8.0, 0.0, 2); + auto n3 = Node::build(42.0, -17.0, 5.0, 3); + auto n4 = Node::build(11.0, 11.0, 1.0, 4); + auto n5 = Node::build(-7.0, 4.0, 2.0, 5); + tri6->set_node(0, n0.get()); + tri6->set_node(1, n1.get()); + tri6->set_node(2, n2.get()); + tri6->set_node(3, n3.get()); + tri6->set_node(4, n4.get()); + tri6->set_node(5, n5.get()); + + auto side0 = tri6->build_side_ptr(0); + + fail += check_face_qp_to_parent_case("tri6_side0", *tri6, *side0, 0, libMesh::Kokkos::zero_vector()); + } + + return fail; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + int total_fail = 0; + + { + const int f = test_quadrature_against_qgauss(); + std::printf("[quadrature_qgauss_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_hex8(); + std::printf("[physical_map_hex8] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const int f = test_physical_map_tri3(); + std::printf("[physical_map_tri3] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + { + const map_helper_case cases[] = { + { libMesh::EDGE3, "EDGE3" }, + { libMesh::TRI6, "TRI6" }, + { libMesh::QUAD9, "QUAD9" }, + { libMesh::TET10, "TET10" }, + { libMesh::HEX20, "HEX20" } + }; + + for (const auto & info : cases) + { + const int f = test_map_helpers_case(info); + std::printf("[map_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const face_helper_case cases[] = { + { libMesh::TRI6, 0, "TRI6/side0" }, + { libMesh::TET10, 0, "TET10/side0" }, + { libMesh::HEX20, 0, "HEX20/side0" } + }; + + for (const auto & info : cases) + { + const int f = test_face_map_helpers_case(info); + std::printf("[face_helper_oracle] [%s] %s (%d failures)\n", + info.name, f ? "FAIL" : "PASS", f); + total_fail += f; + } + } + { + const int f = test_face_qp_to_parent_ref_coords(); + std::printf("[face_qp_parent_oracle] %s (%d failures)\n", f ? "FAIL" : "PASS", f); + total_fail += f; + } + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/numerics/kokkos_numerics_oracle_test_utils.h b/tests/numerics/kokkos_numerics_oracle_test_utils.h new file mode 100644 index 00000000000..c25ce2a056e --- /dev/null +++ b/tests/numerics/kokkos_numerics_oracle_test_utils.h @@ -0,0 +1,90 @@ +#ifndef KOKKOS_NUMERICS_ORACLE_TEST_UTILS_H +#define KOKKOS_NUMERICS_ORACLE_TEST_UTILS_H + +#include "libmesh/libmesh.h" + +// Avoid conflicting complex operators between CUDA and PETSc +#define PETSC_SKIP_CXX_COMPLEX_FIX 1 +#include +#undef __CUDACC_VER__ + +#include +#include +#include + +namespace libMeshTest +{ +namespace KokkosOracle +{ + +using libMesh::Real; + +template +inline ::Kokkos::View +upload_objects(const std::vector & values, const char * label) +{ + ::Kokkos::View d(std::string(label), values.size()); + auto h = ::Kokkos::create_mirror_view(d); + for (std::size_t i = 0; i < values.size(); ++i) + h(i) = values[i]; + ::Kokkos::deep_copy(d, h); + return d; +} + +inline int +compare_device_scalars(const ::Kokkos::View & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + if (std::fabs(h_values(i) - ref_values[i]) > tol) + ++fail; + + return fail; +} + +template +inline int +compare_device_vectors(const ViewType & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + if (std::fabs(h_values(i, d) - ref_values[i](d)) > tol) + ++fail; + + return fail; +} + +template +inline int +compare_device_tensors(const ViewType & d_values, + const std::vector & ref_values, + const double tol) +{ + auto h_values = ::Kokkos::create_mirror_view(d_values); + ::Kokkos::deep_copy(h_values, d_values); + + int fail = 0; + for (std::size_t i = 0; i < ref_values.size(); ++i) + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + if (std::fabs(h_values(i, row, col) - ref_values[i](row, col)) > tol) + ++fail; + + return fail; +} + +} // namespace KokkosOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h b/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h new file mode 100644 index 00000000000..8a53f37bd83 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_fixtures.h @@ -0,0 +1,161 @@ +#ifndef KOKKOS_TENSOR_OPS_ORACLE_FIXTURES_H +#define KOKKOS_TENSOR_OPS_ORACLE_FIXTURES_H + +#include "libmesh/libmesh.h" +#include "libmesh/point.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_n_tensor.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_tensor_ops.h" +#include "gpu/kokkos_storage.h" +#include "gpu/kokkos_storage_policy.h" + +#include "kokkos_numerics_oracle_test_utils.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosTensorOracle +{ + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; + +using oracle_vector = libMesh::TypeVector; +using oracle_tensor = libMesh::TypeTensor; + +inline oracle_vector +make_host_vector(const Real x, const Real y = 0, const Real z = 0) +{ + oracle_vector v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +inline oracle_tensor +make_host_tensor(const Real xx, + const Real xy = 0, + const Real xz = 0, + const Real yx = 0, + const Real yy = 0, + const Real yz = 0, + const Real zx = 0, + const Real zy = 0, + const Real zz = 0) +{ + oracle_tensor T; + T.zero(); + T(0, 0) = xx; +#if LIBMESH_DIM > 1 + T(0, 1) = xy; + T(1, 0) = yx; + T(1, 1) = yy; +#endif +#if LIBMESH_DIM > 2 + T(0, 2) = xz; + T(1, 2) = yz; + T(2, 0) = zx; + T(2, 1) = zy; + T(2, 2) = zz; +#endif + return T; +} + +struct tensor_dim_case +{ + oracle_tensor J; + unsigned int dim; + const char * name; +}; + +static const tensor_dim_case dim_cases[] = { + { make_host_tensor(1.7, -0.2, 0.5, + 0.3, 1.1, -0.4, + -0.6, 0.8, 0.9), + 1, + "leading_1d" }, +#if LIBMESH_DIM > 1 + { make_host_tensor(2.5, -0.75, 0.4, + 1.2, 1.8, -0.6, + -0.3, 0.9, 1.4), + 2, + "leading_2d" }, +#endif +#if LIBMESH_DIM > 2 + { make_host_tensor(9.08973348886179e-01, 3.36455579239923e-01, 5.16389236893863e-01, + 9.44156071777472e-01, 1.35610910092516e-01, 1.49881119060538e-02, + 1.15988384086146e-01, 6.79845197685518e-03, 3.77028969454745e-01), + 3, + "leading_3d" } +#endif +}; + +inline oracle_tensor +build_identity_tensor(const unsigned int dim) +{ + oracle_tensor I; + I.zero(); + for (unsigned int i = 0; i < dim; ++i) + I(i, i) = Real(1); + return I; +} + +inline Real +host_leading_determinant(const oracle_tensor & J, const unsigned int dim) +{ + if (dim == 0) + return Real(1); + if (dim == 1) + return J(0, 0); + if (dim == 2) + return J(0, 0) * J(1, 1) - J(0, 1) * J(1, 0); +#if LIBMESH_DIM > 2 + return J.det(); +#else + return Real(0); +#endif +} + +inline oracle_tensor +host_leading_inverse(const oracle_tensor & J, const unsigned int dim) +{ + oracle_tensor inv; + inv.zero(); + + if (dim == 1) + { + inv(0, 0) = Real(1) / J(0, 0); + return inv; + } + + if (dim == 2) + { + const Real det = host_leading_determinant(J, dim); + inv(0, 0) = J(1, 1) / det; + inv(0, 1) = -J(0, 1) / det; + inv(1, 0) = -J(1, 0) / det; + inv(1, 1) = J(0, 0) / det; + return inv; + } + +#if LIBMESH_DIM > 2 + return oracle_tensor(J.inverse()); +#else + return inv; +#endif +} + +} // namespace KokkosTensorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_tensor_ops_oracle_runners.h b/tests/numerics/kokkos_tensor_ops_oracle_runners.h new file mode 100644 index 00000000000..de867d59f6d --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_runners.h @@ -0,0 +1,499 @@ +#ifndef KOKKOS_TENSOR_OPS_ORACLE_RUNNERS_H +#define KOKKOS_TENSOR_OPS_ORACLE_RUNNERS_H + +#include "kokkos_tensor_ops_oracle_fixtures.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosTensorOracle +{ + +template +static int +test_dim_ops() +{ + const unsigned int ncases = sizeof(dim_cases) / sizeof(dim_cases[0]); + + std::vector J_values(ncases); + std::vector dims(ncases); + std::vector ref_det(ncases); + std::vector ref_inv(ncases); + std::vector ref_I(ncases); + std::vector ref_prod_left(ncases); + std::vector ref_prod_right(ncases); + + for (unsigned int c = 0; c < ncases; ++c) + { + const auto & info = dim_cases[c]; + J_values[c] = info.J; + dims[c] = info.dim; + + ref_det[c] = host_leading_determinant(info.J, info.dim); + ref_inv[c] = host_leading_inverse(info.J, info.dim); + ref_I[c] = build_identity_tensor(info.dim); + ref_prod_left[c] = info.J * ref_inv[c]; + ref_prod_right[c] = ref_inv[c] * info.J; + } + + auto d_J = libMesh::Kokkos::upload_tensor_storage(J_values, "tensor_dim_ops_J"); + auto d_dims = libMeshTest::KokkosOracle::upload_objects(dims, "tensor_dim_ops_dim"); + ::Kokkos::View d_det("tensor_dim_ops_det", ncases); + auto d_inv = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_inv", ncases); + auto d_I = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_I", ncases); + auto d_prod_left = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_prod_left", ncases); + auto d_prod_right = libMesh::Kokkos::make_tensor_storage("tensor_dim_ops_prod_right", ncases); + + ::Kokkos::parallel_for( + static_cast(ncases), + KOKKOS_LAMBDA(int c) { + const auto J_ref = libMesh::Kokkos::make_tensor_ref(d_J, c); + const unsigned int dim = d_dims(c); + const Real det = J_ref.det(dim); + const auto inv = J_ref.inverse(dim); + const auto I = libMesh::Kokkos::tensor_identity(dim); + const auto prod_left = J_ref * inv; + const auto prod_right = inv * J_ref; + + d_det(c) = det; + libMesh::Kokkos::store_tensor(d_inv, c, inv); + libMesh::Kokkos::store_tensor(d_I, c, I); + libMesh::Kokkos::store_tensor(d_prod_left, c, prod_left); + libMesh::Kokkos::store_tensor(d_prod_right, c, prod_right); + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_scalars(d_det, ref_det, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_inv, ref_inv, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_I, ref_I, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_prod_left, ref_prod_left, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_prod_right, ref_prod_right, tol); +} + +template +static int +test_tensor_ops() +{ + const auto A = make_host_tensor(1.1, -0.4, 0.7, + 0.3, 1.9, -1.2, + -0.8, 0.5, 2.2); + const auto a = make_host_vector(2.0, 3.0, 4.0); + const auto b = make_host_vector(5.0, -6.0, 7.0); + const auto c = make_host_vector(1.25, -0.5, 2.0); + + const auto outer = libMesh::outer_product(a, b); + const auto transpose = A.transpose(); + const auto mix = 1.5 * A - 0.25 * outer; + const auto right = A * c; + const auto left = c * A; + const Real contract = A.contract(outer); + const Real norm = A.norm(); + const auto zero = libMesh::Kokkos::zero_tensor_value(); + + std::vector ref_outer(1, outer); + std::vector ref_transpose(1, transpose); + std::vector ref_mix(1, mix); + std::vector ref_rows(LIBMESH_DIM); + std::vector ref_columns(LIBMESH_DIM); + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + ref_rows[i] = A.row(i); + ref_columns[i] = A.column(i); + } + std::vector ref_right(1, right); + std::vector ref_left(1, left); + std::vector ref_scalars = {contract, norm, zero.is_zero() ? 1.0 : 0.0, A.is_zero() ? 1.0 : 0.0}; + + auto d_A = libMesh::Kokkos::upload_tensor_storage(std::vector{A}, "tensor_ops_A"); + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "tensor_ops_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "tensor_ops_b"); + auto d_c = libMesh::Kokkos::upload_vector_storage(std::vector{c}, "tensor_ops_c"); + auto d_outer = libMesh::Kokkos::make_tensor_storage("tensor_ops_outer", 1); + auto d_transpose = libMesh::Kokkos::make_tensor_storage("tensor_ops_transpose", 1); + auto d_mix = libMesh::Kokkos::make_tensor_storage("tensor_ops_mix", 1); + auto d_rows = libMesh::Kokkos::make_vector_storage("tensor_ops_rows", LIBMESH_DIM); + auto d_columns = libMesh::Kokkos::make_vector_storage("tensor_ops_columns", LIBMESH_DIM); + auto d_right = libMesh::Kokkos::make_vector_storage("tensor_ops_right", 1); + auto d_left = libMesh::Kokkos::make_vector_storage("tensor_ops_left", 1); + ::Kokkos::View d_scalars("tensor_ops_scalars", 4); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto A_ref = libMesh::Kokkos::make_tensor_ref(d_A, 0); + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); + const auto outer_d = libMesh::Kokkos::outer_product(a_ref, b_ref); + const auto transpose_d = A_ref.transpose(); + const auto mix_d = Real(1.5) * A_ref - Real(0.25) * outer_d; + const auto right_d = A_ref * c_ref; + const auto left_d = c_ref * A_ref; + const Real contract_d = A_ref.contract(outer_d); + const Real norm_d = A_ref.norm(); + const bool zero_is_zero_d = libMesh::Kokkos::zero_tensor_value().is_zero(); + const bool A_is_zero_d = A_ref.is_zero(); + + for (unsigned int i = 0; i < LIBMESH_DIM; ++i) + { + libMesh::Kokkos::store_vector(d_rows, i, A_ref.row(i)); + libMesh::Kokkos::store_vector(d_columns, i, A_ref.column(i)); + } + + libMesh::Kokkos::store_tensor(d_outer, 0, outer_d); + libMesh::Kokkos::store_tensor(d_transpose, 0, transpose_d); + libMesh::Kokkos::store_tensor(d_mix, 0, mix_d); + libMesh::Kokkos::store_vector(d_right, 0, right_d); + libMesh::Kokkos::store_vector(d_left, 0, left_d); + d_scalars(0) = contract_d; + d_scalars(1) = norm_d; + d_scalars(2) = zero_is_zero_d ? 1.0 : 0.0; + d_scalars(3) = A_is_zero_d ? 1.0 : 0.0; + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_tensors(d_outer, ref_outer, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_transpose, ref_transpose, tol) + + libMeshTest::KokkosOracle::compare_device_tensors(d_mix, ref_mix, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_rows, ref_rows, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_columns, ref_columns, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_right, ref_right, tol) + + libMeshTest::KokkosOracle::compare_device_vectors(d_left, ref_left, tol) + + libMeshTest::KokkosOracle::compare_device_scalars(d_scalars, ref_scalars, tol); +} + +inline int +test_tensor_host_only_ops() +{ + int fail = 0; + +#if LIBMESH_DIM > 2 + { + libMesh::TensorValue tensor(2., 1., 0., + 1., 2., 1., + 0., 1., 2.); + fail += tensor.is_hpd(/*rel_tol=*/0.) ? 0 : 1; + } + + { + libMesh::TensorValue tensor(1., 0., 0., + 0., 0., 1., + 0., 1., 0.); + fail += tensor.is_hpd() ? 1 : 0; + } + + { + const libMesh::Point x(1., 0., 0.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 0., 0.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0)) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2)) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 0., 0.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1)) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2)) <= tol) ? 0 : 1; + } + + { + const libMesh::Point x(1., 1., 1.); + const auto R = libMesh::RealTensorValue::extrinsic_rotation_matrix(90., 90., 90.); + const auto rotated = R * x; + fail += (std::fabs(rotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(1) + 1.) <= tol) ? 0 : 1; + fail += (std::fabs(rotated(2) - 1.) <= tol) ? 0 : 1; + + const auto invR = libMesh::RealTensorValue::inverse_extrinsic_rotation_matrix(90., 90., 90.); + const auto unrotated = invR * rotated; + fail += (std::fabs(unrotated(0) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(1) - 1.) <= tol) ? 0 : 1; + fail += (std::fabs(unrotated(2) - 1.) <= tol) ? 0 : 1; + } +#endif + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool assertion = + std::is_same>>::value; + fail += assertion ? 0 : 1; +#endif + + return fail; +} + +template +static int +test_linalg_foundation_storage_roundtrip() +{ + auto d_vector = libMesh::Kokkos::make_vector_storage("foundation_vector", 1); + auto d_tensor = libMesh::Kokkos::make_tensor_storage("foundation_tensor", 1); + + { + auto h_vector = ::Kokkos::create_mirror_view(d_vector); + auto h_tensor = ::Kokkos::create_mirror_view(d_tensor); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + h_vector(0, d) = Real(d + 1) * Real(0.5); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + h_tensor(0, row, col) = Real(10 * row + col + 1) * Real(0.25); + + ::Kokkos::deep_copy(d_vector, h_vector); + ::Kokkos::deep_copy(d_tensor, h_tensor); + } + + auto d_vector_out = libMesh::Kokkos::make_vector_storage("foundation_vector_out", 1); + auto d_tensor_out = libMesh::Kokkos::make_tensor_storage("foundation_tensor_out", 1); + ::Kokkos::View d_fail("foundation_fail"); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + int local_fail = 0; + + const auto vector_in = libMesh::Kokkos::make_vector_ref(d_vector, 0); + const auto tensor_in = libMesh::Kokkos::make_tensor_ref(d_tensor, 0); + + const auto as_point = libMesh::Kokkos::materialize_vector(vector_in); + const auto as_vector_value = + libMesh::Kokkos::materialize_vector>(vector_in); + const auto as_type_vector = + libMesh::Kokkos::materialize_vector>(vector_in); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + { + const Real expected = Real(d + 1) * Real(0.5); + local_fail += (std::fabs(as_point(d) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_vector_value(d) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_type_vector(d) - expected) <= tol) ? 0 : 1; + } + + const auto as_tensor_value = + libMesh::Kokkos::materialize_tensor>(tensor_in); + const auto as_type_tensor = + libMesh::Kokkos::materialize_tensor>(tensor_in); + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + const Real expected = Real(10 * row + col + 1) * Real(0.25); + local_fail += (std::fabs(as_tensor_value(row, col) - expected) <= tol) ? 0 : 1; + local_fail += (std::fabs(as_type_tensor(row, col) - expected) <= tol) ? 0 : 1; + } + + auto vector_out = libMesh::Kokkos::make_vector_ref(d_vector_out, 0); + auto tensor_out = libMesh::Kokkos::make_tensor_ref(d_tensor_out, 0); + + vector_out.zero(); + vector_out.assign(as_vector_value); + vector_out.add_scaled(as_type_vector, Real(0)); + vector_out.subtract_scaled(as_type_vector, Real(0)); + + tensor_out.zero(); + tensor_out.assign(as_tensor_value); + tensor_out.add_scaled(as_type_tensor, Real(0)); + tensor_out.subtract_scaled(as_type_tensor, Real(0)); + + d_fail() = local_fail; + }); + ::Kokkos::fence(); + + int fail = 0; + ::Kokkos::deep_copy(fail, d_fail); + + { + auto h_vector_out = ::Kokkos::create_mirror_view(d_vector_out); + auto h_tensor_out = ::Kokkos::create_mirror_view(d_tensor_out); + ::Kokkos::deep_copy(h_vector_out, d_vector_out); + ::Kokkos::deep_copy(h_tensor_out, d_tensor_out); + + for (unsigned int d = 0; d < LIBMESH_DIM; ++d) + { + const Real expected = Real(d + 1) * Real(0.5); + fail += (std::fabs(h_vector_out(0, d) - expected) <= tol) ? 0 : 1; + } + + for (unsigned int row = 0; row < LIBMESH_DIM; ++row) + for (unsigned int col = 0; col < LIBMESH_DIM; ++col) + { + const Real expected = Real(10 * row + col + 1) * Real(0.25); + fail += (std::fabs(h_tensor_out(0, row, col) - expected) <= tol) ? 0 : 1; + } + } + + return fail; +} + +template +static int +test_mixed_representation_ops() +{ + int fail = 0; + + const auto a = make_host_vector(2.0, 3.0, 4.0); + const auto b = make_host_vector(5.0, -6.0, 7.0); + const auto c = make_host_vector(1.25, -0.5, 2.0); + const auto A = make_host_tensor(1.1, -0.4, 0.7, + 0.3, 1.9, -1.2, + -0.8, 0.5, 2.2); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "mixed_ops_a"); + auto d_A = libMesh::Kokkos::upload_tensor_storage(std::vector{A}, "mixed_ops_A"); + + ::Kokkos::View d_scalars("mixed_ops_scalars", 8); + auto d_vectors = libMesh::Kokkos::make_vector_storage("mixed_ops_vectors", 5); + auto d_tensors = libMesh::Kokkos::make_tensor_storage("mixed_ops_tensors", 4); + + const auto ref_dot = a * b; + const auto ref_contract = A.contract(libMesh::outer_product(a, b)); + const auto ref_det = host_leading_determinant(A, LIBMESH_DIM); + const auto ref_right = A * c; + const auto ref_left = A.left_multiply(c); + const auto ref_mix = a + b; + const auto ref_row0 = A.row(0); + const auto ref_col0 = A.column(0); + const auto ref_transpose = A.transpose(); + const auto ref_inverse = host_leading_inverse(A, LIBMESH_DIM); + const auto ref_add = A + ref_transpose; + const auto ref_scaled = 0.5 * A; + const auto ref_trace = A.tr(); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto A_ref = libMesh::Kokkos::make_tensor_ref(d_A, 0); + + const auto mix = a_ref + b; + const auto right = A_ref * c; + const auto left = A_ref.left_multiply(c); + const auto row0 = A_ref.row(0); + const auto col0 = A_ref.column(0); + const auto transpose = A_ref.transpose(); + const auto inverse = A_ref.inverse(); + const auto add = A_ref + ref_transpose; + const auto scaled = Real(0.5) * A_ref; + const auto outer = libMesh::Kokkos::outer_product(a_ref, b); + + d_scalars(0) = a_ref * b; + d_scalars(1) = A_ref.contract(outer); + d_scalars(2) = A_ref.det(); + d_scalars(3) = (A_ref == A) ? 1.0 : 0.0; + d_scalars(4) = (A_ref != inverse) ? 1.0 : 0.0; + d_scalars(5) = (row0 == ref_row0) ? 1.0 : 0.0; + d_scalars(6) = (col0 == ref_col0) ? 1.0 : 0.0; + d_scalars(7) = A_ref.tr(); + + libMesh::Kokkos::store_vector(d_vectors, 0, right); + libMesh::Kokkos::store_vector(d_vectors, 1, left); + libMesh::Kokkos::store_vector(d_vectors, 2, mix); + libMesh::Kokkos::store_vector(d_vectors, 3, row0); + libMesh::Kokkos::store_vector(d_vectors, 4, col0); + libMesh::Kokkos::store_tensor(d_tensors, 0, transpose); + libMesh::Kokkos::store_tensor(d_tensors, 1, inverse); + libMesh::Kokkos::store_tensor(d_tensors, 2, add); + libMesh::Kokkos::store_tensor(d_tensors, 3, scaled); + }); + ::Kokkos::fence(); + + fail += libMeshTest::KokkosOracle::compare_device_scalars( + d_scalars, + std::vector{ref_dot, ref_contract, ref_det, 1.0, 1.0, 1.0, 1.0, ref_trace}, + tol); + fail += libMeshTest::KokkosOracle::compare_device_vectors( + d_vectors, + std::vector{ref_right, ref_left, ref_mix, ref_row0, ref_col0}, + tol); + fail += libMeshTest::KokkosOracle::compare_device_tensors( + d_tensors, + std::vector{ref_transpose, ref_inverse, ref_add, ref_scaled}, + tol); + + return fail; +} + +inline int +run_all_oracles() +{ + int total_fail = 0; + + const int dim_fail_left = test_dim_ops(); + std::printf("[tensor_dim_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + dim_fail_left ? "FAIL" : "PASS", + dim_fail_left); + total_fail += dim_fail_left; + + const int dim_fail_right = test_dim_ops(); + std::printf("[tensor_dim_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + dim_fail_right ? "FAIL" : "PASS", + dim_fail_right); + total_fail += dim_fail_right; + + const int tensor_fail_left = test_tensor_ops(); + std::printf("[tensor_ops_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + tensor_fail_left ? "FAIL" : "PASS", + tensor_fail_left); + total_fail += tensor_fail_left; + + const int tensor_fail_right = test_tensor_ops(); + std::printf("[tensor_ops_kernel_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + tensor_fail_right ? "FAIL" : "PASS", + tensor_fail_right); + total_fail += tensor_fail_right; + + const int host_fail = test_tensor_host_only_ops(); + std::printf("[tensor_host_ops_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + const int foundation_fail_left = + test_linalg_foundation_storage_roundtrip(); + std::printf("[kokkos_linalg_foundation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + foundation_fail_left ? "FAIL" : "PASS", + foundation_fail_left); + total_fail += foundation_fail_left; + + const int foundation_fail_right = + test_linalg_foundation_storage_roundtrip(); + std::printf("[kokkos_linalg_foundation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + foundation_fail_right ? "FAIL" : "PASS", + foundation_fail_right); + total_fail += foundation_fail_right; + + const int mixed_fail_left = test_mixed_representation_ops(); + std::printf("[kokkos_linalg_mixed_representation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_fail_left ? "FAIL" : "PASS", + mixed_fail_left); + total_fail += mixed_fail_left; + + const int mixed_fail_right = test_mixed_representation_ops(); + std::printf("[kokkos_linalg_mixed_representation_oracle] [%s] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_fail_right ? "FAIL" : "PASS", + mixed_fail_right); + total_fail += mixed_fail_right; + + return total_fail; +} + +} // namespace KokkosTensorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_tensor_ops_oracle_test.K b/tests/numerics/kokkos_tensor_ops_oracle_test.K new file mode 100644 index 00000000000..858d4773690 --- /dev/null +++ b/tests/numerics/kokkos_tensor_ops_oracle_test.K @@ -0,0 +1,20 @@ +#include "libmesh/libmesh_config.h" +#include "kokkos_tensor_ops_oracle_runners.h" + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + const int total_fail = libMeshTest::KokkosTensorOracle::run_all_oracles(); + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/numerics/kokkos_vector_ops_oracle_fixtures.h b/tests/numerics/kokkos_vector_ops_oracle_fixtures.h new file mode 100644 index 00000000000..5bce52de341 --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_fixtures.h @@ -0,0 +1,223 @@ +#ifndef KOKKOS_VECTOR_OPS_ORACLE_FIXTURES_H +#define KOKKOS_VECTOR_OPS_ORACLE_FIXTURES_H + +#include "libmesh/libmesh.h" +#include "libmesh/tensor_value.h" +#include "libmesh/type_vector.h" +#include "libmesh/vector_value.h" +#include "gpu/kokkos_vector_ops.h" +#include "gpu/kokkos_storage.h" +#include "gpu/kokkos_storage_policy.h" + +#include "kokkos_numerics_oracle_test_utils.h" + +#include +#include + +namespace libMeshTest +{ +namespace KokkosVectorOracle +{ + +using libMesh::Real; + +static constexpr double tol = 2.0e-13; +static constexpr double unit_tol = 1.0e-14; +static constexpr Real golden_ratio = 1.6180339887498948482; +static constexpr unsigned int solid_angle_results = + 1 + ((LIBMESH_DIM > 1) ? 2u : 0u) + ((LIBMESH_DIM > 2) ? 4u : 0u); +static constexpr unsigned int vector_results = + 11 + ((LIBMESH_DIM > 2) ? 2u : 0u); +static constexpr unsigned int scalar_results = 11 + solid_angle_results; + +template +LIBMESH_DEVICE_INLINE +Vec +make_vector(const Real x, const Real y = 0, const Real z = 0) +{ + Vec v; + v.zero(); + v(0) = x; +#if LIBMESH_DIM > 1 + v(1) = y; +#endif +#if LIBMESH_DIM > 2 + v(2) = z; +#endif + return v; +} + +inline libMesh::TypeVector +as_type_vector(const libMesh::TypeVector & v) +{ + return v; +} + +inline libMesh::TypeVector +as_type_vector(const libMesh::VectorValue & v) +{ + return make_vector>(v(0) +#if LIBMESH_DIM > 1 + , + v(1) +#endif +#if LIBMESH_DIM > 2 + , + v(2) +#endif + ); +} + +template +struct host_oracle +{ + std::vector vectors; + std::vector scalars; +}; + +struct vector_case +{ + const char * name; + Real ax, ay, az; + Real bx, by, bz; + Real cx, cy, cz; +}; + +static const vector_case cases[] = { +#if LIBMESH_DIM >= 1 + { "line_case_a", 2.0, 0.0, 0.0, -3.0, 0.0, 0.0, 0.5, 0.0, 0.0 }, + { "line_case_b", -1.25, 0.0, 0.0, 4.5, 0.0, 0.0, -2.0, 0.0, 0.0 }, +#endif +#if LIBMESH_DIM >= 2 + { "plane_case_a", 2.0, 3.0, 0.0, 5.0, -6.0, 0.0, 1.25, -0.5, 0.0 }, + { "plane_case_b", -1.0, 4.0, 0.0, 0.5, 2.5, 0.0, -3.0, 1.5, 0.0 }, +#endif +#if LIBMESH_DIM >= 3 + { "space_case_a", 2.0, 3.0, 4.0, 5.0, -6.0, 7.0, 1.25, -0.5, 2.0 }, + { "space_case_b", -1.0, 4.0, 0.75, 0.5, 2.5, -3.5, -3.0, 1.5, 2.25 }, +#endif +}; + +template +inline host_oracle +build_host_oracle(const Vec & a, const Vec & b, const Vec & c) +{ + host_oracle result; + result.vectors.reserve(vector_results); + result.scalars.reserve(scalar_results); + + const auto copied = a; + + Vec mix = a + b; + mix -= c; + + Vec scaled = 1.25 * a; + scaled += (-0.5) * b; + scaled += (0.25) * c; + + Vec plus_assign = a; + plus_assign += b; + + Vec minus_assign = a; + minus_assign -= b; + + Vec accum; + accum.zero(); + accum.add_scaled(a, 1.25); + accum.add_scaled(b, -0.5); + accum.subtract_scaled(c, -0.25); + + const auto divided = a / 5.0; + const auto outer_right = libMesh::outer_product(a, 5.0); + const auto outer_left = libMesh::outer_product(5.0, a); + + Vec mult_assign = a; + mult_assign *= 5.0; + + Vec div_assign = a; + div_assign /= 5.0; + + Vec assign_zero = a; + assign_zero = 0.0; + + result.vectors.push_back(copied); + result.vectors.push_back(mix); + result.vectors.push_back(scaled); + result.vectors.push_back(accum); + result.vectors.push_back(plus_assign); + result.vectors.push_back(minus_assign); + result.vectors.push_back(divided); + result.vectors.push_back(outer_right); + result.vectors.push_back(outer_left); + result.vectors.push_back(mult_assign); + result.vectors.push_back(div_assign); + + result.scalars.push_back(a * b); + result.scalars.push_back(a.contract(b)); + result.scalars.push_back(mix.norm()); + result.scalars.push_back(mix.norm_sq()); + result.scalars.push_back(make_vector(0.0, 0.0, 0.0).is_zero() ? 1.0 : 0.0); + result.scalars.push_back(mix.is_zero() ? 1.0 : 0.0); + result.scalars.push_back((a == a) ? 1.0 : 0.0); + result.scalars.push_back((a == b) ? 1.0 : 0.0); + result.scalars.push_back((a != a) ? 1.0 : 0.0); + result.scalars.push_back((a != b) ? 1.0 : 0.0); + result.scalars.push_back(assign_zero.is_zero() ? 1.0 : 0.0); + + const auto xvec = make_vector(1.3); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(xvec), + as_type_vector(xvec))); + +#if LIBMESH_DIM > 1 + const auto yvec = make_vector(0.0, 2.7); + const auto xydiag = make_vector(3.1, 3.1); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(xvec), + as_type_vector(yvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(yvec), + as_type_vector(xydiag))); +#endif + +#if LIBMESH_DIM > 2 + const auto xypdiag = make_vector(0.8, -0.8); + const auto zvec = make_vector(0.0, 0.0, 1.1); + const auto xzdiag = make_vector(0.0, 0.7, 0.7); + const auto icosa1 = make_vector(1.0, golden_ratio, 0.0); + const auto icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const auto icosa3 = make_vector(0.0, 1.0, golden_ratio); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xydiag), + as_type_vector(yvec), + as_type_vector(zvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xvec), + as_type_vector(yvec), + as_type_vector(xzdiag))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(xypdiag), + as_type_vector(xydiag), + as_type_vector(zvec))); + result.scalars.push_back(libMesh::solid_angle(as_type_vector(icosa1), + as_type_vector(icosa2), + as_type_vector(icosa3))); +#endif + +#if LIBMESH_DIM > 2 + const auto cross = a.cross(b); + auto unit_cross = cross; + if (cross.norm() > unit_tol) + unit_cross = cross.unit(); + + result.vectors.push_back(cross); + result.vectors.push_back(unit_cross); +#endif + + libmesh_assert_equal_to(result.vectors.size(), vector_results); + libmesh_assert_equal_to(result.scalars.size(), scalar_results); + + return result; +} + +} // namespace KokkosVectorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_vector_ops_oracle_runners.h b/tests/numerics/kokkos_vector_ops_oracle_runners.h new file mode 100644 index 00000000000..5fcd7e45a5e --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_runners.h @@ -0,0 +1,324 @@ +#ifndef KOKKOS_VECTOR_OPS_ORACLE_RUNNERS_H +#define KOKKOS_VECTOR_OPS_ORACLE_RUNNERS_H + +#include "kokkos_vector_ops_oracle_fixtures.h" + +#include + +namespace libMeshTest +{ +namespace KokkosVectorOracle +{ + +template +static int +test_vector_ops_case(const vector_case & info) +{ + const auto a = make_vector(info.ax, info.ay, info.az); + const auto b = make_vector(info.bx, info.by, info.bz); + const auto c = make_vector(info.cx, info.cy, info.cz); + + const auto expected = build_host_oracle(a, b, c); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "vector_ops_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "vector_ops_b"); + auto d_c = libMesh::Kokkos::upload_vector_storage(std::vector{c}, "vector_ops_c"); + auto d_vectors = libMesh::Kokkos::make_vector_storage("vector_ops_vectors", vector_results); + ::Kokkos::View d_scalars("vector_ops_scalars", scalar_results); + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + const auto c_ref = libMesh::Kokkos::make_vector_ref(d_c, 0); + + const Vec copied = libMesh::Kokkos::copy_vector(a_ref); + const Vec mix = a_ref + b_ref - c_ref; + const Vec scaled = Real(1.25) * a_ref + Real(-0.5) * b_ref + Real(0.25) * c_ref; + const Vec plus_assign = a_ref + b_ref; + const Vec minus_assign = a_ref - b_ref; + const Vec accum = Real(1.25) * a_ref + Real(-0.5) * b_ref + Real(0.25) * c_ref; + const Vec divided = a_ref / Real(5.0); + const Vec outer_right = Real(5.0) * a_ref; + const Vec outer_left = a_ref * Real(5.0); + const Vec mult_assign = a_ref * Real(5.0); + const Vec div_assign = a_ref / Real(5.0); + const Vec assign_zero = libMesh::Kokkos::zero_vector_value(); + + const Real dot = libMesh::Kokkos::vector_dot(a_ref, b_ref); + const Real contract = a_ref.contract(b_ref); + const Real norm = mix.norm(); + const Real norm_sq = mix.norm_sq(); + const Vec zero = libMesh::Kokkos::zero_vector_value(); + + unsigned int vector_offset = 0; + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, copied); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mix); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, scaled); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, accum); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, plus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, minus_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, divided); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_right); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, outer_left); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, mult_assign); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, div_assign); + + unsigned int scalar_offset = 0; + d_scalars(scalar_offset++) = a_ref * b_ref; + d_scalars(scalar_offset++) = contract; + d_scalars(scalar_offset++) = norm; + d_scalars(scalar_offset++) = norm_sq; + d_scalars(scalar_offset++) = zero.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = mix.is_zero() ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref == a_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref == b_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref != a_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = (a_ref != b_ref) ? 1.0 : 0.0; + d_scalars(scalar_offset++) = assign_zero.is_zero() ? 1.0 : 0.0; + + const Vec xvec = make_vector(1.3); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, xvec, xvec); + +#if LIBMESH_DIM > 1 + const Vec yvec = make_vector(0.0, 2.7); + const Vec xydiag = make_vector(3.1, 3.1); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, xvec, yvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, yvec, xydiag); +#endif + +#if LIBMESH_DIM > 2 + const Vec xypdiag = make_vector(0.8, -0.8); + const Vec zvec = make_vector(0.0, 0.0, 1.1); + const Vec xzdiag = make_vector(0.0, 0.7, 0.7); + const Vec icosa1 = make_vector(1.0, golden_ratio, 0.0); + const Vec icosa2 = make_vector(-1.0, golden_ratio, 0.0); + const Vec icosa3 = make_vector(0.0, 1.0, golden_ratio); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xydiag, yvec, zvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xvec, yvec, xzdiag); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(xypdiag, xydiag, zvec); + d_scalars(scalar_offset++) = libMesh::Kokkos::vector_solid_angle(icosa1, icosa2, icosa3); +#endif + +#if LIBMESH_DIM > 2 + const Vec cross = a_ref.cross(b_ref); + Vec unit_cross = cross; + if (libMesh::Kokkos::vector_norm(cross) > unit_tol) + unit_cross = libMesh::Kokkos::vector_unit(cross); + + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, cross); + libMesh::Kokkos::store_vector(d_vectors, vector_offset++, unit_cross); +#endif + + libmesh_assert_equal_to(vector_offset, vector_results); + libmesh_assert_equal_to(scalar_offset, scalar_results); + }); + ::Kokkos::fence(); + + return libMeshTest::KokkosOracle::compare_device_vectors(d_vectors, expected.vectors, tol) + + libMeshTest::KokkosOracle::compare_device_scalars(d_scalars, expected.scalars, tol); +} + +template +int +run_vector_cases(const char * suite_name) +{ + int fail = 0; + + for (const auto & info : cases) + { + const int f = test_vector_ops_case(info); + std::printf("[%s] [%s] [%s] %s (%d failures)\n", + suite_name, + libMesh::Kokkos::storage_policy_name(), + info.name, + f ? "FAIL" : "PASS", + f); + fail += f; + } + + return fail; +} + +inline int +test_vector_host_only_traits() +{ + int fail = 0; + +#ifdef LIBMESH_HAVE_METAPHYSICL + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedType; + constexpr bool typevector_assertion = + std::is_same>>::value; + fail += typevector_assertion ? 0 : 1; + + typedef typename MetaPhysicL::ReplaceAlgebraicType< + std::vector>, + typename libMesh::TensorTools::IncrementRank< + typename MetaPhysicL::ValueType>>::type>::type>::type + ReplacedValueType; + constexpr bool vectorvalue_assertion = + std::is_same>>::value; + fail += vectorvalue_assertion ? 0 : 1; +#endif + + return fail; +} + +template +static int +test_mixed_representation_ops() +{ + int fail = 0; + + const auto a = make_vector(2.0, 3.0, 4.0); + const auto b = make_vector(5.0, -6.0, 7.0); + const auto c = make_vector(1.25, -0.5, 2.0); + + auto d_a = libMesh::Kokkos::upload_vector_storage(std::vector{a}, "mixed_vector_a"); + auto d_b = libMesh::Kokkos::upload_vector_storage(std::vector{b}, "mixed_vector_b"); + + auto d_vectors = + libMesh::Kokkos::make_vector_storage("mixed_vector_vectors", (LIBMESH_DIM > 2) ? 5 : 3); + ::Kokkos::View d_scalars("mixed_vector_scalars", (LIBMESH_DIM > 2) ? 7 : 5); + + const auto ref_sum = a + b; + const auto ref_diff = a - b; + const auto ref_scaled = 1.5 * a; + const auto ref_dot = a * b; + const auto ref_contract = a.contract(b); + const auto ref_solid_angle = + libMesh::solid_angle(as_type_vector(a), as_type_vector(b), as_type_vector(c)); + const auto ref_cross_norm_sq = libMesh::cross_norm_sq(as_type_vector(a), as_type_vector(b)); + +#if LIBMESH_DIM > 2 + const auto ref_cross = a.cross(b); + auto ref_unit_cross = ref_cross; + if (ref_cross.norm() > unit_tol) + ref_unit_cross = ref_cross.unit(); +#endif + + ::Kokkos::parallel_for( + 1, + KOKKOS_LAMBDA(int) { + const auto a_ref = libMesh::Kokkos::make_vector_ref(d_a, 0); + const auto b_ref = libMesh::Kokkos::make_vector_ref(d_b, 0); + + const auto sum = a_ref + b; + const auto diff = a - b_ref; + const auto scaled = Real(1.5) * a_ref; + + libMesh::Kokkos::store_vector(d_vectors, 0, sum); + libMesh::Kokkos::store_vector(d_vectors, 1, diff); + libMesh::Kokkos::store_vector(d_vectors, 2, scaled); + + d_scalars(0) = a_ref * b; + d_scalars(1) = b_ref.contract(a); + d_scalars(2) = (a_ref == a) ? 1.0 : 0.0; + d_scalars(3) = (a_ref != b) ? 1.0 : 0.0; + d_scalars(4) = libMesh::Kokkos::vector_solid_angle(a_ref, b, c); + +#if LIBMESH_DIM > 2 + const auto cross = a_ref.cross(b); + Vec unit_cross = cross; + if (libMesh::Kokkos::vector_norm(cross) > unit_tol) + unit_cross = libMesh::Kokkos::vector_unit(cross); + + libMesh::Kokkos::store_vector(d_vectors, 3, cross); + libMesh::Kokkos::store_vector(d_vectors, 4, unit_cross); + d_scalars(5) = libMesh::Kokkos::vector_cross_norm_sq(a_ref, b); + d_scalars(6) = (cross == libMesh::Kokkos::vector_cross(a, b_ref)) ? 1.0 : 0.0; +#endif + }); + ::Kokkos::fence(); + + fail += libMeshTest::KokkosOracle::compare_device_vectors( + d_vectors, + [&]() { + std::vector ref = {ref_sum, ref_diff, ref_scaled}; +#if LIBMESH_DIM > 2 + ref.push_back(ref_cross); + ref.push_back(ref_unit_cross); +#endif + return ref; + }(), + tol); + + fail += libMeshTest::KokkosOracle::compare_device_scalars( + d_scalars, + [&]() { + std::vector ref = {ref_dot, ref_contract, 1.0, 1.0, ref_solid_angle}; +#if LIBMESH_DIM > 2 + ref.push_back(ref_cross_norm_sq); + ref.push_back(1.0); +#endif + return ref; + }(), + tol); + + return fail; +} + +inline int +run_all_oracles() +{ + int total_fail = 0; + + total_fail += run_vector_cases>( + "typevector_kernel_oracle"); + total_fail += run_vector_cases>( + "typevector_kernel_oracle"); + total_fail += run_vector_cases>( + "vectorvalue_kernel_oracle"); + total_fail += run_vector_cases>( + "vectorvalue_kernel_oracle"); + + const int mixed_typevector_left = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [typevector] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_typevector_left ? "FAIL" : "PASS", + mixed_typevector_left); + total_fail += mixed_typevector_left; + + const int mixed_typevector_right = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [typevector] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_typevector_right ? "FAIL" : "PASS", + mixed_typevector_right); + total_fail += mixed_typevector_right; + + const int mixed_vectorvalue_left = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [vectorvalue] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_vectorvalue_left ? "FAIL" : "PASS", + mixed_vectorvalue_left); + total_fail += mixed_vectorvalue_left; + + const int mixed_vectorvalue_right = + test_mixed_representation_ops>(); + std::printf("[vector_mixed_representation_oracle] [%s] [vectorvalue] %s (%d failures)\n", + libMesh::Kokkos::storage_policy_name(), + mixed_vectorvalue_right ? "FAIL" : "PASS", + mixed_vectorvalue_right); + total_fail += mixed_vectorvalue_right; + + const int host_fail = test_vector_host_only_traits(); + std::printf("[vector_host_traits_oracle] %s (%d failures)\n", + host_fail ? "FAIL" : "PASS", + host_fail); + total_fail += host_fail; + + return total_fail; +} + +} // namespace KokkosVectorOracle +} // namespace libMeshTest + +#endif diff --git a/tests/numerics/kokkos_vector_ops_oracle_test.K b/tests/numerics/kokkos_vector_ops_oracle_test.K new file mode 100644 index 00000000000..fedc7651ff5 --- /dev/null +++ b/tests/numerics/kokkos_vector_ops_oracle_test.K @@ -0,0 +1,20 @@ +#include "libmesh/libmesh_config.h" +#include "kokkos_vector_ops_oracle_runners.h" + +int +main(int argc, char ** argv) +{ + Kokkos::initialize(argc, argv); + libMesh::LibMeshInit init(argc, argv); + + const int total_fail = libMeshTest::KokkosVectorOracle::run_all_oracles(); + + Kokkos::finalize(); + + if (total_fail == 0) + std::printf("ALL TESTS PASSED\n"); + else + std::printf("%d TEST(S) FAILED\n", total_fail); + + return total_fail ? 1 : 0; +} diff --git a/tests/quadrature/quadrature_exactness.h b/tests/quadrature/quadrature_exactness.h new file mode 100644 index 00000000000..5db510f09ef --- /dev/null +++ b/tests/quadrature/quadrature_exactness.h @@ -0,0 +1,176 @@ +#ifndef LIBMESH_QUADRATURE_EXACTNESS_H +#define LIBMESH_QUADRATURE_EXACTNESS_H + +#include +#include +#include + +#include +#include +#include + +namespace quadrature_exactness +{ + +inline libMesh::Real +axis_integral(const unsigned int power) +{ + return (power % 2) ? libMesh::Real(0) : (libMesh::Real(2) / (power + 1)); +} + +inline libMesh::Real +edge_integral(const unsigned int x_power) +{ + return axis_integral(x_power); +} + +inline libMesh::Real +quad_integral(const unsigned int x_power, + const unsigned int y_power) +{ + return axis_integral(x_power) * axis_integral(y_power); +} + +inline libMesh::Real +tri_integral(const unsigned int x_power, + const unsigned int y_power) +{ + libMesh::Real analytical = 1.0; + + const unsigned int larger_power = std::max(x_power, y_power); + const unsigned int smaller_power = std::min(x_power, y_power); + + std::vector numerator(smaller_power > 1 ? smaller_power - 1 : 0); + std::vector denominator(2 + smaller_power); + + std::iota(numerator.begin(), numerator.end(), 2); + std::iota(denominator.begin(), denominator.end(), larger_power + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator.size()) + analytical *= numerator[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +hex_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return axis_integral(x_power) * axis_integral(y_power) * axis_integral(z_power); +} + +inline libMesh::Real +tet_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + libMesh::Real analytical = 1.0; + + unsigned int sorted_powers[3] = {x_power, y_power, z_power}; + std::sort(sorted_powers, sorted_powers + 3); + + std::vector numerator_1(sorted_powers[0] > 1 ? sorted_powers[0] - 1 : 0); + std::vector numerator_2(sorted_powers[1] > 1 ? sorted_powers[1] - 1 : 0); + std::vector denominator(3 + sorted_powers[0] + sorted_powers[1]); + + std::iota(numerator_1.begin(), numerator_1.end(), 2); + std::iota(numerator_2.begin(), numerator_2.end(), 2); + std::iota(denominator.begin(), denominator.end(), sorted_powers[2] + 1); + + for (std::size_t i = 0; i < denominator.size(); ++i) + { + if (i < numerator_1.size()) + analytical *= numerator_1[i]; + + if (i < numerator_2.size()) + analytical *= numerator_2[i]; + + analytical /= denominator[i]; + } + + return analytical; +} + +inline libMesh::Real +prism_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + return tri_integral(x_power, y_power) * axis_integral(z_power); +} + +inline libMesh::Real +pyramid_integral(const unsigned int x_power, + const unsigned int y_power, + const unsigned int z_power) +{ + if (x_power % 2 || y_power % 2) + return libMesh::Real(0); + + const unsigned int binom = + libMesh::Utility::binomial(x_power + y_power + z_power + 3, z_power); + + return libMesh::Real(4) / + ((x_power + 1) * (y_power + 1) * binom * (x_power + y_power + z_power + 3)); +} + +inline libMesh::Real +monomial_integral(const libMesh::ElemType elem_type, + const unsigned int x_power, + const unsigned int y_power = 0, + const unsigned int z_power = 0) +{ + switch (elem_type) + { + case libMesh::EDGE2: + case libMesh::EDGE3: + case libMesh::EDGE4: + return edge_integral(x_power); + + case libMesh::TRI3: + case libMesh::TRI6: + case libMesh::TRI7: + return tri_integral(x_power, y_power); + + case libMesh::QUAD4: + case libMesh::QUAD8: + case libMesh::QUAD9: + return quad_integral(x_power, y_power); + + case libMesh::TET4: + case libMesh::TET10: + case libMesh::TET14: + return tet_integral(x_power, y_power, z_power); + + case libMesh::HEX8: + case libMesh::HEX20: + case libMesh::HEX27: + return hex_integral(x_power, y_power, z_power); + + case libMesh::PRISM6: + case libMesh::PRISM15: + case libMesh::PRISM18: + case libMesh::PRISM20: + case libMesh::PRISM21: + return prism_integral(x_power, y_power, z_power); + + case libMesh::PYRAMID5: + case libMesh::PYRAMID13: + case libMesh::PYRAMID14: + case libMesh::PYRAMID18: + return pyramid_integral(x_power, y_power, z_power); + + default: + return libMesh::Real(0); + } +} + +} // namespace quadrature_exactness + +#endif // LIBMESH_QUADRATURE_EXACTNESS_H diff --git a/tests/quadrature/quadrature_test.C b/tests/quadrature/quadrature_test.C index 1dd39a01832..f72440f417e 100644 --- a/tests/quadrature/quadrature_test.C +++ b/tests/quadrature/quadrature_test.C @@ -2,10 +2,10 @@ #include #include #include -#include #include -#include // std::iota + +#include "quadrature_exactness.h" #include "libmesh_cppunit.h" @@ -205,115 +205,47 @@ private: const std::function edge_integrals = [](int mode, int, int) { - return (mode % 2) ? 0 : (Real(2.0) / (mode+1)); + return quadrature_exactness::edge_integral(static_cast(mode)); }; const std::function quad_integrals = [](int modex, int modey, int) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - return exactx*exacty; + return quadrature_exactness::quad_integral(static_cast(modex), + static_cast(modey)); }; const std::function tri_integrals = [](int x_power, int y_power, int) { - // Compute the true integral, a! b! / (a + b + 2)! - Real analytical = 1.0; - - unsigned - larger_power = std::max(x_power, y_power), - smaller_power = std::min(x_power, y_power); - - // Cancel the larger of the two numerator terms with the - // denominator, and fill in the remaining entries. - std::vector - numerator(smaller_power > 1 ? smaller_power-1 : 0), - denominator(2+smaller_power); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator.begin(), numerator.end(), 2); - std::iota(denominator.begin(), denominator.end(), larger_power+1); - - // The denominator is guaranteed to have more terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power)); }; const std::function hex_integrals = [](int modex, int modey, int modez) { - const Real exactx = (modex % 2) ? - 0 : (Real(2.0) / (modex+1)); - - const Real exacty = (modey % 2) ? - 0 : (Real(2.0) / (modey+1)); - - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactx*exacty*exactz; + return quadrature_exactness::hex_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function tet_integrals = [](int x_power, int y_power, int z_power) { - // Compute the true integral, a! b! c! / (a + b + c + 3)! - Real analytical = 1.0; - - // Sort the a, b, c values - int sorted_powers[3] = {x_power, y_power, z_power}; - std::sort(sorted_powers, sorted_powers+3); - - // Cancel the largest power with the denominator, fill in the - // entries for the remaining numerator terms and the denominator. - std::vector - numerator_1(sorted_powers[0] > 1 ? sorted_powers[0]-1 : 0), - numerator_2(sorted_powers[1] > 1 ? sorted_powers[1]-1 : 0), - denominator(3 + sorted_powers[0] + sorted_powers[1]); - - // Fill up the vectors with sequences starting at the right values. - std::iota(numerator_1.begin(), numerator_1.end(), 2); - std::iota(numerator_2.begin(), numerator_2.end(), 2); - std::iota(denominator.begin(), denominator.end(), sorted_powers[2]+1); - - // The denominator is guaranteed to have the most terms... - for (std::size_t i=0; i(x_power), + static_cast(y_power), + static_cast(z_power)); }; const std::function prism_integrals = - [this](int modex, int modey, int modez) { - const Real exactz = (modez % 2) ? - 0 : (Real(2.0) / (modez+1)); - - return exactz * tri_integrals(modex, modey, 0); + [](int modex, int modey, int modez) { + return quadrature_exactness::prism_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; const std::function pyramid_integrals = [](int modex, int modey, int modez) { - - const int binom = Utility::binomial(modex+modey+modez+3, modez); - - if (modex%2 || modey%2) - return Real(0); - - return Real(4)/((modex+1)*(modey+1)*binom*(modex+modey+modez+3)); + return quadrature_exactness::pyramid_integral(static_cast(modex), + static_cast(modey), + static_cast(modez)); }; diff --git a/tests/systems/hilbert_system_kokkos_test.C b/tests/systems/hilbert_system_kokkos_test.C new file mode 100644 index 00000000000..e3d321b449f --- /dev/null +++ b/tests/systems/hilbert_system_kokkos_test.C @@ -0,0 +1,253 @@ +#include "test_comm.h" +#include "libmesh_cppunit.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../src/apps/L2system.C" + +using namespace libMesh; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) +namespace +{ + +constexpr Real projection_tolerance = 5.e-10; + +struct TimedProjectionResult +{ + std::vector solution; + Real elapsed_seconds = 0.; +}; + +void +configure_hilbert_system(HilbertSystem & sys, const bool use_kokkos) +{ + sys.hilbert_order() = 1; + sys.fe_family() = "LAGRANGE"; + sys.fe_order() = 1; + sys.use_kokkos_backend(use_kokkos); + sys.use_exact_parsed_fem_host_path(true); + sys.time_solver = std::make_unique(sys); +} + +void +configure_linear_solver(HilbertSystem & sys) +{ + DiffSolver & solver = *sys.time_solver->diff_solver(); + solver.quiet = true; + solver.verbose = false; + solver.relative_step_tolerance = 1.e-12; + + sys.parameters.set("linear solver maximum iterations") = 500; + sys.parameters.set("linear solver tolerance") = 1.e-14; + + auto * linear_solver = sys.get_linear_solver(); + linear_solver->set_solver_type(CG); + linear_solver->set_preconditioner_type(IDENTITY_PRECOND); +} + +std::vector +localize_solution(const System & sys) +{ + std::vector values; + sys.solution->localize(values); + return values; +} + +void +assert_solutions_close(const std::vector & host_solution, + const std::vector & kokkos_solution) +{ + CPPUNIT_ASSERT_EQUAL(host_solution.size(), kokkos_solution.size()); + + Real max_abs_host = 0; + Real max_abs_diff = 0; + + for (const auto i : index_range(host_solution)) + { + max_abs_host = std::max(max_abs_host, std::abs(libmesh_real(host_solution[i]))); + max_abs_diff = std::max(max_abs_diff, + std::abs(libmesh_real(host_solution[i] - kokkos_solution[i]))); + } + + const Real scaled_tol = projection_tolerance * std::max(1., max_abs_host); + CPPUNIT_ASSERT_DOUBLES_EQUAL(0., max_abs_diff, scaled_tol); +} + +template +TimedProjectionResult +time_projection_solve(SolveFunctor && solve) +{ + const auto start = std::chrono::steady_clock::now(); + auto solution = solve(); + const auto stop = std::chrono::steady_clock::now(); + + TimedProjectionResult result; + result.solution = std::move(solution); + result.elapsed_seconds = + std::chrono::duration_cast>(stop - start).count(); + return result; +} + +void +report_projection_timing(const std::string & label, + const TimedProjectionResult & host_result, + const TimedProjectionResult & kokkos_result) +{ + libMesh::out << label + << " host_time=" << host_result.elapsed_seconds << " s" + << " kokkos_time=" << kokkos_result.elapsed_seconds << " s"; + + if (kokkos_result.elapsed_seconds > 0.) + libMesh::out << " host_over_kokkos=" + << host_result.elapsed_seconds / kokkos_result.elapsed_seconds; + + libMesh::out << std::endl; +} + +std::vector +solve_analytic_projection_impl(const bool use_kokkos) +{ + ReplicatedMesh mesh(*TestCommWorld); + MeshTools::Generation::build_square(mesh, + 3, + 2, + 0., + 1., + 0., + 1., + QUAD4); + + EquationSystems es(mesh); + HilbertSystem & sys = es.add_system("projection"); + configure_hilbert_system(sys, use_kokkos); + es.init(); + + ParsedFunction goal("sin(pi*x) + 0.25*y"); + sys.set_goal_func(goal); + sys.set_fdm_eps(1.e-7); + configure_linear_solver(sys); + sys.solve(); + + return localize_solution(sys); +} + +TimedProjectionResult +solve_analytic_projection(const bool use_kokkos) +{ + return time_projection_solve([&]() { return solve_analytic_projection_impl(use_kokkos); }); +} + +std::vector +solve_parsed_fem_projection_impl(const bool use_kokkos) +{ + ReplicatedMesh mesh(*TestCommWorld); + MeshTools::Generation::build_square(mesh, + 3, + 2, + 0., + 1., + 0., + 1., + QUAD4); + + EquationSystems es(mesh); + ExplicitSystem & input = es.add_system("input"); + input.add_variable("u", FIRST, LAGRANGE); + + HilbertSystem & sys = es.add_system("projection"); + configure_hilbert_system(sys, use_kokkos); + sys.input_system = &input; + es.init(); + + ParsedFunction input_projection("sin(pi*x) + 0.5*y"); + input.project_solution(&input_projection); + + ParsedFEMFunction goal(input, "u*u + x - 0.25*y"); + sys.set_goal_func(goal); + sys.set_fdm_eps(1.e-7); + configure_linear_solver(sys); + sys.solve(); + + return localize_solution(sys); +} + +TimedProjectionResult +solve_parsed_fem_projection(const bool use_kokkos) +{ + return time_projection_solve([&]() { return solve_parsed_fem_projection_impl(use_kokkos); }); +} + +void +report_single_projection_timing(const std::string & label, + const TimedProjectionResult & result) +{ + libMesh::out << label << " time=" << result.elapsed_seconds << " s" << std::endl; +} + +} // namespace +#endif + +class HilbertSystemKokkosTest : public CppUnit::TestCase +{ +public: + LIBMESH_CPPUNIT_TEST_SUITE(HilbertSystemKokkosTest); + CPPUNIT_TEST(testAnalyticParsedFunctionEquivalence); + CPPUNIT_TEST(testParsedFEMFunctionEquivalence); + CPPUNIT_TEST_SUITE_END(); + + void testAnalyticParsedFunctionEquivalence() + { + LOG_UNIT_TEST; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::out << "Starting analytic host solve" << std::endl; + const auto host_solution = solve_analytic_projection(false); + report_single_projection_timing("Hilbert analytic host projection", host_solution); + libMesh::out << "Starting analytic kokkos solve" << std::endl; + const auto kokkos_solution = solve_analytic_projection(true); + report_single_projection_timing("Hilbert analytic kokkos projection", kokkos_solution); + report_projection_timing("Hilbert analytic projection", + host_solution, + kokkos_solution); + assert_solutions_close(host_solution.solution, kokkos_solution.solution); +#endif + } + + void testParsedFEMFunctionEquivalence() + { + LOG_UNIT_TEST; + +#if defined(LIBMESH_HAVE_KOKKOS) && defined(LIBMESH_HAVE_PETSC) && \ + defined(LIBMESH_HAVE_FPARSER) && !defined(LIBMESH_USE_COMPLEX_NUMBERS) + libMesh::out << "Starting parsed FEM host solve" << std::endl; + const auto host_solution = solve_parsed_fem_projection(false); + report_single_projection_timing("Hilbert parsed FEM host projection", host_solution); + libMesh::out << "Starting parsed FEM kokkos solve" << std::endl; + const auto kokkos_solution = solve_parsed_fem_projection(true); + report_single_projection_timing("Hilbert parsed FEM kokkos projection", kokkos_solution); + report_projection_timing("Hilbert parsed FEM projection", + host_solution, + kokkos_solution); + assert_solutions_close(host_solution.solution, kokkos_solution.solution); +#endif + } +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(HilbertSystemKokkosTest);