diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt index dfe0a1f09..b8458720d 100644 --- a/.github/workflows/good_defines.txt +++ b/.github/workflows/good_defines.txt @@ -2,9 +2,11 @@ AMREX_SPACEDIM AMREX_USE_CUDA AMREX_USE_GPU DO_PROBLEM_POST_TIMESTEP +ML NAUX_NET AUX_THERMO ROTATION SDC +_ML_LIB _OPENMP __cplusplus diff --git a/Exec/Make.Maestro b/Exec/Make.Maestro index 7afc6c60f..420450e71 100644 --- a/Exec/Make.Maestro +++ b/Exec/Make.Maestro @@ -24,6 +24,9 @@ BL_NO_FORT ?= TRUE # Require C++17 CXXSTD := c++17 +# default is to not compile with ML (PyTorch) +USE_ML ?= FALSE + # Check to make sure that Microphysics actually exists, # using an arbitrary file that must exist to check. # Throw an error if we don't have Microphysics. @@ -49,6 +52,10 @@ ifeq ($(USE_SDC), TRUE) USERSuffix = .SDC endif +ifeq ($(USE_ML), TRUE) + USERSuffix = .ML +endif + include $(AMREX_HOME)/Tools/GNUMake/Make.defs MAESTROEX_AUTO_SOURCE_DIR := $(TmpBuildDir)/maestroex_sources/$(optionsSuffix).EXE @@ -143,6 +150,37 @@ Blocs += $(foreach dir, $(EXTERN_CORE), $(dir)) # external libraries #------------------------------------------------------------------------------ +#------------------------------------------------------------------------------ +# PyTorch +#------------------------------------------------------------------------------ + +# Torch directories -- note the Make.package for these adds these +# directories into INCLUDE_LOCATIONS and LIBRARY_LOCATIONS for us, +# so we don't need to do it here +ifeq ($(USE_ML), TRUE) + PYTORCH_ROOT := $(TOP)/external/libtorch + + TORCH_LIBPATH = $(PYTORCH_ROOT)/lib + + ifeq ($(USE_CUDA),TRUE) + TORCH_LIBS = -ltorch -ltorch_cpu -lc10 -lc10_cuda -lcuda # -lcaffe2 -lcaffe2_gpu + else + TORCH_LIBS = -ltorch -ltorch_cpu -lc10 # -lcaffe2 + endif + + INCLUDE_LOCATIONS += $(PYTORCH_ROOT)/include \ + $(PYTORCH_ROOT)/include/torch/csrc/api/include + LIBRARY_LOCATIONS += $(TORCH_LIBPATH) + + DEFINES += -DML -D_GLIBCXX_USE_CXX11_ABI=1 + + ifeq ($(USE_CUDA), TRUE) + LDFLAGS += -Xlinker '--no-as-needed,-rpath $(TORCH_LIBPATH) $(TORCH_LIBS)' + else + LDFLAGS += -Wl,--no-as-needed,-rpath=$(TORCH_LIBPATH) $(TORCH_LIBS) + endif +endif + #------------------------------------------------------------------------------ # include all of the necessary directories #------------------------------------------------------------------------------ diff --git a/Exec/science/README b/Exec/science/README index 49d976d1a..0d2be7d2a 100644 --- a/Exec/science/README +++ b/Exec/science/README @@ -17,6 +17,11 @@ flame/ the SNe code show that SDC can overcome these problems. +flame_ml/ + + The same combustion-mode problem as in flame/ but using ML model to + replace the reaction kernel of the solver. + sub_chandra/ Surpise! We're modelling convection. This time in 3D spherical diff --git a/Exec/science/flame_ml/GNUmakefile b/Exec/science/flame_ml/GNUmakefile new file mode 100644 index 000000000..4c820c7ba --- /dev/null +++ b/Exec/science/flame_ml/GNUmakefile @@ -0,0 +1,29 @@ +DEBUG = FALSE +DIM = 2 +COMP = gnu +USE_MPI = TRUE +USE_OMP = FALSE +USE_REACT = TRUE +USE_ML = TRUE + +TINY_PROFILE = FALSE +PROFILE = FALSE # TRUE overrides TINY_PROFILE + + +# define the location of the MAESTROEX home directory +MAESTROEX_HOME := ../../.. + + +# Set the EOS, conductivity, and network directories +# We first check if these exist in $(MAESTROEX_HOME)/Microphysics/(EOS/conductivity/networks) +# If not we use the version in $(MICROPHYSICS_HOME)/Microphysics/(EOS/conductivity/networks) +EOS_DIR := helmholtz +CONDUCTIVITY_DIR := stellar +NETWORK_DIR := ignition_simple +INTEGRATOR_DIR := VODE + +Bpack := ./Make.package +Blocs := . +PROBIN_PARAMETER_DIRS := . + +include $(MAESTROEX_HOME)/Exec/Make.Maestro diff --git a/Exec/science/flame_ml/MaestroBCFill.cpp b/Exec/science/flame_ml/MaestroBCFill.cpp new file mode 100644 index 000000000..797c25b84 --- /dev/null +++ b/Exec/science/flame_ml/MaestroBCFill.cpp @@ -0,0 +1,259 @@ + +#include +#include + +#include + +using namespace amrex; +using namespace InletBCs; + +void Maestro::ScalarFill(const Array4& scal, const Box& bx, + const Box& domainBox, const Real* dx, const BCRec bcs, + const Real* gridlo, const int comp) { + // timer for profiling + BL_PROFILE_VAR("Maestro::ScalarFill()", ScalarFill); + + fab_filcc(bx, scal, 1, domainBox, dx, gridlo, &bcs); + + FillExtBC(scal, bx, domainBox, dx, bcs, comp, false); +} + +void Maestro::VelFill(const Array4& vel, const Box& bx, + const Box& domainBox, const Real* dx, const BCRec bcs, + const Real* gridlo, const int comp) { + // timer for profiling + BL_PROFILE_VAR("Maestro::VelFill()", VelFill); + + fab_filcc(bx, vel, 1, domainBox, dx, gridlo, &bcs); + + FillExtBC(vel, bx, domainBox, dx, bcs, comp, true); +} + +void Maestro::FillExtBC(const Array4& q, const Box& bx, + const Box& domainBox, const Real* dx, const BCRec bcs, + const int comp, const bool is_vel) { + // timer for profiling + BL_PROFILE_VAR("Maestro::FillExtBC()", FillExtBC); + + // do nothing if there are no exterior boundaries + bool found_ext_boundary = false; + for (auto i = 0; i < AMREX_SPACEDIM; ++i) { + if (bcs.lo(i) == BCType::ext_dir || bcs.hi(i) == BCType::ext_dir) { + found_ext_boundary = true; + break; + } + } + if (!found_ext_boundary) return; + + // get parameters for EXT_DIR bcs + const Real INLET_RHO_l = INLET_RHO; + const Real INLET_RHOH_l = INLET_RHOH; + const Real INLET_TEMP_l = INLET_TEMP; + RealVector INLET_RHOX_l(NumSpec); + for (int i = 0; i < NumSpec; ++i) INLET_RHOX_l[i] = INLET_RHOX[i]; + const Real INLET_VEL_l = INLET_VEL; + + const auto domlo = domainBox.loVect3d(); + const auto domhi = domainBox.hiVect3d(); + + const auto lo = bx.loVect3d(); + const auto hi = bx.hiVect3d(); + + if (lo[0] < domlo[0]) { + auto imax = domlo[0]; + + if (bcs.lo(0) == BCType::ext_dir) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (i < imax) { + q(i, j, k) = 1.0; + } + }); + Gpu::synchronize(); + } + } + + if (hi[0] > domhi[0]) { + auto imin = domhi[0]; + + if (bcs.hi(0) == BCType::ext_dir) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (i > imin) { + q(i, j, k) = 1.0; + } + }); + Gpu::synchronize(); + } + } + + if (lo[1] < domlo[1]) { + auto jmax = domlo[1]; + + if (bcs.lo(1) == BCType::ext_dir) { + if (is_vel) { + if (comp == 0) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = 0.0; + } + }); + } else if (comp == 1) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = INLET_VEL_l; + } + }); + } + Gpu::synchronize(); + } else { + if (INLET_VEL != 0.0) { + if (comp == Rho) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = INLET_RHO_l; + } + }); + } else if (comp == RhoH) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = INLET_RHOH_l; + } + }); + } else if (comp >= FirstSpec && + comp < FirstSpec + NumSpec) { + Real* AMREX_RESTRICT INLET_RHOX_p = + INLET_RHOX.dataPtr(); + ParallelFor( + bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = INLET_RHOX_p[comp - FirstSpec]; + } + }); + } else if (comp == Temp) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = INLET_TEMP_l; + } + }); + } else { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = 0.0; + } + }); + } + } else { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j < jmax) { + q(i, j, k) = 0.0; + } + }); + } + Gpu::synchronize(); + } // end if is_vel + } + } + + if (hi[1] > domhi[1]) { + auto jmin = domhi[1]; + + if (bcs.hi(1) == BCType::ext_dir) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (j > jmin) { + q(i, j, k) = 1.0; + } + }); + } + Gpu::synchronize(); + } + +#if AMREX_SPACEDIM == 3 + + if (lo[2] < domlo[2]) { + auto kmax = domlo[2]; + + if (bcs.lo(2) == BCType::ext_dir) { + if (is_vel) { + if (comp == 0 || comp == 1) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = 0.0; + } + }); + } else if (comp == 2) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = INLET_VEL_l; + } + }); + } + } else { + if (INLET_VEL != 0.0) { + if (comp == Rho) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = INLET_RHO_l; + } + }); + } else if (comp == RhoH) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = INLET_RHOH_l; + } + }); + } else if (comp >= FirstSpec && + comp < FirstSpec + NumSpec) { + Real* AMREX_RESTRICT INLET_RHOX_p = + INLET_RHOX.dataPtr(); + ParallelFor( + bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = INLET_RHOX_p[comp - FirstSpec]; + } + }); + } else if (comp == Temp) { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = INLET_TEMP_l; + } + }); + } else { + ParallelFor(bx, + [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = 0.0; + } + }); + } + } else { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k < kmax) { + q(i, j, k) = 0.0; + } + }); + } + } // end if is_vel + Gpu::synchronize(); + } + } + + if (hi[2] > domhi[2]) { + auto kmin = domhi[2]; + + if (bcs.hi(2) == BCType::ext_dir) { + ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (k > kmin) { + q(i, j, k) = 0.0; + } + }); + } + Gpu::synchronize(); + } +#endif +} diff --git a/Exec/science/flame_ml/MaestroBaseState.cpp b/Exec/science/flame_ml/MaestroBaseState.cpp new file mode 100644 index 000000000..59aa35cdc --- /dev/null +++ b/Exec/science/flame_ml/MaestroBaseState.cpp @@ -0,0 +1,166 @@ +#include +#include + +using namespace amrex; + +void Maestro::InitBaseState(BaseState& rho0, BaseState& rhoh0, + BaseState& p0, const int lev) { + // timer for profiling + BL_PROFILE_VAR("Maestro::InitBaseState()", InitBaseState); + + // sanity check + if (dens_fuel < base_cutoff_density || + dens_fuel < anelastic_cutoff_density) { + Abort( + "ERROR: ERROR: fuel density < (base_cutoff_density or " + "anelastic_cutoff_density)"); + } + + const int n = lev; + + Print() << "cutoff densities:" << std::endl; + Print() << " low density cutoff (for mapping the model) = " + << base_cutoff_density << std::endl; + Print() << " buoyancy cutoff density " + << std::endl; + Print() << " (for zeroing rho - rho_0, centrifugal term) = " + << buoyancy_cutoff_factor * base_cutoff_density << std::endl; + Print() << " anelastic cutoff = " + << anelastic_cutoff_density << std::endl; + Print() << " " << std::endl; + + // figure out the indices for different species + const int ic12 = network_spec_index("carbon-12"); + const int io16 = network_spec_index("oxygen-16"); + const int img24 = network_spec_index("magnesium-24"); + + if (ic12 < 0 || io16 < 0 || img24 < 0) { + Abort("ERROR: species indices not defined"); + } + + // length of the domain + Real rlen = geom[lev].ProbHi(AMREX_SPACEDIM - 1) - + geom[lev].ProbLo(AMREX_SPACEDIM - 1); + + // figure out the thermodynamics of the fuel and ash state + RealVector xn_fuel(NumSpec, 0.0); + RealVector xn_ash(NumSpec, 0.0); + + eos_t eos_state; + + // fuel + xn_fuel[ic12] = xc12_fuel; + xn_fuel[io16] = 1.0 - xc12_fuel; + + eos_state.rho = dens_fuel; + eos_state.T = temp_fuel; + for (auto comp = 0; comp < NumSpec; ++comp) { + eos_state.xn[comp] = xn_fuel[comp]; + } + + eos(eos_input_rt, eos_state); + + // note: p_ambient should be = p0_init + Real p_ambient = eos_state.p; + Real rhoh_fuel = dens_fuel * eos_state.h; + + // ash + xn_ash[io16] = 1.0 - xc12_fuel; + xn_ash[img24] = xc12_fuel; + + eos_state.rho = dens_fuel; // initial guess + eos_state.T = temp_ash; + for (auto comp = 0; comp < NumSpec; ++comp) { + eos_state.xn[comp] = xn_ash[comp]; + } + eos_state.p = p_ambient; + + eos(eos_input_tp, eos_state); + + Real dens_ash = eos_state.rho; + Real rhoh_ash = dens_ash * eos_state.h; + + auto s0_init_arr = s0_init.array(); + auto rho0_arr = rho0.array(); + auto rhoh0_arr = rhoh0.array(); + auto p0_arr = p0.array(); + auto p0_init_arr = p0_init.array(); + auto tempbar_arr = tempbar.array(); + auto tempbar_init_arr = tempbar_init.array(); + + for (auto r = 0; r < base_geom.nr(n); ++r) { + // height above the bottom of the domain + Real rloc = geom[lev].ProbLo(AMREX_SPACEDIM - 1) + + (Real(r) + 0.5) * base_geom.dr(n); + + if (rloc < + geom[lev].ProbLo(AMREX_SPACEDIM - 1) + interface_pos_frac * rlen) { + // fuel + s0_init_arr(n, r, Rho) = dens_fuel; + s0_init_arr(n, r, RhoH) = rhoh_fuel; + for (auto comp = 0; comp < NumSpec; ++comp) { + s0_init_arr(n, r, FirstSpec + comp) = dens_fuel * xn_fuel[comp]; + } + + } else { + // ash + s0_init_arr(n, r, Rho) = dens_ash; + s0_init_arr(n, r, RhoH) = rhoh_ash; + for (auto comp = 0; comp < NumSpec; ++comp) { + s0_init_arr(n, r, FirstSpec + comp) = dens_fuel * xn_ash[comp]; + } + } + + // give the temperature a smooth profile + s0_init_arr(n, r, Temp) = + temp_fuel + + (temp_ash - temp_fuel) * 0.5 * + (1.0 + tanh((rloc - (geom[lev].ProbLo(AMREX_SPACEDIM - 1) + + interface_pos_frac * rlen)) / + (smooth_len_frac * rlen))); + + // give the carbon mass fraction a smooth profile too + RealVector xn_smooth(NumSpec, 0.0); + + xn_smooth[ic12] = + xn_fuel[ic12] + + (xn_ash[ic12] - xn_fuel[ic12]) * 0.5 * + (1.0 + tanh((rloc - (geom[lev].ProbLo(AMREX_SPACEDIM - 1) + + interface_pos_frac * rlen)) / + (smooth_len_frac * rlen))); + + xn_smooth[io16] = xn_fuel[io16]; + xn_smooth[img24] = 1.0 - xn_smooth[ic12] - xn_smooth[io16]; + + // get the new density and enthalpy + eos_state.rho = s0_init_arr(n, r, Rho); + eos_state.T = s0_init_arr(n, r, Temp); + eos_state.p = p_ambient; + for (auto comp = 0; comp < NumSpec; ++comp) { + eos_state.xn[comp] = xn_smooth[comp]; + } + + // (T,p) --> rho, h + eos(eos_input_tp, eos_state); + + s0_init_arr(n, r, Rho) = eos_state.rho; + s0_init_arr(n, r, RhoH) = eos_state.rho * eos_state.h; + for (auto comp = 0; comp < NumSpec; ++comp) { + s0_init_arr(n, r, FirstSpec + comp) = + eos_state.rho * xn_smooth[comp]; + } + p0_init_arr(n, r) = p_ambient; + } + + // copy s0_init and p0_init into rho0, rhoh0, p0, and tempbar + for (auto r = 0; r < base_geom.nr_fine; ++r) { + rho0_arr(lev, r) = s0_init_arr(n, r, Rho); + rhoh0_arr(lev, r) = s0_init_arr(n, r, RhoH); + tempbar_arr(lev, r) = s0_init_arr(n, r, Temp); + tempbar_init_arr(lev, r) = s0_init_arr(n, r, Temp); + p0_arr(lev, r) = p0_init_arr(lev, r); + } + + // initialize any inlet BC parameters + SetInletBCs(); +} diff --git a/Exec/science/flame_ml/MaestroInitData.cpp b/Exec/science/flame_ml/MaestroInitData.cpp new file mode 100644 index 000000000..1fca4102b --- /dev/null +++ b/Exec/science/flame_ml/MaestroInitData.cpp @@ -0,0 +1,43 @@ + +#include +using namespace amrex; + +// initializes data on a specific level +void Maestro::InitLevelData(const int lev, const Real time, const MFIter& mfi, + const Array4 scal, const Array4 vel) { + // timer for profiling + BL_PROFILE_VAR("Maestro::InitLevelData()", InitLevelData); + + const auto tileBox = mfi.tilebox(); + + const auto vel_fuel_loc = vel_fuel; + + // initialize velocity + ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + for (auto n = 0; n < AMREX_SPACEDIM - 1; ++n) { + vel(i, j, k, n) = 0.0; + } + vel(i, j, k, AMREX_SPACEDIM - 1) = vel_fuel_loc; + }); + + const auto s0_arr = s0_init.const_array(); + + ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + int r = AMREX_SPACEDIM == 2 ? j : k; + + // set the scalars using s0 + scal(i, j, k, Rho) = s0_arr(lev, r, Rho); + scal(i, j, k, RhoH) = s0_arr(lev, r, RhoH); + scal(i, j, k, Temp) = s0_arr(lev, r, Temp); + for (auto comp = 0; comp < NumSpec; ++comp) { + scal(i, j, k, FirstSpec + comp) = s0_arr(lev, r, FirstSpec + comp); + } + // initialize pi to zero for now + scal(i, j, k, Pi) = 0.0; + }); +} + +void Maestro::InitLevelDataSphr(const int lev, const Real time, MultiFab& scal, + MultiFab& vel) { + Abort("InitLevelDataSphr not implemented"); +} diff --git a/Exec/science/flame_ml/MaestroInletBCs.H b/Exec/science/flame_ml/MaestroInletBCs.H new file mode 100644 index 000000000..927424182 --- /dev/null +++ b/Exec/science/flame_ml/MaestroInletBCs.H @@ -0,0 +1,15 @@ +#ifndef _MaestroInletBCs_H_ +#define _MaestroInletBCs_H_ + +#include + +namespace InletBCs { + +extern amrex::Real INLET_RHO; +extern amrex::Real INLET_RHOH; +extern amrex::Real INLET_TEMP; +extern RealVector INLET_RHOX; +extern amrex::Real INLET_VEL; +}; // namespace InletBCs + +#endif \ No newline at end of file diff --git a/Exec/science/flame_ml/MaestroInletBCs.cpp b/Exec/science/flame_ml/MaestroInletBCs.cpp new file mode 100644 index 000000000..682d0271b --- /dev/null +++ b/Exec/science/flame_ml/MaestroInletBCs.cpp @@ -0,0 +1,58 @@ +#include +#include + +#include + +using namespace amrex; + +Real InletBCs::INLET_RHO; +Real InletBCs::INLET_RHOH; +Real InletBCs::INLET_TEMP; +RealVector InletBCs::INLET_RHOX; +Real InletBCs::INLET_VEL; + +/* +inlet_bc_module is a simple container module that holds the parameters +that are used by physbc to implement the inlet boundary conditions. +As these are problem-specific, any problem needing inlet boundary +conditions should create its own version of this module, using this +outline. +*/ +void Maestro::SetInletBCs() { + // timer for profiling + BL_PROFILE_VAR("Maestro::SetInletBCs()", SetInletBCs); + + // here we initialize the parameters that are module variables. + // this routine is called when the base state is defined initially, + // and upon restart, just after the base state is read in. + + // figure out the indices for different species + const auto ic12 = network_spec_index("carbon-12"); + const auto io16 = network_spec_index("oxygen-16"); + + if (ic12 < 0 || io16 < 0) { + Abort("ERROR: species indices undefined in inlet_bc"); + } + + eos_t eos_state; + + eos_state.rho = dens_fuel; + eos_state.T = temp_fuel; + + for (auto comp = 0; comp < NumSpec; ++comp) { + eos_state.xn[comp] = 0.0; + } + eos_state.xn[ic12] = xc12_fuel; + eos_state.xn[io16] = 1.0 - xc12_fuel; + + eos(eos_input_rt, eos_state); + + InletBCs::INLET_RHO = dens_fuel; + InletBCs::INLET_RHOH = dens_fuel * eos_state.h; + InletBCs::INLET_TEMP = temp_fuel; + InletBCs::INLET_RHOX.resize(NumSpec); + for (auto comp = 0; comp < NumSpec; ++comp) { + InletBCs::INLET_RHOX[comp] = dens_fuel * eos_state.xn[comp]; + } + InletBCs::INLET_VEL = vel_fuel; +} \ No newline at end of file diff --git a/Exec/science/flame_ml/MaestroTagging.cpp b/Exec/science/flame_ml/MaestroTagging.cpp new file mode 100644 index 000000000..963d6aa47 --- /dev/null +++ b/Exec/science/flame_ml/MaestroTagging.cpp @@ -0,0 +1,66 @@ + +#include +#include + +using namespace amrex; + +void Maestro::RetagArray(const Box& bx, const int lev) { + // timer for profiling + BL_PROFILE_VAR("Maestro::RetagArray()", RetagArray); + // re-compute tag_array since the actual grid structure changed due to buffering + // this is required in order to compute numdisjointchunks, r_start_coord, r_end_coord + + // Tag on regions including buffer cells + auto lo = bx.loVect3d()[AMREX_SPACEDIM - 1]; + auto hi = bx.hiVect3d()[AMREX_SPACEDIM - 1]; + const auto max_lev = base_geom.max_radial_level + 1; + + for (auto r = lo; r <= hi; ++r) { + tag_array[lev - 1 + max_lev * (r / 2)] = TagBox::SET; + } +} + +void Maestro::TagBoxes(TagBoxArray& tags, const MFIter& mfi, const int lev, + const Real time) { + // timer for profiling + BL_PROFILE_VAR("Maestro::TagBoxes()", TagBoxes); + + // Tag on regions of high temperature + const Array4 tag = tags.array(mfi); + const int* AMREX_RESTRICT tag_array_p = tag_array.dataPtr(); + const int max_lev = base_geom.max_radial_level + 1; + + const Box& tilebox = mfi.tilebox(); + + AMREX_PARALLEL_FOR_3D(tilebox, i, j, k, { + int r = AMREX_SPACEDIM == 2 ? j : k; + + if (tag_array_p[lev + max_lev * r] > 0) { + tag(i, j, k) = TagBox::SET; + } + }); +} + +void Maestro::StateError(TagBoxArray& tags, const MultiFab& state_mf, + const MFIter& mfi, const int lev, const Real time) { + // timer for profiling + BL_PROFILE_VAR("Maestro::StateError()", StateError); + + // Tag on regions of high temperature + const Array4 tag = tags.array(mfi); + const Array4 state = state_mf.array(mfi); + int* AMREX_RESTRICT tag_array_p = tag_array.dataPtr(); + const int max_lev = base_geom.max_radial_level + 1; + + const Box& tilebox = mfi.tilebox(); + + // Tag on regions of high temperature + AMREX_PARALLEL_FOR_3D(tilebox, i, j, k, { + if (state(i, j, k, Temp) >= 6.5e8 && state(i, j, k, Temp) <= 2.4e9) { + int r = AMREX_SPACEDIM == 2 ? j : k; + + tag(i, j, k) = TagBox::SET; + tag_array_p[lev + max_lev * r] = TagBox::SET; + } + }); +} diff --git a/Exec/science/flame_ml/Make.package b/Exec/science/flame_ml/Make.package new file mode 100644 index 000000000..2408e6a14 --- /dev/null +++ b/Exec/science/flame_ml/Make.package @@ -0,0 +1,3 @@ +ifeq ($(USE_ML), TRUE) + CEXE_headers += TorchLib.H +endif \ No newline at end of file diff --git a/Exec/science/flame_ml/README.md b/Exec/science/flame_ml/README.md new file mode 100644 index 000000000..5540e51dd --- /dev/null +++ b/Exec/science/flame_ml/README.md @@ -0,0 +1,29 @@ +To run this flame test problem, you will first need to download the appropriate Pytorch library (libtorch) in `MAESTROeX/external` directory. We can choose to use either CPU or CUDA. + +```shell +cd $MAESTROEX_HOME/external + +# CPU only +wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcpu.zip +unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cpu.zip + +# CUDA 11.1 +wget https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu111.zip +unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu111.zip + +# CUDA 10.2 +wget https://download.pytorch.org/libtorch/cu102/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu102.zip +unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu102.zip +``` + +To compile the problem with ML enabled, enter +`make -j`. + +Then you can run the problem. + +`mpiexec -n 1 ./Maestro2d.gnu.MPI.ML.ex inputs_2d_smallscale_ml` + + + diff --git a/Exec/science/flame_ml/TorchLib.H b/Exec/science/flame_ml/TorchLib.H new file mode 100644 index 000000000..6d1c41e95 --- /dev/null +++ b/Exec/science/flame_ml/TorchLib.H @@ -0,0 +1,6 @@ +// Pytorch C++ API library +#ifndef _ML_LIB +#define _ML_LIB + +#include +#endif diff --git a/Exec/science/flame_ml/_parameters b/Exec/science/flame_ml/_parameters new file mode 100644 index 000000000..a897725e4 --- /dev/null +++ b/Exec/science/flame_ml/_parameters @@ -0,0 +1,37 @@ +@namespace: problem + +# For a simple laminar flame setup we define the fuel and ash state. The +# fuel parameters will be used to define the inflow boundary condition. +# the fuel and ash will be in pressure equilibrium, so the ash density +# will be found via the EOS. The ash composition will be set by simply +# turning all the fuel into ash. + +dens_fuel real 1.d8 +temp_fuel real 1.d8 +xc12_fuel real 0.5d0 +vel_fuel real 1.d6 + +temp_ash real 3.d9 + +# fraction of the domain height to put the fuel/ash interface +interface_pos_frac real 0.5 + +# fraction of the domain height to set the smooothing length for +# the interface between the fuel and ash +smooth_len_frac real 0.025 + +# minimum X(C12) that should trigger refinement +XC12_ref_threshold real 1.d-3 + +# do the burning only once for each vertical coordinate +do_average_burn logical .false. + +# acceptable relative error for a cell compared to the average +# at that vertical coord if do_average_burn = T (should be smaller +# than the EOS tolerance). +transverse_tol real 1.d-7 + +# ML normalization constants +dens_fac real 5.000500606732890755d7 +temp_fac real 3.000899961693421364d9 +enuc_fac real 7.033368299136000000d12 diff --git a/Exec/science/flame_ml/inputs_2d_smallscale b/Exec/science/flame_ml/inputs_2d_smallscale new file mode 100644 index 000000000..fcffe9082 --- /dev/null +++ b/Exec/science/flame_ml/inputs_2d_smallscale @@ -0,0 +1,111 @@ + +# INITIAL MODEL +maestro.perturb_model = false +maestro.drdxfac = 5 + +# PROBLEM SIZE +geometry.prob_lo = 0.0 0.0 +geometry.prob_hi = 0.625e0 5.e0 + +# BOUNDARY CONDITIONS +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = Slipwall +# 2 = Outflow 5 = NoSlipWall +maestro.lo_bc = 0 1 2 +maestro.hi_bc = 0 2 2 +geometry.is_periodic = 1 0 0 + +# VERBOSITY +maestro.v = 1 # verbosity + +# DEBUG FOR NAN +amrex.fpe_trap_invalid = 1 # floating point exception + +# GRIDDING AND REFINEMENT +amr.n_cell = 128 1024 +amr.max_grid_size = 64 +amr.max_level = 0 # maximum level number allowed +maestro.regrid_int = 2 # how often to regrid +amr.ref_ratio = 2 2 2 2 2 2 # refinement ratio +amr.blocking_factor = 8 # block factor in grid generation +amr.refine_grid_layout = 0 # chop grids up into smaller grids if nprocs > ngrids + +# TAGGING +maestro.temperr = 6.5e8 6.5e8 6.5e8 + +# TIME STEPPING +maestro.max_step = 500 +maestro.stop_time = 1.e0 +maestro.cfl = 0.5 # cfl number for hyperbolic system + # In this test problem, the velocity is + # time-dependent. We could use 0.9 in + # the 3D test, but need to use 0.7 in 2D + # to satisfy CFL condition. +maestro.fixed_dt = 2.4e-8 + +# ALGORITHMIC OPTIONS +maestro.spherical = 0 +maestro.evolve_base_state = false +maestro.do_initial_projection = true +maestro.init_divu_iter = 3 +maestro.init_iter = 1 +maestro.do_smallscale = true +maestro.beta0_type = 3 + +maestro.anelastic_cutoff_density = 3.e6 +maestro.base_cutoff_density = 3.e6 + +maestro.grav_const = 0.e0 +maestro.use_thermal_diffusion = true + +maestro.init_shrink = 0.1e0 +maestro.max_dt_growth = 1.2e0 + +maestro.use_tfromp = false +maestro.dpdt_factor = 0.3 + +# PLOTFILES +maestro.plot_base_name = flame_ # root name of plot file +maestro.plot_int = 100 # number of timesteps between plot files +maestro.plot_deltat = 1.e-5 + +# CHECKPOINT +maestro.check_base_name = chk +maestro.chk_int = -1 + +# ML OUTPUT +maestro.save_react_int = 10 + +# tolerances for the initial projection +maestro.eps_init_proj_cart = 1.e-11 +maestro.eps_init_proj_sph = 1.e-10 +# tolerances for the divu iterations +maestro.eps_divu_cart = 1.e-11 +maestro.eps_divu_sph = 1.e-10 +maestro.divu_iter_factor = 100. +maestro.divu_level_factor = 10. +# tolerances for the MAC projection +maestro.eps_mac = 1.e-10 +maestro.eps_mac_max = 1.e-8 +maestro.mac_level_factor = 10. +maestro.eps_mac_bottom = 1.e-3 +# tolerances for the nodal projection +maestro.eps_hg = 1.e-10 +maestro.eps_hg_max = 1.e-10 +maestro.hg_level_factor = 10. +maestro.eps_hg_bottom = 1.e-4 + +maestro.do_burning = true + +# override the default values of the probin namelist values here + +problem.dens_fuel = 5.e7 +problem.temp_fuel = 1.e8 +problem.xc12_fuel = 0.5e0 +problem.vel_fuel = 1.e5 +problem.temp_ash = 3.e9 +problem.interface_pos_frac = 0.125e0 +problem.smooth_len_frac = 0.025e0 + +eos.use_eos_coulomb = 1 + diff --git a/Exec/science/flame_ml/inputs_2d_smallscale_ml b/Exec/science/flame_ml/inputs_2d_smallscale_ml new file mode 100644 index 000000000..a07c1c830 --- /dev/null +++ b/Exec/science/flame_ml/inputs_2d_smallscale_ml @@ -0,0 +1,118 @@ + +# INITIAL MODEL +maestro.perturb_model = false +maestro.drdxfac = 5 + +# ML MODEL +maestro.ml_model_file = "" + +# PROBLEM SIZE +geometry.prob_lo = 0.0 0.0 +geometry.prob_hi = 0.625e0 5.e0 + +# BOUNDARY CONDITIONS +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = Slipwall +# 2 = Outflow 5 = NoSlipWall +maestro.lo_bc = 0 1 2 +maestro.hi_bc = 0 2 2 +geometry.is_periodic = 1 0 0 + +# VERBOSITY +maestro.v = 1 # verbosity + +# DEBUG FOR NAN +amrex.fpe_trap_invalid = 1 # floating point exception + +# GRIDDING AND REFINEMENT +amr.n_cell = 128 1024 +amr.max_grid_size = 64 +amr.max_level = 0 # maximum level number allowed +maestro.regrid_int = 2 # how often to regrid +amr.ref_ratio = 2 2 2 2 2 2 # refinement ratio +amr.blocking_factor = 8 # block factor in grid generation +amr.refine_grid_layout = 0 # chop grids up into smaller grids if nprocs > ngrids + +# TAGGING +maestro.temperr = 6.5e8 6.5e8 6.5e8 + +# TIME STEPPING +maestro.max_step = 500 +maestro.stop_time = 1.e0 +maestro.cfl = 0.5 # cfl number for hyperbolic system + # In this test problem, the velocity is + # time-dependent. We could use 0.9 in + # the 3D test, but need to use 0.7 in 2D + # to satisfy CFL condition. +maestro.fixed_dt = 2.4e-8 + +# ALGORITHMIC OPTIONS +maestro.spherical = 0 +maestro.evolve_base_state = false +maestro.do_initial_projection = true +maestro.init_divu_iter = 3 +maestro.init_iter = 1 +maestro.do_smallscale = true +maestro.beta0_type = 3 + +maestro.anelastic_cutoff_density = 3.e6 +maestro.base_cutoff_density = 3.e6 + +maestro.grav_const = 0.e0 +maestro.use_thermal_diffusion = true + +maestro.init_shrink = 0.1e0 +maestro.max_dt_growth = 1.2e0 + +maestro.use_tfromp = false +maestro.dpdt_factor = 0.3 + +# PLOTFILES +maestro.plot_base_name = flame_ # root name of plot file +maestro.plot_int = 100 # number of timesteps between plot files +maestro.plot_deltat = 1.e-5 + +# CHECKPOINT +maestro.check_base_name = chk +maestro.chk_int = -1 + +# ML OUTPUT +maestro.save_react_int = 10 + +# tolerances for the initial projection +maestro.eps_init_proj_cart = 1.e-11 +maestro.eps_init_proj_sph = 1.e-10 +# tolerances for the divu iterations +maestro.eps_divu_cart = 1.e-11 +maestro.eps_divu_sph = 1.e-10 +maestro.divu_iter_factor = 100. +maestro.divu_level_factor = 10. +# tolerances for the MAC projection +maestro.eps_mac = 1.e-10 +maestro.eps_mac_max = 1.e-8 +maestro.mac_level_factor = 10. +maestro.eps_mac_bottom = 1.e-3 +# tolerances for the nodal projection +maestro.eps_hg = 1.e-10 +maestro.eps_hg_max = 1.e-10 +maestro.hg_level_factor = 10. +maestro.eps_hg_bottom = 1.e-4 + +maestro.do_burning = true + +# override the default values of the probin namelist values here + +problem.dens_fuel = 5.e7 +problem.temp_fuel = 1.e8 +problem.xc12_fuel = 0.5e0 +problem.vel_fuel = 1.e5 +problem.temp_ash = 3.e9 +problem.interface_pos_frac = 0.125e0 +problem.smooth_len_frac = 0.025e0 + +problem.dens_fac = 5.000500606732890755e7 +problem.temp_fac = 3.000899961693421364e9 +problem.enuc_fac = 7.033368299136000000e12 + +eos.use_eos_coulomb = 1 + diff --git a/Source/Maestro.H b/Source/Maestro.H index b5f73e168..7db624b2a 100644 --- a/Source/Maestro.H +++ b/Source/Maestro.H @@ -28,6 +28,11 @@ using namespace maestro; #include #include +/// Include PyTorch library if using ML model +#ifdef ML +#include +#endif + /// Define Real vector types for CUDA-compatability. If `AMREX_USE_CUDA`, then /// this will be stored in CUDA managed memory. #ifdef AMREX_USE_CUDA @@ -2077,12 +2082,20 @@ class Maestro : public amrex::AmrCore { Real p0bdot; Real p0b; + /// ML model +#ifdef ML + bool use_ml; + torch::jit::script::Module module; +#endif + /// flag for writing plotfiles enum plotfile_flag { plotInitData = -9999999, plotInitProj = -9999998, plotDivuIter = -9999997 }; + + std::string algo_step; }; #endif diff --git a/Source/MaestroAdvance.cpp b/Source/MaestroAdvance.cpp index a9bcbfe13..451f0316f 100644 --- a/Source/MaestroAdvance.cpp +++ b/Source/MaestroAdvance.cpp @@ -6,6 +6,8 @@ using namespace amrex; // advance a single level for a single time step, updates flux registers void Maestro::AdvanceTimeStep(bool is_initIter) { + algo_step = "0"; + // timer for profiling BL_PROFILE_VAR("Maestro::AdvanceTimeStep()", AdvanceTimeStep); @@ -228,6 +230,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { Real react_time_start = ParallelDescriptor::second(); + algo_step = "1"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 1 : react state >>>" << std::endl; } @@ -246,6 +250,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { Real advect_time_start = ParallelDescriptor::second(); + algo_step = "2"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 2 : make w0 >>>" << std::endl; } @@ -351,6 +357,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { // STEP 3 -- construct the advective velocity ////////////////////////////////////////////////////////////////////////////// + algo_step = "3"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 3 : create MAC velocities >>>" << std::endl; } @@ -394,6 +402,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { advect_time_start = ParallelDescriptor::second(); + algo_step = "4"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 4 : advect base >>>" << std::endl; } @@ -560,6 +570,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { Real thermal_time_start = ParallelDescriptor::second(); + algo_step = "4a"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 4a: thermal conduct >>>" << std::endl; } @@ -611,6 +623,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { react_time_start = ParallelDescriptor::second(); + algo_step = "5"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 5 : react state >>>" << std::endl; } @@ -659,6 +673,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { advect_time_start = ParallelDescriptor::second(); + algo_step = "6"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 6 : make new S and new w0 >>>" << std::endl; } @@ -761,6 +777,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { // STEP 7 -- redo the construction of the advective velocity using the current w0 ////////////////////////////////////////////////////////////////////////////// + algo_step = "7"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 7 : create MAC velocities >>>" << std::endl; } @@ -798,6 +816,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { advect_time_start = ParallelDescriptor::second(); + algo_step = "8"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 8 : advect base >>>" << std::endl; } @@ -952,6 +972,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { thermal_time_start = ParallelDescriptor::second(); + algo_step = "8a"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 8a: thermal conduct >>>" << std::endl; } @@ -997,6 +1019,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { react_time_start = ParallelDescriptor::second(); + algo_step = "9"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 9 : react state >>>" << std::endl; } @@ -1041,6 +1065,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { Real ndproj_time_start = ParallelDescriptor::second(); + algo_step = "10"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 10: make new S >>>" << std::endl; } @@ -1083,6 +1109,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { advect_time_start = ParallelDescriptor::second(); + algo_step = "11"; + if (maestro_verbose >= 1) { Print() << "<<< STEP 11: update and project new velocity >>>" << std::endl; @@ -1181,6 +1209,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) { } } + algo_step = "endstep"; + Print() << "\nTimestep " << istep << " ends with TIME = " << t_new << " DT = " << dt << std::endl; diff --git a/Source/MaestroBurner.cpp b/Source/MaestroBurner.cpp index ea83d32ae..2c57cd67e 100644 --- a/Source/MaestroBurner.cpp +++ b/Source/MaestroBurner.cpp @@ -5,6 +5,8 @@ using namespace amrex; #ifndef SDC + +#ifndef ML void Maestro::Burner(const Vector& s_in, Vector& s_out, const Vector& rho_Hext, Vector& rho_omegadot, Vector& rho_Hnuc, @@ -30,7 +32,20 @@ void Maestro::Burner(const Vector& s_in, Vector& s_out, const auto ispec_threshold = network_spec_index(burner_threshold_species); + Vector react_in(finest_level+1), react_out(finest_level+1); + const bool save_react_data = istep > 0 && save_react_int > 0 && istep % save_react_int == 0; + for (int lev = 0; lev <= finest_level; ++lev) { + if (save_react_data) { + if (NumSpec == 3) { + react_in[lev].define(grids[lev], dmap[lev], NumSpec+1, 0); // X(C12), X(Mg24), rho, T -> burn + react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+0), 0); // burn -> X(C12), X(Mg24), Hnuc, dXdt, enucdot + } else { + react_in[lev].define(grids[lev], dmap[lev], NumSpec+2, 0); // X, rho, T -> burn + react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+1), 0); // burn -> X, Hnuc, dXdt, enucdot + } + } + // create mask assuming refinement ratio = 2 int finelev = lev + 1; if (lev == finest_level) { @@ -57,12 +72,223 @@ void Maestro::Burner(const Vector& s_in, Vector& s_out, const Array4 rho_Hnuc_arr = rho_Hnuc[lev].array(mfi); const auto tempbar_init_arr = tempbar_init.const_array(); + Array4 react_in_arr, react_out_arr; + + if (save_react_data) { + react_in_arr = react_in[lev].array(mfi); + react_out_arr = react_out[lev].array(mfi); + } + // use a dummy value for non-spherical as tempbar_init_cart is not defined const Array4 tempbar_cart_arr = spherical ? tempbar_init_cart[lev].array(mfi) : rho_Hext[lev].array(mfi); const Array4 mask_arr = mask.array(mfi); + // If save_react_data, we add perturbations to the mass fractions to + // get more varied data for training ML model + if (save_react_data) { + ParallelForRNG(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k, + amrex::RandomEngine const& engine) noexcept + { + if (use_mask && mask_arr(i, j, k)) + return; // cell is covered by finer cells + + auto rho = s_in_arr(i, j, k, Rho); + Real x_in[NumSpec]; + for (int n = 0; n < NumSpec; ++n) { + x_in[n] = s_in_arr(i, j, k, FirstSpec + n) / rho; + } + +#if NAUX_NET > 0 + Real aux_in[NumAux]; + for (int n = 0; n < NumAux; ++n) { + aux_in[n] = s_in_arr(i, j, k, FirstAux + n) / rho; + } +#endif + + Real T_in = 0.0; + if (drive_initial_convection) { + if (!spherical) { + auto r = (AMREX_SPACEDIM == 2) ? j : k; + T_in = tempbar_init_arr(lev, r); + } else { + T_in = tempbar_cart_arr(i, j, k); + } + } else { + T_in = s_in_arr(i, j, k, Temp); + } + + Real x_test = + (ispec_threshold > 0) ? x_in[ispec_threshold] : 0.0; + + // Perturb 75 percent of the data + Real rand_perc = 0.75; + if (amrex::Random(engine) < rand_perc) { + // Loop over the species + for (int n = 0; n < NumSpec; ++n) { + // aprox13 network (NumSpec == 13) + // Three main species are C12(1), O16(2), Mg24(4) + // Do not perturb the main species + if (NumSpec == 13 && n != 1 && n != 2 && n != 4) { + // Set a random perturbation based on log(X_k) + Real X_log = std::log10(x_in[n]); + // We only want to perturb mass fractions below 10^-4 + // if (X_log <= -4.0) { + // Random number generated between [-X_log/15, X_log/15] + Real rand = (amrex::Random(engine) - 0.5) * 2*X_log/15.0; + Real perturb = pow(10.0, X_log + rand) - x_in[n]; + x_in[n] += perturb; + + // Pick the larger mass fraction of C12 and Mg24 + // and subtract perturb since sum(X_k) = 1 + int spec_rand = (x_in[1] > x_in[4]) ? 1 : 4; + x_in[spec_rand] -= perturb; + // } + } + // ignition_simple network (NumSpec == 3) + else if (NumSpec == 3 && n != 1) { + int spec_other = (n == 0) ? 2 : 0; + Real x_less = (x_in[n] < x_in[spec_other]) ? x_in[n] : x_in[spec_other]; + // Set a random perturbation within -/+ 10% of smaller mass fraction + Real perturb = (amrex::Random(engine) - 0.5) * 0.1 * x_less; + // avoid negative mass fractions + if (x_in[n] < -perturb || x_in[spec_other] < perturb) { + perturb = 0.0; + } + x_in[n] += perturb; + + // Subtract perturb from the other main species + x_in[spec_other] -= perturb; + } + } + + // Perturb density and temperature by small relative values + // since they do not change much + Real perturb_rel = (amrex::Random(engine) - 0.5) * 2.e-4; + rho *= (1.0 + perturb_rel); + perturb_rel = (amrex::Random(engine) - 0.5) * 6.e-4; + T_in *= (1.0 + perturb_rel); + } + + if (NumSpec == 3) { + react_in_arr(i, j, k, 0) = x_in[0]; + react_in_arr(i, j, k, 1) = x_in[2]; + react_in_arr(i, j, k, 2) = rho; + react_in_arr(i, j, k, 3) = T_in; + + react_out_arr(i, j, k, 2) = 0.0; // output generated energy + for (int n = NumSpec; n < 2*(NumSpec); ++n) { + react_out_arr(i, j, k, n) = 0.0; + } + } else { + for (int n = 0; n < NumSpec; ++n) { + react_in_arr(i, j, k, n) = x_in[n]; + react_out_arr(i, j, k, n) = x_in[n]; + } + react_in_arr(i, j, k, NumSpec) = rho; // input density + react_in_arr(i, j, k, NumSpec+1) = T_in; // input temperature + react_out_arr(i, j, k, NumSpec) = 0.0; // output generated energy + + for (int n = NumSpec+1; n < 2*(NumSpec+1); ++n) { + react_out_arr(i, j, k, n) = 0.0; + } + } + + + burn_t state_in; + burn_t state_out; + + Real x_out[NumSpec]; +#if NAUX_NET > 0 + Real aux_out[NumAux]; +#endif + Real rhowdot[NumSpec]; + Real rhoH = 0.0; + + // if the threshold species is not in the network, then we burn + // normally. if it is in the network, make sure the mass + // fraction is above the cutoff. + if ((rho > burning_cutoff_density_lo && + rho < burning_cutoff_density_hi) && + (ispec_threshold < 0 || + (ispec_threshold > 0 && + x_test > burner_threshold_cutoff))) { + // Initialize burn state_in and state_out + state_in.e = 0.0; + state_in.rho = rho; + state_in.T = T_in; + for (int n = 0; n < NumSpec; ++n) { + state_in.xn[n] = x_in[n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + state_in.aux[n] = aux_in[n]; + } +#endif + + // initialize state_out the same as state_in + state_out.e = 0.0; + state_out.rho = rho; + state_out.T = T_in; + for (int n = 0; n < NumSpec; ++n) { + state_out.xn[n] = x_in[n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + state_out.aux[n] = aux_in[n]; + } +#endif + + burner(state_out, dt_in); + + for (int n = 0; n < NumSpec; ++n) { + x_out[n] = state_out.xn[n]; + rhowdot[n] = state_out.rho * + (state_out.xn[n] - state_in.xn[n]) / dt_in; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + aux_out[n] = state_out.aux[n]; + } +#endif + rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in; + + // use state_in and state_out to set the reaction outputs and RHS + // because the EOS calls require the actual internal energy e, + // this should come after we're otherwise finished with state in/out. + if (NumSpec == 3) { + react_out_arr(i, j, k, 0) = state_out.xn[0]; + react_out_arr(i, j, k, 1) = state_out.xn[2]; + react_out_arr(i, j, k, 2) = state_out.e - state_in.e; + } else { + for (int n = 0; n < NumSpec; ++n) { + react_out_arr(i, j, k, n) = state_out.xn[n]; + } + react_out_arr(i, j, k, NumSpec) = state_out.e - state_in.e; + } + + eos(eos_input_rt, state_in); // get initial e + state_out.e += state_in.e; + eos(eos_input_re, state_out); // using final e, get final T + Array1D ydot; // this will hold the RHS + actual_rhs(state_out, ydot); // evaluate the RHS to get dYdt + + if (NumSpec == 3) { + react_out_arr(i, j, k, NumSpec) = ydot(1) * aion[0]; // save dXdt + react_out_arr(i, j, k, NumSpec+1) = ydot(3) * aion[2]; + react_out_arr(i, j, k, 2*NumSpec-1) = ydot(net_ienuc); // save enucdot + } else { + for (int n = 1; n <= NumSpec; ++n) { + react_out_arr(i, j, k, NumSpec+n) = ydot(n) * aion[n-1]; // save dXdt + } + react_out_arr(i, j, k, 2*NumSpec+1) = ydot(net_ienuc); // save enucdot + } + } + }); + } + + // solve original input states ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) { if (use_mask && mask_arr(i, j, k) == 1) { return; // cell is covered by finer cells @@ -155,6 +381,7 @@ void Maestro::Burner(const Vector& s_in, Vector& s_out, } #endif rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in; + } else { for (int n = 0; n < NumSpec; ++n) { x_out[n] = x_in[n]; @@ -174,7 +401,7 @@ void Maestro::Burner(const Vector& s_in, Vector& s_out, } if (fabs(sumX - 1.0) > reaction_sum_tol) { -#ifndef AMREX_USE_GPU +#ifndef AMREX_USE_CUDA Abort("ERROR: abundances do not sum to 1"); #endif for (int n = 0; n < NumSpec; ++n) { @@ -211,8 +438,486 @@ void Maestro::Burner(const Vector& s_in, Vector& s_out, }); } } + + if (save_react_data) { + Vector react_in_varnames, react_out_varnames; + for (int i = 0; i < NumSpec; ++i) { + std::string spec_string = "X("; + spec_string += short_spec_names_cxx[i]; + spec_string += ')'; + if (NumSpec == 3 && i != 1) { + react_in_varnames.push_back(spec_string); + react_out_varnames.push_back(spec_string); + } + } + react_in_varnames.push_back("rho"); + react_in_varnames.push_back("temp"); + react_out_varnames.push_back("enuc"); + for (int i = 0; i < NumSpec; ++i) { + std::string spec_string = "Xdot("; + spec_string += short_spec_names_cxx[i]; + spec_string += ')'; + if (NumSpec == 3 && i != 1) { + react_out_varnames.push_back(spec_string); + } + } + react_out_varnames.push_back("enucdot"); + + std::string react_in_name = "react_inputs_"; + std::string react_out_name = "react_outputs_"; + PlotFileName(istep, &react_in_name); + PlotFileName(istep, &react_out_name); + react_in_name = react_in_name + "_" + algo_step; + react_out_name = react_out_name + "_" + algo_step; + Vector step_array(finest_level+1, istep); + WriteMultiLevelPlotfile(react_in_name, finest_level + 1, GetVecOfConstPtrs(react_in), react_in_varnames, + Geom(), dt_in, step_array, refRatio()); + WriteMultiLevelPlotfile(react_out_name, finest_level + 1, GetVecOfConstPtrs(react_out), react_out_varnames, + Geom(), dt_in, step_array, refRatio()); + } } +#else +// ifdef ML +void Maestro::Burner(const Vector& s_in, Vector& s_out, + const Vector& rho_Hext, + Vector& rho_omegadot, Vector& rho_Hnuc, + const BaseState& p0, const Real dt_in, + const Real time_in) { + // timer for profiling + BL_PROFILE_VAR("Maestro::Burner()", Burner); + + // Put tempbar_init on cart + Vector tempbar_init_cart(finest_level + 1); + + if (spherical) { + for (int lev = 0; lev <= finest_level; ++lev) { + tempbar_init_cart[lev].define(grids[lev], dmap[lev], 1, 0); + tempbar_init_cart[lev].setVal(0.); + } + + if (drive_initial_convection) { + Put1dArrayOnCart(tempbar_init, tempbar_init_cart, false, false, + bcs_f, 0); + } + } + + const auto ispec_threshold = network_spec_index(burner_threshold_species); + + const bool use_ml_const = use_ml; + + // match data type of pytorch tensor to multifab data + auto dtype0 = torch::kFloat64; + + Vector react_in(finest_level+1), react_out(finest_level+1); + + const bool save_react_data = istep > 0 && save_react_int > 0 && istep % save_react_int == 0; + + for (int lev = 0; lev <= finest_level; ++lev) { + if (save_react_data) { + if (NumSpec == 3) { + react_in[lev].define(grids[lev], dmap[lev], NumSpec+1, 0); // X(C12), X(Mg24), rho, T -> burn + react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+0), 0); // burn -> X(C12), X(Mg24), Hnuc, dXdt, enucdot + } else { + react_in[lev].define(grids[lev], dmap[lev], NumSpec+2, 0); // X, rho, T -> burn + react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+1), 0); // burn -> X, Hnuc, dXdt, enucdot + } + } + + // create mask assuming refinement ratio = 2 + int finelev = lev + 1; + if (lev == finest_level) finelev = finest_level; + + const BoxArray& fba = s_in[finelev].boxArray(); + const iMultiFab& mask = makeFineMask(s_in[lev], fba, IntVect(2)); + + // loop over boxes (make sure mfi takes a cell-centered multifab as an argument) +#ifdef _OPENMP +#pragma omp parallel +#endif + for (MFIter mfi(s_in[lev], TilingIfNotGPU()); mfi.isValid(); ++mfi) { + // Get the index space of the valid region + const Box& tileBox = mfi.tilebox(); + + const bool use_mask = (lev != finest_level); + + const Array4 s_in_arr = s_in[lev].array(mfi); + const Array4 s_out_arr = s_out[lev].array(mfi); + const Array4 rho_Hext_arr = rho_Hext[lev].array(mfi); + const Array4 rho_omegadot_arr = rho_omegadot[lev].array(mfi); + const Array4 rho_Hnuc_arr = rho_Hnuc[lev].array(mfi); + const auto tempbar_init_arr = tempbar_init.const_array(); + + Array4 react_in_arr, react_out_arr; + + if (save_react_data) { + react_in_arr = react_in[lev].array(mfi); + react_out_arr = react_out[lev].array(mfi); + } + + // use a dummy value for non-spherical as tempbar_init_cart is not defined + const Array4 tempbar_cart_arr = + spherical ? tempbar_init_cart[lev].array(mfi) + : rho_Hext[lev].array(mfi); + const Array4 mask_arr = mask.array(mfi); + + // retrieve smallend and size of box + const IntVect bx_lo = tileBox.smallEnd(); + const IntVect nbox = tileBox.size(); + + // compute total cells in the box + int ncell = AMREX_SPACEDIM == 2 ? + nbox[0] * nbox[1] : nbox[0] * nbox[1] * nbox[2]; + + // copy multifabs to input array + // index ordering: (species, rho, temp) + int nextra = (NumSpec == 3) ? 1 : 2; + const int NumInput = NumSpec + nextra; + amrex::Gpu::ManagedVector state_temp(ncell*NumInput); + Real* AMREX_RESTRICT temp_ptr = state_temp.dataPtr(); + + // copy input multifab to torch tensor + amrex::ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept { + int ii = i - bx_lo[0]; + int jj = j - bx_lo[1]; + int index = jj*nbox[0] + ii; +#if AMREX_SPACEDIM == 3 + int kk = k - bx_lo[2]; + index += kk*nbox[0]*nbox[1]; +#endif + + auto rho = s_in_arr(i, j, k, Rho); + Real T_in = 0.0; + if (drive_initial_convection) { + if (!spherical) { + auto r = (AMREX_SPACEDIM == 2) ? j : k; + T_in = tempbar_init_arr(lev, r); + } else { + T_in = tempbar_cart_arr(i, j, k); + } + } else { + T_in = s_in_arr(i, j, k, Temp); + } + + // array order is row-based [index][comp] + if (NumSpec == 3) { + temp_ptr[index*NumInput] = s_in_arr(i, j, k, FirstSpec) / rho; + temp_ptr[index*NumInput + 1] = s_in_arr(i, j, k, FirstSpec + 2) / rho; + temp_ptr[index*NumInput + 2] = rho / dens_fac; + temp_ptr[index*NumInput + 3] = T_in / temp_fac; + } else { + for (int n = 0; n < NumSpec; ++n) { + temp_ptr[index*NumInput + n] = s_in_arr(i, j, k, FirstSpec + n) / rho; + } + temp_ptr[index*NumInput + NumSpec] = rho / dens_fac; + temp_ptr[index*NumInput + NumSpec + 1] = T_in / temp_fac; + } + }); + + at::Tensor outputs_torch = torch::zeros({ncell, NumSpec+1}, torch::TensorOptions().dtype(dtype0)); +#ifdef AMREX_USE_CUDA + outputs_torch.to(torch::kCUDA); +#endif + + if (use_ml) { + // create torch tensor from array +#ifndef AMREX_USE_CUDA + at::Tensor inputs_torch = torch::from_blob(temp_ptr, {ncell, NumInput}, + torch::TensorOptions().dtype(dtype0)); +#else + at::Tensor inputs_torch = torch::from_blob(temp_ptr, {ncell, NumInput}, + torch::TensorOptions().dtype(dtype0).device(torch::kCUDA)); +#endif + + // evaluate torch model + inputs_torch = inputs_torch.to(torch::kFloat32); + outputs_torch = module.forward({inputs_torch}).toTensor(); + outputs_torch = outputs_torch.to(dtype0); + + if (/*DEBUG=*/0) { + Print() << "example input: \n" + << inputs_torch.slice(/*dim=*/0, /*start=*/0, /*end=*/5) << '\n'; + Print() << "example output: \n" + << outputs_torch.slice(/*dim=*/0, /*start=*/0, /*end=*/5) << '\n'; + } + } + + // get accessor to tensor (read-only) +#ifndef AMREX_USE_CUDA + auto outputs_torch_acc = outputs_torch.accessor(); +#else + auto outputs_torch_acc = outputs_torch.packed_accessor64(); +#endif + auto use_ml_const_istep_gt_0 = use_ml_const && istep > 0; // cannot use use_ml_const and istep directly in CUDA kernel below. + + ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) { + if (use_mask && mask_arr(i, j, k)) + return; // cell is covered by finer cells + + int ii = i - bx_lo[0]; + int jj = j - bx_lo[1]; + int index = jj*nbox[0] + ii; +#if AMREX_SPACEDIM == 3 + int kk = k - bx_lo[2]; + index += kk*nbox[0]*nbox[1]; +#endif + + auto rho = s_in_arr(i, j, k, Rho); + Real x_in[NumSpec]; + for (int n = 0; n < NumSpec; ++n) { + x_in[n] = s_in_arr(i, j, k, FirstSpec + n) / rho; + } +#if NAUX_NET > 0 + Real aux_in[NumAux]; + for (int n = 0; n < NumAux; ++n) { + aux_in[n] = s_in_arr(i, j, k, FirstAux + n) / rho; + } +#endif + + Real T_in = 0.0; + if (drive_initial_convection) { + if (!spherical) { + auto r = (AMREX_SPACEDIM == 2) ? j : k; + T_in = tempbar_init_arr(lev, r); + } else { + T_in = tempbar_cart_arr(i, j, k); + } + } else { + T_in = s_in_arr(i, j, k, Temp); + } + + Real x_test = + (ispec_threshold > 0) ? x_in[ispec_threshold] : 0.0; + + Real x_out[NumSpec]; +#if NAUX_NET > 0 + Real aux_out[NumAux]; +#endif + Real rhowdot[NumSpec]; + Real rhoH = 0.0; + + if (save_react_data) { + if (NumSpec == 3) { + react_in_arr(i, j, k, 0) = x_in[0]; + react_in_arr(i, j, k, 1) = x_in[2]; + react_in_arr(i, j, k, 2) = rho; + react_in_arr(i, j, k, 3) = T_in; + + react_out_arr(i, j, k, 2) = 0.0; // output generated energy + for (int n = NumSpec; n < 2*(NumSpec); ++n) { + react_out_arr(i, j, k, n) = 0.0; + } + } else { + for (int n = 0; n < NumSpec; ++n) { + react_in_arr(i, j, k, n) = x_in[n]; + react_out_arr(i, j, k, n) = x_in[n]; + } + react_in_arr(i, j, k, NumSpec) = rho; // input density + react_in_arr(i, j, k, NumSpec+1) = T_in; // input temperature + react_out_arr(i, j, k, NumSpec) = 0.0; // output generated energy + + for (int n = NumSpec+1; n < 2*(NumSpec+1); ++n) { + react_out_arr(i, j, k, n) = 0.0; + } + } + } + + // if the threshold species is not in the network, then we burn + // normally. if it is in the network, make sure the mass + // fraction is above the cutoff. + if ((rho > burning_cutoff_density_lo && + rho < burning_cutoff_density_hi) && + (ispec_threshold < 0 || + (ispec_threshold > 0 && + x_test > burner_threshold_cutoff))) { + + if (use_ml_const_istep_gt_0) { + // copy output tensor to multifabs + // index ordering: (species, enuc) + // check if X_k >= 0 + x_out[0] = (outputs_torch_acc[index][0] >= 0.0) ? + outputs_torch_acc[index][0] : 0.0; + // X(O16) does not change + x_out[1] = x_in[1]; + // check if X_k >= 0 + x_out[2] = (outputs_torch_acc[index][1] >= 0.0) ? + outputs_torch_acc[index][1] : 0.0; + + // note enuc in output tensor is the normalized value + // of (state_out.e - state_in.e) + rhoH = rho * (outputs_torch_acc[index][2] * enuc_fac) / dt_in; + } else { + // need to use burner if no ML model was given or at step 0 + burn_t state_in; + burn_t state_out; + + // Initialize burn state_in and state_out + state_in.e = 0.0; + state_in.rho = rho; + state_in.T = T_in; + for (int n = 0; n < NumSpec; ++n) { + state_in.xn[n] = x_in[n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + state_in.aux[n] = aux_in[n]; + } +#endif + + // initialize state_out the same as state_in + state_out.e = 0.0; + state_out.rho = rho; + state_out.T = T_in; + for (int n = 0; n < NumSpec; ++n) { + state_out.xn[n] = x_in[n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + state_out.aux[n] = aux_in[n]; + } +#endif + + burner(state_out, dt_in); + + for (int n = 0; n < NumSpec; ++n) { + x_out[n] = state_out.xn[n]; + rhowdot[n] = state_out.rho * + (state_out.xn[n] - state_in.xn[n]) / dt_in; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + aux_out[n] = state_out.aux[n]; + } +#endif + rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in; + } + } else { + for (int n = 0; n < NumSpec; ++n) { + x_out[n] = x_in[n]; + rhowdot[n] = 0.0; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + aux_out[n] = aux_in[n]; + } +#endif + } + + // check if sum{X_k} = 1 + Real sumX = 0.0; + for (int n = 0; n < NumSpec; ++n) { + sumX += x_out[n]; + } + + if (fabs(sumX - 1.0) > reaction_sum_tol) { + if (NumSpec == 3) { + Real sumXmO16 = sumX - x_out[1]; + x_out[0] *= (1.0 - x_out[1]) / sumXmO16; + x_out[2] *= (1.0 - x_out[1]) / sumXmO16; + } else { + for (int n = 0; n < NumSpec; ++n) { + x_out[n] /= sumX; + } + } + } + + if (use_ml_const) { + for (int n = 0; n < NumSpec; ++n) { + rhowdot[n] = rho * (x_out[n] - x_in[n]) / dt_in; + } + } + + if (save_react_data) { + if (NumSpec == 3) { + react_out_arr(i, j, k, 0) = x_out[0]; + react_out_arr(i, j, k, 1) = x_out[2]; + react_out_arr(i, j, k, 2) = rhoH * dt_in; + + react_out_arr(i, j, k, NumSpec) = rhowdot[0]; // save dXdt + react_out_arr(i, j, k, NumSpec+1) = rhowdot[2]; + react_out_arr(i, j, k, 2*NumSpec-1) = rhoH; // save enucdot + } else { + for (int n = 0; n < NumSpec; ++n) { + react_out_arr(i, j, k, n) = x_out[n]; + } + react_out_arr(i, j, k, NumSpec) = rhoH * dt_in; + + for (int n = 0; n < NumSpec; ++n) { + react_out_arr(i, j, k, NumSpec+n) = rhowdot[n]; // save dXdt + } + react_out_arr(i, j, k, 2*NumSpec+1) = rhoH; // save enucdot + } + } + + // pass the density and pi through + s_out_arr(i, j, k, Rho) = s_in_arr(i, j, k, Rho); + s_out_arr(i, j, k, Pi) = s_in_arr(i, j, k, Pi); + + // update the species + for (int n = 0; n < NumSpec; ++n) { + s_out_arr(i, j, k, FirstSpec + n) = x_out[n] * rho; + } + + // update the auxiliary variables +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; ++n) { + s_out_arr(i, j, k, FirstAux + n) = aux_out[n] * rho; + } +#endif + + // store the energy generation and species create quantities + for (int n = 0; n < NumSpec; ++n) { + rho_omegadot_arr(i, j, k, n) = rhowdot[n]; + } + rho_Hnuc_arr(i, j, k) = rhoH; + + // update the enthalpy -- include the change due to external heating + s_out_arr(i, j, k, RhoH) = s_in_arr(i, j, k, RhoH) + + dt_in * rho_Hnuc_arr(i, j, k) + + dt_in * rho_Hext_arr(i, j, k); + }); + } + } + + if (save_react_data) { + Vector react_in_varnames, react_out_varnames; + for (int i = 0; i < NumSpec; ++i) { + std::string spec_string = "X("; + spec_string += short_spec_names_cxx[i]; + spec_string += ')'; + if (NumSpec == 3 && i != 1) { + react_in_varnames.push_back(spec_string); + react_out_varnames.push_back(spec_string); + } + } + react_in_varnames.push_back("rho"); + react_in_varnames.push_back("temp"); + react_out_varnames.push_back("enuc"); + for (int i = 0; i < NumSpec; ++i) { + std::string spec_string = "Xdot("; + spec_string += short_spec_names_cxx[i]; + spec_string += ')'; + if (NumSpec == 3 && i != 1) { + react_out_varnames.push_back(spec_string); + } + } + react_out_varnames.push_back("enucdot"); + + std::string react_in_name = "react_inputs_"; + std::string react_out_name = "react_outputs_"; + PlotFileName(istep, &react_in_name); + PlotFileName(istep, &react_out_name); + react_in_name = react_in_name + "_" + algo_step; + react_out_name = react_out_name + "_" + algo_step; + Vector step_array(finest_level+1, istep); + WriteMultiLevelPlotfile(react_in_name, finest_level + 1, GetVecOfConstPtrs(react_in), react_in_varnames, + Geom(), dt_in, step_array, refRatio()); + WriteMultiLevelPlotfile(react_out_name, finest_level + 1, GetVecOfConstPtrs(react_out), react_out_varnames, + Geom(), dt_in, step_array, refRatio()); + } +} +#endif + #else // SDC burner void Maestro::Burner(const Vector& s_in, Vector& s_out, diff --git a/Source/MaestroInit.cpp b/Source/MaestroInit.cpp index b8013fc21..59308e728 100644 --- a/Source/MaestroInit.cpp +++ b/Source/MaestroInit.cpp @@ -15,6 +15,12 @@ void Maestro::Init() { Print() << "Calling Init()" << std::endl; + // initializes the seed for C++ random number calls + auto seed = 142; + InitRandom(seed + ParallelDescriptor::MyProc(), + ParallelDescriptor::NProcs(), + seed + ParallelDescriptor::MyProc()); + if (restart_file.empty()) { start_step = 1; @@ -531,9 +537,22 @@ void Maestro::DivuIter(int istep_divu_iter) { rhohalf[lev].setVal(1.); } +#ifdef ML + // save current state of use_ml + const auto use_ml_temp = use_ml; + + // do not use ML model for divu steps + use_ml = false; +#endif + React(sold, stemp, rho_Hext, rho_omegadot, rho_Hnuc, p0_old, 0.5 * dt, t_old); +#ifdef ML + // reset use_ml to previous state + use_ml = use_ml_temp; +#endif + // WriteMF(sold,"a_sold_2levs"); // Abort(); diff --git a/Source/MaestroPlot.cpp b/Source/MaestroPlot.cpp index 5dbb7ddcc..4dc701e29 100644 --- a/Source/MaestroPlot.cpp +++ b/Source/MaestroPlot.cpp @@ -1103,7 +1103,7 @@ void Maestro::WriteJobInfo(const std::string& dir) const { jobInfoFile << "\n\n"; -#ifdef AMREX_USE_GPU +#ifdef AMREX_USE_CUDA // This output assumes for simplicity that every rank uses the // same type of GPU. diff --git a/Source/MaestroSetup.cpp b/Source/MaestroSetup.cpp index 9d0c126aa..78be51838 100644 --- a/Source/MaestroSetup.cpp +++ b/Source/MaestroSetup.cpp @@ -216,6 +216,32 @@ void Maestro::Setup() { << std::endl; Error(); } + +#ifdef ML + // check to see if ml model is provided + // if model file is provided, load model + if (ml_model_file.empty()) { + use_ml = false; + Print() << "Warning: No ML model file provided! Will not use ML." << std::endl; + } else { + use_ml = true; + + // Load pytorch module via torch script + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + module = torch::jit::load(ml_model_file); + } + catch (const c10::Error& e) { + Abort("Error loading the ML model\n"); + } + Print() << "ML model loaded.\n"; + +#ifdef AMREX_USE_CUDA + module.to(torch::kCUDA); + Print() << "Copying model to GPU." << std::endl; +#endif + } +#endif } // read in some parameters from inputs file diff --git a/Source/param/_cpp_parameters b/Source/param/_cpp_parameters index 9d2ea9778..a4a1e843c 100644 --- a/Source/param/_cpp_parameters +++ b/Source/param/_cpp_parameters @@ -87,6 +87,9 @@ use_soundspeed_firstdt bool false y # Use the divu constraint when computing the first time step. use_divu_firstdt bool false y +# Save reactions input & output data every so many steps (if >0) +save_react_int int 0 + #----------------------------------------------------------------------------- # category: grid #----------------------------------------------------------------------------- @@ -621,7 +624,15 @@ sdc_couple_mac_velocity bool false # The nodal solve is non-deterministic on the GPU. Should it instead be run # on the CPU to give a deterministic solution? -deterministic_nodal_solve bool false +deterministic_nodal_solve bool false + + +#----------------------------------------------------------------------------- +# category: machine learned model +#----------------------------------------------------------------------------- + +# Where the ml model is located. Empty string means do not use model. +ml_model_file string "" #-----------------------------------------------------------------------------