diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt
index dfe0a1f09..b8458720d 100644
--- a/.github/workflows/good_defines.txt
+++ b/.github/workflows/good_defines.txt
@@ -2,9 +2,11 @@ AMREX_SPACEDIM
 AMREX_USE_CUDA
 AMREX_USE_GPU
 DO_PROBLEM_POST_TIMESTEP
+ML
 NAUX_NET
 AUX_THERMO
 ROTATION
 SDC
+_ML_LIB
 _OPENMP
 __cplusplus
diff --git a/Exec/Make.Maestro b/Exec/Make.Maestro
index 7afc6c60f..420450e71 100644
--- a/Exec/Make.Maestro
+++ b/Exec/Make.Maestro
@@ -24,6 +24,9 @@ BL_NO_FORT ?= TRUE
 # Require C++17
 CXXSTD := c++17
 
+# default is to not compile with ML (PyTorch)
+USE_ML ?= FALSE
+
 # Check to make sure that Microphysics actually exists,
 # using an arbitrary file that must exist to check.
 # Throw an error if we don't have Microphysics.
@@ -49,6 +52,10 @@ ifeq ($(USE_SDC), TRUE)
   USERSuffix = .SDC
 endif
 
+ifeq ($(USE_ML), TRUE)
+  USERSuffix = .ML
+endif
+
 include $(AMREX_HOME)/Tools/GNUMake/Make.defs
 
 MAESTROEX_AUTO_SOURCE_DIR := $(TmpBuildDir)/maestroex_sources/$(optionsSuffix).EXE
@@ -143,6 +150,37 @@ Blocs += $(foreach dir, $(EXTERN_CORE), $(dir))
 # external libraries
 #------------------------------------------------------------------------------
 
+#------------------------------------------------------------------------------
+# PyTorch
+#------------------------------------------------------------------------------
+
+# Torch directories -- note the Make.package for these adds these
+# directories into INCLUDE_LOCATIONS and LIBRARY_LOCATIONS for us,
+# so we don't need to do it here
+ifeq ($(USE_ML), TRUE)
+  PYTORCH_ROOT := $(TOP)/external/libtorch
+
+  TORCH_LIBPATH = $(PYTORCH_ROOT)/lib
+
+  ifeq ($(USE_CUDA),TRUE)
+    TORCH_LIBS = -ltorch -ltorch_cpu -lc10 -lc10_cuda -lcuda  # -lcaffe2 -lcaffe2_gpu
+  else
+    TORCH_LIBS = -ltorch -ltorch_cpu -lc10  # -lcaffe2
+  endif
+
+  INCLUDE_LOCATIONS += $(PYTORCH_ROOT)/include \
+                       $(PYTORCH_ROOT)/include/torch/csrc/api/include
+  LIBRARY_LOCATIONS += $(TORCH_LIBPATH)
+
+  DEFINES += -DML -D_GLIBCXX_USE_CXX11_ABI=1
+
+  ifeq ($(USE_CUDA), TRUE)
+    LDFLAGS += -Xlinker '--no-as-needed,-rpath $(TORCH_LIBPATH) $(TORCH_LIBS)'
+  else
+    LDFLAGS += -Wl,--no-as-needed,-rpath=$(TORCH_LIBPATH) $(TORCH_LIBS)
+  endif
+endif
+
 #------------------------------------------------------------------------------
 # include all of the necessary directories
 #------------------------------------------------------------------------------
diff --git a/Exec/science/README b/Exec/science/README
index 49d976d1a..0d2be7d2a 100644
--- a/Exec/science/README
+++ b/Exec/science/README
@@ -17,6 +17,11 @@ flame/
    the SNe code show that SDC can overcome these problems.
 
 
+flame_ml/
+
+   The same combustion-mode problem as in flame/ but using ML model to
+   replace the reaction kernel of the solver.
+
 sub_chandra/
 
    Surpise! We're modelling convection.  This time in 3D spherical
diff --git a/Exec/science/flame_ml/GNUmakefile b/Exec/science/flame_ml/GNUmakefile
new file mode 100644
index 000000000..4c820c7ba
--- /dev/null
+++ b/Exec/science/flame_ml/GNUmakefile
@@ -0,0 +1,29 @@
+DEBUG      = FALSE
+DIM        = 2
+COMP	   = gnu
+USE_MPI    = TRUE
+USE_OMP    = FALSE
+USE_REACT  = TRUE
+USE_ML     = TRUE
+
+TINY_PROFILE = FALSE
+PROFILE      = FALSE # TRUE overrides TINY_PROFILE
+
+
+# define the location of the MAESTROEX home directory
+MAESTROEX_HOME  := ../../..
+
+
+# Set the EOS, conductivity, and network directories
+# We first check if these exist in $(MAESTROEX_HOME)/Microphysics/(EOS/conductivity/networks)
+# If not we use the version in $(MICROPHYSICS_HOME)/Microphysics/(EOS/conductivity/networks)
+EOS_DIR          := helmholtz
+CONDUCTIVITY_DIR := stellar
+NETWORK_DIR      := ignition_simple
+INTEGRATOR_DIR   := VODE
+
+Bpack   := ./Make.package
+Blocs   := .
+PROBIN_PARAMETER_DIRS := .
+
+include $(MAESTROEX_HOME)/Exec/Make.Maestro
diff --git a/Exec/science/flame_ml/MaestroBCFill.cpp b/Exec/science/flame_ml/MaestroBCFill.cpp
new file mode 100644
index 000000000..797c25b84
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroBCFill.cpp
@@ -0,0 +1,259 @@
+
+#include <Maestro.H>
+#include <Maestro_F.H>
+
+#include <MaestroInletBCs.H>
+
+using namespace amrex;
+using namespace InletBCs;
+
+void Maestro::ScalarFill(const Array4<Real>& scal, const Box& bx,
+                         const Box& domainBox, const Real* dx, const BCRec bcs,
+                         const Real* gridlo, const int comp) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::ScalarFill()", ScalarFill);
+
+    fab_filcc(bx, scal, 1, domainBox, dx, gridlo, &bcs);
+
+    FillExtBC(scal, bx, domainBox, dx, bcs, comp, false);
+}
+
+void Maestro::VelFill(const Array4<Real>& vel, const Box& bx,
+                      const Box& domainBox, const Real* dx, const BCRec bcs,
+                      const Real* gridlo, const int comp) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::VelFill()", VelFill);
+
+    fab_filcc(bx, vel, 1, domainBox, dx, gridlo, &bcs);
+
+    FillExtBC(vel, bx, domainBox, dx, bcs, comp, true);
+}
+
+void Maestro::FillExtBC(const Array4<Real>& q, const Box& bx,
+                        const Box& domainBox, const Real* dx, const BCRec bcs,
+                        const int comp, const bool is_vel) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::FillExtBC()", FillExtBC);
+
+    // do nothing if there are no exterior boundaries
+    bool found_ext_boundary = false;
+    for (auto i = 0; i < AMREX_SPACEDIM; ++i) {
+        if (bcs.lo(i) == BCType::ext_dir || bcs.hi(i) == BCType::ext_dir) {
+            found_ext_boundary = true;
+            break;
+        }
+    }
+    if (!found_ext_boundary) return;
+
+    // get parameters for EXT_DIR bcs
+    const Real INLET_RHO_l = INLET_RHO;
+    const Real INLET_RHOH_l = INLET_RHOH;
+    const Real INLET_TEMP_l = INLET_TEMP;
+    RealVector INLET_RHOX_l(NumSpec);
+    for (int i = 0; i < NumSpec; ++i) INLET_RHOX_l[i] = INLET_RHOX[i];
+    const Real INLET_VEL_l = INLET_VEL;
+
+    const auto domlo = domainBox.loVect3d();
+    const auto domhi = domainBox.hiVect3d();
+
+    const auto lo = bx.loVect3d();
+    const auto hi = bx.hiVect3d();
+
+    if (lo[0] < domlo[0]) {
+        auto imax = domlo[0];
+
+        if (bcs.lo(0) == BCType::ext_dir) {
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                if (i < imax) {
+                    q(i, j, k) = 1.0;
+                }
+            });
+            Gpu::synchronize();
+        }
+    }
+
+    if (hi[0] > domhi[0]) {
+        auto imin = domhi[0];
+
+        if (bcs.hi(0) == BCType::ext_dir) {
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                if (i > imin) {
+                    q(i, j, k) = 1.0;
+                }
+            });
+            Gpu::synchronize();
+        }
+    }
+
+    if (lo[1] < domlo[1]) {
+        auto jmax = domlo[1];
+
+        if (bcs.lo(1) == BCType::ext_dir) {
+            if (is_vel) {
+                if (comp == 0) {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (j < jmax) {
+                            q(i, j, k) = 0.0;
+                        }
+                    });
+                } else if (comp == 1) {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (j < jmax) {
+                            q(i, j, k) = INLET_VEL_l;
+                        }
+                    });
+                }
+                Gpu::synchronize();
+            } else {
+                if (INLET_VEL != 0.0) {
+                    if (comp == Rho) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (j < jmax) {
+                                            q(i, j, k) = INLET_RHO_l;
+                                        }
+                                    });
+                    } else if (comp == RhoH) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (j < jmax) {
+                                            q(i, j, k) = INLET_RHOH_l;
+                                        }
+                                    });
+                    } else if (comp >= FirstSpec &&
+                               comp < FirstSpec + NumSpec) {
+                        Real* AMREX_RESTRICT INLET_RHOX_p =
+                            INLET_RHOX.dataPtr();
+                        ParallelFor(
+                            bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                if (j < jmax) {
+                                    q(i, j, k) = INLET_RHOX_p[comp - FirstSpec];
+                                }
+                            });
+                    } else if (comp == Temp) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (j < jmax) {
+                                            q(i, j, k) = INLET_TEMP_l;
+                                        }
+                                    });
+                    } else {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (j < jmax) {
+                                            q(i, j, k) = 0.0;
+                                        }
+                                    });
+                    }
+                } else {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (j < jmax) {
+                            q(i, j, k) = 0.0;
+                        }
+                    });
+                }
+                Gpu::synchronize();
+            }  // end if is_vel
+        }
+    }
+
+    if (hi[1] > domhi[1]) {
+        auto jmin = domhi[1];
+
+        if (bcs.hi(1) == BCType::ext_dir) {
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                if (j > jmin) {
+                    q(i, j, k) = 1.0;
+                }
+            });
+        }
+        Gpu::synchronize();
+    }
+
+#if AMREX_SPACEDIM == 3
+
+    if (lo[2] < domlo[2]) {
+        auto kmax = domlo[2];
+
+        if (bcs.lo(2) == BCType::ext_dir) {
+            if (is_vel) {
+                if (comp == 0 || comp == 1) {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (k < kmax) {
+                            q(i, j, k) = 0.0;
+                        }
+                    });
+                } else if (comp == 2) {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (k < kmax) {
+                            q(i, j, k) = INLET_VEL_l;
+                        }
+                    });
+                }
+            } else {
+                if (INLET_VEL != 0.0) {
+                    if (comp == Rho) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (k < kmax) {
+                                            q(i, j, k) = INLET_RHO_l;
+                                        }
+                                    });
+                    } else if (comp == RhoH) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (k < kmax) {
+                                            q(i, j, k) = INLET_RHOH_l;
+                                        }
+                                    });
+                    } else if (comp >= FirstSpec &&
+                               comp < FirstSpec + NumSpec) {
+                        Real* AMREX_RESTRICT INLET_RHOX_p =
+                            INLET_RHOX.dataPtr();
+                        ParallelFor(
+                            bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                if (k < kmax) {
+                                    q(i, j, k) = INLET_RHOX_p[comp - FirstSpec];
+                                }
+                            });
+                    } else if (comp == Temp) {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (k < kmax) {
+                                            q(i, j, k) = INLET_TEMP_l;
+                                        }
+                                    });
+                    } else {
+                        ParallelFor(bx,
+                                    [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                                        if (k < kmax) {
+                                            q(i, j, k) = 0.0;
+                                        }
+                                    });
+                    }
+                } else {
+                    ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                        if (k < kmax) {
+                            q(i, j, k) = 0.0;
+                        }
+                    });
+                }
+            }  // end if is_vel
+            Gpu::synchronize();
+        }
+    }
+
+    if (hi[2] > domhi[2]) {
+        auto kmin = domhi[2];
+
+        if (bcs.hi(2) == BCType::ext_dir) {
+            ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                if (k > kmin) {
+                    q(i, j, k) = 0.0;
+                }
+            });
+        }
+        Gpu::synchronize();
+    }
+#endif
+}
diff --git a/Exec/science/flame_ml/MaestroBaseState.cpp b/Exec/science/flame_ml/MaestroBaseState.cpp
new file mode 100644
index 000000000..59aa35cdc
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroBaseState.cpp
@@ -0,0 +1,166 @@
+#include <Maestro.H>
+#include <Maestro_F.H>
+
+using namespace amrex;
+
+void Maestro::InitBaseState(BaseState<Real>& rho0, BaseState<Real>& rhoh0,
+                            BaseState<Real>& p0, const int lev) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::InitBaseState()", InitBaseState);
+
+    // sanity check
+    if (dens_fuel < base_cutoff_density ||
+        dens_fuel < anelastic_cutoff_density) {
+        Abort(
+            "ERROR: ERROR: fuel density < (base_cutoff_density or "
+            "anelastic_cutoff_density)");
+    }
+
+    const int n = lev;
+
+    Print() << "cutoff densities:" << std::endl;
+    Print() << "    low density cutoff (for mapping the model) =      "
+            << base_cutoff_density << std::endl;
+    Print() << "    buoyancy cutoff density                           "
+            << std::endl;
+    Print() << "        (for zeroing rho - rho_0, centrifugal term) = "
+            << buoyancy_cutoff_factor * base_cutoff_density << std::endl;
+    Print() << "    anelastic cutoff =                                "
+            << anelastic_cutoff_density << std::endl;
+    Print() << " " << std::endl;
+
+    // figure out the indices for different species
+    const int ic12 = network_spec_index("carbon-12");
+    const int io16 = network_spec_index("oxygen-16");
+    const int img24 = network_spec_index("magnesium-24");
+
+    if (ic12 < 0 || io16 < 0 || img24 < 0) {
+        Abort("ERROR: species indices not defined");
+    }
+
+    // length of the domain
+    Real rlen = geom[lev].ProbHi(AMREX_SPACEDIM - 1) -
+                geom[lev].ProbLo(AMREX_SPACEDIM - 1);
+
+    // figure out the thermodynamics of the fuel and ash state
+    RealVector xn_fuel(NumSpec, 0.0);
+    RealVector xn_ash(NumSpec, 0.0);
+
+    eos_t eos_state;
+
+    // fuel
+    xn_fuel[ic12] = xc12_fuel;
+    xn_fuel[io16] = 1.0 - xc12_fuel;
+
+    eos_state.rho = dens_fuel;
+    eos_state.T = temp_fuel;
+    for (auto comp = 0; comp < NumSpec; ++comp) {
+        eos_state.xn[comp] = xn_fuel[comp];
+    }
+
+    eos(eos_input_rt, eos_state);
+
+    // note: p_ambient should be = p0_init
+    Real p_ambient = eos_state.p;
+    Real rhoh_fuel = dens_fuel * eos_state.h;
+
+    // ash
+    xn_ash[io16] = 1.0 - xc12_fuel;
+    xn_ash[img24] = xc12_fuel;
+
+    eos_state.rho = dens_fuel;  // initial guess
+    eos_state.T = temp_ash;
+    for (auto comp = 0; comp < NumSpec; ++comp) {
+        eos_state.xn[comp] = xn_ash[comp];
+    }
+    eos_state.p = p_ambient;
+
+    eos(eos_input_tp, eos_state);
+
+    Real dens_ash = eos_state.rho;
+    Real rhoh_ash = dens_ash * eos_state.h;
+
+    auto s0_init_arr = s0_init.array();
+    auto rho0_arr = rho0.array();
+    auto rhoh0_arr = rhoh0.array();
+    auto p0_arr = p0.array();
+    auto p0_init_arr = p0_init.array();
+    auto tempbar_arr = tempbar.array();
+    auto tempbar_init_arr = tempbar_init.array();
+
+    for (auto r = 0; r < base_geom.nr(n); ++r) {
+        // height above the bottom of the domain
+        Real rloc = geom[lev].ProbLo(AMREX_SPACEDIM - 1) +
+                    (Real(r) + 0.5) * base_geom.dr(n);
+
+        if (rloc <
+            geom[lev].ProbLo(AMREX_SPACEDIM - 1) + interface_pos_frac * rlen) {
+            // fuel
+            s0_init_arr(n, r, Rho) = dens_fuel;
+            s0_init_arr(n, r, RhoH) = rhoh_fuel;
+            for (auto comp = 0; comp < NumSpec; ++comp) {
+                s0_init_arr(n, r, FirstSpec + comp) = dens_fuel * xn_fuel[comp];
+            }
+
+        } else {
+            // ash
+            s0_init_arr(n, r, Rho) = dens_ash;
+            s0_init_arr(n, r, RhoH) = rhoh_ash;
+            for (auto comp = 0; comp < NumSpec; ++comp) {
+                s0_init_arr(n, r, FirstSpec + comp) = dens_fuel * xn_ash[comp];
+            }
+        }
+
+        // give the temperature a smooth profile
+        s0_init_arr(n, r, Temp) =
+            temp_fuel +
+            (temp_ash - temp_fuel) * 0.5 *
+                (1.0 + tanh((rloc - (geom[lev].ProbLo(AMREX_SPACEDIM - 1) +
+                                     interface_pos_frac * rlen)) /
+                            (smooth_len_frac * rlen)));
+
+        // give the carbon mass fraction a smooth profile too
+        RealVector xn_smooth(NumSpec, 0.0);
+
+        xn_smooth[ic12] =
+            xn_fuel[ic12] +
+            (xn_ash[ic12] - xn_fuel[ic12]) * 0.5 *
+                (1.0 + tanh((rloc - (geom[lev].ProbLo(AMREX_SPACEDIM - 1) +
+                                     interface_pos_frac * rlen)) /
+                            (smooth_len_frac * rlen)));
+
+        xn_smooth[io16] = xn_fuel[io16];
+        xn_smooth[img24] = 1.0 - xn_smooth[ic12] - xn_smooth[io16];
+
+        // get the new density and enthalpy
+        eos_state.rho = s0_init_arr(n, r, Rho);
+        eos_state.T = s0_init_arr(n, r, Temp);
+        eos_state.p = p_ambient;
+        for (auto comp = 0; comp < NumSpec; ++comp) {
+            eos_state.xn[comp] = xn_smooth[comp];
+        }
+
+        // (T,p) --> rho, h
+        eos(eos_input_tp, eos_state);
+
+        s0_init_arr(n, r, Rho) = eos_state.rho;
+        s0_init_arr(n, r, RhoH) = eos_state.rho * eos_state.h;
+        for (auto comp = 0; comp < NumSpec; ++comp) {
+            s0_init_arr(n, r, FirstSpec + comp) =
+                eos_state.rho * xn_smooth[comp];
+        }
+        p0_init_arr(n, r) = p_ambient;
+    }
+
+    // copy s0_init and p0_init into rho0, rhoh0, p0, and tempbar
+    for (auto r = 0; r < base_geom.nr_fine; ++r) {
+        rho0_arr(lev, r) = s0_init_arr(n, r, Rho);
+        rhoh0_arr(lev, r) = s0_init_arr(n, r, RhoH);
+        tempbar_arr(lev, r) = s0_init_arr(n, r, Temp);
+        tempbar_init_arr(lev, r) = s0_init_arr(n, r, Temp);
+        p0_arr(lev, r) = p0_init_arr(lev, r);
+    }
+
+    // initialize any inlet BC parameters
+    SetInletBCs();
+}
diff --git a/Exec/science/flame_ml/MaestroInitData.cpp b/Exec/science/flame_ml/MaestroInitData.cpp
new file mode 100644
index 000000000..1fca4102b
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroInitData.cpp
@@ -0,0 +1,43 @@
+
+#include <Maestro.H>
+using namespace amrex;
+
+// initializes data on a specific level
+void Maestro::InitLevelData(const int lev, const Real time, const MFIter& mfi,
+                            const Array4<Real> scal, const Array4<Real> vel) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::InitLevelData()", InitLevelData);
+
+    const auto tileBox = mfi.tilebox();
+
+    const auto vel_fuel_loc = vel_fuel;
+
+    // initialize velocity
+    ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+        for (auto n = 0; n < AMREX_SPACEDIM - 1; ++n) {
+            vel(i, j, k, n) = 0.0;
+        }
+        vel(i, j, k, AMREX_SPACEDIM - 1) = vel_fuel_loc;
+    });
+
+    const auto s0_arr = s0_init.const_array();
+
+    ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+        int r = AMREX_SPACEDIM == 2 ? j : k;
+
+        // set the scalars using s0
+        scal(i, j, k, Rho) = s0_arr(lev, r, Rho);
+        scal(i, j, k, RhoH) = s0_arr(lev, r, RhoH);
+        scal(i, j, k, Temp) = s0_arr(lev, r, Temp);
+        for (auto comp = 0; comp < NumSpec; ++comp) {
+            scal(i, j, k, FirstSpec + comp) = s0_arr(lev, r, FirstSpec + comp);
+        }
+        // initialize pi to zero for now
+        scal(i, j, k, Pi) = 0.0;
+    });
+}
+
+void Maestro::InitLevelDataSphr(const int lev, const Real time, MultiFab& scal,
+                                MultiFab& vel) {
+    Abort("InitLevelDataSphr not implemented");
+}
diff --git a/Exec/science/flame_ml/MaestroInletBCs.H b/Exec/science/flame_ml/MaestroInletBCs.H
new file mode 100644
index 000000000..927424182
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroInletBCs.H
@@ -0,0 +1,15 @@
+#ifndef _MaestroInletBCs_H_
+#define _MaestroInletBCs_H_
+
+#include <AMReX_BLFort.H>
+
+namespace InletBCs {
+
+extern amrex::Real INLET_RHO;
+extern amrex::Real INLET_RHOH;
+extern amrex::Real INLET_TEMP;
+extern RealVector INLET_RHOX;
+extern amrex::Real INLET_VEL;
+};  // namespace InletBCs
+
+#endif
\ No newline at end of file
diff --git a/Exec/science/flame_ml/MaestroInletBCs.cpp b/Exec/science/flame_ml/MaestroInletBCs.cpp
new file mode 100644
index 000000000..682d0271b
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroInletBCs.cpp
@@ -0,0 +1,58 @@
+#include <Maestro.H>
+#include <Maestro_F.H>
+
+#include <MaestroInletBCs.H>
+
+using namespace amrex;
+
+Real InletBCs::INLET_RHO;
+Real InletBCs::INLET_RHOH;
+Real InletBCs::INLET_TEMP;
+RealVector InletBCs::INLET_RHOX;
+Real InletBCs::INLET_VEL;
+
+/*
+inlet_bc_module is a simple container module that holds the parameters
+that are used by physbc to implement the inlet boundary conditions.
+As these are problem-specific, any problem needing inlet boundary
+conditions should create its own version of this module, using this
+outline.
+*/
+void Maestro::SetInletBCs() {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::SetInletBCs()", SetInletBCs);
+
+    // here we initialize the parameters that are module variables.
+    // this routine is called when the base state is defined initially,
+    // and upon restart, just after the base state is read in.
+
+    // figure out the indices for different species
+    const auto ic12 = network_spec_index("carbon-12");
+    const auto io16 = network_spec_index("oxygen-16");
+
+    if (ic12 < 0 || io16 < 0) {
+        Abort("ERROR: species indices undefined in inlet_bc");
+    }
+
+    eos_t eos_state;
+
+    eos_state.rho = dens_fuel;
+    eos_state.T = temp_fuel;
+
+    for (auto comp = 0; comp < NumSpec; ++comp) {
+        eos_state.xn[comp] = 0.0;
+    }
+    eos_state.xn[ic12] = xc12_fuel;
+    eos_state.xn[io16] = 1.0 - xc12_fuel;
+
+    eos(eos_input_rt, eos_state);
+
+    InletBCs::INLET_RHO = dens_fuel;
+    InletBCs::INLET_RHOH = dens_fuel * eos_state.h;
+    InletBCs::INLET_TEMP = temp_fuel;
+    InletBCs::INLET_RHOX.resize(NumSpec);
+    for (auto comp = 0; comp < NumSpec; ++comp) {
+        InletBCs::INLET_RHOX[comp] = dens_fuel * eos_state.xn[comp];
+    }
+    InletBCs::INLET_VEL = vel_fuel;
+}
\ No newline at end of file
diff --git a/Exec/science/flame_ml/MaestroTagging.cpp b/Exec/science/flame_ml/MaestroTagging.cpp
new file mode 100644
index 000000000..963d6aa47
--- /dev/null
+++ b/Exec/science/flame_ml/MaestroTagging.cpp
@@ -0,0 +1,66 @@
+
+#include <Maestro.H>
+#include <Maestro_F.H>
+
+using namespace amrex;
+
+void Maestro::RetagArray(const Box& bx, const int lev) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::RetagArray()", RetagArray);
+    // re-compute tag_array since the actual grid structure changed due to buffering
+    // this is required in order to compute numdisjointchunks, r_start_coord, r_end_coord
+
+    // Tag on regions including buffer cells
+    auto lo = bx.loVect3d()[AMREX_SPACEDIM - 1];
+    auto hi = bx.hiVect3d()[AMREX_SPACEDIM - 1];
+    const auto max_lev = base_geom.max_radial_level + 1;
+
+    for (auto r = lo; r <= hi; ++r) {
+        tag_array[lev - 1 + max_lev * (r / 2)] = TagBox::SET;
+    }
+}
+
+void Maestro::TagBoxes(TagBoxArray& tags, const MFIter& mfi, const int lev,
+                       const Real time) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::TagBoxes()", TagBoxes);
+
+    // Tag on regions of high temperature
+    const Array4<char> tag = tags.array(mfi);
+    const int* AMREX_RESTRICT tag_array_p = tag_array.dataPtr();
+    const int max_lev = base_geom.max_radial_level + 1;
+
+    const Box& tilebox = mfi.tilebox();
+
+    AMREX_PARALLEL_FOR_3D(tilebox, i, j, k, {
+        int r = AMREX_SPACEDIM == 2 ? j : k;
+
+        if (tag_array_p[lev + max_lev * r] > 0) {
+            tag(i, j, k) = TagBox::SET;
+        }
+    });
+}
+
+void Maestro::StateError(TagBoxArray& tags, const MultiFab& state_mf,
+                         const MFIter& mfi, const int lev, const Real time) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::StateError()", StateError);
+
+    // Tag on regions of high temperature
+    const Array4<char> tag = tags.array(mfi);
+    const Array4<const Real> state = state_mf.array(mfi);
+    int* AMREX_RESTRICT tag_array_p = tag_array.dataPtr();
+    const int max_lev = base_geom.max_radial_level + 1;
+
+    const Box& tilebox = mfi.tilebox();
+
+    // Tag on regions of high temperature
+    AMREX_PARALLEL_FOR_3D(tilebox, i, j, k, {
+        if (state(i, j, k, Temp) >= 6.5e8 && state(i, j, k, Temp) <= 2.4e9) {
+            int r = AMREX_SPACEDIM == 2 ? j : k;
+
+            tag(i, j, k) = TagBox::SET;
+            tag_array_p[lev + max_lev * r] = TagBox::SET;
+        }
+    });
+}
diff --git a/Exec/science/flame_ml/Make.package b/Exec/science/flame_ml/Make.package
new file mode 100644
index 000000000..2408e6a14
--- /dev/null
+++ b/Exec/science/flame_ml/Make.package
@@ -0,0 +1,3 @@
+ifeq ($(USE_ML), TRUE)
+  CEXE_headers += TorchLib.H
+endif
\ No newline at end of file
diff --git a/Exec/science/flame_ml/README.md b/Exec/science/flame_ml/README.md
new file mode 100644
index 000000000..5540e51dd
--- /dev/null
+++ b/Exec/science/flame_ml/README.md
@@ -0,0 +1,29 @@
+To run this flame test problem, you will first need to download the appropriate Pytorch library (libtorch) in `MAESTROeX/external` directory. We can choose to use either CPU or CUDA.
+
+```shell
+cd $MAESTROEX_HOME/external
+
+# CPU only
+wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcpu.zip
+unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cpu.zip
+
+# CUDA 11.1
+wget https://download.pytorch.org/libtorch/cu111/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu111.zip
+unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu111.zip
+
+# CUDA 10.2
+wget https://download.pytorch.org/libtorch/cu102/libtorch-cxx11-abi-shared-with-deps-1.9.0%2Bcu102.zip
+unzip libtorch-cxx11-abi-shared-with-deps-1.9.0+cu102.zip
+```
+
+To compile the problem with ML enabled, enter
+`make -j`.
+
+Then you can run the problem.
+
+`mpiexec -n 1 ./Maestro2d.gnu.MPI.ML.ex inputs_2d_smallscale_ml`
+
+
+<!---
+Torchscript code based on https://pytorch.org/tutorials/advanced/cpp_export.html 
+-->
diff --git a/Exec/science/flame_ml/TorchLib.H b/Exec/science/flame_ml/TorchLib.H
new file mode 100644
index 000000000..6d1c41e95
--- /dev/null
+++ b/Exec/science/flame_ml/TorchLib.H
@@ -0,0 +1,6 @@
+// Pytorch C++ API library
+#ifndef _ML_LIB
+#define _ML_LIB
+
+#include <torch/script.h>
+#endif
diff --git a/Exec/science/flame_ml/_parameters b/Exec/science/flame_ml/_parameters
new file mode 100644
index 000000000..a897725e4
--- /dev/null
+++ b/Exec/science/flame_ml/_parameters
@@ -0,0 +1,37 @@
+@namespace: problem
+
+# For a simple laminar flame setup we define the fuel and ash state.  The
+# fuel parameters will be used to define the inflow boundary condition.
+# the fuel and ash will be in pressure equilibrium, so the ash density
+# will be found via the EOS.  The ash composition will be set by simply
+# turning all the fuel into ash.
+
+dens_fuel               real                 1.d8
+temp_fuel               real                 1.d8
+xc12_fuel               real                 0.5d0
+vel_fuel                real                 1.d6
+
+temp_ash                real                 3.d9
+
+# fraction of the domain height to put the fuel/ash interface
+interface_pos_frac      real                 0.5
+
+# fraction of the domain height to set the smooothing length for
+# the interface between the fuel and ash
+smooth_len_frac         real                 0.025
+
+# minimum X(C12) that should trigger refinement
+XC12_ref_threshold      real                 1.d-3
+
+# do the burning only once for each vertical coordinate
+do_average_burn         logical              .false.
+
+# acceptable relative error for a cell compared to the average
+# at that vertical coord if do_average_burn = T (should be smaller
+# than the EOS tolerance).
+transverse_tol          real                 1.d-7
+
+# ML normalization constants
+dens_fac                real         5.000500606732890755d7
+temp_fac                real         3.000899961693421364d9
+enuc_fac                real         7.033368299136000000d12
diff --git a/Exec/science/flame_ml/inputs_2d_smallscale b/Exec/science/flame_ml/inputs_2d_smallscale
new file mode 100644
index 000000000..fcffe9082
--- /dev/null
+++ b/Exec/science/flame_ml/inputs_2d_smallscale
@@ -0,0 +1,111 @@
+
+# INITIAL MODEL
+maestro.perturb_model = false
+maestro.drdxfac = 5
+
+# PROBLEM SIZE
+geometry.prob_lo     =  0.0      0.0
+geometry.prob_hi     =  0.625e0  5.e0
+
+# BOUNDARY CONDITIONS
+# 0 = Interior   3 = Symmetry
+# 1 = Inflow     4 = Slipwall
+# 2 = Outflow    5 = NoSlipWall
+maestro.lo_bc = 0 1 2
+maestro.hi_bc = 0 2 2
+geometry.is_periodic =  1 0 0
+
+# VERBOSITY
+maestro.v              = 1       # verbosity
+
+# DEBUG FOR NAN
+amrex.fpe_trap_invalid = 1       # floating point exception
+
+# GRIDDING AND REFINEMENT
+amr.n_cell             = 128 1024
+amr.max_grid_size      = 64
+amr.max_level          = 0       # maximum level number allowed
+maestro.regrid_int     = 2       # how often to regrid
+amr.ref_ratio          = 2 2 2 2 2 2 # refinement ratio
+amr.blocking_factor    = 8       # block factor in grid generation
+amr.refine_grid_layout = 0       # chop grids up into smaller grids if nprocs > ngrids
+
+# TAGGING
+maestro.temperr = 6.5e8 6.5e8 6.5e8
+
+# TIME STEPPING
+maestro.max_step  = 500
+maestro.stop_time = 1.e0
+maestro.cfl       = 0.5    # cfl number for hyperbolic system
+                           # In this test problem, the velocity is
+		           # time-dependent.  We could use 0.9 in
+		           # the 3D test, but need to use 0.7 in 2D
+		           # to satisfy CFL condition.
+maestro.fixed_dt = 2.4e-8
+
+# ALGORITHMIC OPTIONS
+maestro.spherical = 0
+maestro.evolve_base_state = false
+maestro.do_initial_projection = true
+maestro.init_divu_iter        = 3
+maestro.init_iter             = 1
+maestro.do_smallscale = true
+maestro.beta0_type = 3
+
+maestro.anelastic_cutoff_density = 3.e6
+maestro.base_cutoff_density = 3.e6
+
+maestro.grav_const = 0.e0
+maestro.use_thermal_diffusion = true
+
+maestro.init_shrink = 0.1e0
+maestro.max_dt_growth = 1.2e0
+
+maestro.use_tfromp = false
+maestro.dpdt_factor = 0.3
+
+# PLOTFILES
+maestro.plot_base_name  = flame_    # root name of plot file
+maestro.plot_int   = 100      # number of timesteps between plot files
+maestro.plot_deltat = 1.e-5
+
+# CHECKPOINT
+maestro.check_base_name = chk
+maestro.chk_int         = -1
+
+# ML OUTPUT
+maestro.save_react_int = 10
+
+# tolerances for the initial projection
+maestro.eps_init_proj_cart = 1.e-11
+maestro.eps_init_proj_sph  = 1.e-10
+# tolerances for the divu iterations
+maestro.eps_divu_cart      = 1.e-11
+maestro.eps_divu_sph       = 1.e-10
+maestro.divu_iter_factor   = 100.
+maestro.divu_level_factor  = 10.
+# tolerances for the MAC projection
+maestro.eps_mac            = 1.e-10
+maestro.eps_mac_max        = 1.e-8
+maestro.mac_level_factor   = 10.
+maestro.eps_mac_bottom     = 1.e-3
+# tolerances for the nodal projection
+maestro.eps_hg             = 1.e-10
+maestro.eps_hg_max         = 1.e-10
+maestro.hg_level_factor    = 10.
+maestro.eps_hg_bottom      = 1.e-4
+
+maestro.do_burning = true
+
+# override the default values of the probin namelist values here
+
+problem.dens_fuel = 5.e7
+problem.temp_fuel = 1.e8
+problem.xc12_fuel = 0.5e0
+problem.vel_fuel = 1.e5
+problem.temp_ash = 3.e9
+problem.interface_pos_frac = 0.125e0
+problem.smooth_len_frac = 0.025e0
+
+eos.use_eos_coulomb = 1
+
diff --git a/Exec/science/flame_ml/inputs_2d_smallscale_ml b/Exec/science/flame_ml/inputs_2d_smallscale_ml
new file mode 100644
index 000000000..a07c1c830
--- /dev/null
+++ b/Exec/science/flame_ml/inputs_2d_smallscale_ml
@@ -0,0 +1,118 @@
+
+# INITIAL MODEL
+maestro.perturb_model = false
+maestro.drdxfac = 5
+
+# ML MODEL
+maestro.ml_model_file = ""
+
+# PROBLEM SIZE
+geometry.prob_lo     =  0.0      0.0
+geometry.prob_hi     =  0.625e0  5.e0
+
+# BOUNDARY CONDITIONS
+# 0 = Interior   3 = Symmetry
+# 1 = Inflow     4 = Slipwall
+# 2 = Outflow    5 = NoSlipWall
+maestro.lo_bc = 0 1 2
+maestro.hi_bc = 0 2 2
+geometry.is_periodic =  1 0 0
+
+# VERBOSITY
+maestro.v              = 1       # verbosity
+
+# DEBUG FOR NAN
+amrex.fpe_trap_invalid = 1       # floating point exception
+
+# GRIDDING AND REFINEMENT
+amr.n_cell             = 128 1024
+amr.max_grid_size      = 64
+amr.max_level          = 0       # maximum level number allowed
+maestro.regrid_int     = 2       # how often to regrid
+amr.ref_ratio          = 2 2 2 2 2 2 # refinement ratio
+amr.blocking_factor    = 8       # block factor in grid generation
+amr.refine_grid_layout = 0       # chop grids up into smaller grids if nprocs > ngrids
+
+# TAGGING
+maestro.temperr = 6.5e8 6.5e8 6.5e8
+
+# TIME STEPPING
+maestro.max_step  = 500
+maestro.stop_time = 1.e0
+maestro.cfl       = 0.5    # cfl number for hyperbolic system
+                           # In this test problem, the velocity is
+		           # time-dependent.  We could use 0.9 in
+		           # the 3D test, but need to use 0.7 in 2D
+		           # to satisfy CFL condition.
+maestro.fixed_dt = 2.4e-8
+
+# ALGORITHMIC OPTIONS
+maestro.spherical = 0
+maestro.evolve_base_state = false
+maestro.do_initial_projection = true
+maestro.init_divu_iter        = 3
+maestro.init_iter             = 1
+maestro.do_smallscale = true
+maestro.beta0_type = 3
+
+maestro.anelastic_cutoff_density = 3.e6
+maestro.base_cutoff_density = 3.e6
+
+maestro.grav_const = 0.e0
+maestro.use_thermal_diffusion = true
+
+maestro.init_shrink = 0.1e0
+maestro.max_dt_growth = 1.2e0
+
+maestro.use_tfromp = false
+maestro.dpdt_factor = 0.3
+
+# PLOTFILES
+maestro.plot_base_name  = flame_    # root name of plot file
+maestro.plot_int   = 100      # number of timesteps between plot files
+maestro.plot_deltat = 1.e-5
+
+# CHECKPOINT
+maestro.check_base_name = chk
+maestro.chk_int         = -1
+
+# ML OUTPUT
+maestro.save_react_int = 10
+
+# tolerances for the initial projection
+maestro.eps_init_proj_cart = 1.e-11
+maestro.eps_init_proj_sph  = 1.e-10
+# tolerances for the divu iterations
+maestro.eps_divu_cart      = 1.e-11
+maestro.eps_divu_sph       = 1.e-10
+maestro.divu_iter_factor   = 100.
+maestro.divu_level_factor  = 10.
+# tolerances for the MAC projection
+maestro.eps_mac            = 1.e-10
+maestro.eps_mac_max        = 1.e-8
+maestro.mac_level_factor   = 10.
+maestro.eps_mac_bottom     = 1.e-3
+# tolerances for the nodal projection
+maestro.eps_hg             = 1.e-10
+maestro.eps_hg_max         = 1.e-10
+maestro.hg_level_factor    = 10.
+maestro.eps_hg_bottom      = 1.e-4
+
+maestro.do_burning = true
+
+# override the default values of the probin namelist values here
+
+problem.dens_fuel = 5.e7
+problem.temp_fuel = 1.e8
+problem.xc12_fuel = 0.5e0
+problem.vel_fuel = 1.e5
+problem.temp_ash = 3.e9
+problem.interface_pos_frac = 0.125e0
+problem.smooth_len_frac = 0.025e0
+
+problem.dens_fac = 5.000500606732890755e7
+problem.temp_fac = 3.000899961693421364e9
+problem.enuc_fac = 7.033368299136000000e12
+
+eos.use_eos_coulomb = 1
+
diff --git a/Source/Maestro.H b/Source/Maestro.H
index b5f73e168..7db624b2a 100644
--- a/Source/Maestro.H
+++ b/Source/Maestro.H
@@ -28,6 +28,11 @@ using namespace maestro;
 #include <PhysBCFunctMaestro.H>
 #include <SimpleLog.H>
 
+/// Include PyTorch library if using ML model
+#ifdef ML
+#include <TorchLib.H>
+#endif
+
 /// Define Real vector types for CUDA-compatability. If `AMREX_USE_CUDA`, then
 /// this will be stored in CUDA managed memory.
 #ifdef AMREX_USE_CUDA
@@ -2077,12 +2082,20 @@ class Maestro : public amrex::AmrCore {
     Real p0bdot;
     Real p0b;
 
+    /// ML model
+#ifdef ML
+    bool use_ml;
+    torch::jit::script::Module module;
+#endif
+    
     /// flag for writing plotfiles
     enum plotfile_flag {
         plotInitData = -9999999,
         plotInitProj = -9999998,
         plotDivuIter = -9999997
     };
+
+    std::string algo_step;
 };
 
 #endif
diff --git a/Source/MaestroAdvance.cpp b/Source/MaestroAdvance.cpp
index a9bcbfe13..451f0316f 100644
--- a/Source/MaestroAdvance.cpp
+++ b/Source/MaestroAdvance.cpp
@@ -6,6 +6,8 @@ using namespace amrex;
 
 // advance a single level for a single time step, updates flux registers
 void Maestro::AdvanceTimeStep(bool is_initIter) {
+    algo_step = "0";
+
     // timer for profiling
     BL_PROFILE_VAR("Maestro::AdvanceTimeStep()", AdvanceTimeStep);
 
@@ -228,6 +230,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     Real react_time_start = ParallelDescriptor::second();
 
+    algo_step = "1";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 1 : react state >>>" << std::endl;
     }
@@ -246,6 +250,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     Real advect_time_start = ParallelDescriptor::second();
 
+    algo_step = "2";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 2 : make w0 >>>" << std::endl;
     }
@@ -351,6 +357,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
     // STEP 3 -- construct the advective velocity
     //////////////////////////////////////////////////////////////////////////////
 
+    algo_step = "3";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 3 : create MAC velocities >>>" << std::endl;
     }
@@ -394,6 +402,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     advect_time_start = ParallelDescriptor::second();
 
+    algo_step = "4";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 4 : advect base >>>" << std::endl;
     }
@@ -560,6 +570,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     Real thermal_time_start = ParallelDescriptor::second();
 
+    algo_step = "4a";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 4a: thermal conduct >>>" << std::endl;
     }
@@ -611,6 +623,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     react_time_start = ParallelDescriptor::second();
 
+    algo_step = "5";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 5 : react state >>>" << std::endl;
     }
@@ -659,6 +673,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     advect_time_start = ParallelDescriptor::second();
 
+    algo_step = "6";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 6 : make new S and new w0 >>>" << std::endl;
     }
@@ -761,6 +777,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
     // STEP 7 -- redo the construction of the advective velocity using the current w0
     //////////////////////////////////////////////////////////////////////////////
 
+    algo_step = "7";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 7 : create MAC velocities >>>" << std::endl;
     }
@@ -798,6 +816,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     advect_time_start = ParallelDescriptor::second();
 
+    algo_step = "8";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 8 : advect base >>>" << std::endl;
     }
@@ -952,6 +972,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     thermal_time_start = ParallelDescriptor::second();
 
+    algo_step = "8a";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 8a: thermal conduct >>>" << std::endl;
     }
@@ -997,6 +1019,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     react_time_start = ParallelDescriptor::second();
 
+    algo_step = "9";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 9 : react state >>>" << std::endl;
     }
@@ -1041,6 +1065,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     Real ndproj_time_start = ParallelDescriptor::second();
 
+    algo_step = "10";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 10: make new S >>>" << std::endl;
     }
@@ -1083,6 +1109,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
 
     advect_time_start = ParallelDescriptor::second();
 
+    algo_step = "11";
+
     if (maestro_verbose >= 1) {
         Print() << "<<< STEP 11: update and project new velocity >>>"
                 << std::endl;
@@ -1181,6 +1209,8 @@ void Maestro::AdvanceTimeStep(bool is_initIter) {
         }
     }
 
+    algo_step = "endstep";
+
     Print() << "\nTimestep " << istep << " ends with TIME = " << t_new
             << " DT = " << dt << std::endl;
 
diff --git a/Source/MaestroBurner.cpp b/Source/MaestroBurner.cpp
index ea83d32ae..2c57cd67e 100644
--- a/Source/MaestroBurner.cpp
+++ b/Source/MaestroBurner.cpp
@@ -5,6 +5,8 @@
 using namespace amrex;
 
 #ifndef SDC
+
+#ifndef ML
 void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
                      const Vector<MultiFab>& rho_Hext,
                      Vector<MultiFab>& rho_omegadot, Vector<MultiFab>& rho_Hnuc,
@@ -30,7 +32,20 @@ void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
 
     const auto ispec_threshold = network_spec_index(burner_threshold_species);
 
+    Vector<MultiFab> react_in(finest_level+1), react_out(finest_level+1);
+    const bool save_react_data = istep > 0 && save_react_int > 0 && istep % save_react_int == 0;
+
     for (int lev = 0; lev <= finest_level; ++lev) {
+        if (save_react_data) {
+	    if (NumSpec == 3) {
+		react_in[lev].define(grids[lev], dmap[lev], NumSpec+1, 0);  // X(C12), X(Mg24), rho, T -> burn
+		react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+0), 0); // burn -> X(C12), X(Mg24), Hnuc, dXdt, enucdot
+	    } else {
+		react_in[lev].define(grids[lev], dmap[lev], NumSpec+2, 0);  // X, rho, T -> burn
+		react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+1), 0); // burn -> X, Hnuc, dXdt, enucdot
+	    }
+	}
+
         // create mask assuming refinement ratio = 2
         int finelev = lev + 1;
         if (lev == finest_level) {
@@ -57,12 +72,223 @@ void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
             const Array4<Real> rho_Hnuc_arr = rho_Hnuc[lev].array(mfi);
             const auto tempbar_init_arr = tempbar_init.const_array();
 
+            Array4<Real> react_in_arr, react_out_arr;
+
+            if (save_react_data) {
+                react_in_arr = react_in[lev].array(mfi);
+                react_out_arr = react_out[lev].array(mfi);
+            }
+
             // use a dummy value for non-spherical as tempbar_init_cart is not defined
             const Array4<const Real> tempbar_cart_arr =
                 spherical ? tempbar_init_cart[lev].array(mfi)
                           : rho_Hext[lev].array(mfi);
             const Array4<const int> mask_arr = mask.array(mfi);
 
+            // If save_react_data, we add perturbations to the mass fractions to
+            // get more varied data for training ML model
+            if (save_react_data) {
+                ParallelForRNG(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k,
+                                                             amrex::RandomEngine const& engine) noexcept
+                {
+                    if (use_mask && mask_arr(i, j, k))
+                        return;  // cell is covered by finer cells
+
+                    auto rho = s_in_arr(i, j, k, Rho);
+                    Real x_in[NumSpec];
+                    for (int n = 0; n < NumSpec; ++n) {
+                        x_in[n] = s_in_arr(i, j, k, FirstSpec + n) / rho;
+                    }
+
+#if NAUX_NET > 0
+                    Real aux_in[NumAux];
+                    for (int n = 0; n < NumAux; ++n) {
+                        aux_in[n] = s_in_arr(i, j, k, FirstAux + n) / rho;
+                    }
+#endif
+
+		    Real T_in = 0.0;
+		    if (drive_initial_convection) {
+			if (!spherical) {
+			    auto r = (AMREX_SPACEDIM == 2) ? j : k;
+			    T_in = tempbar_init_arr(lev, r);
+			} else {
+			    T_in = tempbar_cart_arr(i, j, k);
+			}
+		    } else {
+			T_in = s_in_arr(i, j, k, Temp);
+		    }
+
+		    Real x_test =
+			(ispec_threshold > 0) ? x_in[ispec_threshold] : 0.0;
+
+		    // Perturb 75 percent of the data
+		    Real rand_perc = 0.75;
+		    if (amrex::Random(engine) < rand_perc) {
+			// Loop over the species
+			for (int n = 0; n < NumSpec; ++n) {
+			    // aprox13 network (NumSpec == 13)
+			    // Three main species are C12(1), O16(2), Mg24(4)
+			    // Do not perturb the main species
+			    if (NumSpec == 13 && n != 1 && n != 2 && n != 4) {
+				// Set a random perturbation based on log(X_k)
+				Real X_log = std::log10(x_in[n]);
+				// We only want to perturb mass fractions below 10^-4
+				// if (X_log <= -4.0) {
+				    // Random number generated between [-X_log/15, X_log/15]
+				    Real rand = (amrex::Random(engine) - 0.5) * 2*X_log/15.0;
+				    Real perturb = pow(10.0, X_log + rand) - x_in[n];
+				    x_in[n] += perturb;
+
+				    // Pick the larger mass fraction of C12 and Mg24
+				    // and subtract perturb since sum(X_k) = 1 
+				    int spec_rand = (x_in[1] > x_in[4]) ? 1 : 4;
+				    x_in[spec_rand] -= perturb;
+				// }
+			    }
+			    // ignition_simple network (NumSpec == 3)
+			    else if (NumSpec == 3 && n != 1) {
+				int spec_other = (n == 0) ? 2 : 0;
+				Real x_less = (x_in[n] < x_in[spec_other]) ? x_in[n] : x_in[spec_other];
+				// Set a random perturbation within -/+ 10% of smaller mass fraction
+				Real perturb = (amrex::Random(engine) - 0.5) * 0.1 * x_less;
+				// avoid negative mass fractions
+				if (x_in[n] < -perturb || x_in[spec_other] < perturb) {
+				    perturb = 0.0; 
+				}
+				x_in[n] += perturb;
+
+				// Subtract perturb from the other main species
+				x_in[spec_other] -= perturb;
+			    }
+			}
+			
+			// Perturb density and temperature by small relative values
+			// since they do not change much
+			Real perturb_rel = (amrex::Random(engine) - 0.5) * 2.e-4;
+			rho *= (1.0 + perturb_rel);
+			perturb_rel = (amrex::Random(engine) - 0.5) * 6.e-4;
+			T_in *= (1.0 + perturb_rel);
+		    }
+
+		    if (NumSpec == 3) {
+			react_in_arr(i, j, k, 0) = x_in[0];
+			react_in_arr(i, j, k, 1) = x_in[2];
+			react_in_arr(i, j, k, 2) = rho;
+			react_in_arr(i, j, k, 3) = T_in;
+
+			react_out_arr(i, j, k, 2) = 0.0; // output generated energy
+			for (int n = NumSpec; n < 2*(NumSpec); ++n) {
+			    react_out_arr(i, j, k, n) = 0.0;
+			}
+		    } else {
+			for (int n = 0; n < NumSpec; ++n) {
+			    react_in_arr(i, j, k, n) = x_in[n];
+			    react_out_arr(i, j, k, n) = x_in[n];
+			}
+			react_in_arr(i, j, k, NumSpec) = rho;    // input density
+			react_in_arr(i, j, k, NumSpec+1) = T_in; // input temperature
+			react_out_arr(i, j, k, NumSpec) = 0.0;   // output generated energy
+
+			for (int n = NumSpec+1; n < 2*(NumSpec+1); ++n) {
+			    react_out_arr(i, j, k, n) = 0.0;
+			}
+		    }
+		    
+
+                    burn_t state_in;
+                    burn_t state_out;
+
+                    Real x_out[NumSpec];
+#if NAUX_NET > 0
+                    Real aux_out[NumAux];
+#endif
+                    Real rhowdot[NumSpec];
+                    Real rhoH = 0.0;
+
+                    // if the threshold species is not in the network, then we burn
+                    // normally.  if it is in the network, make sure the mass
+                    // fraction is above the cutoff.
+                    if ((rho > burning_cutoff_density_lo &&
+                         rho < burning_cutoff_density_hi) &&
+                        (ispec_threshold < 0 ||
+                         (ispec_threshold > 0 &&
+                          x_test > burner_threshold_cutoff))) {
+                        // Initialize burn state_in and state_out
+                        state_in.e = 0.0;
+                        state_in.rho = rho;
+                        state_in.T = T_in;
+                        for (int n = 0; n < NumSpec; ++n) {
+                            state_in.xn[n] = x_in[n];
+                        }
+#if NAUX_NET > 0
+                        for (int n = 0; n < NumAux; ++n) {
+                            state_in.aux[n] = aux_in[n];
+                        }
+#endif
+
+                        // initialize state_out the same as state_in
+                        state_out.e = 0.0;
+                        state_out.rho = rho;
+                        state_out.T = T_in;
+                        for (int n = 0; n < NumSpec; ++n) {
+                            state_out.xn[n] = x_in[n];
+                        }
+#if NAUX_NET > 0
+                        for (int n = 0; n < NumAux; ++n) {
+                            state_out.aux[n] = aux_in[n];
+                        }
+#endif
+
+                        burner(state_out, dt_in);
+
+                        for (int n = 0; n < NumSpec; ++n) {
+                            x_out[n] = state_out.xn[n];
+                            rhowdot[n] = state_out.rho *
+                                (state_out.xn[n] - state_in.xn[n]) / dt_in;
+                        }
+#if NAUX_NET > 0
+                        for (int n = 0; n < NumAux; ++n) {
+                            aux_out[n] = state_out.aux[n];
+                        }
+#endif
+                        rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in;
+
+                        // use state_in and state_out to set the reaction outputs and RHS
+                        // because the EOS calls require the actual internal energy e,
+                        // this should come after we're otherwise finished with state in/out.
+			if (NumSpec == 3) {
+			    react_out_arr(i, j, k, 0) = state_out.xn[0];
+			    react_out_arr(i, j, k, 1) = state_out.xn[2];
+			    react_out_arr(i, j, k, 2) = state_out.e - state_in.e;
+			} else {
+			    for (int n = 0; n < NumSpec; ++n) {
+				react_out_arr(i, j, k, n) = state_out.xn[n];
+			    }
+			    react_out_arr(i, j, k, NumSpec) = state_out.e - state_in.e;
+			}
+			
+                        eos(eos_input_rt, state_in); // get initial e
+                        state_out.e += state_in.e;
+                        eos(eos_input_re, state_out); // using final e, get final T
+                        Array1D<Real, 1, neqs> ydot; // this will hold the RHS
+                        actual_rhs(state_out, ydot); // evaluate the RHS to get dYdt
+
+			if (NumSpec == 3) {
+			    react_out_arr(i, j, k, NumSpec) = ydot(1) * aion[0]; // save dXdt
+			    react_out_arr(i, j, k, NumSpec+1) = ydot(3) * aion[2];
+			    react_out_arr(i, j, k, 2*NumSpec-1) = ydot(net_ienuc); // save enucdot
+			} else {
+			    for (int n = 1; n <= NumSpec; ++n) {
+				react_out_arr(i, j, k, NumSpec+n) = ydot(n) * aion[n-1]; // save dXdt
+			    }
+			    react_out_arr(i, j, k, 2*NumSpec+1) = ydot(net_ienuc); // save enucdot
+			}
+		    }
+                });
+            }
+
+            // solve original input states
             ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
                 if (use_mask && mask_arr(i, j, k) == 1) {
                     return;  // cell is covered by finer cells
@@ -155,6 +381,7 @@ void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
                     }
 #endif
                     rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in;
+
                 } else {
                     for (int n = 0; n < NumSpec; ++n) {
                         x_out[n] = x_in[n];
@@ -174,7 +401,7 @@ void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
                 }
 
                 if (fabs(sumX - 1.0) > reaction_sum_tol) {
-#ifndef AMREX_USE_GPU
+#ifndef AMREX_USE_CUDA
                     Abort("ERROR: abundances do not sum to 1");
 #endif
                     for (int n = 0; n < NumSpec; ++n) {
@@ -211,8 +438,486 @@ void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
             });
         }
     }
+
+    if (save_react_data) {
+	Vector<std::string> react_in_varnames, react_out_varnames;
+	for (int i = 0; i < NumSpec; ++i) {
+	    std::string spec_string = "X(";
+	    spec_string += short_spec_names_cxx[i];
+	    spec_string += ')';
+	    if (NumSpec == 3 && i != 1) {
+		react_in_varnames.push_back(spec_string);
+		react_out_varnames.push_back(spec_string);
+	    }
+	}
+	react_in_varnames.push_back("rho");
+	react_in_varnames.push_back("temp");
+	react_out_varnames.push_back("enuc");
+	for (int i = 0; i < NumSpec; ++i) {
+	    std::string spec_string = "Xdot(";
+	    spec_string += short_spec_names_cxx[i];
+	    spec_string += ')';
+	    if (NumSpec == 3 && i != 1) {
+		react_out_varnames.push_back(spec_string);
+	    }
+	}
+	react_out_varnames.push_back("enucdot");
+    
+        std::string react_in_name = "react_inputs_";
+        std::string react_out_name = "react_outputs_";
+        PlotFileName(istep, &react_in_name);
+        PlotFileName(istep, &react_out_name);
+        react_in_name = react_in_name + "_" + algo_step;
+        react_out_name = react_out_name + "_" + algo_step;
+        Vector<int> step_array(finest_level+1, istep);
+        WriteMultiLevelPlotfile(react_in_name, finest_level + 1, GetVecOfConstPtrs(react_in), react_in_varnames,
+                                Geom(), dt_in, step_array, refRatio());
+        WriteMultiLevelPlotfile(react_out_name, finest_level + 1, GetVecOfConstPtrs(react_out), react_out_varnames,
+                                Geom(), dt_in, step_array, refRatio());
+    }
 }
 
+#else
+// ifdef ML
+void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
+                     const Vector<MultiFab>& rho_Hext,
+                     Vector<MultiFab>& rho_omegadot, Vector<MultiFab>& rho_Hnuc,
+                     const BaseState<Real>& p0, const Real dt_in,
+                     const Real time_in) {
+    // timer for profiling
+    BL_PROFILE_VAR("Maestro::Burner()", Burner);
+
+    // Put tempbar_init on cart
+    Vector<MultiFab> tempbar_init_cart(finest_level + 1);
+
+    if (spherical) {
+        for (int lev = 0; lev <= finest_level; ++lev) {
+            tempbar_init_cart[lev].define(grids[lev], dmap[lev], 1, 0);
+            tempbar_init_cart[lev].setVal(0.);
+        }
+
+        if (drive_initial_convection) {
+            Put1dArrayOnCart(tempbar_init, tempbar_init_cart, false, false,
+                             bcs_f, 0);
+        }
+    }
+
+    const auto ispec_threshold = network_spec_index(burner_threshold_species);
+
+    const bool use_ml_const = use_ml;
+
+    // match data type of pytorch tensor to multifab data
+    auto dtype0 = torch::kFloat64;
+
+    Vector<MultiFab> react_in(finest_level+1), react_out(finest_level+1);
+
+    const bool save_react_data = istep > 0 && save_react_int > 0 && istep % save_react_int == 0;
+
+    for (int lev = 0; lev <= finest_level; ++lev) {
+        if (save_react_data) {
+	    if (NumSpec == 3) {
+		react_in[lev].define(grids[lev], dmap[lev], NumSpec+1, 0);  // X(C12), X(Mg24), rho, T -> burn
+		react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+0), 0); // burn -> X(C12), X(Mg24), Hnuc, dXdt, enucdot
+	    } else {
+		react_in[lev].define(grids[lev], dmap[lev], NumSpec+2, 0);  // X, rho, T -> burn
+		react_out[lev].define(grids[lev], dmap[lev], 2*(NumSpec+1), 0); // burn -> X, Hnuc, dXdt, enucdot
+	    }
+	}
+
+        // create mask assuming refinement ratio = 2
+        int finelev = lev + 1;
+        if (lev == finest_level) finelev = finest_level;
+
+        const BoxArray& fba = s_in[finelev].boxArray();
+        const iMultiFab& mask = makeFineMask(s_in[lev], fba, IntVect(2));
+
+        // loop over boxes (make sure mfi takes a cell-centered multifab as an argument)
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+        for (MFIter mfi(s_in[lev], TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+            // Get the index space of the valid region
+            const Box& tileBox = mfi.tilebox();
+
+            const bool use_mask = (lev != finest_level);
+
+            const Array4<const Real> s_in_arr = s_in[lev].array(mfi);
+            const Array4<Real> s_out_arr = s_out[lev].array(mfi);
+            const Array4<const Real> rho_Hext_arr = rho_Hext[lev].array(mfi);
+            const Array4<Real> rho_omegadot_arr = rho_omegadot[lev].array(mfi);
+            const Array4<Real> rho_Hnuc_arr = rho_Hnuc[lev].array(mfi);
+            const auto tempbar_init_arr = tempbar_init.const_array();
+
+            Array4<Real> react_in_arr, react_out_arr;
+
+            if (save_react_data) {
+                react_in_arr = react_in[lev].array(mfi);
+                react_out_arr = react_out[lev].array(mfi);
+            }
+
+            // use a dummy value for non-spherical as tempbar_init_cart is not defined
+            const Array4<const Real> tempbar_cart_arr =
+                spherical ? tempbar_init_cart[lev].array(mfi)
+                          : rho_Hext[lev].array(mfi);
+            const Array4<const int> mask_arr = mask.array(mfi);
+
+	    // retrieve smallend and size of box
+            const IntVect bx_lo = tileBox.smallEnd();
+            const IntVect nbox = tileBox.size();
+
+            // compute total cells in the box
+            int ncell = AMREX_SPACEDIM == 2 ?
+                nbox[0] * nbox[1] : nbox[0] * nbox[1] * nbox[2];
+
+            // copy multifabs to input array
+            // index ordering: (species, rho, temp)
+	    int nextra = (NumSpec == 3) ? 1 : 2;
+            const int NumInput = NumSpec + nextra;
+            amrex::Gpu::ManagedVector<Real> state_temp(ncell*NumInput);
+            Real* AMREX_RESTRICT temp_ptr = state_temp.dataPtr();
+
+	    // copy input multifab to torch tensor
+            amrex::ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
+                int ii = i - bx_lo[0];
+                int jj = j - bx_lo[1];
+                int index = jj*nbox[0] + ii;
+#if AMREX_SPACEDIM == 3
+                int kk = k - bx_lo[2];
+                index += kk*nbox[0]*nbox[1];
+#endif
+
+                auto rho = s_in_arr(i, j, k, Rho);
+                Real T_in = 0.0;
+                if (drive_initial_convection) {
+                    if (!spherical) {
+                        auto r = (AMREX_SPACEDIM == 2) ? j : k;
+                        T_in = tempbar_init_arr(lev, r);
+                    } else {
+                        T_in = tempbar_cart_arr(i, j, k);
+                    }
+                } else {
+                    T_in = s_in_arr(i, j, k, Temp);
+                }
+
+                // array order is row-based [index][comp]
+		if (NumSpec == 3) {
+		    temp_ptr[index*NumInput] = s_in_arr(i, j, k, FirstSpec) / rho;
+		    temp_ptr[index*NumInput + 1] = s_in_arr(i, j, k, FirstSpec + 2) / rho;
+		    temp_ptr[index*NumInput + 2] = rho / dens_fac;
+		    temp_ptr[index*NumInput + 3] = T_in / temp_fac;
+		} else {
+		    for (int n = 0; n < NumSpec; ++n) {
+			temp_ptr[index*NumInput + n] = s_in_arr(i, j, k, FirstSpec + n) / rho;
+		    }
+		    temp_ptr[index*NumInput + NumSpec] = rho / dens_fac;
+		    temp_ptr[index*NumInput + NumSpec + 1] = T_in / temp_fac;
+		}
+            });
+
+	    at::Tensor outputs_torch = torch::zeros({ncell, NumSpec+1}, torch::TensorOptions().dtype(dtype0));
+#ifdef AMREX_USE_CUDA
+            outputs_torch.to(torch::kCUDA);
+#endif
+
+            if (use_ml) {
+                // create torch tensor from array
+#ifndef AMREX_USE_CUDA
+                at::Tensor inputs_torch = torch::from_blob(temp_ptr, {ncell, NumInput},
+                                                           torch::TensorOptions().dtype(dtype0));
+#else
+                at::Tensor inputs_torch = torch::from_blob(temp_ptr, {ncell, NumInput},
+                                                           torch::TensorOptions().dtype(dtype0).device(torch::kCUDA));
+#endif
+
+                // evaluate torch model
+                inputs_torch = inputs_torch.to(torch::kFloat32);
+                outputs_torch = module.forward({inputs_torch}).toTensor();
+                outputs_torch = outputs_torch.to(dtype0);
+
+                if (/*DEBUG=*/0) {
+                    Print() << "example input: \n"
+                            << inputs_torch.slice(/*dim=*/0, /*start=*/0, /*end=*/5) << '\n';
+                    Print() << "example output: \n"
+                            << outputs_torch.slice(/*dim=*/0, /*start=*/0, /*end=*/5) << '\n';
+                }
+            }
+
+            // get accessor to tensor (read-only)
+#ifndef AMREX_USE_CUDA
+            auto outputs_torch_acc = outputs_torch.accessor<Real,2>();
+#else
+            auto outputs_torch_acc = outputs_torch.packed_accessor64<Real,2>();
+#endif
+            auto use_ml_const_istep_gt_0 = use_ml_const && istep > 0; // cannot use use_ml_const and istep directly in CUDA kernel below.
+	    
+            ParallelFor(tileBox, [=] AMREX_GPU_DEVICE(int i, int j, int k) {
+                if (use_mask && mask_arr(i, j, k))
+                    return;  // cell is covered by finer cells
+		
+		int ii = i - bx_lo[0];
+                int jj = j - bx_lo[1];
+                int index = jj*nbox[0] + ii;
+#if AMREX_SPACEDIM == 3
+                int kk = k - bx_lo[2];
+                index += kk*nbox[0]*nbox[1];
+#endif
+
+                auto rho = s_in_arr(i, j, k, Rho);
+                Real x_in[NumSpec];
+                for (int n = 0; n < NumSpec; ++n) {
+                    x_in[n] = s_in_arr(i, j, k, FirstSpec + n) / rho;
+                }
+#if NAUX_NET > 0
+                Real aux_in[NumAux];
+                for (int n = 0; n < NumAux; ++n) {
+                    aux_in[n] = s_in_arr(i, j, k, FirstAux + n) / rho;
+                }
+#endif
+
+                Real T_in = 0.0;
+                if (drive_initial_convection) {
+                    if (!spherical) {
+                        auto r = (AMREX_SPACEDIM == 2) ? j : k;
+                        T_in = tempbar_init_arr(lev, r);
+                    } else {
+                        T_in = tempbar_cart_arr(i, j, k);
+                    }
+                } else {
+                    T_in = s_in_arr(i, j, k, Temp);
+                }
+
+                Real x_test =
+                    (ispec_threshold > 0) ? x_in[ispec_threshold] : 0.0;
+
+		Real x_out[NumSpec];
+#if NAUX_NET > 0
+		Real aux_out[NumAux];
+#endif
+		Real rhowdot[NumSpec];
+		Real rhoH = 0.0;
+
+		if (save_react_data) {
+		    if (NumSpec == 3) {
+			react_in_arr(i, j, k, 0) = x_in[0];
+			react_in_arr(i, j, k, 1) = x_in[2];
+			react_in_arr(i, j, k, 2) = rho;
+			react_in_arr(i, j, k, 3) = T_in;
+
+			react_out_arr(i, j, k, 2) = 0.0; // output generated energy
+			for (int n = NumSpec; n < 2*(NumSpec); ++n) {
+			    react_out_arr(i, j, k, n) = 0.0;
+			}
+		    } else {
+			for (int n = 0; n < NumSpec; ++n) {
+			    react_in_arr(i, j, k, n) = x_in[n];
+			    react_out_arr(i, j, k, n) = x_in[n];
+			}
+			react_in_arr(i, j, k, NumSpec) = rho;    // input density
+			react_in_arr(i, j, k, NumSpec+1) = T_in; // input temperature
+			react_out_arr(i, j, k, NumSpec) = 0.0;   // output generated energy
+			
+			for (int n = NumSpec+1; n < 2*(NumSpec+1); ++n) {
+			    react_out_arr(i, j, k, n) = 0.0;
+			}
+		    }
+		}
+
+                // if the threshold species is not in the network, then we burn
+                // normally.  if it is in the network, make sure the mass
+                // fraction is above the cutoff.
+                if ((rho > burning_cutoff_density_lo &&
+                     rho < burning_cutoff_density_hi) &&
+                    (ispec_threshold < 0 ||
+                     (ispec_threshold > 0 &&
+                      x_test > burner_threshold_cutoff))) {
+
+		    if (use_ml_const_istep_gt_0) {
+                        // copy output tensor to multifabs
+                        // index ordering: (species, enuc)
+			// check if X_k >= 0
+			x_out[0] = (outputs_torch_acc[index][0] >= 0.0) ?
+				    outputs_torch_acc[index][0] : 0.0;
+			// X(O16) does not change
+			x_out[1] = x_in[1];
+			// check if X_k >= 0
+			x_out[2] = (outputs_torch_acc[index][1] >= 0.0) ?
+				    outputs_torch_acc[index][1] : 0.0;
+
+                        // note enuc in output tensor is the normalized value
+                        // of (state_out.e - state_in.e)
+                        rhoH = rho * (outputs_torch_acc[index][2] * enuc_fac) / dt_in;
+                    } else {
+                        // need to use burner if no ML model was given or at step 0
+			burn_t state_in;
+			burn_t state_out;
+
+			// Initialize burn state_in and state_out
+			state_in.e = 0.0;
+			state_in.rho = rho;
+			state_in.T = T_in;
+			for (int n = 0; n < NumSpec; ++n) {
+			    state_in.xn[n] = x_in[n];
+			}
+#if NAUX_NET > 0
+			for (int n = 0; n < NumAux; ++n) {
+			    state_in.aux[n] = aux_in[n];
+			}
+#endif
+
+			// initialize state_out the same as state_in
+			state_out.e = 0.0;
+			state_out.rho = rho;
+			state_out.T = T_in;
+			for (int n = 0; n < NumSpec; ++n) {
+			    state_out.xn[n] = x_in[n];
+			}
+#if NAUX_NET > 0
+			for (int n = 0; n < NumAux; ++n) {
+			    state_out.aux[n] = aux_in[n];
+			}
+#endif
+
+			burner(state_out, dt_in);
+
+			for (int n = 0; n < NumSpec; ++n) {
+			    x_out[n] = state_out.xn[n];
+			    rhowdot[n] = state_out.rho *
+                                         (state_out.xn[n] - state_in.xn[n]) / dt_in;
+			}
+#if NAUX_NET > 0
+			for (int n = 0; n < NumAux; ++n) {
+			    aux_out[n] = state_out.aux[n];
+			}
+#endif
+			rhoH = state_out.rho * (state_out.e - state_in.e) / dt_in;
+		    }
+                } else {
+                    for (int n = 0; n < NumSpec; ++n) {
+                        x_out[n] = x_in[n];
+                        rhowdot[n] = 0.0;
+                    }
+#if NAUX_NET > 0
+                    for (int n = 0; n < NumAux; ++n) {
+                        aux_out[n] = aux_in[n];
+                    }
+#endif
+                }
+
+                // check if sum{X_k} = 1
+                Real sumX = 0.0;
+                for (int n = 0; n < NumSpec; ++n) {
+                    sumX += x_out[n];
+                }
+
+                if (fabs(sumX - 1.0) > reaction_sum_tol) {
+		    if (NumSpec == 3) {
+			Real sumXmO16 = sumX - x_out[1];
+			x_out[0] *= (1.0 - x_out[1]) / sumXmO16;
+			x_out[2] *= (1.0 - x_out[1]) / sumXmO16;
+		    } else {
+			for (int n = 0; n < NumSpec; ++n) {
+			    x_out[n] /= sumX;
+			}
+		    }
+                }
+
+		if (use_ml_const) {
+		    for (int n = 0; n < NumSpec; ++n) {
+			rhowdot[n] = rho * (x_out[n] - x_in[n]) / dt_in;
+		    }
+		}
+
+		if (save_react_data) {
+		    if (NumSpec == 3) {
+			react_out_arr(i, j, k, 0) = x_out[0];
+			react_out_arr(i, j, k, 1) = x_out[2];
+			react_out_arr(i, j, k, 2) = rhoH * dt_in;
+			
+			react_out_arr(i, j, k, NumSpec) = rhowdot[0]; // save dXdt
+			react_out_arr(i, j, k, NumSpec+1) = rhowdot[2];
+			react_out_arr(i, j, k, 2*NumSpec-1) = rhoH; // save enucdot
+		    } else {
+			for (int n = 0; n < NumSpec; ++n) {
+			    react_out_arr(i, j, k, n) = x_out[n];
+			}
+			react_out_arr(i, j, k, NumSpec) = rhoH * dt_in;
+
+			for (int n = 0; n < NumSpec; ++n) {
+			    react_out_arr(i, j, k, NumSpec+n) = rhowdot[n]; // save dXdt
+			}
+			react_out_arr(i, j, k, 2*NumSpec+1) = rhoH; // save enucdot
+		    }
+		}
+		
+                // pass the density and pi through
+                s_out_arr(i, j, k, Rho) = s_in_arr(i, j, k, Rho);
+                s_out_arr(i, j, k, Pi) = s_in_arr(i, j, k, Pi);
+
+                // update the species
+                for (int n = 0; n < NumSpec; ++n) {
+                    s_out_arr(i, j, k, FirstSpec + n) = x_out[n] * rho;
+                }
+
+                // update the auxiliary variables
+#if NAUX_NET > 0
+                for (int n = 0; n < NumAux; ++n) {
+                    s_out_arr(i, j, k, FirstAux + n) = aux_out[n] * rho;
+                }
+#endif
+
+                // store the energy generation and species create quantities
+                for (int n = 0; n < NumSpec; ++n) {
+                    rho_omegadot_arr(i, j, k, n) = rhowdot[n];
+                }
+                rho_Hnuc_arr(i, j, k) = rhoH;
+
+                // update the enthalpy -- include the change due to external heating
+                s_out_arr(i, j, k, RhoH) = s_in_arr(i, j, k, RhoH) +
+                                           dt_in * rho_Hnuc_arr(i, j, k) +
+                                           dt_in * rho_Hext_arr(i, j, k);
+            });
+        }
+    }
+
+    if (save_react_data) {
+	Vector<std::string> react_in_varnames, react_out_varnames;
+	for (int i = 0; i < NumSpec; ++i) {
+	    std::string spec_string = "X(";
+	    spec_string += short_spec_names_cxx[i];
+	    spec_string += ')';
+	    if (NumSpec == 3 && i != 1) {
+		react_in_varnames.push_back(spec_string);
+		react_out_varnames.push_back(spec_string);
+	    }
+	}
+	react_in_varnames.push_back("rho");
+	react_in_varnames.push_back("temp");
+	react_out_varnames.push_back("enuc");
+	for (int i = 0; i < NumSpec; ++i) {
+	    std::string spec_string = "Xdot(";
+	    spec_string += short_spec_names_cxx[i];
+	    spec_string += ')';
+	    if (NumSpec == 3 && i != 1) {
+		react_out_varnames.push_back(spec_string);
+	    }
+	}
+	react_out_varnames.push_back("enucdot");
+    
+        std::string react_in_name = "react_inputs_";
+        std::string react_out_name = "react_outputs_";
+        PlotFileName(istep, &react_in_name);
+        PlotFileName(istep, &react_out_name);
+        react_in_name = react_in_name + "_" + algo_step;
+        react_out_name = react_out_name + "_" + algo_step;
+        Vector<int> step_array(finest_level+1, istep);
+        WriteMultiLevelPlotfile(react_in_name, finest_level + 1, GetVecOfConstPtrs(react_in), react_in_varnames,
+                                Geom(), dt_in, step_array, refRatio());
+        WriteMultiLevelPlotfile(react_out_name, finest_level + 1, GetVecOfConstPtrs(react_out), react_out_varnames,
+                                Geom(), dt_in, step_array, refRatio());
+    }
+}
+#endif
+
 #else
 // SDC burner
 void Maestro::Burner(const Vector<MultiFab>& s_in, Vector<MultiFab>& s_out,
diff --git a/Source/MaestroInit.cpp b/Source/MaestroInit.cpp
index b8013fc21..59308e728 100644
--- a/Source/MaestroInit.cpp
+++ b/Source/MaestroInit.cpp
@@ -15,6 +15,12 @@ void Maestro::Init() {
 
     Print() << "Calling Init()" << std::endl;
 
+    // initializes the seed for C++ random number calls
+    auto seed = 142;
+    InitRandom(seed + ParallelDescriptor::MyProc(),
+	       ParallelDescriptor::NProcs(),
+	       seed + ParallelDescriptor::MyProc());
+
     if (restart_file.empty()) {
         start_step = 1;
 
@@ -531,9 +537,22 @@ void Maestro::DivuIter(int istep_divu_iter) {
         rhohalf[lev].setVal(1.);
     }
 
+#ifdef ML
+    // save current state of use_ml
+    const auto use_ml_temp = use_ml;
+
+    // do not use ML model for divu steps
+    use_ml = false;
+#endif
+
     React(sold, stemp, rho_Hext, rho_omegadot, rho_Hnuc, p0_old, 0.5 * dt,
           t_old);
 
+#ifdef ML
+    // reset use_ml to previous state
+    use_ml = use_ml_temp;
+#endif
+
     // WriteMF(sold,"a_sold_2levs");
     // Abort();
 
diff --git a/Source/MaestroPlot.cpp b/Source/MaestroPlot.cpp
index 5dbb7ddcc..4dc701e29 100644
--- a/Source/MaestroPlot.cpp
+++ b/Source/MaestroPlot.cpp
@@ -1103,7 +1103,7 @@ void Maestro::WriteJobInfo(const std::string& dir) const {
 
         jobInfoFile << "\n\n";
 
-#ifdef AMREX_USE_GPU
+#ifdef AMREX_USE_CUDA
         // This output assumes for simplicity that every rank uses the
         // same type of GPU.
 
diff --git a/Source/MaestroSetup.cpp b/Source/MaestroSetup.cpp
index 9d0c126aa..78be51838 100644
--- a/Source/MaestroSetup.cpp
+++ b/Source/MaestroSetup.cpp
@@ -216,6 +216,32 @@ void Maestro::Setup() {
                   << std::endl;
         Error();
     }
+
+#ifdef ML
+    // check to see if ml model is provided
+    // if model file is provided, load model
+    if (ml_model_file.empty()) {
+        use_ml = false;
+        Print() << "Warning: No ML model file provided! Will not use ML." << std::endl;
+    } else {
+        use_ml = true;
+
+        // Load pytorch module via torch script
+        try {
+            // Deserialize the ScriptModule from a file using torch::jit::load().
+            module = torch::jit::load(ml_model_file);
+        }
+        catch (const c10::Error& e) {
+            Abort("Error loading the ML model\n");
+        }
+        Print() << "ML model loaded.\n";
+
+#ifdef AMREX_USE_CUDA
+        module.to(torch::kCUDA);
+        Print() << "Copying model to GPU." << std::endl;
+#endif
+    }
+#endif
 }
 
 // read in some parameters from inputs file
diff --git a/Source/param/_cpp_parameters b/Source/param/_cpp_parameters
index 9d2ea9778..a4a1e843c 100644
--- a/Source/param/_cpp_parameters
+++ b/Source/param/_cpp_parameters
@@ -87,6 +87,9 @@ use_soundspeed_firstdt              bool            false     y
 # Use the divu constraint when computing the first time step.
 use_divu_firstdt                    bool            false       y
 
+# Save reactions input & output data every so many steps (if >0)
+save_react_int                      int             0
+
 #-----------------------------------------------------------------------------
 # category: grid
 #-----------------------------------------------------------------------------
@@ -621,7 +624,15 @@ sdc_couple_mac_velocity             bool            false
 
 # The nodal solve is non-deterministic on the GPU. Should it instead be run 
 # on the CPU to give a deterministic solution?
-deterministic_nodal_solve           bool false
+deterministic_nodal_solve           bool            false
+
+
+#-----------------------------------------------------------------------------
+# category: machine learned model
+#-----------------------------------------------------------------------------
+
+# Where the ml model is located.  Empty string means do not use model.
+ml_model_file                       string          ""
 
 
 #-----------------------------------------------------------------------------