From e1e8a0677782983182bc313a07a5efb190ba19ab Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 7 Mar 2024 06:42:28 +0000
Subject: [PATCH 01/27] build: bump mtk to v9

---
 Project.toml      | 2 +-
 docs/Project.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index b3da28942..443279dce 100644
--- a/Project.toml
+++ b/Project.toml
@@ -62,7 +62,7 @@ LogDensityProblems = "2"
 Lux = "0.5"
 LuxCUDA = "0.3"
 MCMCChains = "6"
-ModelingToolkit = "8"
+ModelingToolkit = "9"
 MonteCarloMeasurements = "1"
 Optim = "1.7.8"
 Optimization = "3"
diff --git a/docs/Project.toml b/docs/Project.toml
index 2c81ad0d6..2a2c0fac5 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -32,7 +32,7 @@ DomainSets = "0.6, 0.7"
 Flux = "0.14"
 Integrals = "4"
 Lux = "0.5"
-ModelingToolkit = "8.33"
+ModelingToolkit = "9"
 MonteCarloMeasurements = "1"
 NeuralPDE = "5.3"
 Optimization = "3.9"

From a89afeb52ca843da64af64f4d981019b5c73b1de Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 15 Mar 2024 06:11:00 +0000
Subject: [PATCH 02/27] docs: fix rendering of dgm example

---
 docs/src/tutorials/dgm.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/tutorials/dgm.md b/docs/src/tutorials/dgm.md
index 1917df8f4..cb853d009 100644
--- a/docs/src/tutorials/dgm.md
+++ b/docs/src/tutorials/dgm.md
@@ -28,15 +28,15 @@ where $\vec{x}$ is the concatenated vector of $(t, x)$ and $L$ is the number of
 
 Let's try to solve the following Burger's equation using Deep Galerkin Method for $\alpha = 0.05$ and compare our solution with the finite difference method:
 
-$$
+```math
 \partial_t u(t, x) + u(t, x) \partial_x u(t, x) - \alpha \partial_{xx} u(t, x) = 0 
-$$
+```
 
 defined over
 
-$$ 
+```math
 t \in [0, 1], x \in [-1, 1] 
-$$
+```
 
 with boundary conditions
 ```math

From fe39bcc0ce754721cbb343935b2e2f02860de4e3 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 15 Mar 2024 06:11:21 +0000
Subject: [PATCH 03/27] docs: remove example block from warnonly

---
 docs/make.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index 3b1340c27..dfa8087f3 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,7 +13,7 @@ makedocs(sitename = "NeuralPDE.jl",
          authors = "#",
          modules = [NeuralPDE],
          clean = true, doctest = false, linkcheck = true,
-         warnonly = [:missing_docs, :example_block],
+         warnonly = [:missing_docs],
          format = Documenter.HTML(assets = ["assets/favicon.ico"],
                                   canonical = "https://docs.sciml.ai/NeuralPDE/stable/"),
          pages = pages)

From bfb159c19077536324ca720e6a4b84eb7af1ad2c Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 15 Mar 2024 06:26:14 +0000
Subject: [PATCH 04/27] docs: fix rendering of Bayesian PINN docstrings

---
 src/BPINN_ode.jl        | 34 ++++++++++++++++++----------------
 src/advancedHMC_MCMC.jl |  4 ++--
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 4493a0732..3bbf1afea 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -123,18 +123,19 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
 end
 
 """
-Contains ahmc_bayesian_pinn_ode() function output:
-1> a MCMCChains.jl chain object for sampled parameters
-2> The set of all sampled parameters
-3> statistics like: 
-    > n_steps
-    > acceptance_rate
-    > log_density
-    > hamiltonian_energy
-    > hamiltonian_energy_error 
-    > numerical_error
-    > step_size
-    > nom_step_size
+Contains `ahmc_bayesian_pinn_ode()` function output:
+
+1. A MCMCChains.jl chain object for sampled parameters.
+2. The set of all sampled parameters.
+3. Statistics like:
+    - n_steps
+    - acceptance_rate
+    - log_density
+    - hamiltonian_energy
+    - hamiltonian_energy_error
+    - numerical_error
+    - step_size
+    - nom_step_size
 """
 struct BPINNstats{MC, S, ST}
     mcmc_chain::MC
@@ -143,10 +144,11 @@ struct BPINNstats{MC, S, ST}
 end
 
 """
-BPINN Solution contains the original solution from AdvancedHMC.jl sampling(BPINNstats contains fields related to that)
-> ensemblesol is the Probabilistic Estimate(MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's(made using all sampled parameters) output's.
-> estimated_nn_params - Probabilistic Estimate of NN params from sampled weights,biases
-> estimated_de_params - Probabilistic Estimate of DE params from sampled unknown DE parameters
+BPINN Solution contains the original solution from AdvancedHMC.jl sampling (BPINNstats contains fields related to that).
+
+1. `ensemblesol` is the Probabilistic Estimate (MonteCarloMeasurements.jl Particles type) of Ensemble solution from All Neural Network's (made using all sampled parameters) output's.
+2. `estimated_nn_params` - Probabilistic Estimate of NN params from sampled weights, biases.
+3. `estimated_de_params` - Probabilistic Estimate of DE params from sampled unknown DE parameters.
 """
 struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
     original::O
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index c051d208c..6f3014925 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -344,8 +344,8 @@ end
 
 !!! warn
 
-    Note that ahmc_bayesian_pinn_ode() only supports ODEs which are written in the out-of-place form, i.e.
-    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the ahmc_bayesian_pinn_ode()
+    Note that `ahmc_bayesian_pinn_ode()` only supports ODEs which are written in the out-of-place form, i.e.
+    `du = f(u,p,t)`, and not `f(du,u,p,t)`. If not declared out-of-place, then the `ahmc_bayesian_pinn_ode()`
     will exit with an error.
 
 ## Example

From 2a72fa8c58ac8736559cfcda739738bd357b0df1 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 21 Mar 2024 06:56:28 +0000
Subject: [PATCH 05/27] refactor: correctly lower quadrature training strategy
 in NNODE

---
 src/ode_solve.jl           | 35 +++++++++++------------------------
 src/training_strategies.jl |  6 +-----
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index f93183d76..b9c46d346 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -27,11 +27,12 @@ of the physics-informed neural network which is used as a solver for a standard
               the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
-* `batch`: The batch size to use for the internal quadrature. Defaults to `0`, which
+* `batch`: The batch size for the loss computation. Defaults to `false`, which
            means the application of the neural network is done at individual time points one
-           at a time. `batch>0` means the neural network is applied at a row vector of values
+           at a time. `true` means the neural network is applied at a row vector of values
            `t` simultaneously, i.e. it's the batch size for the neural network evaluations.
            This requires a neural network compatible with batched data.
+           This is not applicable to `QuadratureTraining` where `batch` is passed in the `strategy` which is the number of points it can parallelly compute the integrand.
 * `param_estim`: Boolean to indicate whether parameters of the differential equations are learnt along with parameters of the neural network.
 * `strategy`: The training strategy used to choose the points for the evaluations.
               Default of `nothing` means that `QuadratureTraining` with QuadGK is used if no
@@ -88,7 +89,7 @@ struct NNODE{C, O, P, B, PE, K, AL <: Union{Nothing, Function},
 end
 function NNODE(chain, opt, init_params = nothing;
                strategy = nothing,
-               autodiff = false, batch = nothing, param_estim = false, additional_loss = nothing, kwargs...)
+               autodiff = false, batch = false, param_estim = false, additional_loss = nothing, kwargs...)
     !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
     NNODE(chain, opt, init_params, autodiff, batch, strategy, param_estim, additional_loss, kwargs)
 end
@@ -111,11 +112,7 @@ end
 
 function generate_phi_θ(chain::Lux.AbstractExplicitLayer, t, u0, init_params)
     θ, st = Lux.setup(Random.default_rng(), chain)
-    if init_params === nothing
-        init_params = ComponentArrays.ComponentArray(θ)
-    else
-        init_params = ComponentArrays.ComponentArray(init_params)
-    end
+    isnothing(init_params) && (init_params = θ)
     ODEPhi(chain, t, u0, st), init_params
 end
 
@@ -182,7 +179,7 @@ function ode_dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool)
 end
 
 """
-    inner_loss(phi, f, autodiff, t, θ, p)
+    inner_loss(phi, f, autodiff, t, θ, p, param_estim)
 
 Simple L2 inner loss at a time `t` with parameters `θ` of the neural network.
 """
@@ -220,7 +217,7 @@ function inner_loss(phi::ODEPhi{C, T, U}, f, autodiff::Bool, t::AbstractVector,
 end
 
 """
-    generate_loss(strategy, phi, f, autodiff, tspan, p, batch)
+    generate_loss(strategy, phi, f, autodiff, tspan, p, batch, param_estim)
 
 Representation of the loss function, parametric on the training strategy `strategy`.
 """
@@ -229,14 +226,13 @@ function generate_loss(strategy::QuadratureTraining, phi, f, autodiff::Bool, tsp
     integrand(t::Number, θ) = abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim))
 
     integrand(ts, θ) = [abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim)) for t in ts]
-    @assert batch == 0 # not implemented
 
     function loss(θ, _)
-        intprob = IntegralProblem(integrand, (tspan[1], tspan[2]), θ)
-        sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
+        intf = BatchIntegralFunction(integrand, max_batch = strategy.batch)
+        intprob = IntegralProblem(intf, (tspan[1], tspan[2]), θ)
+        sol = solve(intprob, strategy.quadrature_alg; abstol = strategy.abstol, reltol = strategy.reltol, maxiters = strategy.maxiters)
         sol.u
     end
-
     return loss
 end
 
@@ -395,16 +391,7 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
         alg.strategy
     end
 
-    batch = if alg.batch === nothing
-        if strategy isa QuadratureTraining
-            strategy.batch
-        else
-            true
-        end
-    else
-        alg.batch
-    end
-
+    batch = alg.batch
     inner_f = generate_loss(strategy, phi, f, autodiff, tspan, p, batch, param_estim)
     additional_loss = alg.additional_loss
     (param_estim && isnothing(additional_loss)) && throw(ArgumentError("Please provide `additional_loss` in `NNODE` for parameter estimation (`param_estim` is true)."))
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index 1db878094..c997e6c4c 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -272,7 +272,7 @@ struct QuadratureTraining{Q <: SciMLBase.AbstractIntegralAlgorithm, T} <:
     batch::Int64
 end
 
-function QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-6, abstol = 1e-3,
+function QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-3, abstol = 1e-6,
                             maxiters = 1_000, batch = 100)
     QuadratureTraining(quadrature_alg, reltol, abstol, maxiters, batch)
 end
@@ -306,11 +306,7 @@ function get_loss_function(loss_function, lb, ub, eltypeθ, strategy::Quadrature
     end
     area = eltypeθ(prod(abs.(ub .- lb)))
     f_ = (lb, ub, loss_, θ) -> begin
-        # last_x = 1
         function integrand(x, θ)
-            # last_x = x
-            # mean(abs2,loss_(x,θ), dims=2)
-            # size_x = fill(size(x)[2],(1,1))
             x = adapt(parameterless_type(ComponentArrays.getdata(θ)), x)
             sum(abs2, view(loss_(x, θ), 1, :), dims = 2) #./ size_x
         end

From a80cd034e68e3482ff525c82cf1303023cacda59 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 21 Mar 2024 08:36:15 +0000
Subject: [PATCH 06/27] test: remove tests for assertion error with batch to be
 true for QuadratureTraining

---
 test/NNODE_tests.jl | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 122adcceb..b3731f059 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -69,9 +69,6 @@ end
     sol = solve(prob, NNODE(luxchain, opt), verbose = true, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
-    @test_throws AssertionError solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
-                        maxiters = 400)
-
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = false,
                                 strategy = StochasticTraining(100)),
@@ -105,10 +102,6 @@ end
                 abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
-    @test_throws AssertionError solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
-                        maxiters = 400,
-                        abstol = 1.0f-8)
-
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = false,
                                 strategy = StochasticTraining(100)),

From 118102989f25117c6aa919480578526d5422db91 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 21 Mar 2024 12:19:04 +0000
Subject: [PATCH 07/27] test: make verbose = false for NNODE tests

---
 test/NNODE_tests.jl       | 61 +++++++++++++++++++++++----------------
 test/NNODE_tstops_test.jl | 21 ++++++++++----
 2 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index b3731f059..8475737e0 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -9,6 +9,7 @@ Random.seed!(100)
 
 @testset "Scalar" begin
     # Run a solve on scalars
+    println("Scalar")
     linear = (u, p, t) -> cos(2pi * t)
     tspan = (0.0f0, 1.0f0)
     u0 = 0.0f0
@@ -16,26 +17,27 @@ Random.seed!(100)
     luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
     opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
-    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
                 abstol = 1.0f-10, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
                         dt = 1 / 20.0f0,
-                        verbose = true, abstol = 1.0f-10, maxiters = 200)
+                        verbose = false, abstol = 1.0f-10, maxiters = 200)
 
-    sol = solve(prob, NNODE(luxchain, opt), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt), verbose = false,
                 abstol = 1.0f-6, maxiters = 200)
 
     opt = OptimizationOptimJL.BFGS()
-    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
                 abstol = 1.0f-10, maxiters = 200)
 
-    sol = solve(prob, NNODE(luxchain, opt), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt), verbose = false,
                 abstol = 1.0f-6, maxiters = 200)
 end
 
 @testset "Vector" begin
     # Run a solve on vectors
+    println("Vector")
     linear = (u, p, t) -> [cos(2pi * t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0]
@@ -44,14 +46,14 @@ end
 
     opt = OptimizationOptimJL.BFGS()
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
-                verbose = true, maxiters = 200)
+                verbose = false, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
                         dt = 1 / 20.0f0,
-                        abstol = 1e-10, verbose = true, maxiters = 200)
+                        abstol = 1e-10, verbose = false, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), abstol = 1.0f-6,
-                verbose = true, maxiters = 200)
+                verbose = false, maxiters = 200)
 
     @test sol(0.5) isa Vector
     @test sol(0.5; idxs = 1) isa Number
@@ -59,6 +61,7 @@ end
 end
 
 @testset "Example 1" begin
+    println("Example 1")
     linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
                             u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
     linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
@@ -66,68 +69,70 @@ end
     luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
     opt = OptimizationOptimisers.Adam(0.01)
 
-    sol = solve(prob, NNODE(luxchain, opt), verbose = true, maxiters = 400)
+    sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = false,
                                 strategy = StochasticTraining(100)),
-                verbose = true, maxiters = 400)
+                verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = true,
                                 strategy = StochasticTraining(100)),
-                verbose = true, maxiters = 400)
+                verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
                 maxiters = 400, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
                 maxiters = 400,
                 dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 end
 
 @testset "Example 2" begin
+    println("Example 2")
     linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
     linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
     prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
     luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
     opt = OptimizationOptimisers.Adam(0.1)
-    sol = solve(prob, NNODE(luxchain, opt), verbose = true, maxiters = 400,
+    sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400,
                 abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = false,
                                 strategy = StochasticTraining(100)),
-                verbose = true, maxiters = 400,
+                verbose = false, maxiters = 400,
                 abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
                 NNODE(luxchain, opt; batch = true,
                                 strategy = StochasticTraining(100)),
-                verbose = true, maxiters = 400,
+                verbose = false, maxiters = 400,
                 abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
                 maxiters = 400,
                 abstol = 1.0f-8, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 
-    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = true,
+    sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
                 maxiters = 400,
                 abstol = 1.0f-8, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 end
 
 @testset "Example 3" begin
+    println("Example 3")
     linear = (u, p, t) -> [cos(2pi * t), sin(2pi * t)]
     tspan = (0.0f0, 1.0f0)
     u0 = [0.0f0, -1.0f0 / 2pi]
@@ -139,13 +144,14 @@ end
     alg = NNODE(luxchain, opt; autodiff = false)
 
     sol = solve(prob,
-                alg, verbose = true, dt = 1 / 40.0f0,
+                alg, verbose = false, dt = 1 / 40.0f0,
                 maxiters = 2000, abstol = 1.0f-7)
     @test sol.errors[:l2] < 0.5
 end
 
 @testset "Training Strategies" begin
     @testset "WeightedIntervalTraining" begin
+        println("WeightedIntervalTraining")
         function f(u, p, t)
             [p[1] * u[1] - p[2] * u[1] * u[2], -p[3] * u[2] + p[4] * u[1] * u[2]]
         end
@@ -162,7 +168,7 @@ end
         points = 200
         alg = NNODE(chain, opt, autodiff = false,
                             strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose = true, maxiters = 100000, saveat = 0.01)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = 5000, saveat = 0.01)
         @test abs(mean(sol) - mean(true_sol)) < 0.2
     end
 
@@ -176,6 +182,7 @@ end
     u_analytical(x) = (1 / (2pi)) .* sin.(2pi .* x)
 
     @testset "GridTraining" begin
+        println("GridTraining")
         luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
@@ -183,22 +190,24 @@ end
         end
         alg1 = NNODE(luxchain, opt, strategy = GridTraining(0.01),
                             additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-8, maxiters = 500)
+        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
 
     @testset "QuadratureTraining" begin
+        println("QuadratureTraining")
         luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
         alg1 = NNODE(luxchain, opt, additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-10, maxiters = 200)
+        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-10, maxiters = 200)
         @test sol1.errors[:l2] < 0.5
     end
 
     @testset "StochasticTraining" begin
+        println("StochasticTraining")
         luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
         (u_, t_) = (u_analytical(ts), ts)
         function additional_loss(phi, θ)
@@ -206,12 +215,13 @@ end
         end
         alg1 = NNODE(luxchain, opt, strategy = StochasticTraining(1000),
                             additional_loss = additional_loss)
-        sol1 = solve(prob, alg1, verbose = true, abstol = 1e-8, maxiters = 500)
+        sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
 end
 
 @testset "Parameter Estimation" begin
+    println("Parameter Estimation")
     function lorenz(u, p, t)
         return [p[1]*(u[2]-u[1]),
                 u[1]*(p[2]-u[3])-u[2],
@@ -235,12 +245,13 @@ end
             )
     opt = OptimizationOptimJL.LBFGS(linesearch = BackTracking())
     alg = NNODE(luxchain, opt, strategy = GridTraining(0.01), param_estim = true, additional_loss = additional_loss)
-    sol = solve(prob, alg, verbose = true, abstol = 1e-8, maxiters = 5000, saveat = t_)
+    sol = solve(prob, alg, verbose = false, abstol = 1e-8, maxiters = 5000, saveat = t_)
     @test sol.k.u.p≈true_p atol=1e-2
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
 end
 
 @testset "Translating from Flux" begin
+    println("Translating from Flux")
     linear = (u, p, t) -> cos(2pi * t)
     linear_analytic = (u, p, t) -> (1 / (2pi)) * sin(2pi * t)
     tspan = (0.0, 1.0)
@@ -252,6 +263,6 @@ end
     fluxchain = Flux.Chain(Flux.Dense(1, 5, Flux.σ), Flux.Dense(5, 1))
     alg1 = NNODE(fluxchain, opt)
     @test alg1.chain isa Lux.AbstractExplicitLayer
-    sol1 = solve(prob, alg1, verbose = true, abstol = 1e-10, maxiters = 200)
+    sol1 = solve(prob, alg1, verbose = false, abstol = 1e-10, maxiters = 200)
     @test sol1.errors[:l2] < 0.5
 end
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
index c0f8422a0..bc4a4b08d 100644
--- a/test/NNODE_tstops_test.jl
+++ b/test/NNODE_tstops_test.jl
@@ -31,46 +31,55 @@ points = 3
 dx = 1.0
 
 @testset "GridTraining" begin
+    println("GridTraining")
     @testset "Without added points" begin
+        println("Without added points")
         # (difference between solutions should be high)
         alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
         @test abs(mean(sol) - mean(true_sol)) > threshold
     end
     @testset "With added points" begin
+        println("With added points")
         # (difference between solutions should be low)
         alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
 
 @testset "WeightedIntervalTraining" begin
+    println("WeightedIntervalTraining")
     @testset "Without added points" begin
+        println("Without added points")
         # (difference between solutions should be high)
         alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
         @test abs(mean(sol) - mean(true_sol)) > threshold
     end
     @testset "With added points" begin
+        println("With added points")
         # (difference between solutions should be low)
         alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
 
 @testset "StochasticTraining" begin
+    println("StochasticTraining")
     @testset "Without added points" begin
+        println("Without added points")
         # (difference between solutions should be high)
         alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
         @test abs(mean(sol) - mean(true_sol)) > threshold
     end
     @testset "With added points" begin
+        println("With added points")
         # (difference between solutions should be low)
         alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
-        sol = solve(prob_oop, alg, verbose=true, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end

From c74384caa5fb7a5e360d5c3a98b85a6fa469328c Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 22 Mar 2024 07:11:24 +0000
Subject: [PATCH 08/27] test: use BFGS instead of LBFGS for parameter
 estimation in NNODE

---
 test/NNODE_tests.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 8475737e0..1e2ba3c05 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -243,9 +243,9 @@ end
                 Lux.Dense(n, n, Lux.σ),
                 Lux.Dense(n, 3)
             )
-    opt = OptimizationOptimJL.LBFGS(linesearch = BackTracking())
+    opt = OptimizationOptimJL.BFGS(linesearch = BackTracking())
     alg = NNODE(luxchain, opt, strategy = GridTraining(0.01), param_estim = true, additional_loss = additional_loss)
-    sol = solve(prob, alg, verbose = false, abstol = 1e-8, maxiters = 5000, saveat = t_)
+    sol = solve(prob, alg, verbose = false, abstol = 1e-8, maxiters = 1000, saveat = t_)
     @test sol.k.u.p≈true_p atol=1e-2
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
 end

From 4ab36749592e9b32c79d89940ff1457f0880b94f Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 22 Mar 2024 09:10:30 +0000
Subject: [PATCH 09/27] refactor: use FromFluxAdaptor for converting Flux to
 Lux as Lux.transform is deprecated

---
 src/BPINN_ode.jl        | 2 +-
 src/NeuralPDE.jl        | 1 +
 src/advancedHMC_MCMC.jl | 2 +-
 src/dae_solve.jl        | 2 +-
 src/ode_solve.jl        | 4 ++--
 src/pinn_types.jl       | 6 +++---
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 3bbf1afea..087b9d41c 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -113,7 +113,7 @@ function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
         targetacceptancerate = 0.8),
     Integratorkwargs = (Integrator = Leapfrog,),
     autodiff = false, progress = false, verbose = false)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     BNNODE(chain, Kernel, strategy,
         draw_samples, priorsNNw, param, l2std,
         phystd, dataset, physdt, MCMCkwargs,
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index d367bf8b6..2ba1de25b 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -30,6 +30,7 @@ using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, righte
 using SciMLBase: @add_kwonly, parameterless_type
 using UnPack: @unpack
 import ChainRulesCore, Lux, ComponentArrays
+using Lux: FromFluxAdaptor
 using ChainRulesCore: @non_differentiable
 
 RuntimeGeneratedFunctions.init(@__MODULE__)
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 6f3014925..c86c87599 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -439,7 +439,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     MCMCkwargs = (n_leapfrog = 30,),
     progress = false, verbose = false)
 
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index 3f6bf8f0f..0c9d1323d 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -42,7 +42,7 @@ end
 
 function NNDAE(chain, opt, init_params = nothing; strategy = nothing, autodiff = false,
         kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
 end
 
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index b9c46d346..ef57beabe 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -15,7 +15,7 @@ of the physics-informed neural network which is used as a solver for a standard
 ## Positional Arguments
 
 * `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer` or `Flux.Chain`. 
-          `Flux.Chain` will be converted to `Lux` using `Lux.transform`.
+          `Flux.Chain` will be converted to `Lux` using `adapt(FromFluxAdaptor(false, false), chain)`.
 * `opt`: The optimizer to train the neural network.
 * `init_params`: The initial parameter of the neural network. By default, this is `nothing` 
                  which thus uses the random initialization provided by the neural network library.
@@ -90,7 +90,7 @@ end
 function NNODE(chain, opt, init_params = nothing;
                strategy = nothing,
                autodiff = false, batch = false, param_estim = false, additional_loss = nothing, kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     NNODE(chain, opt, init_params, autodiff, batch, strategy, param_estim, additional_loss, kwargs)
 end
 
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 69116c4da..50f7649dc 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -48,7 +48,7 @@ methodology.
 * `chain`: a vector of Lux/Flux chains with a d-dimensional input and a
            1-dimensional output corresponding to each of the dependent variables. Note that this
            specification respects the order of the dependent variables as specified in the PDESystem.
-           Flux chains will be converted to Lux internally using `Lux.transform`.
+           Flux chains will be converted to Lux internally using `adapt(FromFluxAdaptor(false, false), chain)`.
 * `strategy`: determines which training strategy will be used. See the Training Strategy
               documentation for more details.
 
@@ -107,7 +107,7 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
         if multioutput
             !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
         else
-            !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+            !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
         end
         if phi === nothing
             if multioutput
@@ -243,7 +243,7 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
         if multioutput
             !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
         else
-            !(chain isa Lux.AbstractExplicitLayer) && (chain = Lux.transform(chain))
+            !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
         end
         if phi === nothing
             if multioutput

From 6c94adff6203504f8d7d5c63724ae31645cda660 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 22 Mar 2024 09:28:12 +0000
Subject: [PATCH 10/27] docs: adjust tolerance in QuadratureTraining in neural
 adapter example

---
 docs/src/tutorials/neural_adapter.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index a56e30a26..93f0dd036 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -69,7 +69,7 @@ function loss(cord, θ)
     ch2 .- phi(cord, res.u)
 end
 
-strategy = NeuralPDE.QuadratureTraining()
+strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6)
 
 prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy)
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
@@ -173,7 +173,7 @@ for i in 1:count_decomp
     bcs_ = create_bcs(domains_[1].domain, phi_bound)
     @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
     push!(pde_system_map, pde_system_)
-    strategy = NeuralPDE.QuadratureTraining()
+    strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6)
 
     discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
                                                  init_params = init_params[i])
@@ -243,10 +243,10 @@ callback = function (p, l)
 end
 
 prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                 NeuralPDE.QuadratureTraining())
+                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-                                 NeuralPDE.QuadratureTraining())
+                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 
 phi_ = PhysicsInformedNN(chain2, strategy; init_params = res_.u).phi

From 7f3cc74adfa811f1ff9a1d30bd816d92b9ab732c Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 22 Mar 2024 10:30:19 +0000
Subject: [PATCH 11/27] build: bump lower bound of Lux

---
 Project.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index d76565ff2..45468985c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -44,7 +44,7 @@ AdvancedHMC = "0.6.1"
 Aqua = "0.8"
 ArrayInterface = "7.7"
 CUDA = "5.2"
-ChainRulesCore = "1.18"
+ChainRulesCore = "1.21"
 ComponentArrays = "0.15.8"
 Cubature = "1.5"
 DiffEqBase = "6.144"
@@ -59,7 +59,7 @@ Integrals = "4"
 LineSearches = "7.2"
 LinearAlgebra = "1"
 LogDensityProblems = "2"
-Lux = "0.5.14"
+Lux = "0.5.22"
 LuxCUDA = "0.3.2"
 MCMCChains = "6"
 MethodOfLines = "0.10.7"
@@ -82,7 +82,7 @@ SymbolicUtils = "1.4"
 Symbolics = "5.17"
 Test = "1"
 UnPack = "1"
-Zygote = "0.6.68"
+Zygote = "0.6.69"
 julia = "1.10"
 
 [extras]
@@ -91,12 +91,12 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
+MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-MethodOfLines = "94925ecb-adb7-4558-8ed8-f975c56a0bf4"
 
 [targets]
 test = ["Aqua", "Test", "CUDA", "SafeTestsets", "OptimizationOptimJL", "Pkg", "OrdinaryDiffEq", "LineSearches", "LuxCUDA", "Flux", "MethodOfLines"]

From 7e3de9879de54292ac4a0ec8f41375104303120b Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 22 Mar 2024 11:15:10 +0000
Subject: [PATCH 12/27] refactor: make batch to be true by default for NNODE

---
 src/ode_solve.jl | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index ef57beabe..8af6a708d 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -27,11 +27,9 @@ of the physics-informed neural network which is used as a solver for a standard
               the PDE operators. The reverse mode of the loss function is always
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
-* `batch`: The batch size for the loss computation. Defaults to `false`, which
-           means the application of the neural network is done at individual time points one
-           at a time. `true` means the neural network is applied at a row vector of values
-           `t` simultaneously, i.e. it's the batch size for the neural network evaluations.
-           This requires a neural network compatible with batched data.
+* `batch`: The batch size for the loss computation. Defaults to `true`, means the neural network is applied at a row vector of values
+           `t` simultaneously, i.e. it's the batch size for the neural network evaluations. This requires a neural network compatible with batched data. 
+           `false` means which means the application of the neural network is done at individual time points one at a time. 
            This is not applicable to `QuadratureTraining` where `batch` is passed in the `strategy` which is the number of points it can parallelly compute the integrand.
 * `param_estim`: Boolean to indicate whether parameters of the differential equations are learnt along with parameters of the neural network.
 * `strategy`: The training strategy used to choose the points for the evaluations.
@@ -89,7 +87,7 @@ struct NNODE{C, O, P, B, PE, K, AL <: Union{Nothing, Function},
 end
 function NNODE(chain, opt, init_params = nothing;
                strategy = nothing,
-               autodiff = false, batch = false, param_estim = false, additional_loss = nothing, kwargs...)
+               autodiff = false, batch = true, param_estim = false, additional_loss = nothing, kwargs...)
     !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
     NNODE(chain, opt, init_params, autodiff, batch, strategy, param_estim, additional_loss, kwargs)
 end

From 24a65de27f732238c7568b5846476d766a255294 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 27 Mar 2024 03:33:10 +0000
Subject: [PATCH 13/27] feat: allow complex for NNODE

---
 src/ode_solve.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index 8af6a708d..bb417b649 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -326,6 +326,7 @@ function (f::NNODEInterpolation)(t::Vector, idxs, ::Type{Val{0}}, p, continuity)
 end
 
 SciMLBase.interp_summary(::NNODEInterpolation) = "Trained neural network interpolation"
+SciMLBase.allowscomplex(::NNODE) = true
 
 function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
         alg::NNODE,

From 88c41160760efbb01d01eb301ab42694de6e6413 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 27 Mar 2024 03:33:31 +0000
Subject: [PATCH 14/27] test: add tests for complex equations

---
 test/NNODE_tests.jl | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 1e2ba3c05..72bb47f81 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -5,6 +5,7 @@ import Lux, OptimizationOptimisers, OptimizationOptimJL
 using Flux
 using LineSearches
 
+rng = Random.default_rng()
 Random.seed!(100)
 
 @testset "Scalar" begin
@@ -250,6 +251,45 @@ end
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
 end
 
+@testset "Complex Numbers" begin
+    function bloch_equations(u, p, t)
+        Ω, Δ, Γ = p
+        γ = Γ / 2
+        ρ₁₁, ρ₂₂, ρ₁₂, ρ₂₁ = u
+        d̢ρ = [im * Ω * (ρ₁₂ - ρ₂₁) + Γ * ρ₂₂;
+                -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
+                -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
+                conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
+        return d̢ρ
+    end
+    
+    u0 = zeros(ComplexF64, 4)
+    u0[1] = 1
+    time_span = (0.0, 2.0)   
+    parameters = [2.0, 0.0, 1.0]
+    
+    problem = ODEProblem(bloch_equations, u0, time_span, parameters)
+    
+    chain = Lux.Chain(
+                Lux.Dense(1, 16, tanh; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)) , 
+                Lux.Dense(16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+            )
+    ps, st = Lux.setup(rng, chain)
+    
+    opt = OptimizationOptimisers.Adam(0.01)
+    ground_truth  = solve(problem, Tsit5(), saveat = 0.01)
+    strategies = [StochasticTraining(500), GridTraining(0.01), WeightedIntervalTraining([0.1, 0.4, 0.4, 0.1], 500)]
+
+    @testset "$(nameof(typeof(strategy)))" for strategy in strategies
+        alg = NNODE(chain, opt, ps; strategy)
+        sol = solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+        @test sol.u ≈ ground_truth.u rtol=1e-1
+    end
+
+    alg = NNODE(chain, opt, ps; strategy = QuadratureTraining())
+    @test_throws ErrorException solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+end
+
 @testset "Translating from Flux" begin
     println("Translating from Flux")
     linear = (u, p, t) -> cos(2pi * t)

From 7938adf14dea20f399e02523cccaf11a1e21556f Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 27 Mar 2024 03:34:32 +0000
Subject: [PATCH 15/27] docs: add example for complex number support

---
 docs/pages.jl                |  3 +-
 docs/src/examples/complex.md | 95 ++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/examples/complex.md

diff --git a/docs/pages.jl b/docs/pages.jl
index 054b01f9e..31b18ec31 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -22,7 +22,8 @@ pages = ["index.md",
                                "examples/heterogeneous.md",
                                "examples/linear_parabolic.md",
                                "examples/nonlinear_elliptic.md",
-                               "examples/nonlinear_hyperbolic.md"],
+                               "examples/nonlinear_hyperbolic.md",
+                               "examples/complex.md"],
     "Manual" => Any["manual/ode.md",
                     "manual/dae.md",
                     "manual/pinns.md",
diff --git a/docs/src/examples/complex.md b/docs/src/examples/complex.md
new file mode 100644
index 000000000..0e1456e41
--- /dev/null
+++ b/docs/src/examples/complex.md
@@ -0,0 +1,95 @@
+# Complex Equations with PINNs
+
+NeuralPDE supports training PINNs with complex differential equations. This example will demonstrate how to use it for [`NNODE`](@ref). Let us consider a system of [bloch equations](https://en.wikipedia.org/wiki/Bloch_equations). Note [`QuadratureTraining`](@ref) cannot be used with complex equations due to current limitations of computing quadratures.
+
+As the input to this neural network is time which is real, we need to initialize the parameters of the neural network with complex values for it to output and train with complex values.
+
+```@example complex
+using Random, NeuralPDE
+using OrdinaryDiffEq
+using Lux, OptimizationOptimisers
+using Plots
+rng = Random.default_rng()
+Random.seed!(100)
+
+function bloch_equations(u, p, t)
+    Ω, Δ, Γ = p
+    γ = Γ / 2
+    ρ₁₁, ρ₂₂, ρ₁₂, ρ₂₁ = u
+    d̢ρ = [im * Ω * (ρ₁₂ - ρ₂₁) + Γ * ρ₂₂;
+            -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
+            -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
+            conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
+    return d̢ρ
+end
+
+u0 = zeros(ComplexF64, 4)
+u0[1] = 1.0
+time_span = (0.0, 2.0)   
+parameters = [2.0, 0.0, 1.0]
+
+problem = ODEProblem(bloch_equations, u0, time_span, parameters)
+
+chain = Lux.Chain(
+            Lux.Dense(1, 16, tanh; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)) , 
+            Lux.Dense(16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+        )
+ps, st = Lux.setup(rng, chain)
+
+opt = OptimizationOptimisers.Adam(0.01)
+ground_truth  = solve(problem, Tsit5(), saveat = 0.01)
+alg = NNODE(chain, opt, ps; strategy = StochasticTraining(500))
+sol = solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+```
+
+Now, lets plot the predictions.
+
+`u1`:
+
+```@example complex
+plot(sol.t, real.(reduce(hcat, sol.u)[1, :]));
+plot!(ground_truth.t, real.(reduce(hcat, ground_truth.u)[1, :]))
+```
+
+```@example complex
+plot(sol.t, imag.(reduce(hcat, sol.u)[1, :]));
+plot!(ground_truth.t, imag.(reduce(hcat, ground_truth.u)[1, :]))
+```
+
+`u2`:
+
+```@example complex
+plot(sol.t, real.(reduce(hcat, sol.u)[2, :]));
+plot!(ground_truth.t, real.(reduce(hcat, ground_truth.u)[2, :]))
+```
+
+```@example complex
+plot(sol.t, imag.(reduce(hcat, sol.u)[2, :]));
+plot!(ground_truth.t, imag.(reduce(hcat, ground_truth.u)[2, :]))
+```
+
+`u3`:
+
+```@example complex
+plot(sol.t, real.(reduce(hcat, sol.u)[3, :]));
+plot!(ground_truth.t, real.(reduce(hcat, ground_truth.u)[3, :]))
+```
+
+```@example complex
+plot(sol.t, imag.(reduce(hcat, sol.u)[3, :]));
+plot!(ground_truth.t, imag.(reduce(hcat, ground_truth.u)[3, :]))
+```
+
+`u4`:
+
+```@example complex
+plot(sol.t, real.(reduce(hcat, sol.u)[4, :]));
+plot!(ground_truth.t, real.(reduce(hcat, ground_truth.u)[4, :]))
+```
+
+```@example complex
+plot(sol.t, imag.(reduce(hcat, sol.u)[4, :]));
+plot!(ground_truth.t, imag.(reduce(hcat, ground_truth.u)[4, :]))
+```
+
+We can see it is able to learn the real parts of `u1`, `u2` and imaginary parts of `u3`, `u4`.

From fd9afba302daab0ab621226c912904a6a0dd254b Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Wed, 27 Mar 2024 03:34:56 +0000
Subject: [PATCH 16/27] docs: relax abstol for neural adapter example

---
 docs/src/tutorials/neural_adapter.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index 93f0dd036..56d3a0679 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -69,7 +69,7 @@ function loss(cord, θ)
     ch2 .- phi(cord, res.u)
 end
 
-strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6)
+strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3)
 
 prob_ = NeuralPDE.neural_adapter(loss, init_params2, pde_system, strategy)
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 10000)
@@ -173,7 +173,7 @@ for i in 1:count_decomp
     bcs_ = create_bcs(domains_[1].domain, phi_bound)
     @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
     push!(pde_system_map, pde_system_)
-    strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6)
+    strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3)
 
     discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
                                                  init_params = init_params[i])
@@ -243,10 +243,10 @@ callback = function (p, l)
 end
 
 prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6))
+                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6))
+                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 
 phi_ = PhysicsInformedNN(chain2, strategy; init_params = res_.u).phi

From dad881531fffe25705c35b3258c4c3f183904636 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 28 Mar 2024 04:14:26 +0000
Subject: [PATCH 17/27] refactor: error out if QuadratureTraining is used with
 complex parameters for NNODE

---
 src/ode_solve.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index bb417b649..eb5ae942e 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -358,6 +358,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
 
     !(chain isa Lux.AbstractExplicitLayer) && error("Only Lux.AbstractExplicitLayer neural networks are supported")
     phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
+    ((eltype(eltype(init_params).types[1]) <: Complex || eltype(eltype(init_params).types[2]) <: Complex) && alg.strategy isa QuadratureTraining) &&
+        error("QuadratureTraining cannot be used with complex parameters. Use other strategies.")
 
     init_params = if alg.param_estim
         ComponentArrays.ComponentArray(; depvar = ComponentArrays.ComponentArray(init_params), p = prob.p)

From 0556f6ead2ec90b57da2f1e7e936fb13b2e3f62f Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Thu, 28 Mar 2024 17:19:49 -0400
Subject: [PATCH 18/27] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 443279dce..c5d536e4f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -82,7 +82,7 @@ Symbolics = "5"
 Test = "1"
 UnPack = "1"
 Zygote = "0.6"
-MethodOfLines = "0.10.7"
+MethodOfLines = "0.11"
 julia = "1.6"
 
 [extras]

From 9f353a636352a3ef69369bb3af517d60561b4b2f Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 29 Mar 2024 03:53:16 +0000
Subject: [PATCH 19/27] build: bump lower bound of packages

---
 Project.toml      | 16 ++++++++--------
 docs/Project.toml | 28 ++++++++++++++--------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/Project.toml b/Project.toml
index 5ad71dc39..44acd7b95 100644
--- a/Project.toml
+++ b/Project.toml
@@ -42,12 +42,12 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 Adapt = "4"
 AdvancedHMC = "0.6.1"
 Aqua = "0.8"
-ArrayInterface = "7.7"
+ArrayInterface = "7.8"
 CUDA = "5.2"
 ChainRulesCore = "1.21"
 ComponentArrays = "0.15.8"
 Cubature = "1.5"
-DiffEqBase = "6.144"
+DiffEqBase = "6.148"
 DiffEqNoiseProcess = "5.20"
 Distributions = "0.25.107"
 DocStringExtensions = "0.9"
@@ -63,23 +63,23 @@ Lux = "0.5.22"
 LuxCUDA = "0.3.2"
 MCMCChains = "6"
 MethodOfLines = "0.11"
-ModelingToolkit = "9"
+ModelingToolkit = "9.7"
 MonteCarloMeasurements = "1.1"
 Optim = "1.7.8"
-Optimization = "3.22"
+Optimization = "3.24"
 OptimizationOptimJL = "0.2.1"
 OptimizationOptimisers = "0.2.1"
-OrdinaryDiffEq = "6.70"
+OrdinaryDiffEq = "6.74"
 Pkg = "1"
 QuasiMonteCarlo = "0.3.2"
 Random = "1"
 Reexport = "1.2"
-RuntimeGeneratedFunctions = "0.5.11"
+RuntimeGeneratedFunctions = "0.5.12"
 SafeTestsets = "0.1"
-SciMLBase = "2.24"
+SciMLBase = "2.28"
 Statistics = "1.10"
 SymbolicUtils = "1.4"
-Symbolics = "5.17"
+Symbolics = "5.24"
 Test = "1"
 UnPack = "1"
 Zygote = "0.6.69"
diff --git a/docs/Project.toml b/docs/Project.toml
index 1313e4dad..4d82d9741 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -28,29 +28,29 @@ Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 
 [compat]
-AdvancedHMC = "0.6"
-ComponentArrays = "0.15"
+AdvancedHMC = "0.6.1"
+ComponentArrays = "0.15.8"
 Cubature = "1.5"
-DiffEqBase = "6.106"
-Distributions = "0.25"
+DiffEqBase = "6.148"
+Distributions = "0.25.107"
 Documenter = "1"
 DomainSets = "0.6, 0.7"
-Flux = "0.14"
+Flux = "0.14.11"
 Integrals = "4"
 LineSearches = "7.2"
-Lux = "0.5"
-LuxCUDA = "0.3"
+Lux = "0.5.22"
+LuxCUDA = "0.3.2"
 MethodOfLines = "0.11"
-ModelingToolkit = "9"
+ModelingToolkit = "9.7"
 MonteCarloMeasurements = "1"
-NeuralPDE = "5.3"
-Optimization = "3.9"
-OptimizationOptimJL = "0.2"
-OptimizationOptimisers = "0.2"
+NeuralPDE = "5.14"
+Optimization = "3.24"
+OptimizationOptimJL = "0.2.1"
+OptimizationOptimisers = "0.2.1"
 OptimizationPolyalgorithms = "0.2"
-OrdinaryDiffEq = "6.31"
+OrdinaryDiffEq = "6.74"
 Plots = "1.36"
-QuasiMonteCarlo = "0.3"
+QuasiMonteCarlo = "0.3.2"
 Random = "1"
 Roots = "2.0"
 SpecialFunctions = "2.1"

From 86ab97c70db483b9ce7bfaa9097c39e63ee592fb Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Fri, 29 Mar 2024 09:26:17 -0400
Subject: [PATCH 20/27] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 44acd7b95..d1508cbd1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NeuralPDE"
 uuid = "315f7962-48a3-4962-8226-d0f33b1235f0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "5.14.0"
+version = "5.15.0"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

From b0daed7f09bc7aa59574d96c618a58563e8a1c07 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Fri, 29 Mar 2024 14:20:26 +0000
Subject: [PATCH 21/27] chore: format the whole repo

---
 README.md                                     |   4 +-
 docs/make.jl                                  |  16 +-
 docs/pages.jl                                 |  62 +++---
 docs/src/developer/debugging.md               |  17 +-
 docs/src/examples/complex.md                  |  18 +-
 docs/src/examples/heterogeneous.md            |   4 +-
 docs/src/examples/linear_parabolic.md         |   3 +-
 docs/src/examples/nonlinear_hyperbolic.md     |   4 +-
 docs/src/examples/wave.md                     |  21 ++-
 docs/src/index.md                             |   4 +-
 docs/src/manual/bpinns.md                     |   1 -
 docs/src/manual/ode.md                        |   2 +-
 docs/src/tutorials/constraints.md             |  13 +-
 docs/src/tutorials/dae.md                     |   8 +-
 .../tutorials/derivative_neural_network.md    |   3 +-
 docs/src/tutorials/dgm.md                     |  23 +--
 docs/src/tutorials/gpu.md                     |  28 +--
 docs/src/tutorials/integro_diff.md            |   2 +-
 docs/src/tutorials/low_level.md               |   2 +-
 docs/src/tutorials/low_level_2.md             |  11 +-
 docs/src/tutorials/neural_adapter.md          |  39 ++--
 .../src/tutorials/ode_parameter_estimation.md |  19 +-
 docs/src/tutorials/param_estim.md             |  12 +-
 docs/src/tutorials/pdesystem.md               |  14 +-
 docs/src/tutorials/systems.md                 |   3 +-
 lib/NeuralPDELogging/src/NeuralPDELogging.jl  |   4 +-
 .../test/adaptive_loss_log_tests.jl           |  32 ++--
 lib/NeuralPDELogging/test/runtests.jl         |  64 ++++---
 src/BPINN_ode.jl                              |  49 ++---
 src/NeuralPDE.jl                              |  28 +--
 src/PDE_BPINN.jl                              |  35 ++--
 src/adaptive_losses.jl                        | 137 +++++++-------
 src/advancedHMC_MCMC.jl                       | 111 +++++------
 src/dae_solve.jl                              |  18 +-
 src/dgm.jl                                    |  94 +++++-----
 src/discretize.jl                             | 176 +++++++++---------
 src/neural_adapter.jl                         |  12 +-
 src/ode_solve.jl                              |  59 ++++--
 src/pinn_types.jl                             |  27 ++-
 src/rode_solve.jl                             |  24 +--
 src/symbolic_utilities.jl                     |  97 +++++-----
 src/training_strategies.jl                    |  96 +++++-----
 src/transform_inf_integral.jl                 |  28 +--
 test/BPINN_PDE_tests.jl                       |   9 +-
 test/BPINN_PDEinvsol_tests.jl                 |  22 +--
 test/BPINN_Tests.jl                           |  18 +-
 test/IDE_tests.jl                             |  14 +-
 test/NNDAE_tests.jl                           |   4 +-
 test/NNODE_tests.jl                           | 142 +++++++-------
 test/NNODE_tstops_test.jl                     |  23 ++-
 test/NNPDE_tests.jl                           |  78 ++++----
 test/NNPDE_tests_gpu_Lux.jl                   |  53 +++---
 test/NNRODE_tests.jl                          |   4 +-
 test/adaptive_loss_tests.jl                   |  23 +--
 test/additional_loss_tests.jl                 |  54 +++---
 test/dgm_test.jl                              | 126 ++++++-------
 test/direct_function_tests.jl                 |  18 +-
 test/forward_tests.jl                         |  15 +-
 test/neural_adapter_tests.jl                  |  60 +++---
 test/runtests.jl                              |  64 +++++--
 60 files changed, 1150 insertions(+), 1001 deletions(-)

diff --git a/README.md b/README.md
index 928ff3871..ecd865834 100644
--- a/README.md
+++ b/README.md
@@ -93,9 +93,9 @@ xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains]
 analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
 u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
+    (length(xs), length(ys)))
 diff_u = abs.(u_predict .- u_real)
 
 using Plots
diff --git a/docs/make.jl b/docs/make.jl
index dfa8087f3..32f56b622 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,13 +10,13 @@ using Plots
 include("pages.jl")
 
 makedocs(sitename = "NeuralPDE.jl",
-         authors = "#",
-         modules = [NeuralPDE],
-         clean = true, doctest = false, linkcheck = true,
-         warnonly = [:missing_docs],
-         format = Documenter.HTML(assets = ["assets/favicon.ico"],
-                                  canonical = "https://docs.sciml.ai/NeuralPDE/stable/"),
-         pages = pages)
+    authors = "#",
+    modules = [NeuralPDE],
+    clean = true, doctest = false, linkcheck = true,
+    warnonly = [:missing_docs],
+    format = Documenter.HTML(assets = ["assets/favicon.ico"],
+        canonical = "https://docs.sciml.ai/NeuralPDE/stable/"),
+    pages = pages)
 
 deploydocs(repo = "github.com/SciML/NeuralPDE.jl.git";
-           push_preview = true)
+    push_preview = true)
diff --git a/docs/pages.jl b/docs/pages.jl
index 31b18ec31..e0a2741a2 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -1,36 +1,36 @@
 pages = ["index.md",
     "ODE PINN Tutorials" => Any["Introduction to NeuralPDE for ODEs" => "tutorials/ode.md",
-                                "Bayesian PINNs for Coupled ODEs" => "tutorials/Lotka_Volterra_BPINNs.md",
-                                "PINNs DAEs" => "tutorials/dae.md",
-                                "Parameter Estimation with PINNs for ODEs" => "tutorials/ode_parameter_estimation.md",
-                                "Deep Galerkin Method" => "tutorials/dgm.md"
-                                #"examples/nnrode_example.md", # currently incorrect
-                                ],
-    "PDE PINN Tutorials" => Any["Introduction to NeuralPDE for PDEs" => "tutorials/pdesystem.md",
-                                "Bayesian PINNs for PDEs" => "tutorials/low_level_2.md",
-                                "Using GPUs" => "tutorials/gpu.md",
-                                "Defining Systems of PDEs" => "tutorials/systems.md",
-                                "Imposing Constraints" => "tutorials/constraints.md",
-                                "The symbolic_discretize Interface" => "tutorials/low_level.md",
-                                "Optimising Parameters (Solving Inverse Problems)" => "tutorials/param_estim.md",
-                                "Solving Integro Differential Equations" => "tutorials/integro_diff.md",
-                                "Transfer Learning with Neural Adapter" => "tutorials/neural_adapter.md",
-                                "The Derivative Neural Network Approximation" => "tutorials/derivative_neural_network.md"],
+        "Bayesian PINNs for Coupled ODEs" => "tutorials/Lotka_Volterra_BPINNs.md",
+        "PINNs DAEs" => "tutorials/dae.md",
+        "Parameter Estimation with PINNs for ODEs" => "tutorials/ode_parameter_estimation.md",
+        "Deep Galerkin Method" => "tutorials/dgm.md"        #"examples/nnrode_example.md", # currently incorrect
+    ],
+    "PDE PINN Tutorials" => Any[
+        "Introduction to NeuralPDE for PDEs" => "tutorials/pdesystem.md",
+        "Bayesian PINNs for PDEs" => "tutorials/low_level_2.md",
+        "Using GPUs" => "tutorials/gpu.md",
+        "Defining Systems of PDEs" => "tutorials/systems.md",
+        "Imposing Constraints" => "tutorials/constraints.md",
+        "The symbolic_discretize Interface" => "tutorials/low_level.md",
+        "Optimising Parameters (Solving Inverse Problems)" => "tutorials/param_estim.md",
+        "Solving Integro Differential Equations" => "tutorials/integro_diff.md",
+        "Transfer Learning with Neural Adapter" => "tutorials/neural_adapter.md",
+        "The Derivative Neural Network Approximation" => "tutorials/derivative_neural_network.md"],
     "Extended Examples" => Any["examples/wave.md",
-                               "examples/3rd.md",
-                               "examples/ks.md",
-                               "examples/heterogeneous.md",
-                               "examples/linear_parabolic.md",
-                               "examples/nonlinear_elliptic.md",
-                               "examples/nonlinear_hyperbolic.md",
-                               "examples/complex.md"],
+        "examples/3rd.md",
+        "examples/ks.md",
+        "examples/heterogeneous.md",
+        "examples/linear_parabolic.md",
+        "examples/nonlinear_elliptic.md",
+        "examples/nonlinear_hyperbolic.md",
+        "examples/complex.md"],
     "Manual" => Any["manual/ode.md",
-                    "manual/dae.md",
-                    "manual/pinns.md",
-                    "manual/bpinns.md",
-                    "manual/training_strategies.md",
-                    "manual/adaptive_losses.md",
-                    "manual/logging.md",
-                    "manual/neural_adapters.md"],
-    "Developer Documentation" => Any["developer/debugging.md"],
+        "manual/dae.md",
+        "manual/pinns.md",
+        "manual/bpinns.md",
+        "manual/training_strategies.md",
+        "manual/adaptive_losses.md",
+        "manual/logging.md",
+        "manual/neural_adapters.md"],
+    "Developer Documentation" => Any["developer/debugging.md"]
 ]
diff --git a/docs/src/developer/debugging.md b/docs/src/developer/debugging.md
index 27ab263ab..4f0ff5afe 100644
--- a/docs/src/developer/debugging.md
+++ b/docs/src/developer/debugging.md
@@ -58,8 +58,8 @@ strategy = NeuralPDE.GridTraining(dx)
 integral = NeuralPDE.get_numeric_integral(strategy, indvars, multioutput, chain, derivative)
 
 _pde_loss_function = NeuralPDE.build_loss_function(eq, indvars, depvars, phi, derivative,
-                                                   integral, multioutput, init_params,
-                                                   strategy)
+    integral, multioutput, init_params,
+    strategy)
 ```
 
 ```
@@ -83,9 +83,9 @@ julia> bc_indvars = NeuralPDE.get_variables(bcs,indvars,depvars)
 
 ```julia
 _bc_loss_functions = [NeuralPDE.build_loss_function(bc, indvars, depvars,
-                                                    phi, derivative, integral, multioutput,
-                                                    init_params, strategy,
-                                                    bc_indvars = bc_indvar)
+                          phi, derivative, integral, multioutput,
+                          init_params, strategy,
+                          bc_indvars = bc_indvar)
                       for (bc, bc_indvar) in zip(bcs, bc_indvars)]
 ```
 
@@ -126,7 +126,7 @@ julia> expr_bc_loss_functions = [NeuralPDE.build_symbolic_loss_function(bc,indva
 
 ```julia
 train_sets = NeuralPDE.generate_training_sets(domains, dx, [eq], bcs, eltypeθ, indvars,
-                                              depvars)
+    depvars)
 pde_train_set, bcs_train_set = train_sets
 ```
 
@@ -145,8 +145,9 @@ julia> bcs_train_set
 ```
 
 ```julia
-pde_bounds, bcs_bounds = NeuralPDE.get_bounds(domains, [eq], bcs, eltypeθ, indvars, depvars,
-                                              NeuralPDE.StochasticTraining(100))
+pde_bounds, bcs_bounds = NeuralPDE.get_bounds(
+    domains, [eq], bcs, eltypeθ, indvars, depvars,
+    NeuralPDE.StochasticTraining(100))
 ```
 
 ```
diff --git a/docs/src/examples/complex.md b/docs/src/examples/complex.md
index 0e1456e41..a44e31e79 100644
--- a/docs/src/examples/complex.md
+++ b/docs/src/examples/complex.md
@@ -17,27 +17,29 @@ function bloch_equations(u, p, t)
     γ = Γ / 2
     ρ₁₁, ρ₂₂, ρ₁₂, ρ₂₁ = u
     d̢ρ = [im * Ω * (ρ₁₂ - ρ₂₁) + Γ * ρ₂₂;
-            -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
-            -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
-            conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
+           -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
+           -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
+           conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
     return d̢ρ
 end
 
 u0 = zeros(ComplexF64, 4)
 u0[1] = 1.0
-time_span = (0.0, 2.0)   
+time_span = (0.0, 2.0)
 parameters = [2.0, 0.0, 1.0]
 
 problem = ODEProblem(bloch_equations, u0, time_span, parameters)
 
 chain = Lux.Chain(
-            Lux.Dense(1, 16, tanh; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)) , 
-            Lux.Dense(16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
-        )
+    Lux.Dense(1, 16, tanh;
+        init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)),
+    Lux.Dense(
+        16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+)
 ps, st = Lux.setup(rng, chain)
 
 opt = OptimizationOptimisers.Adam(0.01)
-ground_truth  = solve(problem, Tsit5(), saveat = 0.01)
+ground_truth = solve(problem, Tsit5(), saveat = 0.01)
 alg = NNODE(chain, opt, ps; strategy = StochasticTraining(500))
 sol = solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
 ```
diff --git a/docs/src/examples/heterogeneous.md b/docs/src/examples/heterogeneous.md
index 286e085be..069116ded 100644
--- a/docs/src/examples/heterogeneous.md
+++ b/docs/src/examples/heterogeneous.md
@@ -32,9 +32,9 @@ domains = [x ∈ Interval(0.0, 1.0),
 
 numhid = 3
 chains = [[Lux.Chain(Dense(1, numhid, Lux.σ), Dense(numhid, numhid, Lux.σ),
-                     Dense(numhid, 1)) for i in 1:2]
+               Dense(numhid, 1)) for i in 1:2]
           [Lux.Chain(Dense(2, numhid, Lux.σ), Dense(numhid, numhid, Lux.σ),
-                     Dense(numhid, 1)) for i in 1:2]]
+               Dense(numhid, 1)) for i in 1:2]]
 discretization = NeuralPDE.PhysicsInformedNN(chains, QuadratureTraining())
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [p(x), q(y), r(x, y), s(y, x)])
diff --git a/docs/src/examples/linear_parabolic.md b/docs/src/examples/linear_parabolic.md
index 60494a5c9..c481114a2 100644
--- a/docs/src/examples/linear_parabolic.md
+++ b/docs/src/examples/linear_parabolic.md
@@ -24,7 +24,8 @@ w(t, 1) = \frac{e^{\lambda_1} cos(\frac{x}{a})-e^{\lambda_2}cos(\frac{x}{a})}{\l
 with a physics-informed neural network.
 
 ```@example linear_parabolic
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers, OptimizationOptimJL, LineSearches
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers,
+      OptimizationOptimJL, LineSearches
 using Plots
 using ModelingToolkit: Interval, infimum, supremum
 
diff --git a/docs/src/examples/nonlinear_hyperbolic.md b/docs/src/examples/nonlinear_hyperbolic.md
index 60203b161..08e2552c7 100644
--- a/docs/src/examples/nonlinear_hyperbolic.md
+++ b/docs/src/examples/nonlinear_hyperbolic.md
@@ -33,7 +33,8 @@ where k is a root of the algebraic (transcendental) equation f(k) = g(k), j0 and
 We solve this with Neural:
 
 ```@example nonlinear_hyperbolic
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, Roots, LineSearches
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimJL, Roots,
+      LineSearches
 using SpecialFunctions
 using Plots
 using ModelingToolkit: Interval, infimum, supremum
@@ -121,7 +122,6 @@ for i in 1:2
 end
 ```
 
-
 ```@example nonlinear_hyperbolic
 ps[1]
 ```
diff --git a/docs/src/examples/wave.md b/docs/src/examples/wave.md
index 954b15cc9..d53e4df65 100644
--- a/docs/src/examples/wave.md
+++ b/docs/src/examples/wave.md
@@ -70,9 +70,9 @@ function analytic_sol_func(t, x)
 end
 
 u_predict = reshape([first(phi([t, x], res.u)) for t in ts for x in xs],
-                    (length(ts), length(xs)))
+    (length(ts), length(xs)))
 u_real = reshape([analytic_sol_func(t, x) for t in ts for x in xs],
-                 (length(ts), length(xs)))
+    (length(ts), length(xs)))
 
 diff_u = abs.(u_predict .- u_real)
 p1 = plot(ts, xs, u_real, linetype = :contourf, title = "analytic");
@@ -121,7 +121,7 @@ eq = Dx(Dxu(t, x)) ~ 1 / v^2 * Dt(Dtu(t, x)) + b * Dtu(t, x)
 bcs_ = [u(t, 0) ~ 0.0,# for all t > 0
     u(t, L) ~ 0.0,# for all t > 0
     u(0, x) ~ x * (1.0 - x), # for all 0 < x < 1
-    Dtu(0, x) ~ 1 - 2x, # for all  0 < x < 1
+    Dtu(0, x) ~ 1 - 2x # for all  0 < x < 1
 ]
 
 ep = (cbrt(eps(eltype(Float64))))^2 / 6
@@ -139,16 +139,16 @@ domains = [t ∈ Interval(0.0, L),
 inn = 25
 innd = 4
 chain = [[Lux.Chain(Dense(2, inn, Lux.tanh),
-                    Dense(inn, inn, Lux.tanh),
-                    Dense(inn, inn, Lux.tanh),
-                    Dense(inn, 1)) for _ in 1:3]
+              Dense(inn, inn, Lux.tanh),
+              Dense(inn, inn, Lux.tanh),
+              Dense(inn, 1)) for _ in 1:3]
          [Lux.Chain(Dense(2, innd, Lux.tanh), Dense(innd, 1)) for _ in 1:2]]
 
 strategy = GridTraining(0.02)
 discretization = PhysicsInformedNN(chain, strategy;)
 
 @named pde_system = PDESystem(eq, bcs, domains, [t, x],
-                              [u(t, x), Dxu(t, x), Dtu(t, x), O1(t, x), O2(t, x)])
+    [u(t, x), Dxu(t, x), Dtu(t, x), O1(t, x), O2(t, x)])
 prob = discretize(pde_system, discretization)
 sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
 
@@ -201,10 +201,11 @@ gif(anim, "1Dwave_damped_adaptive.gif", fps = 200)
 
 # Surface plot
 ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
-u_predict = reshape([first(phi([t, x], res.u.depvar.u)) for
-                     t in ts for x in xs], (length(ts), length(xs)))
+u_predict = reshape(
+    [first(phi([t, x], res.u.depvar.u)) for
+     t in ts for x in xs], (length(ts), length(xs)))
 u_real = reshape([analytic_sol_func(t, x) for t in ts for x in xs],
-                 (length(ts), length(xs)))
+    (length(ts), length(xs)))
 
 diff_u = abs.(u_predict .- u_real)
 p1 = plot(ts, xs, u_real, linetype = :contourf, title = "analytic");
diff --git a/docs/src/index.md b/docs/src/index.md
index 2b03424c1..8a6334fc6 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,7 +1,7 @@
 # NeuralPDE.jl: Automatic Physics-Informed Neural Networks (PINNs)
 
-[NeuralPDE.jl](https://github.com/SciML/NeuralPDE.jl) is a solver package which 
-consists of neural network solvers for partial differential equations using 
+[NeuralPDE.jl](https://github.com/SciML/NeuralPDE.jl) is a solver package which
+consists of neural network solvers for partial differential equations using
 physics-informed neural networks (PINNs) and the ability to generate neural
 networks which both approximate physical laws and real data simultaneously.
 
diff --git a/docs/src/manual/bpinns.md b/docs/src/manual/bpinns.md
index 1b2990168..c067dc717 100644
--- a/docs/src/manual/bpinns.md
+++ b/docs/src/manual/bpinns.md
@@ -22,4 +22,3 @@ SciMLBase.symbolic_discretize(::PDESystem, ::NeuralPDE.AbstractPINN)
 NeuralPDE.BPINNstats
 NeuralPDE.BPINNsolution
 ```
-
diff --git a/docs/src/manual/ode.md b/docs/src/manual/ode.md
index 353974536..c25328943 100644
--- a/docs/src/manual/ode.md
+++ b/docs/src/manual/ode.md
@@ -8,4 +8,4 @@ NNODE
 
 ```@docs
 BNNODE
-```
\ No newline at end of file
+```
diff --git a/docs/src/tutorials/constraints.md b/docs/src/tutorials/constraints.md
index 59f389b17..0898fab11 100644
--- a/docs/src/tutorials/constraints.md
+++ b/docs/src/tutorials/constraints.md
@@ -47,9 +47,9 @@ domains = [x ∈ Interval(x_0, x_end)]
 # Neural network
 inn = 18
 chain = Lux.Chain(Dense(1, inn, Lux.σ),
-                  Dense(inn, inn, Lux.σ),
-                  Dense(inn, inn, Lux.σ),
-                  Dense(inn, 1))
+    Dense(inn, inn, Lux.σ),
+    Dense(inn, inn, Lux.σ),
+    Dense(inn, 1))
 
 lb = [x_0]
 ub = [x_end]
@@ -63,8 +63,8 @@ function norm_loss_function(phi, θ, p)
 end
 
 discretization = PhysicsInformedNN(chain,
-                                   QuadratureTraining(),
-                                   additional_loss = norm_loss_function)
+    QuadratureTraining(),
+    additional_loss = norm_loss_function)
 
 @named pdesystem = PDESystem(eq, bcs, domains, [x], [p(x)])
 prob = discretize(pdesystem, discretization)
@@ -84,7 +84,8 @@ cb_ = function (p, l)
     return false
 end
 
-res = Optimization.solve(prob, BFGS(linesearch = BackTracking()), callback = cb_, maxiters = 600)
+res = Optimization.solve(
+    prob, BFGS(linesearch = BackTracking()), callback = cb_, maxiters = 600)
 ```
 
 And some analysis:
diff --git a/docs/src/tutorials/dae.md b/docs/src/tutorials/dae.md
index de83542c6..a40d9c58d 100644
--- a/docs/src/tutorials/dae.md
+++ b/docs/src/tutorials/dae.md
@@ -5,16 +5,14 @@
     It is highly recommended you first read the [solving ordinary differential
     equations with DifferentialEquations.jl tutorial](https://docs.sciml.ai/DiffEqDocs/stable/tutorials/dae_example/) before reading this tutorial.
 
-
-This tutorial is an introduction to using physics-informed neural networks (PINNs) for solving differential algebraic equations (DAEs). 
-
+This tutorial is an introduction to using physics-informed neural networks (PINNs) for solving differential algebraic equations (DAEs).
 
 ## Solving an DAE with PINNs
 
 Let's solve a simple DAE system:
 
 ```@example dae
-using NeuralPDE 
+using NeuralPDE
 using Random
 using OrdinaryDiffEq, Statistics
 using Lux, OptimizationOptimisers
@@ -57,4 +55,4 @@ ground_sol = solve(prob_mm, Rodas5(), reltol = 1e-8, abstol = 1e-8)
 using Plots
 plot(ground_sol, tspan = tspan, layout = (2, 1), label = "ground truth")
 plot!(sol, tspan = tspan, layout = (2, 1), label = "dae with pinns")
-```
\ No newline at end of file
+```
diff --git a/docs/src/tutorials/derivative_neural_network.md b/docs/src/tutorials/derivative_neural_network.md
index 82ad99b98..d7ccec27a 100644
--- a/docs/src/tutorials/derivative_neural_network.md
+++ b/docs/src/tutorials/derivative_neural_network.md
@@ -91,7 +91,8 @@ input_ = length(domains)
 n = 15
 chain = [Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, 1)) for _ in 1:7]
 
-training_strategy = NeuralPDE.QuadratureTraining(; batch = 200, reltol = 1e-6, abstol = 1e-6)
+training_strategy = NeuralPDE.QuadratureTraining(;
+    batch = 200, reltol = 1e-6, abstol = 1e-6)
 discretization = NeuralPDE.PhysicsInformedNN(chain, training_strategy)
 
 vars = [u1(t, x), u2(t, x), u3(t, x), Dxu1(t, x), Dtu1(t, x), Dxu2(t, x), Dtu2(t, x)]
diff --git a/docs/src/tutorials/dgm.md b/docs/src/tutorials/dgm.md
index cb853d009..a769795ef 100644
--- a/docs/src/tutorials/dgm.md
+++ b/docs/src/tutorials/dgm.md
@@ -1,6 +1,6 @@
 ## Solving PDEs using Deep Galerkin Method
 
-### Overview 
+### Overview
 
 Deep Galerkin Method is a meshless deep learning algorithm to solve high dimensional PDEs. The algorithm does so by approximating the solution of a PDE with a neural network. The loss function of the network is defined in the similar spirit as PINNs, composed of PDE loss and boundary condition loss.
 
@@ -39,6 +39,7 @@ t \in [0, 1], x \in [-1, 1]
 ```
 
 with boundary conditions
+
 ```math
 \begin{align*}
 u(t, x) & = - sin(πx), \\
@@ -66,11 +67,11 @@ Dx = Differential(x)
 Dxx = Dx^2
 α = 0.05
 # Burger's equation
-eq = Dt(u(t,x)) + u(t,x) * Dx(u(t,x)) - α * Dxx(u(t,x)) ~ 0 
+eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0
 
 # boundary conditions
 bcs = [
-    u(0.0, x) ~ - sin(π*x),
+    u(0.0, x) ~ -sin(π * x),
     u(t, -1.0) ~ 0.0,
     u(t, 1.0) ~ 0.0
 ]
@@ -81,23 +82,23 @@ domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
 dx = 0.01
 order = 2
 discretization = MOLFiniteDifference([x => dx], t, saveat = 0.01)
-@named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t,x)])
+@named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 prob = discretize(pde_system, discretization)
-sol= solve(prob, Tsit5())
+sol = solve(prob, Tsit5())
 ts = sol[t]
-xs = sol[x] 
+xs = sol[x]
 
-u_MOL = sol[u(t,x)]
+u_MOL = sol[u(t, x)]
 
 # NeuralPDE, using Deep Galerkin Method
-strategy = QuasiRandomTraining(256, minibatch= 32)
+strategy = QuasiRandomTraining(256, minibatch = 32)
 discretization = DeepGalerkin(2, 1, 50, 5, tanh, tanh, identity, strategy)
-@named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t,x)])
+@named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
 prob = discretize(pde_system, discretization)
 global iter = 0
 callback = function (p, l)
     global iter += 1
-    if iter%20 == 0
+    if iter % 20 == 0
         println("$iter => $l")
     end
     return false
@@ -108,7 +109,7 @@ prob = remake(prob, u0 = res.u)
 res = Optimization.solve(prob, Adam(0.01); maxiters = 500)
 phi = discretization.phi
 
-u_predict= [first(phi([t, x], res.minimizer)) for t in ts, x in xs]
+u_predict = [first(phi([t, x], res.minimizer)) for t in ts, x in xs]
 
 diff_u = abs.(u_predict .- u_MOL)
 tgrid = 0.0:0.01:1.0
diff --git a/docs/src/tutorials/gpu.md b/docs/src/tutorials/gpu.md
index feea05c9e..82a07dceb 100644
--- a/docs/src/tutorials/gpu.md
+++ b/docs/src/tutorials/gpu.md
@@ -34,10 +34,10 @@ using Lux, LuxCUDA, ComponentArrays, Random
 const gpud = gpu_device()
 inner = 25
 chain = Chain(Dense(3, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, 1))
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, 1))
 ps = Lux.setup(Random.default_rng(), chain)[1]
 ps = ps |> ComponentArray |> gpud .|> Float64
 ```
@@ -83,17 +83,17 @@ domains = [t ∈ Interval(t_min, t_max),
 # Neural network
 inner = 25
 chain = Chain(Dense(3, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, inner, Lux.σ),
-              Dense(inner, 1))
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, inner, Lux.σ),
+    Dense(inner, 1))
 
 strategy = QuasiRandomTraining(100)
 ps = Lux.setup(Random.default_rng(), chain)[1]
 ps = ps |> ComponentArray |> gpud .|> Float64
 discretization = PhysicsInformedNN(chain,
-                                   strategy,
-                                   init_params = ps)
+    strategy,
+    init_params = ps)
 
 @named pde_system = PDESystem(eq, bcs, domains, [t, x, y], [u(t, x, y)])
 prob = discretize(pde_system, discretization)
@@ -111,7 +111,8 @@ We then use the `remake` function to rebuild the PDE problem to start a new opti
 
 ```@example gpu
 prob = remake(prob, u0 = res.u)
-res = Optimization.solve(prob, OptimizationOptimisers.Adam(1e-3); callback = callback, maxiters = 2500)
+res = Optimization.solve(
+    prob, OptimizationOptimisers.Adam(1e-3); callback = callback, maxiters = 2500)
 ```
 
 Finally, we inspect the solution:
@@ -127,9 +128,9 @@ function plot_(res)
     anim = @animate for (i, t) in enumerate(0:0.05:t_max)
         @info "Animating frame $i..."
         u_real = reshape([analytic_sol_func(t, x, y) for x in xs for y in ys],
-                         (length(xs), length(ys)))
+            (length(xs), length(ys)))
         u_predict = reshape([Array(phi([t, x, y], res.u))[1] for x in xs for y in ys],
-                            length(xs), length(ys))
+            length(xs), length(ys))
         u_error = abs.(u_predict .- u_real)
         title = @sprintf("predict, t = %.3f", t)
         p1 = plot(xs, ys, u_predict, st = :surface, label = "", title = title)
@@ -153,6 +154,7 @@ runtime with GPU and CPU.
 
 ```julia
 julia> CUDA.device()
+
 ```
 
 ![image](https://user-images.githubusercontent.com/12683885/110297207-49202500-8004-11eb-9e45-d4cb28045d87.png)
diff --git a/docs/src/tutorials/integro_diff.md b/docs/src/tutorials/integro_diff.md
index e977b87af..f578ed9c8 100644
--- a/docs/src/tutorials/integro_diff.md
+++ b/docs/src/tutorials/integro_diff.md
@@ -60,7 +60,7 @@ chain = Lux.Chain(Lux.Dense(1, 15, Lux.σ), Lux.Dense(15, 1))
 
 strategy_ = QuadratureTraining()
 discretization = PhysicsInformedNN(chain,
-                                   strategy_)
+    strategy_)
 @named pde_system = PDESystem(eq, bcs, domains, [t], [i(t)])
 prob = NeuralPDE.discretize(pde_system, discretization)
 callback = function (p, l)
diff --git a/docs/src/tutorials/low_level.md b/docs/src/tutorials/low_level.md
index 33b605e77..90c75de30 100644
--- a/docs/src/tutorials/low_level.md
+++ b/docs/src/tutorials/low_level.md
@@ -77,7 +77,7 @@ using Plots
 
 ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 u_predict_contourf = reshape([first(phi([t, x], res.u)) for t in ts for x in xs],
-                             length(xs), length(ts))
+    length(xs), length(ts))
 plot(ts, xs, u_predict_contourf, linetype = :contourf, title = "predict")
 
 u_predict = [[first(phi([t, x], res.u)) for x in xs] for t in ts]
diff --git a/docs/src/tutorials/low_level_2.md b/docs/src/tutorials/low_level_2.md
index 00929ec72..381026ab6 100644
--- a/docs/src/tutorials/low_level_2.md
+++ b/docs/src/tutorials/low_level_2.md
@@ -88,13 +88,15 @@ datasetpde = [generate_dataset_matrix(domains, dx, dt)]
 
 # noise to dataset
 noisydataset = deepcopy(datasetpde)
-noisydataset[1][:, 1] = noisydataset[1][:, 1] .+ randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
-                      noisydataset[1][:, 1]
+noisydataset[1][:, 1] = noisydataset[1][:, 1] .+
+                        randn(size(noisydataset[1][:, 1])) .* 5 / 100 .*
+                        noisydataset[1][:, 1]
 ```
 
 Plotting dataset, added noise is set at 5%.
+
 ```@example low_level_2
-plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title="Dataset from Analytical Solution")
+plot(datasetpde[1][:, 2], datasetpde[1][:, 1], title = "Dataset from Analytical Solution")
 plot!(noisydataset[1][:, 2], noisydataset[1][:, 1])
 ```
 
@@ -128,7 +130,8 @@ And some analysis:
 
 ```@example low_level_2
 phi = discretization.phi[1]
-xs, ts = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, [dx / 10, dt])]
+xs, ts = [infimum(d.domain):dx:supremum(d.domain)
+          for (d, dx) in zip(domains, [dx / 10, dt])]
 u_predict = [[first(pmean(phi([x, t], sol1.estimated_nn_params[1]))) for x in xs]
              for t in ts]
 u_real = [[u_analytic(x, t) for x in xs] for t in ts]
diff --git a/docs/src/tutorials/neural_adapter.md b/docs/src/tutorials/neural_adapter.md
index 56d3a0679..a2399c786 100644
--- a/docs/src/tutorials/neural_adapter.md
+++ b/docs/src/tutorials/neural_adapter.md
@@ -32,12 +32,12 @@ bcs = [u(0, y) ~ 0.0, u(1, y) ~ -sin(pi * 1) * sin(pi * y),
 # Space and time domains
 domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-3, abstol = 1e-6,
-                                                   maxiters = 50, batch = 100)
+    maxiters = 50, batch = 100)
 inner = 8
 af = Lux.tanh
 chain1 = Lux.Chain(Lux.Dense(2, inner, af),
-                   Lux.Dense(inner, inner, af),
-                   Lux.Dense(inner, 1))
+    Lux.Dense(inner, inner, af),
+    Lux.Dense(inner, 1))
 
 discretization = NeuralPDE.PhysicsInformedNN(chain1, quadrature_strategy)
 
@@ -56,9 +56,9 @@ phi = discretization.phi
 inner_ = 8
 af = Lux.tanh
 chain2 = Lux.Chain(Dense(2, inner_, af),
-                   Dense(inner_, inner_, af),
-                   Dense(inner_, inner_, af),
-                   Dense(inner_, 1))
+    Dense(inner_, inner_, af),
+    Dense(inner_, inner_, af),
+    Dense(inner_, 1))
 initp, st = Lux.setup(Random.default_rng(), chain2)
 init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
 
@@ -80,11 +80,11 @@ xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
 u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_predict_ = reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
+    (length(xs), length(ys)))
 diff_u = u_predict .- u_real
 diff_u_ = u_predict_ .- u_real
 
@@ -132,7 +132,8 @@ af = Lux.tanh
 inner = 10
 chains = [Lux.Chain(Dense(2, inner, af), Dense(inner, inner, af), Dense(inner, 1))
           for _ in 1:count_decomp]
-init_params = map(c -> Float64.(ComponentArray(Lux.setup(Random.default_rng(), c)[1])), chains)
+init_params = map(
+    c -> Float64.(ComponentArray(Lux.setup(Random.default_rng(), c)[1])), chains)
 
 xs_ = infimum(x_domain):(1 / count_decomp):supremum(x_domain)
 xs_domain = [(xs_[i], xs_[i + 1]) for i in 1:(length(xs_) - 1)]
@@ -176,7 +177,7 @@ for i in 1:count_decomp
     strategy = NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3)
 
     discretization = NeuralPDE.PhysicsInformedNN(chains[i], strategy;
-                                                 init_params = init_params[i])
+        init_params = init_params[i])
 
     prob = NeuralPDE.discretize(pde_system_, discretization)
     symprob = NeuralPDE.symbolic_discretize(pde_system_, discretization)
@@ -219,10 +220,10 @@ u_predict, diff_u = compose_result(dx)
 inner_ = 18
 af = Lux.tanh
 chain2 = Lux.Chain(Dense(2, inner_, af),
-                   Dense(inner_, inner_, af),
-                   Dense(inner_, inner_, af),
-                   Dense(inner_, inner_, af),
-                   Dense(inner_, 1))
+    Dense(inner_, inner_, af),
+    Dense(inner_, inner_, af),
+    Dense(inner_, inner_, af),
+    Dense(inner_, 1))
 
 initp, st = Lux.setup(Random.default_rng(), chain2)
 init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
@@ -243,19 +244,19 @@ callback = function (p, l)
 end
 
 prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
+    NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-                                 NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
+    NeuralPDE.QuadratureTraining(; reltol = 1e-6, abstol = 1e-3))
 res_ = Optimization.solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
 
 phi_ = PhysicsInformedNN(chain2, strategy; init_params = res_.u).phi
 
 xs, ys = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
 u_predict_ = reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
+    (length(xs), length(ys)))
 diff_u_ = u_predict_ .- u_real
 
 using Plots
diff --git a/docs/src/tutorials/ode_parameter_estimation.md b/docs/src/tutorials/ode_parameter_estimation.md
index 39bd4ef22..93b1c99ec 100644
--- a/docs/src/tutorials/ode_parameter_estimation.md
+++ b/docs/src/tutorials/ode_parameter_estimation.md
@@ -43,19 +43,19 @@ rng = Random.default_rng()
 Random.seed!(rng, 0)
 n = 15
 chain = Lux.Chain(
-            Lux.Dense(1, n, Lux.σ),
-            Lux.Dense(n, n, Lux.σ),
-            Lux.Dense(n, n, Lux.σ),
-            Lux.Dense(n, 2)
-        )
+    Lux.Dense(1, n, Lux.σ),
+    Lux.Dense(n, n, Lux.σ),
+    Lux.Dense(n, n, Lux.σ),
+    Lux.Dense(n, 2)
+)
 ps, st = Lux.setup(rng, chain) |> Lux.f64
 ```
 
-Next we define an additional loss term to in the total loss which measures how the neural network's predictions is fitting the data. 
+Next we define an additional loss term to in the total loss which measures how the neural network's predictions is fitting the data.
 
 ```@example param_estim_lv
 function additional_loss(phi, θ)
-    return sum(abs2, phi(t_, θ) .- u_)/size(u_, 2)
+    return sum(abs2, phi(t_, θ) .- u_) / size(u_, 2)
 end
 ```
 
@@ -63,14 +63,15 @@ Next we define the optimizer and [`NNODE`](@ref) which is then plugged into the
 
 ```@example param_estim_lv
 opt = LBFGS(linesearch = BackTracking())
-alg = NNODE(chain, opt, ps; strategy = WeightedIntervalTraining([0.7, 0.2, 0.1], 500), param_estim = true, additional_loss = additional_loss)
+alg = NNODE(chain, opt, ps; strategy = WeightedIntervalTraining([0.7, 0.2, 0.1], 500),
+    param_estim = true, additional_loss = additional_loss)
 ```
 
 Now we have all the pieces to solve the optimization problem.
 
 ```@example param_estim_lv
 sol = solve(prob, alg, verbose = true, abstol = 1e-8, maxiters = 5000, saveat = t_)
-@test sol.k.u.p ≈ true_p rtol=1e-2 # hide
+@test sol.k.u.p≈true_p rtol=1e-2 # hide
 ```
 
 Let's plot the predictions from the PINN and compare it to the data.
diff --git a/docs/src/tutorials/param_estim.md b/docs/src/tutorials/param_estim.md
index e696c7670..db369ba93 100644
--- a/docs/src/tutorials/param_estim.md
+++ b/docs/src/tutorials/param_estim.md
@@ -36,11 +36,11 @@ And the neural networks as,
 input_ = length(domains)
 n = 8
 chain1 = Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, n, Lux.σ),
-                   Dense(n, 1))
+    Dense(n, 1))
 chain2 = Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, n, Lux.σ),
-                   Dense(n, 1))
+    Dense(n, 1))
 chain3 = Lux.Chain(Dense(input_, n, Lux.σ), Dense(n, n, Lux.σ), Dense(n, n, Lux.σ),
-                   Dense(n, 1))
+    Dense(n, 1))
 ```
 
 We will add another loss term based on the data that we have to optimize the parameters.
@@ -94,10 +94,10 @@ Then finally defining and optimizing using the `PhysicsInformedNN` interface.
 
 ```@example param_estim
 discretization = NeuralPDE.PhysicsInformedNN([chain1, chain2, chain3],
-                                             NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200), param_estim = true,
-                                             additional_loss = additional_loss)
+    NeuralPDE.QuadratureTraining(; abstol = 1e-6, reltol = 1e-6, batch = 200), param_estim = true,
+    additional_loss = additional_loss)
 @named pde_system = PDESystem(eqs, bcs, domains, [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-                              defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
+    defaults = Dict([p .=> 1.0 for p in [σ_, ρ, β]]))
 prob = NeuralPDE.discretize(pde_system, discretization)
 callback = function (p, l)
     println("Current loss is: $l")
diff --git a/docs/src/tutorials/pdesystem.md b/docs/src/tutorials/pdesystem.md
index d1c438b30..3fbfd55d9 100644
--- a/docs/src/tutorials/pdesystem.md
+++ b/docs/src/tutorials/pdesystem.md
@@ -53,7 +53,8 @@ dim = 2 # number of dimensions
 chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
 
 # Discretization
-discretization = PhysicsInformedNN(chain, QuadratureTraining(; batch = 200, abstol = 1e-6, reltol = 1e-6))
+discretization = PhysicsInformedNN(
+    chain, QuadratureTraining(; batch = 200, abstol = 1e-6, reltol = 1e-6))
 
 @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
 prob = discretize(pde_system, discretization)
@@ -74,9 +75,9 @@ xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains]
 analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
 u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
+    (length(xs), length(ys)))
 diff_u = abs.(u_predict .- u_real)
 
 p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic");
@@ -122,7 +123,8 @@ Here, we build PhysicsInformedNN algorithm where `dx` is the step of discretizat
 `strategy` stores information for choosing a training strategy.
 
 ```@example poisson
-discretization = PhysicsInformedNN(chain, QuadratureTraining(; batch = 200, abstol = 1e-6, reltol = 1e-6))
+discretization = PhysicsInformedNN(
+    chain, QuadratureTraining(; batch = 200, abstol = 1e-6, reltol = 1e-6))
 ```
 
 As described in the API docs, we now need to define the `PDESystem` and create PINNs
@@ -158,9 +160,9 @@ xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains]
 analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
 u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+    (length(xs), length(ys)))
 u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
+    (length(xs), length(ys)))
 diff_u = abs.(u_predict .- u_real)
 
 p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic");
diff --git a/docs/src/tutorials/systems.md b/docs/src/tutorials/systems.md
index 80e3fbb62..fceaa6898 100644
--- a/docs/src/tutorials/systems.md
+++ b/docs/src/tutorials/systems.md
@@ -151,7 +151,8 @@ end
 f_ = OptimizationFunction(loss_function, Optimization.AutoZygote())
 prob = Optimization.OptimizationProblem(f_, sym_prob.flat_init_params)
 
-res = Optimization.solve(prob, OptimizationOptimJL.LBFGS(linesearch = BackTracking()); maxiters = 1000)
+res = Optimization.solve(
+    prob, OptimizationOptimJL.LBFGS(linesearch = BackTracking()); maxiters = 1000)
 ```
 
 ## Solution Representation
diff --git a/lib/NeuralPDELogging/src/NeuralPDELogging.jl b/lib/NeuralPDELogging/src/NeuralPDELogging.jl
index 24a4cda4e..940dbe51a 100644
--- a/lib/NeuralPDELogging/src/NeuralPDELogging.jl
+++ b/lib/NeuralPDELogging/src/NeuralPDELogging.jl
@@ -6,7 +6,7 @@ using TensorBoardLogger
 """This function overrides the empty function in NeuralPDE in order to use TensorBoardLogger in that package
 This is light type piracy but it should be alright since this is a subpackage of NeuralPDE"""
 function NeuralPDE.logvector(logger::TBLogger, vector::AbstractVector{R},
-                             name::AbstractString, step::Integer) where {R <: Real}
+        name::AbstractString, step::Integer) where {R <: Real}
     for j in 1:length(vector)
         log_value(logger, "$(name)/$(j)", vector[j], step = step)
     end
@@ -16,7 +16,7 @@ end
 """This function overrides the empty function in NeuralPDE in order to use TensorBoardLogger in that package.  
 This is light type piracy but it should be alright since this is a subpackage of NeuralPDE"""
 function NeuralPDE.logscalar(logger::TBLogger, scalar::R, name::AbstractString,
-                             step::Integer) where {R <: Real}
+        step::Integer) where {R <: Real}
     log_value(logger, "$(name)", scalar, step = step)
     nothing
 end
diff --git a/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl b/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
index 1facaf0a7..b037381af 100644
--- a/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
+++ b/lib/NeuralPDELogging/test/adaptive_loss_log_tests.jl
@@ -7,16 +7,16 @@ using Random, Lux
 
 nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
 gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-                                                            bc_loss_weights = 1)
+    bc_loss_weights = 1)
 adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
-                                              bc_loss_weights = 1)
+    bc_loss_weights = 1)
 adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
 maxiters = 800
 seed = 60
 
 ## 2D Poisson equation
 function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, haslogger;
-                                                seed = 60, maxiters = 800)
+        seed = 60, maxiters = 800)
     logdir = joinpath(outdir, string(run))
     if haslogger
         logger = TBLogger(logdir)
@@ -26,7 +26,7 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, hasl
     Random.seed!(seed)
     hid = 40
     chain_ = Lux.Chain(Dense(2, hid, Lux.σ), Dense(hid, hid, Lux.σ),
-                       Dense(hid, 1))
+        Dense(hid, 1))
     strategy_ = NeuralPDE.StochasticTraining(256)
     @info "adaptive reweighting test logdir: $(logdir), maxiters: $(maxiters), 2D Poisson equation, adaptive_loss: $(nameof(typeof(adaptive_loss))) "
     @parameters x y
@@ -46,10 +46,10 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, hasl
 
     iteration = [0]
     discretization = NeuralPDE.PhysicsInformedNN(chain_,
-                                                 strategy_;
-                                                 adaptive_loss = adaptive_loss,
-                                                 logger = logger,
-                                                 iteration = iteration)
+        strategy_;
+        adaptive_loss = adaptive_loss,
+        logger = logger,
+        iteration = iteration)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = NeuralPDE.discretize(pde_system, discretization)
@@ -59,7 +59,7 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, hasl
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+        (length(xs), length(ys)))
 
     callback = function (p, l)
         iteration[1] += 1
@@ -70,26 +70,26 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, outdir, hasl
             log_value(logger, "outer_error/loss", l, step = iteration[1])
             if iteration[1] % 30 == 0
                 u_predict = reshape([first(phi([x, y], p.u)) for x in xs for y in ys],
-                                    (length(xs), length(ys)))
+                    (length(xs), length(ys)))
                 diff_u = abs.(u_predict .- u_real)
                 total_diff = sum(diff_u)
                 log_value(logger, "outer_error/total_diff", total_diff, step = iteration[1])
                 total_u = sum(abs.(u_real))
                 total_diff_rel = total_diff / total_u
                 log_value(logger, "outer_error/total_diff_rel", total_diff_rel,
-                          step = iteration[1])
+                    step = iteration[1])
                 total_diff_sq = sum(diff_u .^ 2)
                 log_value(logger, "outer_error/total_diff_sq", total_diff_sq,
-                          step = iteration[1])
+                    step = iteration[1])
             end
         end
         return false
     end
     res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters,
-                             callback = callback)
+        callback = callback)
 
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     diff_u = abs.(u_predict .- u_real)
     total_diff = sum(diff_u)
     total_u = sum(abs.(u_real))
@@ -121,10 +121,10 @@ end
 
 function test_2d_poisson_equation_adaptive_loss_run_seediters(adaptive_loss, run)
     test_2d_poisson_equation_adaptive_loss(adaptive_loss, run, possible_logger_dir,
-                                           haslogger; seed = seed, maxiters = maxiters)
+        haslogger; seed = seed, maxiters = maxiters)
 end
 error_results = map(test_2d_poisson_equation_adaptive_loss_run_seediters, adaptive_losses,
-                    1:length(adaptive_losses))
+    1:length(adaptive_losses))
 
 @test length(readdir(possible_logger_dir)) == expected_log_folders
 if expected_log_folders > 0
diff --git a/lib/NeuralPDELogging/test/runtests.jl b/lib/NeuralPDELogging/test/runtests.jl
index f00b4eb82..2f4d45864 100644
--- a/lib/NeuralPDELogging/test/runtests.jl
+++ b/lib/NeuralPDELogging/test/runtests.jl
@@ -9,35 +9,37 @@ const is_TRAVIS = haskey(ENV, "TRAVIS")
 
 is_CI = haskey(ENV, "CI")
 
-@time begin if GROUP == "All" || GROUP == "Logging"
-    @time @safetestset "AdaptiveLossLogNoImport" begin
-        using Pkg
-        neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-        @info "loading neuralpde package at : $(neuralpde_dir)"
-        neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-        Pkg.develop(neuralpde)
-        @info "making sure that there are no logs without having imported NeuralPDELogging"
-        ENV["LOG_SETTING"] = "NoImport"
-        include("adaptive_loss_log_tests.jl")
+@time begin
+    if GROUP == "All" || GROUP == "Logging"
+        @time @safetestset "AdaptiveLossLogNoImport" begin
+            using Pkg
+            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
+            @info "loading neuralpde package at : $(neuralpde_dir)"
+            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
+            Pkg.develop(neuralpde)
+            @info "making sure that there are no logs without having imported NeuralPDELogging"
+            ENV["LOG_SETTING"] = "NoImport"
+            include("adaptive_loss_log_tests.jl")
+        end
+        @time @safetestset "AdaptiveLossLogImportNoUse" begin
+            using Pkg
+            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
+            @info "loading neuralpde package at : $(neuralpde_dir)"
+            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
+            Pkg.develop(neuralpde)
+            @info "making sure that there are still no logs now that we have imported NeuralPDELogging"
+            ENV["LOG_SETTING"] = "ImportNoUse"
+            include("adaptive_loss_log_tests.jl")
+        end
+        @time @safetestset "AdaptiveLossLogImportUse" begin
+            using Pkg
+            neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
+            @info "loading neuralpde package at : $(neuralpde_dir)"
+            neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
+            Pkg.develop(neuralpde)
+            ENV["LOG_SETTING"] = "ImportUse"
+            @info "making sure that logs are generated now if we use a logger"
+            include("adaptive_loss_log_tests.jl")
+        end
     end
-    @time @safetestset "AdaptiveLossLogImportNoUse" begin
-        using Pkg
-        neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-        @info "loading neuralpde package at : $(neuralpde_dir)"
-        neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-        Pkg.develop(neuralpde)
-        @info "making sure that there are still no logs now that we have imported NeuralPDELogging"
-        ENV["LOG_SETTING"] = "ImportNoUse"
-        include("adaptive_loss_log_tests.jl")
-    end
-    @time @safetestset "AdaptiveLossLogImportUse" begin
-        using Pkg
-        neuralpde_dir = dirname(abspath(joinpath(@__DIR__, "..", "..", "..")))
-        @info "loading neuralpde package at : $(neuralpde_dir)"
-        neuralpde = Pkg.PackageSpec(path = neuralpde_dir)
-        Pkg.develop(neuralpde)
-        ENV["LOG_SETTING"] = "ImportUse"
-        @info "making sure that logs are generated now if we use a logger"
-        include("adaptive_loss_log_tests.jl")
-    end
-end end
+end
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index 087b9d41c..e92d1b11c 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -105,15 +105,16 @@ struct BNNODE{C, K, IT <: NamedTuple,
     verbose::Bool
 end
 function BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
-    priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
-    dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
-    init_params = nothing,
-    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-        Metric = DiagEuclideanMetric,
-        targetacceptancerate = 0.8),
-    Integratorkwargs = (Integrator = Leapfrog,),
-    autodiff = false, progress = false, verbose = false)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
+        priorsNNw = (0.0, 2.0), param = nothing, l2std = [0.05], phystd = [0.05],
+        dataset = [nothing], physdt = 1 / 20.0, MCMCkwargs = (n_leapfrog = 30,), nchains = 1,
+        init_params = nothing,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric,
+            targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        autodiff = false, progress = false, verbose = false)
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
     BNNODE(chain, Kernel, strategy,
         draw_samples, priorsNNw, param, l2std,
         phystd, dataset, physdt, MCMCkwargs,
@@ -163,24 +164,25 @@ struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
             estimated_de_params,
             timepoints)
         new{typeof(original), typeof(ensemblesol), typeof(estimated_nn_params),
-            typeof(estimated_de_params), typeof(timepoints)}(original, ensemblesol, estimated_nn_params,
+            typeof(estimated_de_params), typeof(timepoints)}(
+            original, ensemblesol, estimated_nn_params,
             estimated_de_params, timepoints)
     end
 end
 
 function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
-    alg::BNNODE,
-    args...;
-    dt = nothing,
-    timeseries_errors = true,
-    save_everystep = true,
-    adaptive = false,
-    abstol = 1.0f-6,
-    reltol = 1.0f-3,
-    verbose = false,
-    saveat = 1 / 50.0,
-    maxiters = nothing,
-    numensemble = floor(Int, alg.draw_samples / 3))
+        alg::BNNODE,
+        args...;
+        dt = nothing,
+        timeseries_errors = true,
+        save_everystep = true,
+        adaptive = false,
+        abstol = 1.0f-6,
+        reltol = 1.0f-3,
+        verbose = false,
+        saveat = 1 / 50.0,
+        maxiters = nothing,
+        numensemble = floor(Int, alg.draw_samples / 3))
     @unpack chain, l2std, phystd, param, priorsNNw, Kernel, strategy,
     draw_samples, dataset, init_params,
     nchains, physdt, Adaptorkwargs, Integratorkwargs,
@@ -257,7 +259,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
     end
 
     nnparams = length(θinit)
-    estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :]) for i in 1:nnparams]
+    estimnnparams = [Particles(reduce(hcat, samples[(end - numensemble):end])[i, :])
+                     for i in 1:nnparams]
 
     if ninv == 0
         estimated_params = [nothing]
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 2ba1de25b..cfc3b9367 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -24,9 +24,11 @@ using Symbolics: wrap, unwrap, arguments, operation
 using SymbolicUtils
 using AdvancedHMC, LogDensityProblems, LinearAlgebra, Functors, MCMCChains
 using MonteCarloMeasurements: Particles
-using ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives, Interval, infimum, supremum
+using ModelingToolkit: value, nameof, toexpr, build_expr, expand_derivatives, Interval,
+                       infimum, supremum
 import DomainSets
-using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, rightendpoint, ProductDomain
+using DomainSets: Domain, ClosedInterval, AbstractInterval, leftendpoint, rightendpoint,
+                  ProductDomain
 using SciMLBase: @add_kwonly, parameterless_type
 using UnPack: @unpack
 import ChainRulesCore, Lux, ComponentArrays
@@ -55,16 +57,16 @@ include("PDE_BPINN.jl")
 include("dgm.jl")
 
 export NNODE, NNDAE,
-    PhysicsInformedNN, discretize,
-    GridTraining, StochasticTraining, QuadratureTraining, QuasiRandomTraining,
-    WeightedIntervalTraining,
-    build_loss_function, get_loss_function,
-    generate_training_sets, get_variables, get_argument, get_bounds,
-    get_numeric_integral, symbolic_discretize,
-    AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss,
-    MiniMaxAdaptiveLoss, LogOptions,
-    ahmc_bayesian_pinn_ode, BNNODE, ahmc_bayesian_pinn_pde, vector_to_parameters,
-    BPINNsolution, BayesianPINN,
-    DeepGalerkin
+       PhysicsInformedNN, discretize,
+       GridTraining, StochasticTraining, QuadratureTraining, QuasiRandomTraining,
+       WeightedIntervalTraining,
+       build_loss_function, get_loss_function,
+       generate_training_sets, get_variables, get_argument, get_bounds,
+       get_numeric_integral, symbolic_discretize,
+       AbstractAdaptiveLoss, NonAdaptiveLoss, GradientScaleAdaptiveLoss,
+       MiniMaxAdaptiveLoss, LogOptions,
+       ahmc_bayesian_pinn_ode, BNNODE, ahmc_bayesian_pinn_pde, vector_to_parameters,
+       BPINNsolution, BayesianPINN,
+       DeepGalerkin
 
 end # module
diff --git a/src/PDE_BPINN.jl b/src/PDE_BPINN.jl
index 02fc3d30c..1c37bfdaa 100644
--- a/src/PDE_BPINN.jl
+++ b/src/PDE_BPINN.jl
@@ -4,7 +4,7 @@ mutable struct PDELogTargetDensity{
     P <: Vector{<:Distribution},
     I,
     F,
-    PH,
+    PH
 }
     dim::Int64
     strategy::ST
@@ -26,7 +26,7 @@ mutable struct PDELogTargetDensity{
             typeof(priors),
             typeof(init_params),
             typeof(full_loglikelihood),
-            typeof(Φ),
+            typeof(Φ)
         }(dim,
             strategy,
             dataset,
@@ -48,7 +48,7 @@ mutable struct PDELogTargetDensity{
             typeof(priors),
             typeof(init_params),
             typeof(full_loglikelihood),
-            typeof(Φ),
+            typeof(Φ)
         }(dim,
             strategy,
             dataset,
@@ -83,7 +83,7 @@ function setparameters(Tar::PDELogTargetDensity, θ)
     # which we use for mapping current ahmc sampled vector of parameters onto NNs
     i = 0
     Luxparams = [vector_to_parameters(ps_new[((i += length(ps[x])) - length(ps[x]) + 1):i],
-        ps[x]) for x in names]
+                     ps[x]) for x in names]
 
     a = ComponentArrays.ComponentArray(NamedTuple{Tar.names}(i for i in Luxparams))
 
@@ -125,7 +125,9 @@ function L2LossData(Tar::PDELogTargetDensity, θ)
 
     if Tar.extraparams > 0
         for i in eachindex(Φ)
-            sumt += logpdf(MvNormal(Φ[i](dataset[i][:, 2:end]',
+            sumt += logpdf(
+                MvNormal(
+                    Φ[i](dataset[i][:, 2:end]',
                         vector_to_parameters(θ[1:(end - Tar.extraparams)],
                             init_params)[Tar.names[i]])[1,
                         :],
@@ -148,12 +150,14 @@ function priorlogpdf(Tar::PDELogTargetDensity, θ)
     nnwparams = allparams[1]
 
     if Tar.extraparams > 0
-        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
-                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
+        invlogpdf = sum(
+            logpdf(invpriors[length(θ) - i + 1], θ[i])
+            for i in (length(θ) - Tar.extraparams + 1):length(θ);
+            init = 0.0)
 
         return (invlogpdf
                 +
-                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)])) 
+                logpdf(nnwparams, θ[1:(length(θ) - Tar.extraparams)]))
     end
     return logpdf(nnwparams, θ)
 end
@@ -195,7 +199,7 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
 
     span = [[ranges[indvar] for indvar in input] for input in inputs]
     timepoints = [hcat(vec(map(points -> collect(points),
-        Iterators.product(span[i]...)))...)
+                      Iterators.product(span[i]...)))...)
                   for i in eachindex(phi)]
 
     # order of range's domains must match chain's inputs and dep_vars
@@ -224,7 +228,7 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
 
     # convert to format directly usable by lux
     estimatedLuxparams = [vector_to_parameters(estimnnparams[Luxparams[i]],
-        initial_nnθ[names[i]]) for i in eachindex(phi)]
+                              initial_nnθ[names[i]]) for i in eachindex(phi)]
 
     # infer predictions(preds) each row - NN, each col - ith sample
     samplesn = reduce(hcat, samples)
@@ -232,8 +236,8 @@ function inference(samples, pinnrep, saveats, numensemble, ℓπ)
     for j in eachindex(phi)
         push!(preds,
             [phi[j](timepoints[j],
-                vector_to_parameters(samplesn[:, i][Luxparams[j]],
-                    initial_nnθ[names[j]])) for i in 1:numensemble])
+                 vector_to_parameters(samplesn[:, i][Luxparams[j]],
+                     initial_nnθ[names[j]])) for i in 1:numensemble])
     end
 
     # note here no of samples referse to numensemble and points is the no of points in each dep_vars discretization
@@ -351,7 +355,7 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
     # add init_params for NN params
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters)))),
+        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
     ]
 
     # append Ode params to all paramvector - initial_θ
@@ -414,7 +418,8 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             mcmc_chain = MCMCChains.Chains(matrix_samples')
 
             fullsolution = BPINNstats(mcmc_chain, samples, stats)
-            ensemblecurves, estimnnparams, estimated_params, timepoints = inference(samples,
+            ensemblecurves, estimnnparams, estimated_params, timepoints = inference(
+                samples,
                 pinnrep,
                 saveat,
                 numensemble,
@@ -462,4 +467,4 @@ function ahmc_bayesian_pinn_pde(pde_system, discretization;
             estimated_params,
             timepoints)
     end
-end
\ No newline at end of file
+end
diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 3a1c4a79d..ca949ec45 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -24,27 +24,28 @@ mutable struct NonAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function NonAdaptiveLoss{T}(; pde_loss_weights = 1.0,
-                                                      bc_loss_weights = 1.0,
-                                                      additional_loss_weights = 1.0) where {
-                                                                                          T <:
-                                                                                          Real
-                                                                                          }
+            bc_loss_weights = 1.0,
+            additional_loss_weights = 1.0) where {
+            T <:
+            Real
+    }
         new(vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
     end
 end
 
 # default to Float64
-SciMLBase.@add_kwonly function NonAdaptiveLoss(; pde_loss_weights = 1.0, bc_loss_weights = 1.0,
-                                               additional_loss_weights = 1.0)
+SciMLBase.@add_kwonly function NonAdaptiveLoss(;
+        pde_loss_weights = 1.0, bc_loss_weights = 1.0,
+        additional_loss_weights = 1.0)
     NonAdaptiveLoss{Float64}(; pde_loss_weights = pde_loss_weights,
-                             bc_loss_weights = bc_loss_weights,
-                             additional_loss_weights = additional_loss_weights)
+        bc_loss_weights = bc_loss_weights,
+        additional_loss_weights = additional_loss_weights)
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::NonAdaptiveLoss,
-                                         pde_loss_functions, bc_loss_functions)
+        adaloss::NonAdaptiveLoss,
+        pde_loss_functions, bc_loss_functions)
     function null_nonadaptive_loss(θ, pde_losses, bc_losses)
         nothing
     end
@@ -87,13 +88,13 @@ mutable struct GradientScaleAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss{T}(reweight_every;
-                                                                weight_change_inertia = 0.9,
-                                                                pde_loss_weights = 1.0,
-                                                                bc_loss_weights = 1.0,
-                                                                additional_loss_weights = 1.0) where {
-                                                                                                    T <:
-                                                                                                    Real
-                                                                                                    }
+            weight_change_inertia = 0.9,
+            pde_loss_weights = 1.0,
+            bc_loss_weights = 1.0,
+            additional_loss_weights = 1.0) where {
+            T <:
+            Real
+    }
         new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
@@ -101,20 +102,20 @@ mutable struct GradientScaleAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
 end
 # default to Float64
 SciMLBase.@add_kwonly function GradientScaleAdaptiveLoss(reweight_every;
-                                                         weight_change_inertia = 0.9,
-                                                         pde_loss_weights = 1.0,
-                                                         bc_loss_weights = 1.0,
-                                                         additional_loss_weights = 1.0)
+        weight_change_inertia = 0.9,
+        pde_loss_weights = 1.0,
+        bc_loss_weights = 1.0,
+        additional_loss_weights = 1.0)
     GradientScaleAdaptiveLoss{Float64}(reweight_every;
-                                       weight_change_inertia = weight_change_inertia,
-                                       pde_loss_weights = pde_loss_weights,
-                                       bc_loss_weights = bc_loss_weights,
-                                       additional_loss_weights = additional_loss_weights)
+        weight_change_inertia = weight_change_inertia,
+        pde_loss_weights = pde_loss_weights,
+        bc_loss_weights = bc_loss_weights,
+        additional_loss_weights = additional_loss_weights)
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::GradientScaleAdaptiveLoss,
-                                         pde_loss_functions, bc_loss_functions)
+        adaloss::GradientScaleAdaptiveLoss,
+        pde_loss_functions, bc_loss_functions)
     weight_change_inertia = adaloss.weight_change_inertia
     iteration = pinnrep.iteration
     adaloss_T = eltype(adaloss.pde_loss_weights)
@@ -137,14 +138,14 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                        (1 .- weight_change_inertia) .*
                                        bc_loss_weights_proposed
             logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max",
-                      iteration[1])
+                iteration[1])
             logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes",
-                      iteration[1])
+                iteration[1])
             logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean",
-                      iteration[1])
+                iteration[1])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                      "adaptive_loss/bc_loss_weights",
-                      iteration[1])
+                "adaptive_loss/bc_loss_weights",
+                iteration[1])
         end
         nothing
     end
@@ -182,8 +183,8 @@ Levi McClenny, Ulisses Braga-Neto
 https://arxiv.org/abs/2009.04544
 """
 mutable struct MiniMaxAdaptiveLoss{T <: Real,
-                                   PDE_OPT,
-                                   BC_OPT} <:
+    PDE_OPT,
+    BC_OPT} <:
                AbstractAdaptiveLoss
     reweight_every::Int64
     pde_max_optimiser::PDE_OPT
@@ -192,17 +193,17 @@ mutable struct MiniMaxAdaptiveLoss{T <: Real,
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss{T,
-                                                       PDE_OPT, BC_OPT}(reweight_every;
-                                                                        pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-                                                                        bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                                                                        pde_loss_weights = 1.0,
-                                                                        bc_loss_weights = 1.0,
-                                                                        additional_loss_weights = 1.0) where {
-                                                                                                            T <:
-                                                                                                            Real,
-                                                                                                            PDE_OPT,
-                                                                                                            BC_OPT
-                                                                                                            }
+            PDE_OPT, BC_OPT}(reweight_every;
+            pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+            bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
+            pde_loss_weights = 1.0,
+            bc_loss_weights = 1.0,
+            additional_loss_weights = 1.0) where {
+            T <:
+            Real,
+            PDE_OPT,
+            BC_OPT
+    }
         new(convert(Int64, reweight_every), convert(PDE_OPT, pde_max_optimiser),
             convert(BC_OPT, bc_max_optimiser),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
@@ -212,38 +213,42 @@ end
 
 # default to Float64, ADAM, ADAM
 SciMLBase.@add_kwonly function MiniMaxAdaptiveLoss(reweight_every;
-                                                   pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
-                                                   bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
-                                                   pde_loss_weights = 1.0,
-                                                   bc_loss_weights = 1.0,
-                                                   additional_loss_weights = 1.0)
+        pde_max_optimiser = OptimizationOptimisers.Adam(1e-4),
+        bc_max_optimiser = OptimizationOptimisers.Adam(0.5),
+        pde_loss_weights = 1.0,
+        bc_loss_weights = 1.0,
+        additional_loss_weights = 1.0)
     MiniMaxAdaptiveLoss{Float64, typeof(pde_max_optimiser),
-                        typeof(bc_max_optimiser)}(reweight_every;
-                                                  pde_max_optimiser = pde_max_optimiser,
-                                                  bc_max_optimiser = bc_max_optimiser,
-                                                  pde_loss_weights = pde_loss_weights,
-                                                  bc_loss_weights = bc_loss_weights,
-                                                  additional_loss_weights = additional_loss_weights)
+        typeof(bc_max_optimiser)}(reweight_every;
+        pde_max_optimiser = pde_max_optimiser,
+        bc_max_optimiser = bc_max_optimiser,
+        pde_loss_weights = pde_loss_weights,
+        bc_loss_weights = bc_loss_weights,
+        additional_loss_weights = additional_loss_weights)
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::MiniMaxAdaptiveLoss,
-                                         pde_loss_functions, bc_loss_functions)
+        adaloss::MiniMaxAdaptiveLoss,
+        pde_loss_functions, bc_loss_functions)
     pde_max_optimiser = adaloss.pde_max_optimiser
-    pde_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(pde_max_optimiser, adaloss.pde_loss_weights)
+    pde_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(
+        pde_max_optimiser, adaloss.pde_loss_weights)
     bc_max_optimiser = adaloss.bc_max_optimiser
-    bc_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(bc_max_optimiser, adaloss.bc_loss_weights)
+    bc_max_optimiser_setup = OptimizationOptimisers.Optimisers.setup(
+        bc_max_optimiser, adaloss.bc_loss_weights)
     iteration = pinnrep.iteration
 
     function run_minimax_adaptive_loss(θ, pde_losses, bc_losses)
         if iteration[1] % adaloss.reweight_every == 0
-            OptimizationOptimisers.Optimisers.update!(pde_max_optimiser_setup, adaloss.pde_loss_weights, -pde_losses)
-            OptimizationOptimisers.Optimisers.update!(bc_max_optimiser_setup, adaloss.bc_loss_weights, -bc_losses)
+            OptimizationOptimisers.Optimisers.update!(
+                pde_max_optimiser_setup, adaloss.pde_loss_weights, -pde_losses)
+            OptimizationOptimisers.Optimisers.update!(
+                bc_max_optimiser_setup, adaloss.bc_loss_weights, -bc_losses)
             logvector(pinnrep.logger, adaloss.pde_loss_weights,
-                      "adaptive_loss/pde_loss_weights", iteration[1])
+                "adaptive_loss/pde_loss_weights", iteration[1])
             logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                      "adaptive_loss/bc_loss_weights",
-                      iteration[1])
+                "adaptive_loss/bc_loss_weights",
+                iteration[1])
         end
         nothing
     end
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index c86c87599..22292462d 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -1,7 +1,7 @@
 mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     P <: Vector{<:Distribution},
     D <:
-    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}},
+    Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
 }
     dim::Int
     prob::DiffEqBase.ODEProblem
@@ -18,16 +18,16 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     init_params::I
 
     function LogTargetDensity(dim, prob, chain::Optimisers.Restructure, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::AbstractVector)
+            dataset,
+            priors, phystd, l2std, autodiff, physdt, extraparams,
+            init_params::AbstractVector)
         new{
             typeof(chain),
             Nothing,
             typeof(strategy),
             typeof(init_params),
             typeof(priors),
-            typeof(dataset),
+            typeof(dataset)
         }(dim,
             prob,
             chain,
@@ -42,16 +42,16 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
             init_params)
     end
     function LogTargetDensity(dim, prob, chain::Lux.AbstractExplicitLayer, st, strategy,
-        dataset,
-        priors, phystd, l2std, autodiff, physdt, extraparams,
-        init_params::NamedTuple)
+            dataset,
+            priors, phystd, l2std, autodiff, physdt, extraparams,
+            init_params::NamedTuple)
         new{
             typeof(chain),
             typeof(st),
             typeof(strategy),
             typeof(init_params),
             typeof(priors),
-            typeof(dataset),
+            typeof(dataset)
         }(dim,
             prob,
             chain, st, strategy,
@@ -106,7 +106,8 @@ function L2LossData(Tar::LogTargetDensity, θ)
         L2logprob = 0
         for i in 1:length(Tar.prob.u0)
             # for u[i] ith vector must be added to dataset,nn[1,:] is the dx in lotka_volterra
-            L2logprob += logpdf(MvNormal(nn[i, :],
+            L2logprob += logpdf(
+                MvNormal(nn[i, :],
                     LinearAlgebra.Diagonal(abs2.(Tar.l2std[i] .*
                                                  ones(length(Tar.dataset[i]))))),
                 Tar.dataset[i])
@@ -138,8 +139,8 @@ function physloglikelihood(Tar::LogTargetDensity, θ)
 end
 
 function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        tspan,
+        ode_params, θ)
     if Tar.dataset isa Vector{Nothing}
         t = collect(eltype(strategy.dx), tspan[1]:(strategy.dx):tspan[2])
     else
@@ -152,12 +153,12 @@ function getlogpdf(strategy::GridTraining, Tar::LogTargetDensity, f, autodiff::B
 end
 
 function getlogpdf(strategy::StochasticTraining,
-    Tar::LogTargetDensity,
-    f,
-    autodiff::Bool,
-    tspan,
-    ode_params,
-    θ)
+        Tar::LogTargetDensity,
+        f,
+        autodiff::Bool,
+        tspan,
+        ode_params,
+        θ)
     if Tar.dataset isa Vector{Nothing}
         t = [(tspan[2] - tspan[1]) * rand() + tspan[1] for i in 1:(strategy.points)]
     else
@@ -170,21 +171,22 @@ function getlogpdf(strategy::StochasticTraining,
 end
 
 function getlogpdf(strategy::QuadratureTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
     function integrand(t::Number, θ)
         innerdiff(Tar, f, autodiff, [t], θ, ode_params)
     end
-    intprob = IntegralProblem(integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
+    intprob = IntegralProblem(
+        integrand, (tspan[1], tspan[2]), θ; nout = length(Tar.prob.u0))
     sol = solve(intprob, QuadGKJL(); abstol = strategy.abstol, reltol = strategy.reltol)
     sum(sol.u)
 end
 
 function getlogpdf(strategy::WeightedIntervalTraining, Tar::LogTargetDensity, f,
-    autodiff::Bool,
-    tspan,
-    ode_params, θ)
+        autodiff::Bool,
+        tspan,
+        ode_params, θ)
     minT = tspan[1]
     maxT = tspan[2]
 
@@ -217,7 +219,7 @@ end
 MvNormal likelihood at each `ti` in time `t` for ODE collocation residue with NN with parameters θ.
 """
 function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector, θ,
-    ode_params)
+        ode_params)
 
     # Tar used for phi and LogTargetDensity object attributes access
     out = Tar(t, θ[1:(length(θ) - Tar.extraparams)])
@@ -230,13 +232,13 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
     # this is a vector{vector{dx,dy}}(handle case single u(float passed))
     if length(out[:, 1]) == 1
         physsol = [f(out[:, i][1],
-            ode_params,
-            t[i])
+                       ode_params,
+                       t[i])
                    for i in 1:length(out[1, :])]
     else
         physsol = [f(out[:, i],
-            ode_params,
-            t[i])
+                       ode_params,
+                       t[i])
                    for i in 1:length(out[1, :])]
     end
     physsol = reduce(hcat, physsol)
@@ -246,10 +248,11 @@ function innerdiff(Tar::LogTargetDensity, f, autodiff::Bool, t::AbstractVector,
     vals = nnsol .- physsol
 
     # N dimensional vector if N outputs for NN(each row has logpdf of i[i] where u is vector of dependant variables)
-    return [logpdf(MvNormal(vals[i, :],
-            LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
-                                         ones(length(vals[i, :]))))),
-        zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
+    return [logpdf(
+                MvNormal(vals[i, :],
+                    LinearAlgebra.Diagonal(abs2.(Tar.phystd[i] .*
+                                                 ones(length(vals[i, :]))))),
+                zeros(length(vals[i, :]))) for i in 1:length(Tar.prob.u0)]
 end
 
 """
@@ -264,8 +267,10 @@ function priorweights(Tar::LogTargetDensity, θ)
         # Vector of ode parameters priors
         invpriors = allparams[2:end]
 
-        invlogpdf = sum(logpdf(invpriors[length(θ) - i + 1], θ[i])
-                        for i in (length(θ) - Tar.extraparams + 1):length(θ); init = 0.0)
+        invlogpdf = sum(
+            logpdf(invpriors[length(θ) - i + 1], θ[i])
+            for i in (length(θ) - Tar.extraparams + 1):length(θ);
+            init = 0.0)
 
         return (invlogpdf
                 +
@@ -289,7 +294,7 @@ end
 NN OUTPUT AT t,θ ~ phi(t,θ).
 """
 function (f::LogTargetDensity{C, S})(t::AbstractVector,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), t'), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -297,7 +302,7 @@ function (f::LogTargetDensity{C, S})(t::AbstractVector,
 end
 
 function (f::LogTargetDensity{C, S})(t::Number,
-    θ) where {C <: Lux.AbstractExplicitLayer, S}
+        θ) where {C <: Lux.AbstractExplicitLayer, S}
     θ = vector_to_parameters(θ, f.init_params)
     y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ)), [t]), θ, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
@@ -427,19 +432,19 @@ Incase you are only solving the Equations for solution, do not provide dataset
 * AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
 function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
-    strategy = GridTraining, dataset = [nothing],
-    init_params = nothing, draw_samples = 1000,
-    physdt = 1 / 20.0, l2std = [0.05],
-    phystd = [0.05], priorsNNw = (0.0, 2.0),
-    param = [], nchains = 1, autodiff = false,
-    Kernel = HMC,
-    Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-        Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
-    Integratorkwargs = (Integrator = Leapfrog,),
-    MCMCkwargs = (n_leapfrog = 30,),
-    progress = false, verbose = false)
-
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
+        strategy = GridTraining, dataset = [nothing],
+        init_params = nothing, draw_samples = 1000,
+        physdt = 1 / 20.0, l2std = [0.05],
+        phystd = [0.05], priorsNNw = (0.0, 2.0),
+        param = [], nchains = 1, autodiff = false,
+        Kernel = HMC,
+        Adaptorkwargs = (Adaptor = StanHMCAdaptor,
+            Metric = DiagEuclideanMetric, targetacceptancerate = 0.8),
+        Integratorkwargs = (Integrator = Leapfrog,),
+        MCMCkwargs = (n_leapfrog = 30,),
+        progress = false, verbose = false)
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
     # NN parameter prior mean and variance(PriorsNN must be a tuple)
     if isinplace(prob)
         throw(error("The BPINN ODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
@@ -481,7 +486,7 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
     ninv = length(param)
     priors = [
         MvNormal(priorsNNw[1] * ones(nparameters),
-            LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters)))),
+        LinearAlgebra.Diagonal(abs2.(priorsNNw[2] .* ones(nparameters))))
     ]
 
     # append Ode params to all paramvector
@@ -568,4 +573,4 @@ function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
         mcmc_chain = MCMCChains.Chains(matrix_samples')
         return mcmc_chain, samples, stats
     end
-end
\ No newline at end of file
+end
diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index 0c9d1323d..f9edbb948 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -42,23 +42,20 @@ end
 
 function NNDAE(chain, opt, init_params = nothing; strategy = nothing, autodiff = false,
         kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
     NNDAE(chain, opt, init_params, autodiff, strategy, kwargs)
 end
 
-function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool, differential_vars::AbstractVector)
+function dfdx(phi::ODEPhi, t::AbstractVector, θ, autodiff::Bool,
+        differential_vars::AbstractVector)
     if autodiff
         autodiff && throw(ArgumentError("autodiff not supported for DAE problem."))
     else
         dphi = (phi(t .+ sqrt(eps(eltype(t))), θ) - phi(t, θ)) ./ sqrt(eps(eltype(t)))
         batch_size = size(t)[1]
-
         reduce(vcat,
-            [if dv == true
-                dphi[[i], :]
-            else
-                zeros(1, batch_size)
-            end
+            [dv ? dphi[[i], :] : zeros(1, batch_size)
              for (i, dv) in enumerate(differential_vars)])
     end
 end
@@ -115,7 +112,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractDAEProblem,
 
     if chain isa Lux.AbstractExplicitLayer || chain isa Flux.Chain
         phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
-        init_params = ComponentArrays.ComponentArray(; depvar = ComponentArrays.ComponentArray(init_params))
+        init_params = ComponentArrays.ComponentArray(;
+            depvar = ComponentArrays.ComponentArray(init_params))
     else
         error("Only Lux.AbstractExplicitLayer and Flux.Chain neural networks are supported")
     end
@@ -188,4 +186,4 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractDAEProblem,
         DiffEqBase.calculate_solution_errors!(sol; timeseries_errors = true,
             dense_errors = false)
     sol
-end #solve
+end
diff --git a/src/dgm.jl b/src/dgm.jl
index 886deef09..40fe88134 100644
--- a/src/dgm.jl
+++ b/src/dgm.jl
@@ -1,4 +1,4 @@
-struct dgm_lstm_layer{F1, F2} <:Lux.AbstractExplicitLayer
+struct dgm_lstm_layer{F1, F2} <: Lux.AbstractExplicitLayer
     activation1::Function
     activation2::Function
     in_dims::Int
@@ -8,11 +8,12 @@ struct dgm_lstm_layer{F1, F2} <:Lux.AbstractExplicitLayer
 end
 
 function dgm_lstm_layer(in_dims::Int, out_dims::Int, activation1, activation2;
-    init_weight = Lux.glorot_uniform, init_bias = Lux.zeros32)
-    return dgm_lstm_layer{typeof(init_weight), typeof(init_bias)}(activation1, activation2, in_dims, out_dims, init_weight, init_bias);
+        init_weight = Lux.glorot_uniform, init_bias = Lux.zeros32)
+    return dgm_lstm_layer{typeof(init_weight), typeof(init_bias)}(
+        activation1, activation2, in_dims, out_dims, init_weight, init_bias)
 end
 
-import Lux:initialparameters, initialstates, parameterlength, statelength
+import Lux: initialparameters, initialstates, parameterlength, statelength
 
 function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer)
     return (
@@ -24,75 +25,79 @@ function Lux.initialparameters(rng::AbstractRNG, l::dgm_lstm_layer)
         Wg = l.init_weight(rng, l.out_dims, l.out_dims),
         Wr = l.init_weight(rng, l.out_dims, l.out_dims),
         Wh = l.init_weight(rng, l.out_dims, l.out_dims),
-        bz = l.init_bias(rng, l.out_dims) ,
-        bg = l.init_bias(rng, l.out_dims) ,
-        br = l.init_bias(rng, l.out_dims) ,
-        bh = l.init_bias(rng, l.out_dims) 
+        bz = l.init_bias(rng, l.out_dims),
+        bg = l.init_bias(rng, l.out_dims),
+        br = l.init_bias(rng, l.out_dims),
+        bh = l.init_bias(rng, l.out_dims)
     )
 end
 
 Lux.initialstates(::AbstractRNG, ::dgm_lstm_layer) = NamedTuple()
-Lux.parameterlength(l::dgm_lstm_layer) = 4* (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims)
+function Lux.parameterlength(l::dgm_lstm_layer)
+    4 * (l.out_dims * l.in_dims + l.out_dims * l.out_dims + l.out_dims)
+end
 Lux.statelength(l::dgm_lstm_layer) = 0
 
-function (layer::dgm_lstm_layer)(S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where T
+function (layer::dgm_lstm_layer)(
+        S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
     @unpack Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh = ps
-    Z = layer.activation1.(Uz*x+ Wz*S .+ bz);
-    G = layer.activation1.(Ug*x+ Wg*S .+ bg);
-    R = layer.activation1.(Ur*x+ Wr*S .+ br);
-    H = layer.activation2.(Uh*x+ Wh*(S.*R) .+ bh);
-    S_new = (1. .- G) .* H .+ Z .* S;
-    return S_new, st;
+    Z = layer.activation1.(Uz * x + Wz * S .+ bz)
+    G = layer.activation1.(Ug * x + Wg * S .+ bg)
+    R = layer.activation1.(Ur * x + Wr * S .+ br)
+    H = layer.activation2.(Uh * x + Wh * (S .* R) .+ bh)
+    S_new = (1.0 .- G) .* H .+ Z .* S
+    return S_new, st
 end
 
-struct dgm_lstm_block{L <:NamedTuple} <: Lux.AbstractExplicitContainerLayer{(:layers,)} 
+struct dgm_lstm_block{L <: NamedTuple} <: Lux.AbstractExplicitContainerLayer{(:layers,)}
     layers::L
 end
 
 function dgm_lstm_block(l...)
-    names = ntuple(i-> Symbol("dgm_lstm_$i"), length(l));
-    layers = NamedTuple{names}(l);
-    return dgm_lstm_block(layers);
+    names = ntuple(i -> Symbol("dgm_lstm_$i"), length(l))
+    layers = NamedTuple{names}(l)
+    return dgm_lstm_block(layers)
 end
 
 dgm_lstm_block(xs::AbstractVector) = dgm_lstm_block(xs...)
 
-@generated function apply_dgm_lstm_block(layers::NamedTuple{fields}, S::AbstractVecOrMat, x::AbstractVecOrMat, ps, st::NamedTuple) where fields
-    N = length(fields);
+@generated function apply_dgm_lstm_block(layers::NamedTuple{fields}, S::AbstractVecOrMat,
+        x::AbstractVecOrMat, ps, st::NamedTuple) where {fields}
+    N = length(fields)
     S_symbols = vcat([:S], [gensym() for _ in 1:N])
-    x_symbol = :x;
+    x_symbol = :x
     st_symbols = [gensym() for _ in 1:N]
     calls = [:(($(S_symbols[i + 1]), $(st_symbols[i])) = layers.$(fields[i])(
-        $(S_symbols[i]), $(x_symbol), ps.$(fields[i]), st.$(fields[i]))) for i in 1:N]
+                 $(S_symbols[i]), $(x_symbol), ps.$(fields[i]), st.$(fields[i])))
+             for i in 1:N]
     push!(calls, :(st = NamedTuple{$fields}((($(Tuple(st_symbols)...),)))))
     push!(calls, :(return $(S_symbols[N + 1]), st))
     return Expr(:block, calls...)
 end
 
-function (L::dgm_lstm_block)(S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where T
+function (L::dgm_lstm_block)(
+        S::AbstractVecOrMat{T}, x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
     return apply_dgm_lstm_block(L.layers, S, x, ps, st)
 end
 
 struct dgm{S, L, E} <: Lux.AbstractExplicitContainerLayer{(:d_start, :lstm, :d_end)}
     d_start::S
-    lstm:: L
-    d_end:: E
+    lstm::L
+    d_end::E
 end
 
-function (l::dgm)(x::AbstractVecOrMat{T}, ps, st::NamedTuple) where T
+function (l::dgm)(x::AbstractVecOrMat{T}, ps, st::NamedTuple) where {T}
+    S, st_start = l.d_start(x, ps.d_start, st.d_start)
+    S, st_lstm = l.lstm(S, x, ps.lstm, st.lstm)
+    y, st_end = l.d_end(S, ps.d_end, st.d_end)
 
-    S, st_start = l.d_start(x, ps.d_start, st.d_start);
-    S, st_lstm = l.lstm(S, x, ps.lstm, st.lstm);
-    y, st_end = l.d_end(S, ps.d_end, st.d_end);
-    
     st_new = (
-        d_start= st_start,
-        lstm= st_lstm,
-        d_end= st_end
+        d_start = st_start,
+        lstm = st_lstm,
+        d_end = st_end
     )
-    return y, st_new;
-
-end 
+    return y, st_new
+end
 
 """
     dgm(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2, out_activation= Lux.identity)
@@ -122,10 +127,12 @@ f(t, x, \\theta) &= \\sigma_{out}(W S^{L+1} + b).
 - `out_activation`: activation fn used for the output of the network.
 - `kwargs`: additional arguments to be splatted into [`PhysicsInformedNN`](@ref).
 """
-function dgm(in_dims::Int, out_dims::Int, modes::Int, layers::Int, activation1, activation2, out_activation)
+function dgm(in_dims::Int, out_dims::Int, modes::Int, layers::Int,
+        activation1, activation2, out_activation)
     dgm(
         Lux.Dense(in_dims, modes, activation1),
-        dgm_lstm_block([dgm_lstm_layer(in_dims, modes, activation1, activation2) for i in 1:layers]),
+        dgm_lstm_block([dgm_lstm_layer(in_dims, modes, activation1, activation2)
+                        for i in 1:layers]),
         Lux.Dense(modes, out_dims, out_activation)
     )
 end
@@ -157,9 +164,12 @@ discretization = DeepGalerkin(2, 1, 30, 3, tanh, tanh, identity, QuasiRandomTrai
 Sirignano, Justin and Spiliopoulos, Konstantinos, "DGM: A deep learning algorithm for solving partial differential equations",
 Journal of Computational Physics, Volume 375, 2018, Pages 1339-1364, doi: https://doi.org/10.1016/j.jcp.2018.08.029
 """
-function DeepGalerkin(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function, activation2::Function, out_activation::Function, strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...)
+function DeepGalerkin(
+        in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function,
+        activation2::Function, out_activation::Function,
+        strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...)
     PhysicsInformedNN(
         dgm(in_dims, out_dims, modes, L, activation1, activation2, out_activation),
         strategy; kwargs...
     )
-end
\ No newline at end of file
+end
diff --git a/src/discretize.jl b/src/discretize.jl
index 9d42c7d1d..9a40e0fe8 100644
--- a/src/discretize.jl
+++ b/src/discretize.jl
@@ -26,14 +26,14 @@ to
 for Lux.AbstractExplicitLayer.
 """
 function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
-                                      eq_params = SciMLBase.NullParameters(),
-                                      param_estim = false,
-                                      default_p = nothing,
-                                      bc_indvars = pinnrep.indvars,
-                                      integrand = nothing,
-                                      dict_transformation_vars = nothing,
-                                      transformation_vars = nothing,
-                                      integrating_depvars = pinnrep.depvars)
+        eq_params = SciMLBase.NullParameters(),
+        param_estim = false,
+        default_p = nothing,
+        bc_indvars = pinnrep.indvars,
+        integrand = nothing,
+        dict_transformation_vars = nothing,
+        transformation_vars = nothing,
+        integrating_depvars = pinnrep.depvars)
     @unpack indvars, depvars, dict_indvars, dict_depvars, dict_depvar_input,
     phi, derivative, integral,
     multioutput, init_params, strategy, eq_params,
@@ -46,8 +46,9 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         this_eq_pair = pair(eqs, depvars, dict_depvars, dict_depvar_input)
         this_eq_indvars = unique(vcat(values(this_eq_pair)...))
     else
-        this_eq_pair = Dict(map(intvars -> dict_depvars[intvars] => dict_depvar_input[intvars],
-                                integrating_depvars))
+        this_eq_pair = Dict(map(
+            intvars -> dict_depvars[intvars] => dict_depvar_input[intvars],
+            integrating_depvars))
         this_eq_indvars = transformation_vars isa Nothing ?
                           unique(vcat(values(this_eq_pair)...)) : transformation_vars
         loss_function = integrand
@@ -91,7 +92,7 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
             push!(params_symbols, Symbol(:($eq_param)))
         end
         params_eq = Expr(:(=), build_expr(:tuple, params_symbols),
-                         build_expr(:tuple, expr_params))
+            build_expr(:tuple, expr_params))
         push!(ex.args, params_eq)
     end
 
@@ -103,7 +104,7 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
             push!(params_symbols, Symbol(:($eq_param)))
         end
         params_eq = Expr(:(=), build_expr(:tuple, params_symbols),
-                         build_expr(:tuple, expr_params))
+            build_expr(:tuple, expr_params))
         push!(ex.args, params_eq)
     end
 
@@ -118,12 +119,12 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         indvars_ex = get_indvars_ex(bc_indvars)
         left_arg_pairs, right_arg_pairs = this_eq_indvars, indvars_ex
         vars_eq = Expr(:(=), build_expr(:tuple, left_arg_pairs),
-                       build_expr(:tuple, right_arg_pairs))
+            build_expr(:tuple, right_arg_pairs))
     else
         indvars_ex = [:($:cord[[$i], :]) for (i, x) in enumerate(this_eq_indvars)]
         left_arg_pairs, right_arg_pairs = this_eq_indvars, indvars_ex
         vars_eq = Expr(:(=), build_expr(:tuple, left_arg_pairs),
-                       build_expr(:tuple, right_arg_pairs))
+            build_expr(:tuple, right_arg_pairs))
     end
 
     if !(dict_transformation_vars isa Nothing)
@@ -133,11 +134,13 @@ function build_symbolic_loss_function(pinnrep::PINNRepresentation, eqs;
         end
         transformation_expr = Expr(:block, :($(transformation_expr_...)))
         vcat_expr_loss_functions = Expr(:block, transformation_expr, vcat_expr,
-                                        loss_function)
+            loss_function)
     end
     let_ex = Expr(:let, vars_eq, vcat_expr_loss_functions)
     push!(ex.args, let_ex)
-    expr_loss_function = :(($vars) -> begin $ex end)
+    expr_loss_function = :(($vars) -> begin
+        $ex
+    end)
 end
 
 """
@@ -152,14 +155,16 @@ function build_loss_function(pinnrep::PINNRepresentation, eqs, bc_indvars)
     bc_indvars = bc_indvars === nothing ? pinnrep.indvars : bc_indvars
 
     expr_loss_function = build_symbolic_loss_function(pinnrep, eqs;
-                                                      bc_indvars = bc_indvars,
-                                                      eq_params = eq_params,
-                                                      param_estim = param_estim,
-                                                      default_p = default_p)
+        bc_indvars = bc_indvars,
+        eq_params = eq_params,
+        param_estim = param_estim,
+        default_p = default_p)
     u = get_u()
     _loss_function = @RuntimeGeneratedFunction(expr_loss_function)
-    loss_function = (cord, θ) -> begin _loss_function(cord, θ, phi, derivative, integral, u,
-                                                      default_p) end
+    loss_function = (cord, θ) -> begin
+        _loss_function(cord, θ, phi, derivative, integral, u,
+            default_p)
+    end
     return loss_function
 end
 
@@ -172,16 +177,16 @@ strategy.
 function generate_training_sets end
 
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, _indvars::Array,
-                                _depvars::Array)
+        _depvars::Array)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     return generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars,
-                                  dict_depvars)
+        dict_depvars)
 end
 
 # Generate training set in the domain and on the boundary
 function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::Dict,
-                                dict_depvars::Dict)
+        dict_depvars::Dict)
     if dx isa Array
         dxs = dx
     else
@@ -213,20 +218,20 @@ function generate_training_sets(domains, dx, eqs, bcs, eltypeθ, dict_indvars::D
     bcs_train_sets = map(bound_args) do bt
         span = map(b -> get(dict_var_span, b, b), bt)
         _set = adapt(eltypeθ,
-                     hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
     end
 
     pde_vars = get_variables(eqs, dict_indvars, dict_depvars)
     pde_args = get_argument(eqs, dict_indvars, dict_depvars)
 
     pde_train_set = adapt(eltypeθ,
-                          hcat(vec(map(points -> collect(points),
-                                       Iterators.product(bc_data...)))...))
+        hcat(vec(map(points -> collect(points),
+            Iterators.product(bc_data...)))...))
 
     pde_train_sets = map(pde_args) do bt
         span = map(b -> get(dict_var_span_, b, b), bt)
         _set = adapt(eltypeθ,
-                     hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
+            hcat(vec(map(points -> collect(points), Iterators.product(span...)))...))
     end
     [pde_train_sets, bcs_train_sets]
 end
@@ -241,19 +246,19 @@ function get_bounds end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array, strategy)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, _indvars::Array, _depvars::Array,
-                    strategy::QuadratureTraining)
+        strategy::QuadratureTraining)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     return get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, strategy)
 end
 
 function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-                    strategy::QuadratureTraining)
+        strategy::QuadratureTraining)
     dict_lower_bound = Dict([Symbol(d.variables) => infimum(d.domain) for d in domains])
     dict_upper_bound = Dict([Symbol(d.variables) => supremum(d.domain) for d in domains])
 
@@ -286,7 +291,7 @@ function get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars, str
     dx = 1 / strategy.points
     dict_span = Dict([Symbol(d.variables) => [
                           infimum(d.domain) + dx,
-                          supremum(d.domain) - dx,
+                          supremum(d.domain) - dx
                       ] for d in domains])
 
     # pde_bounds = [[infimum(d.domain),supremum(d.domain)] for d in domains]
@@ -330,7 +335,7 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
                 ChainRulesCore.@ignore_derivatives lb_[i, :] = fill(l, 1, size(cord)[2])
             else
                 ChainRulesCore.@ignore_derivatives lb_[i, :] = l(cord, θ, phi, derivative,
-                                                                 nothing, u, nothing)
+                    nothing, u, nothing)
             end
         end
         for (i, u_) in enumerate(ub)
@@ -338,7 +343,7 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
                 ChainRulesCore.@ignore_derivatives ub_[i, :] = fill(u_, 1, size(cord)[2])
             else
                 ChainRulesCore.@ignore_derivatives ub_[i, :] = u_(cord, θ, phi, derivative,
-                                                                  nothing, u, nothing)
+                    nothing, u, nothing)
             end
         end
         integration_arr = Matrix{Float64}(undef, 1, 0)
@@ -346,7 +351,7 @@ function get_numeric_integral(pinnrep::PINNRepresentation)
             # ub__ = @Zygote.ignore getindex(ub_, :,  i)
             # lb__ = @Zygote.ignore getindex(lb_, :,  i)
             integration_arr = hcat(integration_arr,
-                                   integration_(cord[:, i], lb_[:, i], ub_[:, i], θ))
+                integration_(cord[:, i], lb_[:, i], ub_[:, i], θ))
         end
         return integration_arr
     end
@@ -365,7 +370,7 @@ which is later optimized upon to give Solution or the Solution Distribution of t
 For more information, see `discretize` and `PINNRepresentation`.
 """
 function SciMLBase.symbolic_discretize(pde_system::PDESystem,
-    discretization::AbstractPINN)
+        discretization::AbstractPINN)
     eqs = pde_system.eqs
     bcs = pde_system.bcs
     chain = discretization.chain
@@ -380,8 +385,9 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     additional_loss = discretization.additional_loss
     adaloss = discretization.adaptive_loss
 
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(pde_system.indvars,
-                                                                               pde_system.depvars)
+    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(
+        pde_system.indvars,
+        pde_system.depvars)
 
     multioutput = discretization.multioutput
     init_params = discretization.init_params
@@ -392,16 +398,18 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
         # This is done because Float64 is almost always better for these applications
         if chain isa AbstractArray
             x = map(chain) do x
-                _x = ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
-                                                                          x))
+                _x = ComponentArrays.ComponentArray(Lux.initialparameters(
+                    Random.default_rng(),
+                    x))
                 Float64.(_x) # No ComponentArray GPU support
             end
             names = ntuple(i -> depvars[i], length(chain))
             init_params = ComponentArrays.ComponentArray(NamedTuple{names}(i
-                                                                           for i in x))
+            for i in x))
         else
-            init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(Random.default_rng(),
-                                                                                            chain)))
+            init_params = Float64.(ComponentArrays.ComponentArray(Lux.initialparameters(
+                Random.default_rng(),
+                chain)))
         end
     else
         init_params = init_params
@@ -436,11 +444,11 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     if (phi isa Vector && phi[1].f isa Lux.AbstractExplicitLayer)
         for ϕ in phi
             ϕ.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                         ϕ.st)
+                ϕ.st)
         end
     elseif (!(phi isa Vector) && phi.f isa Lux.AbstractExplicitLayer)
         phi.st = adapt(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                       phi.st)
+            phi.st)
     end
 
     derivative = discretization.derivative
@@ -471,24 +479,24 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
     bc_integration_vars = get_integration_variables(bcs, dict_indvars, dict_depvars)
 
     pinnrep = PINNRepresentation(eqs, bcs, domains, eq_params, defaults, default_p,
-                                 param_estim, additional_loss, adaloss, depvars, indvars,
-                                 dict_indvars, dict_depvars, dict_depvar_input, logger,
-                                 multioutput, iteration, init_params, flat_init_params, phi,
-                                 derivative,
-                                 strategy, pde_indvars, bc_indvars, pde_integration_vars,
-                                 bc_integration_vars, nothing, nothing, nothing, nothing)
+        param_estim, additional_loss, adaloss, depvars, indvars,
+        dict_indvars, dict_depvars, dict_depvar_input, logger,
+        multioutput, iteration, init_params, flat_init_params, phi,
+        derivative,
+        strategy, pde_indvars, bc_indvars, pde_integration_vars,
+        bc_integration_vars, nothing, nothing, nothing, nothing)
 
     integral = get_numeric_integral(pinnrep)
 
     symbolic_pde_loss_functions = [build_symbolic_loss_function(pinnrep, eq;
-                                                                bc_indvars = pde_indvar)
+                                       bc_indvars = pde_indvar)
                                    for (eq, pde_indvar) in zip(eqs, pde_indvars,
-                                                               pde_integration_vars)]
+        pde_integration_vars)]
 
     symbolic_bc_loss_functions = [build_symbolic_loss_function(pinnrep, bc;
-                                                               bc_indvars = bc_indvar)
+                                      bc_indvars = bc_indvar)
                                   for (bc, bc_indvar) in zip(bcs, bc_indvars,
-                                                             bc_integration_vars)]
+        bc_integration_vars)]
 
     pinnrep.integral = integral
     pinnrep.symbolic_pde_loss_functions = symbolic_pde_loss_functions
@@ -496,18 +504,18 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
     datafree_pde_loss_functions = [build_loss_function(pinnrep, eq, pde_indvar)
                                    for (eq, pde_indvar, integration_indvar) in zip(eqs,
-                                                                                   pde_indvars,
-                                                                                   pde_integration_vars)]
+        pde_indvars,
+        pde_integration_vars)]
 
     datafree_bc_loss_functions = [build_loss_function(pinnrep, bc, bc_indvar)
                                   for (bc, bc_indvar, integration_indvar) in zip(bcs,
-                                                                                 bc_indvars,
-                                                                                 bc_integration_vars)]
+        bc_indvars,
+        bc_integration_vars)]
 
     pde_loss_functions, bc_loss_functions = merge_strategy_with_loss_function(pinnrep,
-                                                                              strategy,
-                                                                              datafree_pde_loss_functions,
-                                                                              datafree_bc_loss_functions)
+        strategy,
+        datafree_pde_loss_functions,
+        datafree_bc_loss_functions)
     # setup for all adaptive losses
     num_pde_losses = length(pde_loss_functions)
     num_bc_losses = length(bc_loss_functions)
@@ -523,8 +531,8 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                                       adaloss.additional_loss_weights
 
     reweight_losses_func = generate_adaptive_loss_function(pinnrep, adaloss,
-                                                           pde_loss_functions,
-                                                           bc_loss_functions)
+        pde_loss_functions,
+        bc_loss_functions)
 
     function get_likelihood_estimate_function(discretization::PhysicsInformedNN)
         function full_loss_function(θ, p)
@@ -548,7 +556,8 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
             sum_weighted_pde_losses = sum(weighted_pde_losses)
             sum_weighted_bc_losses = sum(weighted_bc_losses)
-            weighted_loss_before_additional = sum_weighted_pde_losses + sum_weighted_bc_losses
+            weighted_loss_before_additional = sum_weighted_pde_losses +
+                                              sum_weighted_bc_losses
 
             full_weighted_loss = if additional_loss isa Nothing
                 weighted_loss_before_additional
@@ -562,7 +571,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                     return additional_loss(phi, θ_, p_)
                 end
                 weighted_additional_loss_val = adaloss.additional_loss_weights[1] *
-                                            _additional_loss(phi, θ)
+                                               _additional_loss(phi, θ)
                 weighted_loss_before_additional + weighted_additional_loss_val
             end
 
@@ -608,11 +617,12 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
     function get_likelihood_estimate_function(discretization::BayesianPINN)
         dataset_pde, dataset_bc = discretization.dataset
-        
+
         # required as Physics loss also needed on the discrete dataset domain points
         # data points are discrete and so by default GridTraining loss applies
         # passing placeholder dx with GridTraining, it uses data points irl
-        datapde_loss_functions, databc_loss_functions = if (!(dataset_bc isa Nothing)||!(dataset_pde isa Nothing))
+        datapde_loss_functions, databc_loss_functions = if (!(dataset_bc isa Nothing) ||
+                                                            !(dataset_pde isa Nothing))
             merge_strategy_with_loglikelihood_function(pinnrep,
                 GridTraining(0.1),
                 datafree_pde_loss_functions,
@@ -625,20 +635,19 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             stdpdes, stdbcs, stdextra = allstd
             # the aggregation happens on cpu even if the losses are gpu, probably fine since it's only a few of them
             pde_loglikelihoods = [logpdf(Normal(0, stdpdes[i]), pde_loss_function(θ))
-                                for (i, pde_loss_function) in enumerate(pde_loss_functions)]
+                                  for (i, pde_loss_function) in enumerate(pde_loss_functions)]
 
             bc_loglikelihoods = [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
-                                for (j, bc_loss_function) in enumerate(bc_loss_functions)]
+                                 for (j, bc_loss_function) in enumerate(bc_loss_functions)]
 
             if !(datapde_loss_functions isa Nothing)
                 pde_loglikelihoods += [logpdf(Normal(0, stdpdes[j]), pde_loss_function(θ))
-                                    for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
-
+                                       for (j, pde_loss_function) in enumerate(datapde_loss_functions)]
             end
 
             if !(databc_loss_functions isa Nothing)
-                bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ)) 
-                                    for (j, bc_loss_function) in enumerate(databc_loss_functions)]
+                bc_loglikelihoods += [logpdf(Normal(0, stdbcs[j]), bc_loss_function(θ))
+                                      for (j, bc_loss_function) in enumerate(databc_loss_functions)]
             end
 
             # this is kind of a hack, and means that whenever the outer function is evaluated the increment goes up, even if it's not being optimized
@@ -658,7 +667,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
             sum_weighted_pde_loglikelihood = sum(weighted_pde_loglikelihood)
             sum_weighted_bc_loglikelihood = sum(weighted_bc_loglikelihood)
             weighted_loglikelihood_before_additional = sum_weighted_pde_loglikelihood +
-                                                    sum_weighted_bc_loglikelihood
+                                                       sum_weighted_bc_loglikelihood
 
             full_weighted_loglikelihood = if additional_loss isa Nothing
                 weighted_loglikelihood_before_additional
@@ -678,7 +687,7 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
                 weighted_additional_loglikelihood = adaloss.additional_loss_weights[1] *
                                                     _additional_loglikelihood
 
-                    weighted_loglikelihood_before_additional + weighted_additional_loglikelihood
+                weighted_loglikelihood_before_additional + weighted_additional_loglikelihood
             end
 
             return full_weighted_loglikelihood
@@ -689,12 +698,11 @@ function SciMLBase.symbolic_discretize(pde_system::PDESystem,
 
     full_loss_function = get_likelihood_estimate_function(discretization)
     pinnrep.loss_functions = PINNLossFunctions(bc_loss_functions, pde_loss_functions,
-                                                full_loss_function, additional_loss, 
-                                                datafree_pde_loss_functions,
-                                                datafree_bc_loss_functions)
+        full_loss_function, additional_loss,
+        datafree_pde_loss_functions,
+        datafree_bc_loss_functions)
 
     return pinnrep
-
 end
 
 """
@@ -707,6 +715,6 @@ solution is the solution to the PDE.
 function SciMLBase.discretize(pde_system::PDESystem, discretization::PhysicsInformedNN)
     pinnrep = symbolic_discretize(pde_system, discretization)
     f = OptimizationFunction(pinnrep.loss_functions.full_loss_function,
-                             Optimization.AutoZygote())
+        Optimization.AutoZygote())
     Optimization.OptimizationProblem(f, pinnrep.flat_init_params)
 end
diff --git a/src/neural_adapter.jl b/src/neural_adapter.jl
index 4e467edef..e54c6e818 100644
--- a/src/neural_adapter.jl
+++ b/src/neural_adapter.jl
@@ -6,7 +6,7 @@ function generate_training_sets(domains, dx, eqs, eltypeθ)
     end
     spans = [infimum(d.domain):dx:supremum(d.domain) for (d, dx) in zip(domains, dxs)]
     train_set = adapt(eltypeθ,
-                      hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...))
+        hcat(vec(map(points -> collect(points), Iterators.product(spans...)))...))
 end
 
 function get_loss_function_(loss, init_params, pde_system, strategy::GridTraining)
@@ -16,7 +16,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::GridTrainin
     end
     domains = pde_system.domain
     depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-                                                            pde_system.depvars)
+        pde_system.depvars)
     eltypeθ = eltype(init_params)
     dx = strategy.dx
     train_set = generate_training_sets(domains, dx, eqs, eltypeθ)
@@ -44,7 +44,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::StochasticT
     domains = pde_system.domain
 
     depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-                                                            pde_system.depvars)
+        pde_system.depvars)
 
     eltypeθ = eltype(init_params)
     bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
@@ -59,7 +59,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::QuasiRandom
     domains = pde_system.domain
 
     depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-                                                            pde_system.depvars)
+        pde_system.depvars)
 
     eltypeθ = eltype(init_params)
     bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
@@ -67,7 +67,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::QuasiRandom
 end
 
 function get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars,
-                     strategy::QuadratureTraining)
+        strategy::QuadratureTraining)
     dict_lower_bound = Dict([Symbol(d.variables) => infimum(d.domain) for d in domains])
     dict_upper_bound = Dict([Symbol(d.variables) => supremum(d.domain) for d in domains])
 
@@ -92,7 +92,7 @@ function get_loss_function_(loss, init_params, pde_system, strategy::QuadratureT
     domains = pde_system.domain
 
     depvars, indvars, dict_indvars, dict_depvars = get_vars(pde_system.indvars,
-                                                            pde_system.depvars)
+        pde_system.depvars)
 
     eltypeθ = eltype(init_params)
     bound = get_bounds_(domains, eqs, eltypeθ, dict_indvars, dict_depvars, strategy)
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index eb5ae942e..e10793647 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -72,7 +72,7 @@ Lagaris, Isaac E., Aristidis Likas, and Dimitrios I. Fotiadis. "Artificial neura
 ordinary and partial differential equations." IEEE Transactions on Neural Networks 9, no. 5 (1998): 987-1000.
 """
 struct NNODE{C, O, P, B, PE, K, AL <: Union{Nothing, Function},
-    S <: Union{Nothing, AbstractTrainingStrategy},
+    S <: Union{Nothing, AbstractTrainingStrategy}
 } <:
        NeuralPDEAlgorithm
     chain::C
@@ -86,10 +86,12 @@ struct NNODE{C, O, P, B, PE, K, AL <: Union{Nothing, Function},
     kwargs::K
 end
 function NNODE(chain, opt, init_params = nothing;
-               strategy = nothing,
-               autodiff = false, batch = true, param_estim = false, additional_loss = nothing, kwargs...)
-    !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
-    NNODE(chain, opt, init_params, autodiff, batch, strategy, param_estim, additional_loss, kwargs)
+        strategy = nothing,
+        autodiff = false, batch = true, param_estim = false, additional_loss = nothing, kwargs...)
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        (chain = adapt(FromFluxAdaptor(false, false), chain))
+    NNODE(chain, opt, init_params, autodiff, batch,
+        strategy, param_estim, additional_loss, kwargs)
 end
 
 """
@@ -116,7 +118,8 @@ end
 
 function (f::ODEPhi{C, T, U})(t::Number,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
+    y, st = f.chain(
+        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 + (t - f.t0) * first(y)
 end
@@ -124,13 +127,15 @@ end
 function (f::ODEPhi{C, T, U})(t::AbstractVector,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U <: Number}
     # Batch via data as row vectors
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
+    y, st = f.chain(
+        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t' .- f.t0) .* y
 end
 
 function (f::ODEPhi{C, T, U})(t::Number, θ) where {C <: Lux.AbstractExplicitLayer, T, U}
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
+    y, st = f.chain(
+        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), [t]), θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t .- f.t0) .* y
 end
@@ -138,7 +143,8 @@ end
 function (f::ODEPhi{C, T, U})(t::AbstractVector,
         θ) where {C <: Lux.AbstractExplicitLayer, T, U}
     # Batch via data as row vectors
-    y, st = f.chain(adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
+    y, st = f.chain(
+        adapt(parameterless_type(ComponentArrays.getdata(θ.depvar)), t'), θ.depvar, f.st)
     ChainRulesCore.@ignore_derivatives f.st = st
     f.u0 .+ (t' .- f.t0) .* y
 end
@@ -223,18 +229,22 @@ function generate_loss(strategy::QuadratureTraining, phi, f, autodiff::Bool, tsp
         batch, param_estim::Bool)
     integrand(t::Number, θ) = abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim))
 
-    integrand(ts, θ) = [abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim)) for t in ts]
+    function integrand(ts, θ)
+        [abs2(inner_loss(phi, f, autodiff, t, θ, p, param_estim)) for t in ts]
+    end
 
     function loss(θ, _)
         intf = BatchIntegralFunction(integrand, max_batch = strategy.batch)
         intprob = IntegralProblem(intf, (tspan[1], tspan[2]), θ)
-        sol = solve(intprob, strategy.quadrature_alg; abstol = strategy.abstol, reltol = strategy.reltol, maxiters = strategy.maxiters)
+        sol = solve(intprob, strategy.quadrature_alg; abstol = strategy.abstol,
+            reltol = strategy.reltol, maxiters = strategy.maxiters)
         sol.u
     end
     return loss
 end
 
-function generate_loss(strategy::GridTraining, phi, f, autodiff::Bool, tspan, p, batch, param_estim::Bool)
+function generate_loss(
+        strategy::GridTraining, phi, f, autodiff::Bool, tspan, p, batch, param_estim::Bool)
     ts = tspan[1]:(strategy.dx):tspan[2]
     autodiff && throw(ArgumentError("autodiff not supported for GridTraining."))
     function loss(θ, _)
@@ -262,7 +272,8 @@ function generate_loss(strategy::StochasticTraining, phi, f, autodiff::Bool, tsp
     return loss
 end
 
-function generate_loss(strategy::WeightedIntervalTraining, phi, f, autodiff::Bool, tspan, p,
+function generate_loss(
+        strategy::WeightedIntervalTraining, phi, f, autodiff::Bool, tspan, p,
         batch, param_estim::Bool)
     autodiff && throw(ArgumentError("autodiff not supported for WeightedIntervalTraining."))
     minT = tspan[1]
@@ -356,18 +367,24 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     #train points generation
     init_params = alg.init_params
 
-    !(chain isa Lux.AbstractExplicitLayer) && error("Only Lux.AbstractExplicitLayer neural networks are supported")
+    !(chain isa Lux.AbstractExplicitLayer) &&
+        error("Only Lux.AbstractExplicitLayer neural networks are supported")
     phi, init_params = generate_phi_θ(chain, t0, u0, init_params)
-    ((eltype(eltype(init_params).types[1]) <: Complex || eltype(eltype(init_params).types[2]) <: Complex) && alg.strategy isa QuadratureTraining) &&
+    ((eltype(eltype(init_params).types[1]) <: Complex ||
+      eltype(eltype(init_params).types[2]) <: Complex) &&
+     alg.strategy isa QuadratureTraining) &&
         error("QuadratureTraining cannot be used with complex parameters. Use other strategies.")
 
     init_params = if alg.param_estim
-        ComponentArrays.ComponentArray(; depvar = ComponentArrays.ComponentArray(init_params), p = prob.p)
+        ComponentArrays.ComponentArray(;
+            depvar = ComponentArrays.ComponentArray(init_params), p = prob.p)
     else
-        ComponentArrays.ComponentArray(; depvar = ComponentArrays.ComponentArray(init_params))
+        ComponentArrays.ComponentArray(;
+            depvar = ComponentArrays.ComponentArray(init_params))
     end
 
-    isinplace(prob) && throw(error("The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
+    isinplace(prob) &&
+        throw(error("The NNODE solver only supports out-of-place ODE definitions, i.e. du=f(u,p,t)."))
 
     try
         phi(t0, init_params)
@@ -395,7 +412,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
     batch = alg.batch
     inner_f = generate_loss(strategy, phi, f, autodiff, tspan, p, batch, param_estim)
     additional_loss = alg.additional_loss
-    (param_estim && isnothing(additional_loss)) && throw(ArgumentError("Please provide `additional_loss` in `NNODE` for parameter estimation (`param_estim` is true)."))
+    (param_estim && isnothing(additional_loss)) &&
+        throw(ArgumentError("Please provide `additional_loss` in `NNODE` for parameter estimation (`param_estim` is true)."))
 
     # Creates OptimizationFunction Object from total_loss
     function total_loss(θ, _)
@@ -405,7 +423,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
         end
         if !(tstops isa Nothing)
             num_tstops_points = length(tstops)
-            tstops_loss_func = evaluate_tstops_loss(phi, f, autodiff, tstops, p, batch, param_estim)
+            tstops_loss_func = evaluate_tstops_loss(
+                phi, f, autodiff, tstops, p, batch, param_estim)
             tstops_loss = tstops_loss_func(θ, phi)
             if strategy isa GridTraining
                 num_original_points = length(tspan[1]:(strategy.dx):tspan[2])
diff --git a/src/pinn_types.jl b/src/pinn_types.jl
index 50f7649dc..59480d8a6 100644
--- a/src/pinn_types.jl
+++ b/src/pinn_types.jl
@@ -105,9 +105,11 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
             kwargs...)
         multioutput = chain isa AbstractArray
         if multioutput
-            !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
+            !all(i -> i isa Lux.AbstractExplicitLayer, chain) &&
+                (chain = Lux.transform.(chain))
         else
-            !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
+            !(chain isa Lux.AbstractExplicitLayer) &&
+                (chain = adapt(FromFluxAdaptor(false, false), chain))
         end
         if phi === nothing
             if multioutput
@@ -117,9 +119,11 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
             end
         else
             if multioutput
-                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) || throw(ArgumentError("Only Lux Chains are supported"))
+                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) ||
+                    throw(ArgumentError("Only Lux Chains are supported"))
             else
-                (phi.f isa Lux.AbstractExplicitLayer) || throw(ArgumentError("Only Lux Chains are supported"))
+                (phi.f isa Lux.AbstractExplicitLayer) ||
+                    throw(ArgumentError("Only Lux Chains are supported"))
             end
             _phi = phi
         end
@@ -139,7 +143,8 @@ struct PhysicsInformedNN{T, P, PH, DER, PE, AL, ADA, LOG, K} <: AbstractPINN
 
         new{typeof(strategy), typeof(init_params), typeof(_phi), typeof(_derivative),
             typeof(param_estim),
-            typeof(additional_loss), typeof(adaptive_loss), typeof(logger), typeof(kwargs)}(chain,
+            typeof(additional_loss), typeof(adaptive_loss), typeof(logger), typeof(kwargs)}(
+            chain,
             strategy,
             init_params,
             _phi,
@@ -241,9 +246,11 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
             kwargs...)
         multioutput = chain isa AbstractArray
         if multioutput
-            !all(i -> i isa Lux.AbstractExplicitLayer, chain) && (chain = Lux.transform.(chain))
+            !all(i -> i isa Lux.AbstractExplicitLayer, chain) &&
+                (chain = Lux.transform.(chain))
         else
-            !(chain isa Lux.AbstractExplicitLayer) && (chain = adapt(FromFluxAdaptor(false, false), chain))
+            !(chain isa Lux.AbstractExplicitLayer) &&
+                (chain = adapt(FromFluxAdaptor(false, false), chain))
         end
         if phi === nothing
             if multioutput
@@ -253,9 +260,11 @@ struct BayesianPINN{T, P, PH, DER, PE, AL, ADA, LOG, D, K} <: AbstractPINN
             end
         else
             if multioutput
-                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) || throw(ArgumentError("Only Lux Chains are supported"))
+                all([phi.f[i] isa Lux.AbstractExplicitLayer for i in eachindex(phi.f)]) ||
+                    throw(ArgumentError("Only Lux Chains are supported"))
             else
-                (phi.f isa Lux.AbstractExplicitLayer) || throw(ArgumentError("Only Lux Chains are supported"))
+                (phi.f isa Lux.AbstractExplicitLayer) ||
+                    throw(ArgumentError("Only Lux Chains are supported"))
             end
             _phi = phi
         end
diff --git a/src/rode_solve.jl b/src/rode_solve.jl
index 16b4637bc..85b496880 100644
--- a/src/rode_solve.jl
+++ b/src/rode_solve.jl
@@ -7,7 +7,7 @@ struct NNRODE{C, W, O, P, K} <: NeuralPDEAlgorithm
     kwargs::K
 end
 function NNRODE(chain, W, opt = Optim.BFGS(), init_params = nothing; autodiff = false,
-                kwargs...)
+        kwargs...)
     if init_params === nothing
         if chain isa Flux.Chain
             init_params, re = Flux.destructure(chain)
@@ -21,15 +21,15 @@ function NNRODE(chain, W, opt = Optim.BFGS(), init_params = nothing; autodiff =
 end
 
 function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
-                          alg::NeuralPDEAlgorithm,
-                          args...;
-                          dt,
-                          timeseries_errors = true,
-                          save_everystep = true,
-                          adaptive = false,
-                          abstol = 1.0f-6,
-                          verbose = false,
-                          maxiters = 100)
+        alg::NeuralPDEAlgorithm,
+        args...;
+        dt,
+        timeseries_errors = true,
+        save_everystep = true,
+        adaptive = false,
+        abstol = 1.0f-6,
+        verbose = false,
+        maxiters = 100)
     DiffEqBase.isinplace(prob) && error("Only out-of-place methods are allowed!")
 
     u0 = prob.u0
@@ -53,7 +53,7 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
             phi = (t, W, θ) -> u0 +
                                (t - tspan[1]) *
                                first(chain(adapt(DiffEqBase.parameterless_type(θ), [t, W]),
-                                           θ))
+                θ))
         else
             phi = (t, W, θ) -> u0 +
                                (t - tspan[1]) *
@@ -111,6 +111,6 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
     sol = DiffEqBase.build_solution(prob, alg, ts, u, W = W, calculate_error = false)
     DiffEqBase.has_analytic(prob.f) &&
         DiffEqBase.calculate_solution_errors!(sol; timeseries_errors = true,
-                                              dense_errors = false)
+            dense_errors = false)
     sol
 end #solve
diff --git a/src/symbolic_utilities.jl b/src/symbolic_utilities.jl
index b4d4c97f3..c78ddeff8 100644
--- a/src/symbolic_utilities.jl
+++ b/src/symbolic_utilities.jl
@@ -26,8 +26,8 @@ function _dot_(x::Expr)
         Expr(:., dotargs[1], Expr(:tuple, dotargs[2:end]...))
     elseif x.head === :comparison
         Expr(:comparison,
-             (iseven(i) && dottable_(arg) && arg isa Symbol && isoperator(arg) ?
-              Symbol('.', arg) : arg for (i, arg) in pairs(dotargs))...)
+            (iseven(i) && dottable_(arg) && arg isa Symbol && isoperator(arg) ?
+             Symbol('.', arg) : arg for (i, arg) in pairs(dotargs))...)
     elseif x.head === :$
         x.args[1]
     elseif x.head === :let # don't add dots to `let x=...` assignments
@@ -70,12 +70,12 @@ get_dict_vars(vars) = Dict([Symbol(v) .=> i for (i, v) in enumerate(vars)])
 
 # Wrapper for _transform_expression
 function transform_expression(pinnrep::PINNRepresentation, ex; is_integral = false,
-                              dict_transformation_vars = nothing,
-                              transformation_vars = nothing)
+        dict_transformation_vars = nothing,
+        transformation_vars = nothing)
     if ex isa Expr
         ex = _transform_expression(pinnrep, ex; is_integral = is_integral,
-                                   dict_transformation_vars = dict_transformation_vars,
-                                   transformation_vars = transformation_vars)
+            dict_transformation_vars = dict_transformation_vars,
+            transformation_vars = transformation_vars)
     end
     return ex
 end
@@ -92,7 +92,7 @@ function get_limits(domain)
         return [leftendpoint(domain)], [rightendpoint(domain)]
     elseif domain isa ProductDomain
         return collect(map(leftendpoint, DomainSets.components(domain))),
-               collect(map(rightendpoint, DomainSets.components(domain)))
+        collect(map(rightendpoint, DomainSets.components(domain)))
     end
 end
 
@@ -115,8 +115,8 @@ where
 - θ - weights in neural network.
 """
 function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = false,
-                               dict_transformation_vars = nothing,
-                               transformation_vars = nothing)
+        dict_transformation_vars = nothing,
+        transformation_vars = nothing)
     @unpack indvars, depvars, dict_indvars, dict_depvars,
     dict_depvar_input, multioutput, strategy, phi,
     derivative, integral, flat_init_params, init_params = pinnrep
@@ -137,7 +137,7 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                         var_,
                         Symbol(:cord, num_depvar),
                         Symbol(:($θ), num_depvar),
-                        Symbol(:phi, num_depvar),
+                        Symbol(:phi, num_depvar)
                     ]
                 end
                 break
@@ -171,7 +171,7 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                         Symbol(:cord, num_depvar),
                         εs_dnv,
                         order,
-                        Symbol(:($θ), num_depvar),
+                        Symbol(:($θ), num_depvar)
                     ]
                 end
                 break
@@ -195,31 +195,32 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                 end
 
                 lb, ub = get_limits(_args[1].domain.domain)
-                lb, ub, _args[2], dict_transformation_vars, transformation_vars = transform_inf_integral(lb,
-                                                                                                         ub,
-                                                                                                         _args[2],
-                                                                                                         integrating_depvars,
-                                                                                                         dict_depvar_input,
-                                                                                                         dict_depvars,
-                                                                                                         integrating_variable,
-                                                                                                         eltypeθ)
+                lb, ub, _args[2], dict_transformation_vars, transformation_vars = transform_inf_integral(
+                    lb,
+                    ub,
+                    _args[2],
+                    integrating_depvars,
+                    dict_depvar_input,
+                    dict_depvars,
+                    integrating_variable,
+                    eltypeθ)
 
                 num_depvar = map(int_depvar -> dict_depvars[int_depvar],
-                                 integrating_depvars)
+                    integrating_depvars)
                 integrand_ = transform_expression(pinnrep, _args[2];
-                                                  is_integral = false,
-                                                  dict_transformation_vars = dict_transformation_vars,
-                                                  transformation_vars = transformation_vars)
+                    is_integral = false,
+                    dict_transformation_vars = dict_transformation_vars,
+                    transformation_vars = transformation_vars)
                 integrand__ = _dot_(integrand_)
 
                 integrand = build_symbolic_loss_function(pinnrep, nothing;
-                                                         integrand = integrand__,
-                                                         integrating_depvars = integrating_depvars,
-                                                         eq_params = SciMLBase.NullParameters(),
-                                                         dict_transformation_vars = dict_transformation_vars,
-                                                         transformation_vars = transformation_vars,
-                                                         param_estim = false,
-                                                         default_p = nothing)
+                    integrand = integrand__,
+                    integrating_depvars = integrating_depvars,
+                    eq_params = SciMLBase.NullParameters(),
+                    dict_transformation_vars = dict_transformation_vars,
+                    transformation_vars = transformation_vars,
+                    param_estim = false,
+                    default_p = nothing)
                 # integrand = repr(integrand)
                 lb = toexpr.(lb)
                 ub = toexpr.(ub)
@@ -230,10 +231,10 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                         push!(lb_, l)
                     else
                         l_expr = NeuralPDE.build_symbolic_loss_function(pinnrep, nothing;
-                                                                        integrand = _dot_(l),
-                                                                        integrating_depvars = integrating_depvars,
-                                                                        param_estim = false,
-                                                                        default_p = nothing)
+                            integrand = _dot_(l),
+                            integrating_depvars = integrating_depvars,
+                            param_estim = false,
+                            default_p = nothing)
                         l_f = @RuntimeGeneratedFunction(l_expr)
                         push!(lb_, l_f)
                     end
@@ -243,10 +244,10 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                         push!(ub_, u_)
                     else
                         u_expr = NeuralPDE.build_symbolic_loss_function(pinnrep, nothing;
-                                                                        integrand = _dot_(u_),
-                                                                        integrating_depvars = integrating_depvars,
-                                                                        param_estim = false,
-                                                                        default_p = nothing)
+                            integrand = _dot_(u_),
+                            integrating_depvars = integrating_depvars,
+                            param_estim = false,
+                            default_p = nothing)
                         u_f = @RuntimeGeneratedFunction(u_expr)
                         push!(ub_, u_f)
                     end
@@ -262,15 +263,15 @@ function _transform_expression(pinnrep::PINNRepresentation, ex; is_integral = fa
                     integrand_func,
                     lb_,
                     ub_,
-                    :($θ),
+                    :($θ)
                 ]
                 break
             end
         else
             ex.args[i] = _transform_expression(pinnrep, ex.args[i];
-                                               is_integral = is_integral,
-                                               dict_transformation_vars = dict_transformation_vars,
-                                               transformation_vars = transformation_vars)
+                is_integral = is_integral,
+                dict_transformation_vars = dict_transformation_vars,
+                transformation_vars = transformation_vars)
         end
     end
     return ex
@@ -351,8 +352,8 @@ function get_vars(indvars_, depvars_)
             dname = ModelingToolkit.getname(d)
             push!(depvars, dname)
             push!(dict_depvar_input,
-                  dname => [nameof(unwrap(argument))
-                            for argument in arguments(unwrap(d))])
+                dname => [nameof(unwrap(argument))
+                          for argument in arguments(unwrap(d))])
         else
             dname = ModelingToolkit.getname(d)
             push!(depvars, dname)
@@ -367,7 +368,7 @@ end
 
 function get_integration_variables(eqs, _indvars::Array, _depvars::Array)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     get_integration_variables(eqs, dict_indvars, dict_depvars)
 end
 
@@ -375,7 +376,7 @@ function get_integration_variables(eqs, dict_indvars, dict_depvars)
     exprs = toexpr.(eqs)
     vars = map(exprs) do expr
         _vars = Symbol.(filter(indvar -> length(find_thing_in_expr(expr, indvar)) > 0,
-                               sort(collect(keys(dict_indvars)))))
+            sort(collect(keys(dict_indvars)))))
     end
 end
 
@@ -388,7 +389,7 @@ function get_variables end
 
 function get_variables(eqs, _indvars::Array, _depvars::Array)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     return get_variables(eqs, dict_indvars, dict_depvars)
 end
 
@@ -427,7 +428,7 @@ function get_argument end
 # Get arguments from boundary condition functions
 function get_argument(eqs, _indvars::Array, _depvars::Array)
     depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = get_vars(_indvars,
-                                                                               _depvars)
+        _depvars)
     get_argument(eqs, dict_indvars, dict_depvars)
 end
 function get_argument(eqs, dict_indvars, dict_depvars)
diff --git a/src/training_strategies.jl b/src/training_strategies.jl
index c997e6c4c..68aa9cd71 100644
--- a/src/training_strategies.jl
+++ b/src/training_strategies.jl
@@ -18,7 +18,7 @@ end
 function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
         strategy::GridTraining,
         datafree_pde_loss_function,
-        datafree_bc_loss_function; train_sets_pde = nothing,train_sets_bc=nothing)
+        datafree_bc_loss_function; train_sets_pde = nothing, train_sets_bc = nothing)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
@@ -26,21 +26,23 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
     # is vec as later each _set in pde_train_sets are columns as points transformed to vector of points (pde_train_sets must be rowwise)
     pde_loss_functions = if !(train_sets_pde isa Nothing)
         pde_train_sets = [train_set[:, 2:end] for train_set in train_sets_pde]
-        pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
+        pde_train_sets = adapt.(
+            parameterless_type(ComponentArrays.getdata(flat_init_params)),
             pde_train_sets)
         [get_loss_function(_loss, _set, eltypeθ, strategy)
-                              for (_loss, _set) in zip(datafree_pde_loss_function,
+         for (_loss, _set) in zip(datafree_pde_loss_function,
             pde_train_sets)]
     else
         nothing
     end
-    
+
     bc_loss_functions = if !(train_sets_bc isa Nothing)
         bcs_train_sets = [train_set[:, 2:end] for train_set in train_sets_bc]
-        bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-        bcs_train_sets)
+        bcs_train_sets = adapt.(
+            parameterless_type(ComponentArrays.getdata(flat_init_params)),
+            bcs_train_sets)
         [get_loss_function(_loss, _set, eltypeθ, strategy)
-                         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
+         for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
     else
         nothing
     end
@@ -49,25 +51,25 @@ function merge_strategy_with_loglikelihood_function(pinnrep::PINNRepresentation,
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-                                           strategy::GridTraining,
-                                           datafree_pde_loss_function,
-                                           datafree_bc_loss_function)
+        strategy::GridTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
     dx = strategy.dx
     eltypeθ = eltype(pinnrep.flat_init_params)
 
     train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
-                                        dict_indvars, dict_depvars)
+        dict_indvars, dict_depvars)
 
     # the points in the domain and on the boundary
     pde_train_sets, bcs_train_sets = train_sets
     pde_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                            pde_train_sets)
+        pde_train_sets)
     bcs_train_sets = adapt.(parameterless_type(ComponentArrays.getdata(flat_init_params)),
-                            bcs_train_sets)
+        bcs_train_sets)
     pde_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
                           for (_loss, _set) in zip(datafree_pde_loss_function,
-                                                   pde_train_sets)]
+        pde_train_sets)]
 
     bc_loss_functions = [get_loss_function(_loss, _set, eltypeθ, strategy)
                          for (_loss, _set) in zip(datafree_bc_loss_function, bcs_train_sets)]
@@ -76,7 +78,7 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
 end
 
 function get_loss_function(loss_function, train_set, eltypeθ, strategy::GridTraining;
-                           τ = nothing)
+        τ = nothing)
     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
 end
 
@@ -107,15 +109,15 @@ function generate_random_points(points, bound, eltypeθ)
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-                                           strategy::StochasticTraining,
-                                           datafree_pde_loss_function,
-                                           datafree_bc_loss_function)
+        strategy::StochasticTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
 
     bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-                        strategy)
+        strategy)
     pde_bounds, bcs_bounds = bounds
 
     pde_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy)
@@ -128,7 +130,7 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
 end
 
 function get_loss_function(loss_function, bound, eltypeθ, strategy::StochasticTraining;
-                           τ = nothing)
+        τ = nothing)
     points = strategy.points
     loss = (θ) -> begin
         sets = generate_random_points(points, bound, eltypeθ)
@@ -173,13 +175,13 @@ struct QuasiRandomTraining <: AbstractTrainingStrategy
 end
 
 function QuasiRandomTraining(points; bcs_points = points,
-                             sampling_alg = LatinHypercubeSample(), resampling = true,
-                             minibatch = 0)
+        sampling_alg = LatinHypercubeSample(), resampling = true,
+        minibatch = 0)
     QuasiRandomTraining(points, bcs_points, sampling_alg, resampling, minibatch)
 end
 
 function generate_quasi_random_points_batch(points, bound, eltypeθ, sampling_alg,
-                                            minibatch)
+        minibatch)
     lb, ub = bound
     set = QuasiMonteCarlo.generate_design_matrices(points, lb, ub, sampling_alg, minibatch)
     set = map(s -> adapt(parameterless_type(eltypeθ), s), set)
@@ -187,24 +189,24 @@ function generate_quasi_random_points_batch(points, bound, eltypeθ, sampling_al
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-                                           strategy::QuasiRandomTraining,
-                                           datafree_pde_loss_function,
-                                           datafree_bc_loss_function)
+        strategy::QuasiRandomTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
 
     eltypeθ = eltype(pinnrep.flat_init_params)
 
     bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-                        strategy)
+        strategy)
     pde_bounds, bcs_bounds = bounds
 
     pde_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy)
                           for (_loss, bound) in zip(datafree_pde_loss_function, pde_bounds)]
 
     strategy_ = QuasiRandomTraining(strategy.bcs_points;
-                                    sampling_alg = strategy.sampling_alg,
-                                    resampling = strategy.resampling,
-                                    minibatch = strategy.minibatch)
+        sampling_alg = strategy.sampling_alg,
+        resampling = strategy.resampling,
+        minibatch = strategy.minibatch)
     bc_loss_functions = [get_loss_function(_loss, bound, eltypeθ, strategy_)
                          for (_loss, bound) in zip(datafree_bc_loss_function, bcs_bounds)]
 
@@ -212,7 +214,7 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
 end
 
 function get_loss_function(loss_function, bound, eltypeθ, strategy::QuasiRandomTraining;
-                           τ = nothing)
+        τ = nothing)
     sampling_alg = strategy.sampling_alg
     points = strategy.points
     resampling = strategy.resampling
@@ -225,9 +227,9 @@ function get_loss_function(loss_function, bound, eltypeθ, strategy::QuasiRandom
     loss = if resampling == true
         θ -> begin
             sets = ChainRulesCore.@ignore_derivatives QuasiMonteCarlo.sample(points,
-                                                                             bound[1],
-                                                                             bound[2],
-                                                                             sampling_alg)
+                bound[1],
+                bound[2],
+                sampling_alg)
             sets_ = adapt(parameterless_type(ComponentArrays.getdata(θ)), sets)
             mean(abs2, loss_function(sets_, θ))
         end
@@ -273,19 +275,19 @@ struct QuadratureTraining{Q <: SciMLBase.AbstractIntegralAlgorithm, T} <:
 end
 
 function QuadratureTraining(; quadrature_alg = CubatureJLh(), reltol = 1e-3, abstol = 1e-6,
-                            maxiters = 1_000, batch = 100)
+        maxiters = 1_000, batch = 100)
     QuadratureTraining(quadrature_alg, reltol, abstol, maxiters, batch)
 end
 
 function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
-                                           strategy::QuadratureTraining,
-                                           datafree_pde_loss_function,
-                                           datafree_bc_loss_function)
+        strategy::QuadratureTraining,
+        datafree_pde_loss_function,
+        datafree_bc_loss_function)
     @unpack domains, eqs, bcs, dict_indvars, dict_depvars, flat_init_params = pinnrep
     eltypeθ = eltype(pinnrep.flat_init_params)
 
     bounds = get_bounds(domains, eqs, bcs, eltypeθ, dict_indvars, dict_depvars,
-                        strategy)
+        strategy)
     pde_bounds, bcs_bounds = bounds
 
     lbs, ubs = pde_bounds
@@ -299,7 +301,7 @@ function merge_strategy_with_loss_function(pinnrep::PINNRepresentation,
 end
 
 function get_loss_function(loss_function, lb, ub, eltypeθ, strategy::QuadratureTraining;
-                           τ = nothing)
+        τ = nothing)
     if length(lb) == 0
         loss = (θ) -> mean(abs2, loss_function(rand(eltypeθ, 1, 10), θ))
         return loss
@@ -313,10 +315,10 @@ function get_loss_function(loss_function, lb, ub, eltypeθ, strategy::Quadrature
         integral_function = BatchIntegralFunction(integrand, max_batch = strategy.batch)
         prob = IntegralProblem(integral_function, (lb, ub), θ)
         solve(prob,
-              strategy.quadrature_alg,
-              reltol = strategy.reltol,
-              abstol = strategy.abstol,
-              maxiters = strategy.maxiters)[1]
+            strategy.quadrature_alg,
+            reltol = strategy.reltol,
+            abstol = strategy.abstol,
+            maxiters = strategy.maxiters)[1]
     end
     loss = (θ) -> 1 / area * f_(lb, ub, loss_function, θ)
     return loss
@@ -349,8 +351,8 @@ function WeightedIntervalTraining(weights, points)
 end
 
 function get_loss_function(loss_function, train_set, eltypeθ,
-                           strategy::WeightedIntervalTraining;
-                           τ = nothing)
+        strategy::WeightedIntervalTraining;
+        τ = nothing)
     loss = (θ) -> mean(abs2, loss_function(train_set, θ))
     return loss
-end
\ No newline at end of file
+end
diff --git a/src/transform_inf_integral.jl b/src/transform_inf_integral.jl
index d0afb3c91..75bc605f1 100644
--- a/src/transform_inf_integral.jl
+++ b/src/transform_inf_integral.jl
@@ -1,6 +1,6 @@
 
 function transform_inf_expr(integrating_depvars, dict_depvar_input, dict_depvars,
-                            integrating_variables, transform)
+        integrating_variables, transform)
     τs = Symbolics.variables(:τ, 1:length(integrating_variables))
     τs = Symbol.(τs)
     dict_transformation_vars = Dict()
@@ -26,8 +26,9 @@ function transform_inf_expr(integrating_depvars, dict_depvar_input, dict_depvars
         dict_depvar_input_[depvar] = ans
     end
 
-    this_eq_pair = Dict(map(intvars -> dict_depvars[intvars] => dict_depvar_input_[intvars],
-                            integrating_depvars))
+    this_eq_pair = Dict(map(
+        intvars -> dict_depvars[intvars] => dict_depvar_input_[intvars],
+        integrating_depvars))
     this_eq_indvars = unique(vcat(values(this_eq_pair)...))
 
     return dict_transformation_vars, this_eq_indvars, integrating_var_transformation
@@ -54,7 +55,7 @@ function v_semiinf(t, a, upto_inf)
 end
 
 function get_inf_transformation_jacobian(integrating_variable, _inf, _semiup, _semilw,
-                                         _num_semiup, _num_semilw)
+        _num_semiup, _num_semilw)
     j = []
     for var in integrating_variable
         if _inf[1]
@@ -70,9 +71,9 @@ function get_inf_transformation_jacobian(integrating_variable, _inf, _semiup, _s
 end
 
 function transform_inf_integral(lb, ub, integrating_ex, integrating_depvars,
-                                dict_depvar_input, dict_depvars, integrating_variable,
-                                eltypeθ; dict_transformation_vars = nothing,
-                                transformation_vars = nothing)
+        dict_depvar_input, dict_depvars, integrating_variable,
+        eltypeθ; dict_transformation_vars = nothing,
+        transformation_vars = nothing)
     lb_ = Symbolics.tosymbol.(lb)
     ub_ = Symbolics.tosymbol.(ub)
 
@@ -102,11 +103,12 @@ function transform_inf_integral(lb, ub, integrating_ex, integrating_depvars,
             end
         end
 
-        dict_transformation_vars, transformation_vars, integrating_var_transformation = transform_inf_expr(integrating_depvars,
-                                                                                                           dict_depvar_input,
-                                                                                                           dict_depvars,
-                                                                                                           integrating_variable,
-                                                                                                           transform_indvars)
+        dict_transformation_vars, transformation_vars, integrating_var_transformation = transform_inf_expr(
+            integrating_depvars,
+            dict_depvar_input,
+            dict_depvars,
+            integrating_variable,
+            transform_indvars)
 
         ϵ = 1 / 20 #cbrt(eps(eltypeθ))
 
@@ -116,7 +118,7 @@ function transform_inf_integral(lb, ub, integrating_ex, integrating_depvars,
              (1.00 - ϵ) .* _num_semiup + ub ./ (1 .+ ub) .* _num_semilw
 
         j = get_inf_transformation_jacobian(integrating_var_transformation, _inf, _semiup,
-                                            _semilw, _num_semiup, _num_semilw)
+            _semilw, _num_semiup, _num_semilw)
 
         integrating_ex = Expr(:call, :*, integrating_ex, j...)
     end
diff --git a/test/BPINN_PDE_tests.jl b/test/BPINN_PDE_tests.jl
index 6dd3637f5..98cacb748 100644
--- a/test/BPINN_PDE_tests.jl
+++ b/test/BPINN_PDE_tests.jl
@@ -100,9 +100,12 @@ end
     # Neural network
     chain = [
         Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
-            Lux.Dense(10, 1)), Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
+            Lux.Dense(10, 1)), Lux.Chain(
+            Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
+            Lux.Dense(10, 1)),
+        Lux.Chain(Lux.Dense(1, 10, Lux.tanh), Lux.Dense(10, 10, Lux.tanh),
+            Lux.Dense(10, 1)),
+        Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)),
         Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1))]
 
     discretization = BayesianPINN(chain, GridTraining(0.01))
diff --git a/test/BPINN_PDEinvsol_tests.jl b/test/BPINN_PDEinvsol_tests.jl
index 61c64de27..b2d27c53a 100644
--- a/test/BPINN_PDEinvsol_tests.jl
+++ b/test/BPINN_PDEinvsol_tests.jl
@@ -35,8 +35,8 @@ Random.seed!(100)
     dataset = [hcat(u, timepoints)]
 
     # checking all training strategies
-    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], StochasticTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -47,8 +47,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], QuasiRandomTraining(200), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -59,8 +59,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], QuadratureTraining(), param_estim = true,
+        dataset = [dataset, nothing])
 
     ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -71,8 +71,8 @@ Random.seed!(100)
         saveats = [1 / 50.0],
         param = [LogNormal(6.0, 0.5)])
 
-    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN([chainl], GridTraining([0.02]), param_estim = true,
+        dataset = [dataset, nothing])
 
     sol1 = ahmc_bayesian_pinn_pde(pde_system,
         discretization;
@@ -112,7 +112,7 @@ end
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
             Lux.Dense(n, 1)),
         Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.tanh),
-            Lux.Dense(n, 1)),
+            Lux.Dense(n, 1))
     ]
 
     #Generate Data
@@ -132,8 +132,8 @@ end
     ts_ = hcat(sol(ts).t...)[1, :]
     dataset = [hcat(us[i, :], ts_) for i in 1:3]
 
-    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true, 
-                                dataset = [dataset, nothing])
+    discretization = BayesianPINN(chain, GridTraining([0.01]); param_estim = true,
+        dataset = [dataset, nothing])
 
     @named pde_system = PDESystem(eqs, bcs, domains,
         [t], [x(t), y(t), z(t)], [σ_], defaults = Dict([p => 1.0 for p in [σ_]]))
diff --git a/test/BPINN_Tests.jl b/test/BPINN_Tests.jl
index 6821a8d35..a0c1eee9e 100644
--- a/test/BPINN_Tests.jl
+++ b/test/BPINN_Tests.jl
@@ -35,7 +35,8 @@ Random.seed!(100)
     chainlux = Lux.Chain(Lux.Dense(1, 7, tanh), Lux.Dense(7, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux)
 
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainlux, draw_samples = 2500)
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
+        prob, chainlux, draw_samples = 2500)
 
     alg = BNNODE(chainlux, draw_samples = 2500)
     sol1lux = solve(prob, alg)
@@ -102,7 +103,7 @@ end
             3.0),
         param = [
             LogNormal(9,
-                0.5),
+            0.5)
         ])
 
     sol2lux = solve(prob, alg)
@@ -153,14 +154,16 @@ end
     chainlux12 = Lux.Chain(Lux.Dense(1, 6, tanh), Lux.Dense(6, 6, tanh), Lux.Dense(6, 1))
     θinit, st = Lux.setup(Random.default_rng(), chainlux12)
 
-    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    fh_mcmc_chainlux12, fhsampleslux12, fhstatslux12 = ahmc_bayesian_pinn_ode(
+        prob, chainlux12,
         draw_samples = 1500,
         l2std = [0.03],
         phystd = [0.03],
         priorsNNw = (0.0,
             10.0))
 
-    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(prob, chainlux12,
+    fh_mcmc_chainlux22, fhsampleslux22, fhstatslux22 = ahmc_bayesian_pinn_ode(
+        prob, chainlux12,
         dataset = dataset,
         draw_samples = 1500,
         l2std = [0.03],
@@ -169,7 +172,7 @@ end
             10.0),
         param = [
             Normal(-7,
-                4),
+            4)
         ])
 
     alg = BNNODE(chainlux12,
@@ -181,7 +184,7 @@ end
             10.0),
         param = [
             Normal(-7,
-                4),
+            4)
         ])
 
     sol3lux_pestim = solve(prob, alg)
@@ -239,7 +242,8 @@ end
     time1 = vec(collect(Float64, ta0))
     physsol0_1 = [linear_analytic(prob.u0, p, time1[i]) for i in eachindex(time1)]
     chainflux = Flux.Chain(Flux.Dense(1, 7, tanh), Flux.Dense(7, 1)) |> Flux.f64
-    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(prob, chainflux, draw_samples = 2500)
+    fh_mcmc_chain, fhsamples, fhstats = ahmc_bayesian_pinn_ode(
+        prob, chainflux, draw_samples = 2500)
     alg = BNNODE(chainflux, draw_samples = 2500)
     @test alg.chain isa Lux.AbstractExplicitLayer
 end
diff --git a/test/IDE_tests.jl b/test/IDE_tests.jl
index cb3f8a302..eda5d7f38 100644
--- a/test/IDE_tests.jl
+++ b/test/IDE_tests.jl
@@ -38,8 +38,8 @@ end
     @parameters x
     @variables u(..)
     Ix = Integral(x in DomainSets.ClosedInterval(0, x))
-eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
-eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
+    eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
+    eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 
     eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
 
@@ -51,7 +51,7 @@ eq = Ix(u(x) * cos(x)) ~ (x^3) / 3
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     prob = discretize(pde_system, discretization)
     res = Optimization.solve(prob, OptimizationOptimJL.BFGS(); callback = callback,
-                            maxiters = 200)
+        maxiters = 200)
     xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains][1]
     phi = discretization.phi
     u_predict = [first(phi([x], res.u)) for x in xs]
@@ -77,8 +77,8 @@ end
     xs = 0.00:0.01:1.00
     ys = 0.00:0.01:1.00
     phi = discretization.phi
-    u_real = collect(1 - x^2 - y^2 for y in ys, x in xs);
-    u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs);
+    u_real = collect(1 - x^2 - y^2 for y in ys, x in xs)
+    u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
     @test Flux.mse(u_real, u_predict) < 0.001
 end
 
@@ -100,8 +100,8 @@ end
     xs = 0.00:0.01:1.00
     ys = 0.00:0.01:1.00
     phi = discretization.phi
-    u_real = collect(x + y^2 for y in ys, x in xs);
-    u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs);
+    u_real = collect(x + y^2 for y in ys, x in xs)
+    u_predict = collect(Array(phi([x, y], res.u))[1] for y in ys, x in xs)
     @test Flux.mse(u_real, u_predict) < 0.01
 end
 
diff --git a/test/NNDAE_tests.jl b/test/NNDAE_tests.jl
index f0f269912..bbcf12dd6 100644
--- a/test/NNDAE_tests.jl
+++ b/test/NNDAE_tests.jl
@@ -14,7 +14,7 @@ Random.seed!(100)
     u₀ = [1.0, -1.0]
     du₀ = [0.0, 0.0]
     M = [1.0 0
-        0 0]
+         0 0]
     f = ODEFunction(example1, mass_matrix = M)
     tspan = (0.0f0, 1.0f0)
 
@@ -41,7 +41,7 @@ end
         nothing
     end
     M = [0.0 0
-        0 1]
+         0 1]
     u₀ = [0.0, 0.0]
     du₀ = [0.0, 0.0]
     tspan = (0.0f0, pi / 2.0f0)
diff --git a/test/NNODE_tests.jl b/test/NNODE_tests.jl
index 72bb47f81..0cd688e31 100644
--- a/test/NNODE_tests.jl
+++ b/test/NNODE_tests.jl
@@ -19,21 +19,21 @@ Random.seed!(100)
     opt = OptimizationOptimisers.Adam(0.1, (0.9, 0.95))
 
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
-                abstol = 1.0f-10, maxiters = 200)
+        abstol = 1.0f-10, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-                        dt = 1 / 20.0f0,
-                        verbose = false, abstol = 1.0f-10, maxiters = 200)
+        dt = 1 / 20.0f0,
+        verbose = false, abstol = 1.0f-10, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), verbose = false,
-                abstol = 1.0f-6, maxiters = 200)
+        abstol = 1.0f-6, maxiters = 200)
 
     opt = OptimizationOptimJL.BFGS()
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, verbose = false,
-                abstol = 1.0f-10, maxiters = 200)
+        abstol = 1.0f-10, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), verbose = false,
-                abstol = 1.0f-6, maxiters = 200)
+        abstol = 1.0f-6, maxiters = 200)
 end
 
 @testset "Vector" begin
@@ -47,14 +47,14 @@ end
 
     opt = OptimizationOptimJL.BFGS()
     sol = solve(prob, NNODE(luxchain, opt), dt = 1 / 20.0f0, abstol = 1e-10,
-                verbose = false, maxiters = 200)
+        verbose = false, maxiters = 200)
 
     @test_throws ArgumentError solve(prob, NNODE(luxchain, opt; autodiff = true),
-                        dt = 1 / 20.0f0,
-                        abstol = 1e-10, verbose = false, maxiters = 200)
+        dt = 1 / 20.0f0,
+        abstol = 1e-10, verbose = false, maxiters = 200)
 
     sol = solve(prob, NNODE(luxchain, opt), abstol = 1.0f-6,
-                verbose = false, maxiters = 200)
+        verbose = false, maxiters = 200)
 
     @test sol(0.5) isa Vector
     @test sol(0.5; idxs = 1) isa Number
@@ -64,9 +64,10 @@ end
 @testset "Example 1" begin
     println("Example 1")
     linear = (u, p, t) -> @. t^3 + 2 * t + (t^2) * ((1 + 3 * (t^2)) / (1 + t + (t^3))) -
-                            u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
+                             u * (t + ((1 + 3 * (t^2)) / (1 + t + t^3)))
     linear_analytic = (u0, p, t) -> [exp(-(t^2) / 2) / (1 + t + t^3) + t^2]
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
+    prob = ODEProblem(
+        ODEFunction(linear, analytic = linear_analytic), [1.0f0], (0.0f0, 1.0f0))
     luxchain = Lux.Chain(Lux.Dense(1, 128, Lux.σ), Lux.Dense(128, 1))
     opt = OptimizationOptimisers.Adam(0.01)
 
@@ -74,24 +75,24 @@ end
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
-                NNODE(luxchain, opt; batch = false,
-                                strategy = StochasticTraining(100)),
-                verbose = false, maxiters = 400)
+        NNODE(luxchain, opt; batch = false,
+            strategy = StochasticTraining(100)),
+        verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
-                NNODE(luxchain, opt; batch = true,
-                                strategy = StochasticTraining(100)),
-                verbose = false, maxiters = 400)
+        NNODE(luxchain, opt; batch = true,
+            strategy = StochasticTraining(100)),
+        verbose = false, maxiters = 400)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
-                maxiters = 400, dt = 1 / 5.0f0)
+        maxiters = 400, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
-                maxiters = 400,
-                dt = 1 / 5.0f0)
+        maxiters = 400,
+        dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 end
 
@@ -99,36 +100,37 @@ end
     println("Example 2")
     linear = (u, p, t) -> -u / 5 + exp(-t / 5) .* cos(t)
     linear_analytic = (u0, p, t) -> exp(-t / 5) * (u0 + sin(t))
-    prob = ODEProblem(ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
+    prob = ODEProblem(
+        ODEFunction(linear, analytic = linear_analytic), 0.0f0, (0.0f0, 1.0f0))
     luxchain = Lux.Chain(Lux.Dense(1, 5, Lux.σ), Lux.Dense(5, 1))
 
     opt = OptimizationOptimisers.Adam(0.1)
     sol = solve(prob, NNODE(luxchain, opt), verbose = false, maxiters = 400,
-                abstol = 1.0f-8)
+        abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
-                NNODE(luxchain, opt; batch = false,
-                                strategy = StochasticTraining(100)),
-                verbose = false, maxiters = 400,
-                abstol = 1.0f-8)
+        NNODE(luxchain, opt; batch = false,
+            strategy = StochasticTraining(100)),
+        verbose = false, maxiters = 400,
+        abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob,
-                NNODE(luxchain, opt; batch = true,
-                                strategy = StochasticTraining(100)),
-                verbose = false, maxiters = 400,
-                abstol = 1.0f-8)
+        NNODE(luxchain, opt; batch = true,
+            strategy = StochasticTraining(100)),
+        verbose = false, maxiters = 400,
+        abstol = 1.0f-8)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob, NNODE(luxchain, opt; batch = false), verbose = false,
-                maxiters = 400,
-                abstol = 1.0f-8, dt = 1 / 5.0f0)
+        maxiters = 400,
+        abstol = 1.0f-8, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 
     sol = solve(prob, NNODE(luxchain, opt; batch = true), verbose = false,
-                maxiters = 400,
-                abstol = 1.0f-8, dt = 1 / 5.0f0)
+        maxiters = 400,
+        abstol = 1.0f-8, dt = 1 / 5.0f0)
     @test sol.errors[:l2] < 0.5
 end
 
@@ -145,8 +147,8 @@ end
     alg = NNODE(luxchain, opt; autodiff = false)
 
     sol = solve(prob,
-                alg, verbose = false, dt = 1 / 40.0f0,
-                maxiters = 2000, abstol = 1.0f-7)
+        alg, verbose = false, dt = 1 / 40.0f0,
+        maxiters = 2000, abstol = 1.0f-7)
     @test sol.errors[:l2] < 0.5
 end
 
@@ -162,13 +164,14 @@ end
         true_sol = solve(prob_oop, Tsit5(), saveat = 0.01)
         func = Lux.σ
         N = 12
-        chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
-                        Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
+        chain = Lux.Chain(
+            Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
+            Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
         opt = OptimizationOptimisers.Adam(0.01)
         weights = [0.7, 0.2, 0.1]
         points = 200
         alg = NNODE(chain, opt, autodiff = false,
-                            strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
+            strategy = NeuralPDE.WeightedIntervalTraining(weights, points))
         sol = solve(prob_oop, alg, verbose = false, maxiters = 5000, saveat = 0.01)
         @test abs(mean(sol) - mean(true_sol)) < 0.2
     end
@@ -190,7 +193,7 @@ end
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
         alg1 = NNODE(luxchain, opt, strategy = GridTraining(0.01),
-                            additional_loss = additional_loss)
+            additional_loss = additional_loss)
         sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
@@ -215,7 +218,7 @@ end
             return sum(sum(abs2, [phi(t, θ) for t in t_] .- u_)) / length(u_)
         end
         alg1 = NNODE(luxchain, opt, strategy = StochasticTraining(1000),
-                            additional_loss = additional_loss)
+            additional_loss = additional_loss)
         sol1 = solve(prob, alg1, verbose = false, abstol = 1e-8, maxiters = 500)
         @test sol1.errors[:l2] < 0.5
     end
@@ -224,9 +227,9 @@ end
 @testset "Parameter Estimation" begin
     println("Parameter Estimation")
     function lorenz(u, p, t)
-        return [p[1]*(u[2]-u[1]),
-                u[1]*(p[2]-u[3])-u[2],
-                u[1]*u[2]-p[3]*u[3]]
+        return [p[1] * (u[2] - u[1]),
+            u[1] * (p[2] - u[3]) - u[2],
+            u[1] * u[2] - p[3] * u[3]]
     end
     prob = ODEProblem(lorenz, [1.0, 0.0, 0.0], (0.0, 1.0), [1.0, 1.0, 1.0])
     true_p = [2.0, 3.0, 2.0]
@@ -235,17 +238,18 @@ end
     t_ = sol.t
     u_ = reduce(hcat, sol.u)
     function additional_loss(phi, θ)
-        return sum(abs2, phi(t_, θ) .- u_)/100
+        return sum(abs2, phi(t_, θ) .- u_) / 100
     end
     n = 8
     luxchain = Lux.Chain(
-                Lux.Dense(1, n, Lux.σ),
-                Lux.Dense(n, n, Lux.σ),
-                Lux.Dense(n, n, Lux.σ),
-                Lux.Dense(n, 3)
-            )
+        Lux.Dense(1, n, Lux.σ),
+        Lux.Dense(n, n, Lux.σ),
+        Lux.Dense(n, n, Lux.σ),
+        Lux.Dense(n, 3)
+    )
     opt = OptimizationOptimJL.BFGS(linesearch = BackTracking())
-    alg = NNODE(luxchain, opt, strategy = GridTraining(0.01), param_estim = true, additional_loss = additional_loss)
+    alg = NNODE(luxchain, opt, strategy = GridTraining(0.01),
+        param_estim = true, additional_loss = additional_loss)
     sol = solve(prob, alg, verbose = false, abstol = 1e-8, maxiters = 1000, saveat = t_)
     @test sol.k.u.p≈true_p atol=1e-2
     @test reduce(hcat, sol.u)≈u_ atol=1e-2
@@ -257,37 +261,41 @@ end
         γ = Γ / 2
         ρ₁₁, ρ₂₂, ρ₁₂, ρ₂₁ = u
         d̢ρ = [im * Ω * (ρ₁₂ - ρ₂₁) + Γ * ρ₂₂;
-                -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
-                -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
-                conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
+               -im * Ω * (ρ₁₂ - ρ₂₁) - Γ * ρ₂₂;
+               -(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁);
+               conj(-(γ + im * Δ) * ρ₁₂ - im * Ω * (ρ₂₂ - ρ₁₁))]
         return d̢ρ
     end
-    
+
     u0 = zeros(ComplexF64, 4)
     u0[1] = 1
-    time_span = (0.0, 2.0)   
+    time_span = (0.0, 2.0)
     parameters = [2.0, 0.0, 1.0]
-    
+
     problem = ODEProblem(bloch_equations, u0, time_span, parameters)
-    
+
     chain = Lux.Chain(
-                Lux.Dense(1, 16, tanh; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)) , 
-                Lux.Dense(16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
-            )
+        Lux.Dense(1, 16, tanh;
+            init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...)),
+        Lux.Dense(
+            16, 4; init_weight = (rng, a...) -> Lux.kaiming_normal(rng, ComplexF64, a...))
+    )
     ps, st = Lux.setup(rng, chain)
-    
+
     opt = OptimizationOptimisers.Adam(0.01)
-    ground_truth  = solve(problem, Tsit5(), saveat = 0.01)
-    strategies = [StochasticTraining(500), GridTraining(0.01), WeightedIntervalTraining([0.1, 0.4, 0.4, 0.1], 500)]
+    ground_truth = solve(problem, Tsit5(), saveat = 0.01)
+    strategies = [StochasticTraining(500), GridTraining(0.01),
+        WeightedIntervalTraining([0.1, 0.4, 0.4, 0.1], 500)]
 
     @testset "$(nameof(typeof(strategy)))" for strategy in strategies
         alg = NNODE(chain, opt, ps; strategy)
         sol = solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
-        @test sol.u ≈ ground_truth.u rtol=1e-1
+        @test sol.u≈ground_truth.u rtol=1e-1
     end
 
     alg = NNODE(chain, opt, ps; strategy = QuadratureTraining())
-    @test_throws ErrorException solve(problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
+    @test_throws ErrorException solve(
+        problem, alg, verbose = false, maxiters = 5000, saveat = 0.01)
 end
 
 @testset "Translating from Flux" begin
diff --git a/test/NNODE_tstops_test.jl b/test/NNODE_tstops_test.jl
index bc4a4b08d..edcf0916a 100644
--- a/test/NNODE_tstops_test.jl
+++ b/test/NNODE_tstops_test.jl
@@ -7,9 +7,9 @@ end
 p = [1.5, 1.0, 3.0, 1.0]
 u0 = [1.0, 1.0]
 tspan = (0.0, 3.0)
-points1 = [rand() for i=1:280]
-points2 = [rand() + 1 for i=1:80]
-points3 = [rand() + 2 for i=1:40]
+points1 = [rand() for i in 1:280]
+points2 = [rand() + 1 for i in 1:80]
+points3 = [rand() + 2 for i in 1:40]
 addedPoints = vcat(points1, points2, points3)
 
 saveat = 0.01
@@ -20,7 +20,7 @@ true_sol = solve(prob_oop, Tsit5(), saveat = saveat)
 func = Lux.σ
 N = 12
 chain = Lux.Chain(Lux.Dense(1, N, func), Lux.Dense(N, N, func), Lux.Dense(N, N, func),
-                    Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
+    Lux.Dense(N, N, func), Lux.Dense(N, length(u0)))
 
 opt = OptimizationOptimisers.Adam(0.01)
 threshold = 0.2
@@ -43,7 +43,8 @@ dx = 1.0
         println("With added points")
         # (difference between solutions should be low)
         alg = NNODE(chain, opt, autodiff = false, strategy = GridTraining(dx))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
+            saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
@@ -53,15 +54,18 @@ end
     @testset "Without added points" begin
         println("Without added points")
         # (difference between solutions should be high)
-        alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
+        alg = NNODE(chain, opt, autodiff = false,
+            strategy = WeightedIntervalTraining(weights, points))
         sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat)
         @test abs(mean(sol) - mean(true_sol)) > threshold
     end
     @testset "With added points" begin
         println("With added points")
         # (difference between solutions should be low)
-        alg = NNODE(chain, opt, autodiff = false, strategy = WeightedIntervalTraining(weights, points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        alg = NNODE(chain, opt, autodiff = false,
+            strategy = WeightedIntervalTraining(weights, points))
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
+            saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
@@ -79,7 +83,8 @@ end
         println("With added points")
         # (difference between solutions should be low)
         alg = NNODE(chain, opt, autodiff = false, strategy = StochasticTraining(points))
-        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters, saveat = saveat, tstops = addedPoints)
+        sol = solve(prob_oop, alg, verbose = false, maxiters = maxiters,
+            saveat = saveat, tstops = addedPoints)
         @test abs(mean(sol) - mean(true_sol)) < threshold
     end
 end
diff --git a/test/NNPDE_tests.jl b/test/NNPDE_tests.jl
index be4b3152d..7236ac041 100644
--- a/test/NNPDE_tests.jl
+++ b/test/NNPDE_tests.jl
@@ -54,31 +54,31 @@ end
 
 grid_strategy = GridTraining(0.1)
 quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                        reltol = 1e3, abstol = 1e-3,
-                                        maxiters = 50, batch = 100)
+    reltol = 1e3, abstol = 1e-3,
+    maxiters = 50, batch = 100)
 stochastic_strategy = StochasticTraining(100; bcs_points = 50)
 quasirandom_strategy = QuasiRandomTraining(100;
-                                            sampling_alg = LatinHypercubeSample(),
-                                            resampling = false,
-                                            minibatch = 100)
+    sampling_alg = LatinHypercubeSample(),
+    resampling = false,
+    minibatch = 100)
 quasirandom_strategy_resampling = QuasiRandomTraining(100;
-                                                    bcs_points = 50,
-                                                    sampling_alg = LatticeRuleSample(),
-                                                    resampling = true,
-                                                    minibatch = 0)
+    bcs_points = 50,
+    sampling_alg = LatticeRuleSample(),
+    resampling = true,
+    minibatch = 0)
 
 strategies = [
     grid_strategy,
     stochastic_strategy,
     quasirandom_strategy,
     quasirandom_strategy_resampling,
-    quadrature_strategy,
+    quadrature_strategy
 ]
 
 @testset "Test ODE/Heterogeneous" begin
     map(strategies) do strategy_
         test_ode(strategy_)
-    end 
+    end
 end
 
 @testset "Example 1: Heterogeneous system" begin
@@ -91,7 +91,7 @@ end
         h(z) ~ cos(z),
         p(x, z) ~ exp(x) * exp(z),
         u(x, y, z) + v(y, x) * Dz(h(z)) - p(x, z) ~ x + y + z - (x^2 + y^2) * sin(z) -
-                                                    exp(x) * exp(z),
+                                                    exp(x) * exp(z)
     ]
 
     bcs = [u(0, 0, 0) ~ 0.0]
@@ -102,23 +102,23 @@ end
 
     chain = [
         Lux.Chain(Lux.Dense(3, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                  Lux.Dense(12, 1)),
+            Lux.Dense(12, 1)),
         Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                  Lux.Dense(12, 1)),
+            Lux.Dense(12, 1)),
         Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                  Lux.Dense(12, 1)),
+            Lux.Dense(12, 1)),
         Lux.Chain(Lux.Dense(2, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                  Lux.Dense(12, 1))]
+            Lux.Dense(12, 1))]
 
     grid_strategy = NeuralPDE.GridTraining(0.1)
     quadrature_strategy = NeuralPDE.QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                                       reltol = 1e-3, abstol = 1e-3,
-                                                       maxiters = 50, batch = 100)
+        reltol = 1e-3, abstol = 1e-3,
+        maxiters = 50, batch = 100)
 
     discretization = NeuralPDE.PhysicsInformedNN(chain, grid_strategy)
 
     @named pde_system = PDESystem(eqs, bcs, domains, [x, y, z],
-                                  [u(x, y, z), v(y, x), h(z), p(x, z)])
+        [u(x, y, z), v(y, x), h(z), p(x, z)])
 
     prob = NeuralPDE.discretize(pde_system, discretization)
 
@@ -135,7 +135,7 @@ end
         (x, y, z) -> x + y + z,
         (x, y) -> x^2 + y^2,
         (z) -> cos(z),
-        (x, z) -> exp(x) * exp(z),
+        (x, z) -> exp(x) * exp(z)
     ]
 
     xs, ys, zs = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
@@ -184,9 +184,9 @@ function test_2d_poisson_equation(chain_, strategy_)
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+        (length(xs), length(ys)))
     @test u_predict≈u_real atol=2.0
 end
 
@@ -197,15 +197,15 @@ end
 
     for strategy_ in strategies
         chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
-                           Lux.Dense(12, 1))
+            Lux.Dense(12, 1))
         test_2d_poisson_equation(chain_, strategy_)
     end
     algs = [CubatureJLp()] #CubatureJLh(),
     for alg in algs
         chain_ = Lux.Chain(Lux.Dense(2, 12, Lux.σ), Lux.Dense(12, 12, Lux.σ),
-                           Lux.Dense(12, 1))
+            Lux.Dense(12, 1))
         strategy_ = NeuralPDE.QuadratureTraining(quadrature_alg = alg, reltol = 1e-4,
-                                                 abstol = 1e-3, maxiters = 30, batch = 10)
+            abstol = 1e-3, maxiters = 30, batch = 10)
         test_2d_poisson_equation(chain_, strategy_)
     end
 end
@@ -234,14 +234,14 @@ end
 
     # Neural network
     chain = [[Lux.Chain(Lux.Dense(1, 12, Lux.tanh), Lux.Dense(12, 12, Lux.tanh),
-                        Lux.Dense(12, 1)) for _ in 1:3]
+                  Lux.Dense(12, 1)) for _ in 1:3]
              [Lux.Chain(Lux.Dense(1, 4, Lux.tanh), Lux.Dense(4, 1)) for _ in 1:2]]
     quasirandom_strategy = QuasiRandomTraining(100; sampling_alg = LatinHypercubeSample())
 
     discretization = PhysicsInformedNN(chain, quasirandom_strategy)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x],
-                                  [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
+        [u(x), Dxu(x), Dxxu(x), O1(x), O2(x)])
 
     prob = discretize(pde_system, discretization)
     sym_prob = symbolic_discretize(pde_system, discretization)
@@ -290,8 +290,8 @@ end
     chain2 = Lux.Chain(Lux.Dense(2, 15, Lux.tanh), Lux.Dense(15, 1))
 
     quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                            reltol = 1e-3, abstol = 1e-3,
-                                            maxiters = 50, batch = 100)
+        reltol = 1e-3, abstol = 1e-3,
+        maxiters = 50, batch = 100)
     chain = [chain1, chain2]
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
@@ -344,8 +344,8 @@ end
     derivative = NeuralPDE.numeric_derivative
 
     quadrature_strategy = QuadratureTraining(quadrature_alg = CubatureJLh(),
-                                            reltol = 1e-3, abstol = 1e-3,
-                                            maxiters = 50, batch = 100)
+        reltol = 1e-3, abstol = 1e-3,
+        maxiters = 50, batch = 100)
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
     prob = discretize(pde_system, discretization)
@@ -365,9 +365,9 @@ end
     end
 
     u_predict = reshape([first(phi([x, t], res.u)) for x in xs for t in ts],
-                        (length(xs), length(ts)))
+        (length(xs), length(ts)))
     u_real = reshape([analytic_sol_func(x, t) for x in xs for t in ts],
-                     (length(xs), length(ts)))
+        (length(xs), length(ts)))
     @test u_predict≈u_real atol=0.1
 end
 
@@ -393,7 +393,7 @@ end
     # Neural network
     inner = 20
     chain = Lux.Chain(Lux.Dense(2, inner, Lux.tanh), Lux.Dense(inner, inner, Lux.tanh),
-                      Lux.Dense(inner, 1))
+        Lux.Dense(inner, 1))
 
     discretization = PhysicsInformedNN(chain, quadrature_strategy)
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
@@ -409,9 +409,9 @@ end
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
 
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+        (length(xs), length(ys)))
     @test u_predict≈u_real rtol=0.1
 end
 
@@ -423,11 +423,11 @@ end
                     u(θ) * (θ + ((1 + 3 * (θ^2)) / (1 + θ + θ^3)))
     bcs = [u(0.0) ~ 1.0]
     domains = [θ ∈ Interval(0.0, 1.0)]
-    
+
     chain = Flux.Chain(Flux.Dense(1, 12, Flux.σ), Flux.Dense(12, 1))
     discretization = PhysicsInformedNN(chain, QuadratureTraining())
     @test discretization.chain isa Lux.AbstractExplicitLayer
-    
+
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u])
     prob = discretize(pde_system, discretization)
     res = Optimization.solve(prob, OptimizationOptimisers.Adam(0.1); maxiters = 1000)
@@ -441,4 +441,4 @@ end
     u_real = [analytic_sol_func(t) for t in ts]
     u_predict = [first(phi(t, res.u)) for t in ts]
     @test u_predict≈u_real atol=0.1
-end
\ No newline at end of file
+end
diff --git a/test/NNPDE_tests_gpu_Lux.jl b/test/NNPDE_tests_gpu_Lux.jl
index 01e63a1aa..378c24016 100644
--- a/test/NNPDE_tests_gpu_Lux.jl
+++ b/test/NNPDE_tests_gpu_Lux.jl
@@ -33,17 +33,17 @@ const gpud = gpu_device()
     # Neural network
     inner = 20
     chain = Lux.Chain(Lux.Dense(1, inner, Lux.σ),
-                Lux.Dense(inner, inner, Lux.σ),
-                Lux.Dense(inner, inner, Lux.σ),
-                Lux.Dense(inner, inner, Lux.σ),
-                Lux.Dense(inner, inner, Lux.σ),
-                Lux.Dense(inner, 1))
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, 1))
 
     strategy = GridTraining(dt)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud
     discretization = PhysicsInformedNN(chain,
-                                    strategy;
-                                    init_params = ps)
+        strategy;
+        init_params = ps)
 
     @named pde_system = PDESystem(eq, bcs, domains, [θ], [u(θ)])
     prob = discretize(pde_system, discretization)
@@ -74,12 +74,12 @@ end
 
     inner = 30
     chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, 1))
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, 1))
 
     strategy = StochasticTraining(500)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
@@ -92,7 +92,7 @@ end
     u_exact = (t, x) -> exp.(-t) * cos.(x)
     ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     u_predict = reshape([first(Array(phi([t, x], res.u))) for t in ts for x in xs],
-                        (length(ts), length(xs)))
+        (length(ts), length(xs)))
     u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
     diff_u = abs.(u_predict .- u_real)
     @test u_predict≈u_real atol=1.0
@@ -120,12 +120,13 @@ end
 
     inner = 20
     chain = Lux.Chain(Lux.Dense(2, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, 1))
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, 1))
 
-    strategy = QuasiRandomTraining(500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
+    strategy = QuasiRandomTraining(
+        500; sampling_alg = SobolSample(), resampling = false, minibatch = 30)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
     discretization = PhysicsInformedNN(chain, strategy; init_params = ps)
     prob = discretize(pdesys, discretization)
@@ -136,7 +137,7 @@ end
     u_exact = (t, x) -> exp(-t) * cos(x)
     ts, xs = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     u_predict = reshape([first(Array(phi([t, x], res.u))) for t in ts for x in xs],
-                        (length(ts), length(xs)))
+        (length(ts), length(xs)))
     u_real = reshape([u_exact(t, x) for t in ts for x in xs], (length(ts), length(xs)))
     diff_u = abs.(u_predict .- u_real)
     @test u_predict≈u_real atol=1.0
@@ -173,10 +174,10 @@ end
     # Neural network
     inner = 25
     chain = Lux.Chain(Lux.Dense(3, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, inner, Lux.σ),
-                    Lux.Dense(inner, 1))
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, inner, Lux.σ),
+        Lux.Dense(inner, 1))
 
     strategy = GridTraining(0.05)
     ps = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray |> gpud .|> Float64
@@ -190,7 +191,7 @@ end
     ts, xs, ys = [infimum(d.domain):0.1:supremum(d.domain) for d in domains]
     u_real = [analytic_sol_func(t, x, y) for t in ts for x in xs for y in ys]
     u_predict = [first(Array(phi([t, x, y], res.u))) for t in ts for x in xs
-                for y in ys]
+                 for y in ys]
 
     @test u_predict≈u_real rtol=0.2
-end
\ No newline at end of file
+end
diff --git a/test/NNRODE_tests.jl b/test/NNRODE_tests.jl
index da8f87ec7..59b890b4f 100644
--- a/test/NNRODE_tests.jl
+++ b/test/NNRODE_tests.jl
@@ -14,7 +14,7 @@ prob = RODEProblem(linear, u0, tspan, noise = W)
 chain = Flux.Chain(Dense(2, 8, relu), Dense(8, 16, relu), Dense(16, 1))
 opt = OptimizationOptimisers.Adam(1e-4)
 sol = solve(prob, NeuralPDE.NNRODE(chain, W, opt), dt = dt, verbose = true,
-            abstol = 1e-10, maxiters = 3000)
+    abstol = 1e-10, maxiters = 3000)
 W2 = NoiseWrapper(sol.W)
 prob1 = RODEProblem(linear, u0, tspan, noise = W2)
 sol2 = solve(prob1, RandomEM(), dt = dt)
@@ -32,7 +32,7 @@ prob = RODEProblem(linear, u0, tspan, noise = W)
 chain = Flux.Chain(Dense(2, 32, sigmoid), Dense(32, 32, sigmoid), Dense(32, 1))
 opt = OptimizationOptimisers.Adam(1e-3)
 sol = solve(prob, NeuralPDE.NNRODE(chain, W, opt), dt = dt, verbose = true,
-            abstol = 1e-10, maxiters = 2000)
+    abstol = 1e-10, maxiters = 2000)
 W2 = NoiseWrapper(sol.W)
 prob1 = RODEProblem(linear, u0, tspan, noise = W2)
 sol2 = solve(prob1, RandomEM(), dt = dt)
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 8180a6895..5259a019f 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -7,9 +7,9 @@ import Lux
 
 nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
 gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
-                                                            bc_loss_weights = 1)
+    bc_loss_weights = 1)
 adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
-                                              bc_loss_weights = 1)
+    bc_loss_weights = 1)
 adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
 maxiters = 4000
 seed = 60
@@ -19,7 +19,7 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
     Random.seed!(seed)
     hid = 40
     chain_ = Lux.Chain(Lux.Dense(2, hid, Lux.σ), Lux.Dense(hid, hid, Lux.σ),
-                       Lux.Dense(hid, 1))
+        Lux.Dense(hid, 1))
     strategy_ = NeuralPDE.StochasticTraining(256)
     @info "adaptive reweighting test outdir:, maxiters: $(maxiters), 2D Poisson equation, adaptive_loss: $(nameof(typeof(adaptive_loss))) "
     @parameters x y
@@ -39,10 +39,10 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
 
     iteration = [0]
     discretization = PhysicsInformedNN(chain_,
-                                    strategy_;
-                                    adaptive_loss = adaptive_loss,
-                                    logger = nothing,
-                                    iteration = iteration)
+        strategy_;
+        adaptive_loss = adaptive_loss,
+        logger = nothing,
+        iteration = iteration)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
@@ -50,7 +50,7 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
     xs, ys = [infimum(d.domain):0.01:supremum(d.domain) for d in domains]
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                     (length(xs), length(ys)))
+        (length(xs), length(ys)))
 
     callback = function (p, l)
         iteration[1] += 1
@@ -59,9 +59,10 @@ function test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = 60, maxite
         end
         return false
     end
-    res = solve(prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters, callback = callback)
+    res = solve(
+        prob, OptimizationOptimisers.Adam(0.03); maxiters = maxiters, callback = callback)
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     diff_u = abs.(u_predict .- u_real)
     total_diff = sum(diff_u)
     total_u = sum(abs.(u_real))
@@ -74,7 +75,7 @@ function test_2d_poisson_equation_adaptive_loss_no_logs_run_seediters(adaptive_l
     test_2d_poisson_equation_adaptive_loss(adaptive_loss; seed = seed, maxiters = maxiters)
 end
 error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_seediters,
-                            adaptive_losses)
+    adaptive_losses)
 
 # accuracy tests
 @show error_results_no_logs[1][:total_diff_rel]
diff --git a/test/additional_loss_tests.jl b/test/additional_loss_tests.jl
index 4fdc0bc7d..3223c6662 100644
--- a/test/additional_loss_tests.jl
+++ b/test/additional_loss_tests.jl
@@ -33,9 +33,9 @@ using ComponentArrays
     # Neural network
     inn = 18
     chain = Lux.Chain(Lux.Dense(1, inn, Lux.σ),
-                    Lux.Dense(inn, inn, Lux.σ),
-                    Lux.Dense(inn, inn, Lux.σ),
-                    Lux.Dense(inn, 1))
+        Lux.Dense(inn, inn, Lux.σ),
+        Lux.Dense(inn, inn, Lux.σ),
+        Lux.Dense(inn, 1))
     init_params = Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain)[1]))
     lb = [x_0]
     ub = [x_end]
@@ -48,7 +48,7 @@ using ComponentArrays
         abs(norm2[1])
     end
     discretization = PhysicsInformedNN(chain, GridTraining(dx); init_params = init_params,
-                                       additional_loss = norm_loss_function)
+        additional_loss = norm_loss_function)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
@@ -73,7 +73,8 @@ using ComponentArrays
     @test u_predict≈u_real rtol=1e-3
 
     ### No init_params
-    discretization = PhysicsInformedNN(chain, GridTraining(dx); additional_loss = norm_loss_function)
+    discretization = PhysicsInformedNN(
+        chain, GridTraining(dx); additional_loss = norm_loss_function)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [p(x)])
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
@@ -113,7 +114,7 @@ end
     input_ = length(domains)
     n = 12
     chain = [Lux.Chain(Lux.Dense(input_, n, Lux.tanh), Lux.Dense(n, n, Lux.σ),
-                    Lux.Dense(n, 1)) for _ in 1:3]
+                 Lux.Dense(n, 1)) for _ in 1:3]
     #Generate Data
     function lorenz!(du, u, p, t)
         du[1] = 10.0 * (u[2] - u[1])
@@ -138,7 +139,7 @@ end
 
     #Additional Loss Function
     init_params = [Float64.(ComponentArray(Lux.setup(Random.default_rng(), chain[i])[1]))
-                for i in 1:3]
+                   for i in 1:3]
     names = (:x, :y, :z)
     flat_init_params = ComponentArray(NamedTuple{names}(i for i in init_params))
 
@@ -149,25 +150,26 @@ end
 
     function additional_loss(phi, θ, p)
         return sum(sum(abs2, phi[i](t_, getproperty(θ, names[i])) .- u_[[i], :]) /
-                len
-                for i in 1:1:3)
+                   len
+        for i in 1:1:3)
     end
 
     discretization = PhysicsInformedNN(chain,
-                                    GridTraining(dt);
-                                    init_params = flat_init_params,
-                                    param_estim = true,
-                                    additional_loss = additional_loss)
+        GridTraining(dt);
+        init_params = flat_init_params,
+        param_estim = true,
+        additional_loss = additional_loss)
 
     additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
-                                [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-                                defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+        [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+        defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
-    sym_prob.loss_functions.full_loss_function(ComponentArray(depvar = flat_init_params,
-                                                            p = ones(3)),
-                                            Float64[])
+    sym_prob.loss_functions.full_loss_function(
+        ComponentArray(depvar = flat_init_params,
+            p = ones(3)),
+        Float64[])
 
     res = solve(prob, OptimizationOptimJL.BFGS(); maxiters = 6000)
     p_ = res.u[(end - 2):end]
@@ -177,14 +179,14 @@ end
 
     ### No init_params
     discretization = PhysicsInformedNN(chain,
-                                    GridTraining(dt);
-                                    param_estim = true,
-                                    additional_loss = additional_loss)
+        GridTraining(dt);
+        param_estim = true,
+        additional_loss = additional_loss)
 
     additional_loss(discretization.phi, flat_init_params, nothing)
     @named pde_system = PDESystem(eqs, bcs, domains,
-                                [t], [x(t), y(t), z(t)], [σ_, ρ, β],
-                                defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
+        [t], [x(t), y(t), z(t)], [σ_, ρ, β],
+        defaults = Dict([p => 1.0 for p in [σ_, ρ, β]]))
     prob = discretize(pde_system, discretization)
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
     sym_prob.loss_functions.full_loss_function(sym_prob.flat_init_params, nothing)
@@ -206,9 +208,9 @@ end
     domain = [x ∈ Interval(x0, x_end)]
     hidden = 10
     chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-                    Lux.Dense(hidden, hidden, Lux.sin),
-                    Lux.Dense(hidden, hidden, Lux.tanh),
-                    Lux.Dense(hidden, 1))
+        Lux.Dense(hidden, hidden, Lux.sin),
+        Lux.Dense(hidden, hidden, Lux.tanh),
+        Lux.Dense(hidden, 1))
     strategy = GridTraining(dx)
     xs = collect(x0:dx:x_end)'
     aproxf_(x) = @. cos(pi * x)
diff --git a/test/dgm_test.jl b/test/dgm_test.jl
index ba40a7a6f..de29888f9 100644
--- a/test/dgm_test.jl
+++ b/test/dgm_test.jl
@@ -1,6 +1,7 @@
 using NeuralPDE, Test
 
-using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions, MethodOfLines, OrdinaryDiffEq
+using ModelingToolkit, Optimization, OptimizationOptimisers, Distributions, MethodOfLines,
+      OrdinaryDiffEq
 import ModelingToolkit: Interval, infimum, supremum
 import Lux: tanh, identity
 
@@ -19,16 +20,16 @@ import Lux: tanh, identity
     # Space and time domains
     domains = [x ∈ Interval(0.0, 1.0), y ∈ Interval(0.0, 1.0)]
 
-    strategy = QuasiRandomTraining(256, minibatch= 32);
-    discretization= DeepGalerkin(2, 1, 20, 3, tanh, tanh, identity, strategy);
+    strategy = QuasiRandomTraining(256, minibatch = 32)
+    discretization = DeepGalerkin(2, 1, 20, 3, tanh, tanh, identity, strategy)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = discretize(pde_system, discretization)
 
-    global iter = 0;
+    global iter = 0
     callback = function (p, l)
-        global iter += 1;
-        if iter%50 == 0
+        global iter += 1
+        if iter % 50 == 0
             println("$iter => $l")
         end
         return false
@@ -43,44 +44,44 @@ import Lux: tanh, identity
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+        (length(xs), length(ys)))
     @test u_predict≈u_real atol=0.1
 end
 
 @testset "Black-Scholes PDE: European Call Option" begin
-    K  = 50.0;
-    T  = 1.0;
-    r  = 0.05;
-    σ  = 0.25;
-    S  = 130.0;
-    S_multiplier = 1.3;
+    K = 50.0
+    T = 1.0
+    r = 0.05
+    σ = 0.25
+    S = 130.0
+    S_multiplier = 1.3
 
     @parameters x t
     @variables g(..)
-    G(x)= max(x - K , 0.0)
+    G(x) = max(x - K, 0.0)
 
-    Dt= Differential(t)
-    Dx= Differential(x)
-    Dxx= Dx^2
+    Dt = Differential(t)
+    Dx = Differential(x)
+    Dxx = Dx^2
 
-    eq= Dt(g(t,x)) + r * x * Dx(g(t,x)) + 0.5 * σ^2 * Dxx(g(t,x)) ~ r * g(t,x)
+    eq = Dt(g(t, x)) + r * x * Dx(g(t, x)) + 0.5 * σ^2 * Dxx(g(t, x)) ~ r * g(t, x)
 
-    bcs= [g(T,x) ~ G(x)] # terminal condition
+    bcs = [g(T, x) ~ G(x)] # terminal condition
 
     domains = [t ∈ Interval(0.0, T), x ∈ Interval(0.0, S * S_multiplier)]
 
-    strategy = QuasiRandomTraining(128, minibatch= 32);
-    discretization= DeepGalerkin(2, 1, 40, 3, tanh, tanh, identity, strategy);
+    strategy = QuasiRandomTraining(128, minibatch = 32)
+    discretization = DeepGalerkin(2, 1, 40, 3, tanh, tanh, identity, strategy)
 
-    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [g(t,x)])
+    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [g(t, x)])
     prob = discretize(pde_system, discretization)
 
-    global iter = 0;
+    global iter = 0
     callback = function (p, l)
-        global iter += 1;
-        if iter%50 == 0
+        global iter += 1
+        if iter % 50 == 0
             println("$iter => $l")
         end
         return false
@@ -92,33 +93,33 @@ end
     phi = discretization.phi
 
     function analytical_soln(t, x, K, σ, T)
-        d₊ = (log(x/K) + (r + 0.5 * σ^2) * (T - t)) / (σ * sqrt(T - t))
+        d₊ = (log(x / K) + (r + 0.5 * σ^2) * (T - t)) / (σ * sqrt(T - t))
         d₋ = d₊ - (σ * sqrt(T - t))
-        return  x * cdf(Normal(0,1), d₊) .- K*exp(-r * (T - t))*cdf(Normal(0,1), d₋)
+        return x * cdf(Normal(0, 1), d₊) .- K * exp(-r * (T - t)) * cdf(Normal(0, 1), d₋)
     end
     analytic_sol_func(t, x) = analytical_soln(t, x, K, σ, T)
-    
-    domains2 = [t ∈ Interval(0.0, T - 0.001), x ∈ Interval(0.0, S)]    
-    ts =  collect(infimum(domains2[1].domain):0.01:supremum(domains2[1].domain))
-    xs =  collect(infimum(domains2[2].domain):1.0:supremum(domains2[2].domain))
-    
-    u_real= [analytic_sol_func(t,x) for t in ts, x in xs]
-    u_predict= [first(phi([t, x], res.u)) for t in ts, x in xs]
-    @test u_predict ≈ u_real rtol= 0.05
+
+    domains2 = [t ∈ Interval(0.0, T - 0.001), x ∈ Interval(0.0, S)]
+    ts = collect(infimum(domains2[1].domain):0.01:supremum(domains2[1].domain))
+    xs = collect(infimum(domains2[2].domain):1.0:supremum(domains2[2].domain))
+
+    u_real = [analytic_sol_func(t, x) for t in ts, x in xs]
+    u_predict = [first(phi([t, x], res.u)) for t in ts, x in xs]
+    @test u_predict≈u_real rtol=0.05
 end
 
 @testset "Burger's equation" begin
     @parameters x t
     @variables u(..)
 
-    Dt= Differential(t)
-    Dx= Differential(x)
-    Dxx= Dx^2
-    α = 0.05;
-    eq= Dt(u(t,x)) + u(t,x) * Dx(u(t,x)) - α * Dxx(u(t,x)) ~ 0 # Burger's equation
+    Dt = Differential(t)
+    Dx = Differential(x)
+    Dxx = Dx^2
+    α = 0.05
+    eq = Dt(u(t, x)) + u(t, x) * Dx(u(t, x)) - α * Dxx(u(t, x)) ~ 0 # Burger's equation
 
-    bcs= [
-        u(0.0, x) ~ - sin(π*x),
+    bcs = [
+        u(0.0, x) ~ -sin(π * x),
         u(t, -1.0) ~ 0.0,
         u(t, 1.0) ~ 0.0
     ]
@@ -126,38 +127,37 @@ end
     domains = [t ∈ Interval(0.0, 1.0), x ∈ Interval(-1.0, 1.0)]
 
     # MethodOfLines
-    dx= 0.01
+    dx = 0.01
     order = 2
     discretization = MOLFiniteDifference([x => dx], t, saveat = 0.01)
-    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t,x)])
+    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
     prob = discretize(pde_system, discretization)
-    sol= solve(prob, Tsit5())
+    sol = solve(prob, Tsit5())
     ts = sol[t]
-    xs = sol[x] 
+    xs = sol[x]
 
-    u_MOL = sol[u(t,x)]
+    u_MOL = sol[u(t, x)]
 
     # NeuralPDE
-    strategy = QuasiRandomTraining(256, minibatch= 32);
-    discretization= DeepGalerkin(2, 1, 50, 5, tanh, tanh, identity, strategy);
-    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t,x)]);
-    prob = discretize(pde_system, discretization);
-    global iter = 0;
+    strategy = QuasiRandomTraining(256, minibatch = 32)
+    discretization = DeepGalerkin(2, 1, 50, 5, tanh, tanh, identity, strategy)
+    @named pde_system = PDESystem(eq, bcs, domains, [t, x], [u(t, x)])
+    prob = discretize(pde_system, discretization)
+    global iter = 0
     callback = function (p, l)
-        global iter += 1;
-        if iter%20 == 0
+        global iter += 1
+        if iter % 20 == 0
             println("$iter => $l")
         end
         return false
     end
 
-    res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 200);
-    prob = remake(prob, u0 = res.u);
-    res = Optimization.solve(prob, Adam(0.001); callback = callback, maxiters = 100);
-    phi = discretization.phi;
-
-    u_predict= [first(phi([t, x], res.u)) for t in ts, x in xs]
+    res = Optimization.solve(prob, Adam(0.01); callback = callback, maxiters = 200)
+    prob = remake(prob, u0 = res.u)
+    res = Optimization.solve(prob, Adam(0.001); callback = callback, maxiters = 100)
+    phi = discretization.phi
 
-    @test u_predict ≈ u_MOL rtol= 0.025
+    u_predict = [first(phi([t, x], res.u)) for t in ts, x in xs]
 
-end
\ No newline at end of file
+    @test u_predict≈u_MOL rtol=0.025
+end
diff --git a/test/direct_function_tests.jl b/test/direct_function_tests.jl
index 497583954..529c0fe64 100644
--- a/test/direct_function_tests.jl
+++ b/test/direct_function_tests.jl
@@ -27,8 +27,8 @@ Random.seed!(110)
 
     hidden = 10
     chain = Lux.Chain(Lux.Dense(1, hidden, Lux.tanh),
-                    Lux.Dense(hidden, hidden, Lux.tanh),
-                    Lux.Dense(hidden, 1))
+        Lux.Dense(hidden, hidden, Lux.tanh),
+        Lux.Dense(hidden, 1))
 
     strategy = GridTraining(0.01)
     discretization = PhysicsInformedNN(chain, strategy)
@@ -53,9 +53,9 @@ end
 
     hidden = 20
     chain = Lux.Chain(Lux.Dense(1, hidden, Lux.sin),
-                    Lux.Dense(hidden, hidden, Lux.sin),
-                    Lux.Dense(hidden, hidden, Lux.sin),
-                    Lux.Dense(hidden, 1))
+        Lux.Dense(hidden, hidden, Lux.sin),
+        Lux.Dense(hidden, hidden, Lux.sin),
+        Lux.Dense(hidden, 1))
 
     strategy = GridTraining(0.01)
     discretization = PhysicsInformedNN(chain, strategy)
@@ -84,9 +84,9 @@ end
     domain = [x ∈ Interval(x0, x_end), y ∈ Interval(y0, y_end)]
     hidden = 25
     chain = Lux.Chain(Lux.Dense(2, hidden, Lux.tanh),
-                    Lux.Dense(hidden, hidden, Lux.tanh),
-                    Lux.Dense(hidden, hidden, Lux.tanh),
-                    Lux.Dense(hidden, 1))
+        Lux.Dense(hidden, hidden, Lux.tanh),
+        Lux.Dense(hidden, hidden, Lux.tanh),
+        Lux.Dense(hidden, 1))
 
     strategy = GridTraining(d)
     discretization = PhysicsInformedNN(chain, strategy)
@@ -103,7 +103,7 @@ end
     xs = collect(x0:0.1:x_end)
     ys = collect(y0:0.1:y_end)
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     u_real = reshape([func(x, y) for x in xs for y in ys], (length(xs), length(ys)))
     diff_u = abs.(u_predict .- u_real)
     @test u_predict≈u_real rtol=0.05
diff --git a/test/forward_tests.jl b/test/forward_tests.jl
index 48d18b918..95d061c05 100644
--- a/test/forward_tests.jl
+++ b/test/forward_tests.jl
@@ -29,11 +29,12 @@ using ComponentArrays
     domains = pde_system.domain
     dx = strategy_.dx
     eltypeθ = eltype(sym_prob.flat_init_params)
-    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = NeuralPDE.get_vars(pde_system.ivs,
-                                                                                         pde_system.dvs)
+    depvars, indvars, dict_indvars, dict_depvars, dict_depvar_input = NeuralPDE.get_vars(
+        pde_system.ivs,
+        pde_system.dvs)
 
     train_sets = generate_training_sets(domains, dx, eqs, bcs, eltypeθ,
-                                        dict_indvars, dict_depvars)
+        dict_indvars, dict_depvars)
 
     pde_train_sets, bcs_train_sets = train_sets
     pde_train_sets = NeuralPDE.adapt(eltypeθ, pde_train_sets)[1]
@@ -47,8 +48,8 @@ end
 
 @testset "derivatives" begin
     chain = Lux.Chain(Lux.Dense(2, 16, Lux.σ), Lux.Dense(16, 16, Lux.σ),
-                       Lux.Dense(16, 1))
-    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray .|> Float64 
+        Lux.Dense(16, 1))
+    init_params = Lux.setup(Random.default_rng(), chain)[1] |> ComponentArray .|> Float64
 
     eltypeθ = eltype(init_params)
     phi = NeuralPDE.Phi(chain)
@@ -99,7 +100,7 @@ end
     chain([1], init_params, st)
     strategy_ = GridTraining(0.1)
     discretization = PhysicsInformedNN(chain, strategy_;
-                                                 init_params = init_params)
+        init_params = init_params)
     @named pde_system = PDESystem(eq, bcs, domains, [x], [u(x)])
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
     prob = discretize(pde_system, discretization)
@@ -118,7 +119,7 @@ end
     chain([1], init_params, st)
 
     discretization = PhysicsInformedNN(chain, strategy_;
-                                        init_params = init_params)
+        init_params = init_params)
     @named pde_system = PDESystem(eqs, bcs, domains, [x], [u(x)])
     sym_prob = NeuralPDE.symbolic_discretize(pde_system, discretization)
     prob = discretize(pde_system, discretization)
diff --git a/test/neural_adapter_tests.jl b/test/neural_adapter_tests.jl
index cc977210d..bf7316fe9 100644
--- a/test/neural_adapter_tests.jl
+++ b/test/neural_adapter_tests.jl
@@ -29,16 +29,16 @@ end
     domains = [x ∈ Interval(0.0, 1.0),
         y ∈ Interval(0.0, 1.0)]
     quadrature_strategy = NeuralPDE.QuadratureTraining(reltol = 1e-3, abstol = 1e-6,
-                                                    maxiters = 50, batch = 100)
+        maxiters = 50, batch = 100)
     inner = 8
     af = Lux.tanh
     chain1 = Lux.Chain(Lux.Dense(2, inner, af),
-                Lux.Dense(inner, inner, af),
-                Lux.Dense(inner, 1))
-    init_params = Lux.setup(Random.default_rng(), chain1)[1] |> ComponentArray .|> Float64 
+        Lux.Dense(inner, inner, af),
+        Lux.Dense(inner, 1))
+    init_params = Lux.setup(Random.default_rng(), chain1)[1] |> ComponentArray .|> Float64
     discretization = NeuralPDE.PhysicsInformedNN(chain1,
-                                                quadrature_strategy;
-                                                init_params = init_params)
+        quadrature_strategy;
+        init_params = init_params)
 
     @named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
     prob = NeuralPDE.discretize(pde_system, discretization)
@@ -49,9 +49,9 @@ end
     inner_ = 8
     af = Lux.tanh
     chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
-                    Lux.Dense(inner_, inner_, af),
-                    Lux.Dense(inner_, inner_, af),
-                    Lux.Dense(inner_, 1))
+        Lux.Dense(inner_, inner_, af),
+        Lux.Dense(inner_, inner_, af),
+        Lux.Dense(inner_, 1))
     initp, st = Lux.setup(Random.default_rng(), chain2)
     init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
 
@@ -61,7 +61,8 @@ end
     end
 
     grid_strategy = GridTraining(0.05)
-    quadrature_strategy = QuadratureTraining(reltol = 1e-3, abstol = 1e-6, maxiters = 50, batch = 100)
+    quadrature_strategy = QuadratureTraining(
+        reltol = 1e-3, abstol = 1e-6, maxiters = 50, batch = 100)
     stochastic_strategy = StochasticTraining(1000)
     quasirandom_strategy = QuasiRandomTraining(1000, minibatch = 200, resampling = true)
 
@@ -80,7 +81,8 @@ end
     end
 
     reses_ = [reses_1; reses_2]
-    discretizations = map(res_ -> PhysicsInformedNN(chain2, grid_strategy; init_params = res_.u), reses_)
+    discretizations = map(
+        res_ -> PhysicsInformedNN(chain2, grid_strategy; init_params = res_.u), reses_)
     probs = map(discret -> discretize(pde_system, discret), discretizations)
     phis = map(discret -> discret.phi, discretizations)
 
@@ -88,15 +90,15 @@ end
     analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
 
     u_predict = reshape([first(phi([x, y], res.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
 
     u_predicts = map(zip(phis, reses_)) do (phi_, res_)
         reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-                (length(xs), length(ys)))
+            (length(xs), length(ys)))
     end
 
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+        (length(xs), length(ys)))
 
     @test u_predict≈u_real rtol=1e-1
     @test u_predicts[1]≈u_real rtol=1e-1
@@ -128,9 +130,11 @@ end
     af = Lux.tanh
     inner = 12
     chains = [Lux.Chain(Lux.Dense(2, inner, af), Lux.Dense(inner, inner, af),
-                        Lux.Dense(inner, 1)) for _ in 1:count_decomp]
-    init_params = map(c -> Float64.(ComponentArrays.ComponentArray(Lux.setup(Random.default_rng(),
-                                                                            c)[1])), chains)
+                  Lux.Dense(inner, 1)) for _ in 1:count_decomp]
+    init_params = map(
+        c -> Float64.(ComponentArrays.ComponentArray(Lux.setup(Random.default_rng(),
+            c)[1])),
+        chains)
 
     xs_ = infimum(x_domain):(1 / count_decomp):supremum(x_domain)
     xs_domain = [(xs_[i], xs_[i + 1]) for i in 1:(length(xs_) - 1)]
@@ -172,9 +176,11 @@ end
         @named pde_system_ = PDESystem(eq, bcs_, domains_, [x, y], [u(x, y)])
         push!(pde_system_map, pde_system_)
         strategy = GridTraining([0.1 / count_decomp, 0.1])
-        discretization = PhysicsInformedNN(chains[i], strategy;  init_params = init_params[i])
+        discretization = PhysicsInformedNN(
+            chains[i], strategy; init_params = init_params[i])
         prob = discretize(pde_system_, discretization)
-        @time res_ = Optimization.solve(prob, OptimizationOptimisers.Adam(5e-3), maxiters = 10000)
+        @time res_ = Optimization.solve(
+            prob, OptimizationOptimisers.Adam(5e-3), maxiters = 10000)
         @show res_.objective
         phi = discretization.phi
         push!(reses, res_)
@@ -213,10 +219,10 @@ end
     inner_ = 18
     af = Lux.tanh
     chain2 = Lux.Chain(Lux.Dense(2, inner_, af),
-                    Lux.Dense(inner_, inner_, af),
-                    Lux.Dense(inner_, inner_, af),
-                    Lux.Dense(inner_, inner_, af),
-                    Lux.Dense(inner_, 1))
+        Lux.Dense(inner_, inner_, af),
+        Lux.Dense(inner_, inner_, af),
+        Lux.Dense(inner_, inner_, af),
+        Lux.Dense(inner_, 1))
 
     initp, st = Lux.setup(Random.default_rng(), chain2)
     init_params2 = Float64.(ComponentArrays.ComponentArray(initp))
@@ -231,20 +237,20 @@ end
     end
 
     prob_ = NeuralPDE.neural_adapter(losses, init_params2, pde_system_map,
-                                    GridTraining([0.1 / count_decomp, 0.1]))
+        GridTraining([0.1 / count_decomp, 0.1]))
     @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
     @show res_.objective
     prob_ = NeuralPDE.neural_adapter(losses, res_.u, pde_system_map,
-                                    GridTraining(0.01))
+        GridTraining(0.01))
     @time res_ = solve(prob_, OptimizationOptimisers.Adam(5e-3); maxiters = 5000)
     @show res_.objective
 
     phi_ = NeuralPDE.Phi(chain2)
     xs, ys = [infimum(d.domain):dx:supremum(d.domain) for d in domains]
     u_predict_ = reshape([first(phi_([x, y], res_.u)) for x in xs for y in ys],
-                        (length(xs), length(ys)))
+        (length(xs), length(ys)))
     u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                    (length(xs), length(ys)))
+        (length(xs), length(ys)))
     diff_u_ = u_predict_ .- u_real
 
     @test u_predict≈u_real rtol=1e-1
diff --git a/test/runtests.jl b/test/runtests.jl
index 34bd50ae8..e6248eae6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,38 +12,64 @@ end
 
 @time begin
     if GROUP == "All" || GROUP == "QA"
-        @time @safetestset "Quality Assurance" begin include("qa.jl") end
+        @time @safetestset "Quality Assurance" begin
+            include("qa.jl")
+        end
     end
     if GROUP == "All" || GROUP == "ODEBPINN"
-        @time @safetestset "Bpinn ODE solver" begin include("BPINN_Tests.jl") end
+        @time @safetestset "Bpinn ODE solver" begin
+            include("BPINN_Tests.jl")
+        end
     end
 
     if GROUP == "All" || GROUP == "PDEBPINN"
-        @time @safetestset "Bpinn PDE solver" begin include("BPINN_PDE_tests.jl") end
-        @time @safetestset "Bpinn PDE invaddloss solver" begin include("BPINN_PDEinvsol_tests.jl") end
+        @time @safetestset "Bpinn PDE solver" begin
+            include("BPINN_PDE_tests.jl")
+        end
+        @time @safetestset "Bpinn PDE invaddloss solver" begin
+            include("BPINN_PDEinvsol_tests.jl")
+        end
     end
 
     if GROUP == "All" || GROUP == "NNPDE1"
-        @time @safetestset "NNPDE" begin include("NNPDE_tests.jl") end
+        @time @safetestset "NNPDE" begin
+            include("NNPDE_tests.jl")
+        end
     end
     if GROUP == "All" || GROUP == "NNODE"
-        @time @safetestset "NNODE" begin include("NNODE_tests.jl") end
-        @time @safetestset "NNODE_tstops" begin include("NNODE_tstops_test.jl") end
-        @time @safetestset "NNDAE" begin include("NNDAE_tests.jl") end
+        @time @safetestset "NNODE" begin
+            include("NNODE_tests.jl")
+        end
+        @time @safetestset "NNODE_tstops" begin
+            include("NNODE_tstops_test.jl")
+        end
+        @time @safetestset "NNDAE" begin
+            include("NNDAE_tests.jl")
+        end
     end
 
     if GROUP == "All" || GROUP == "NNPDE2"
-        @time @safetestset "Additional Loss" begin include("additional_loss_tests.jl") end
-        @time @safetestset "Direction Function Approximation" begin include("direct_function_tests.jl") end
+        @time @safetestset "Additional Loss" begin
+            include("additional_loss_tests.jl")
+        end
+        @time @safetestset "Direction Function Approximation" begin
+            include("direct_function_tests.jl")
+        end
     end
     if GROUP == "All" || GROUP == "NeuralAdapter"
-        @time @safetestset "NeuralAdapter" begin include("neural_adapter_tests.jl") end
+        @time @safetestset "NeuralAdapter" begin
+            include("neural_adapter_tests.jl")
+        end
     end
     if GROUP == "All" || GROUP == "IntegroDiff"
-        @time @safetestset "IntegroDiff" begin include("IDE_tests.jl") end
+        @time @safetestset "IntegroDiff" begin
+            include("IDE_tests.jl")
+        end
     end
     if GROUP == "All" || GROUP == "AdaptiveLoss"
-        @time @safetestset "AdaptiveLoss" begin include("adaptive_loss_tests.jl") end
+        @time @safetestset "AdaptiveLoss" begin
+            include("adaptive_loss_tests.jl")
+        end
     end
 
     #=
@@ -54,7 +80,9 @@ end
     =#
 
     if GROUP == "All" || GROUP == "Forward"
-        @time @safetestset "Forward" begin include("forward_tests.jl") end
+        @time @safetestset "Forward" begin
+            include("forward_tests.jl")
+        end
     end
     if GROUP == "All" || GROUP == "Logging"
         dev_subpkg("NeuralPDELogging")
@@ -62,10 +90,14 @@ end
         Pkg.test(PackageSpec(name = "NeuralPDELogging", path = subpkg_path))
     end
     if !is_APPVEYOR && GROUP == "GPU"
-        @safetestset "NNPDE_gpu_Lux" begin include("NNPDE_tests_gpu_Lux.jl") end
+        @safetestset "NNPDE_gpu_Lux" begin
+            include("NNPDE_tests_gpu_Lux.jl")
+        end
     end
 
     if GROUP == "All" || GROUP == "DGM"
-        @time @safetestset "Deep Galerkin solver" begin include("dgm_test.jl") end
+        @time @safetestset "Deep Galerkin solver" begin
+            include("dgm_test.jl")
+        end
     end
 end

From 8ca79bcc4068b938e11ad56ed0fd7ba335cb22be Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 1 Apr 2024 15:36:42 +0000
Subject: [PATCH 22/27] build(deps): bump julia-actions/setup-julia from 1 to 2

Bumps [julia-actions/setup-julia](https://github.com/julia-actions/setup-julia) from 1 to 2.
- [Release notes](https://github.com/julia-actions/setup-julia/releases)
- [Commits](https://github.com/julia-actions/setup-julia/compare/v1...v2)

---
updated-dependencies:
- dependency-name: julia-actions/setup-julia
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/CI.yml            | 2 +-
 .github/workflows/Downgrade.yml     | 2 +-
 .github/workflows/Downstream.yml    | 2 +-
 .github/workflows/Invalidations.yml | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 97b6f7f8f..f32163dfb 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -30,7 +30,7 @@ jobs:
           - "1"
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
       - uses: actions/cache@v4
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
index 190d11e1f..9261e7719 100644
--- a/.github/workflows/Downgrade.yml
+++ b/.github/workflows/Downgrade.yml
@@ -30,7 +30,7 @@ jobs:
           - "1"
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
       - uses: actions/cache@v4
diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
index 1048e3f39..ce29fdd10 100644
--- a/.github/workflows/Downstream.yml
+++ b/.github/workflows/Downstream.yml
@@ -20,7 +20,7 @@ jobs:
           - {user: SciML, repo: PDESystemLibrary.jl, group: NeuralPDE}
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.julia-version }}
           arch: x64
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
index 6a0a747c7..7ed999080 100644
--- a/.github/workflows/Invalidations.yml
+++ b/.github/workflows/Invalidations.yml
@@ -16,7 +16,7 @@ jobs:
     if: github.base_ref == github.event.repository.default_branch
     runs-on: ubuntu-latest
     steps:
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: "1"
       - uses: actions/checkout@v4

From 1c110c5f8a0386fc6a0e66002fcd19a397b9ef4a Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 4 Apr 2024 04:16:02 +0000
Subject: [PATCH 23/27] ci: add NNODE, NeuralAdapter, IntegroDiff in GHA from
 buildkite to have them in Downgrade CI

---
 .buildkite/pipeline.yml         | 25 -------------------------
 .github/workflows/CI.yml        |  3 +++
 .github/workflows/Downgrade.yml |  3 +++
 3 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 7a4e4387a..a83997c38 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -16,31 +16,6 @@ steps:
     # Don't run Buildkite if the commit message includes the text [skip tests]
     if: build.message !~ /\[skip tests\]/
 
-  - label: "{{matrix.group}} v{{matrix.version}}"
-    matrix:
-      setup:
-        version:
-          - "1"
-        group:
-          - "NNODE"
-          - "NeuralAdapter"
-          - "IntegroDiff"
-    env:
-      BUILDKITE_PLUGIN_JULIA_VERSION: "{{matrix.version}}"
-      GROUP: "{{matrix.group}}"
-    plugins:
-      - JuliaCI/julia#v1
-      - JuliaCI/julia-test#v1:
-          coverage: false
-          julia_args: "--threads=auto"
-    agents:
-      os: "linux"
-      queue: "juliaecosystem"
-      arch: "x86_64"
-    timeout_in_minutes: 680
-    # Don't run Buildkite if the commit message includes the text [skip tests]
-    if: build.message !~ /\[skip tests\]/
-
   - label: "Documentation"
     plugins:
       - JuliaCI/julia#v1:
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 97b6f7f8f..c11720597 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -26,6 +26,9 @@ jobs:
           - Logging
           - Forward
           - DGM
+          - NNODE
+          - NeuralAdapter
+          - IntegroDiff
         version:
           - "1"
     steps:
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
index 190d11e1f..631e42015 100644
--- a/.github/workflows/Downgrade.yml
+++ b/.github/workflows/Downgrade.yml
@@ -26,6 +26,9 @@ jobs:
           - Logging
           - Forward
           - DGM
+          - NNODE
+          - NeuralAdapter
+          - IntegroDiff
         version:
           - "1"
     steps:

From 4fe0529576d4c2df5eda9900a8a1ea0345965f5c Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 4 Apr 2024 04:17:04 +0000
Subject: [PATCH 24/27] build: bump versions to get QA passing in Downgrade

---
 Project.toml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Project.toml b/Project.toml
index d1508cbd1..3541fae00 100644
--- a/Project.toml
+++ b/Project.toml
@@ -42,7 +42,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 Adapt = "4"
 AdvancedHMC = "0.6.1"
 Aqua = "0.8"
-ArrayInterface = "7.8"
+ArrayInterface = "7.9"
 CUDA = "5.2"
 ChainRulesCore = "1.21"
 ComponentArrays = "0.15.8"
@@ -55,7 +55,7 @@ DomainSets = "0.6, 0.7"
 Flux = "0.14.11"
 ForwardDiff = "0.10.36"
 Functors = "0.4.4"
-Integrals = "4"
+Integrals = "4.4"
 LineSearches = "7.2"
 LinearAlgebra = "1"
 LogDensityProblems = "2"
@@ -63,7 +63,7 @@ Lux = "0.5.22"
 LuxCUDA = "0.3.2"
 MCMCChains = "6"
 MethodOfLines = "0.11"
-ModelingToolkit = "9.7"
+ModelingToolkit = "9.9"
 MonteCarloMeasurements = "1.1"
 Optim = "1.7.8"
 Optimization = "3.24"
@@ -78,8 +78,8 @@ RuntimeGeneratedFunctions = "0.5.12"
 SafeTestsets = "0.1"
 SciMLBase = "2.28"
 Statistics = "1.10"
-SymbolicUtils = "1.4"
-Symbolics = "5.24"
+SymbolicUtils = "1.5"
+Symbolics = "5.27.1"
 Test = "1"
 UnPack = "1"
 Zygote = "0.6.69"

From 41fa43401c9dcc77b2c41450d2be118ea6fa7fe2 Mon Sep 17 00:00:00 2001
From: Sathvik Bhagavan <sathvik.bhagavan@juliahub.com>
Date: Thu, 4 Apr 2024 04:17:51 +0000
Subject: [PATCH 25/27] docs: add reference for the bloch equations in the
 complex example

---
 docs/src/examples/complex.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/src/examples/complex.md b/docs/src/examples/complex.md
index a44e31e79..a11a4d4d0 100644
--- a/docs/src/examples/complex.md
+++ b/docs/src/examples/complex.md
@@ -1,6 +1,6 @@
 # Complex Equations with PINNs
 
-NeuralPDE supports training PINNs with complex differential equations. This example will demonstrate how to use it for [`NNODE`](@ref). Let us consider a system of [bloch equations](https://en.wikipedia.org/wiki/Bloch_equations). Note [`QuadratureTraining`](@ref) cannot be used with complex equations due to current limitations of computing quadratures.
+NeuralPDE supports training PINNs with complex differential equations. This example will demonstrate how to use it for [`NNODE`](@ref). Let us consider a system of [bloch equations](https://en.wikipedia.org/wiki/Bloch_equations) [^1]. Note [`QuadratureTraining`](@ref) cannot be used with complex equations due to current limitations of computing quadratures.
 
 As the input to this neural network is time which is real, we need to initialize the parameters of the neural network with complex values for it to output and train with complex values.
 
@@ -95,3 +95,5 @@ plot!(ground_truth.t, imag.(reduce(hcat, ground_truth.u)[4, :]))
 ```
 
 We can see it is able to learn the real parts of `u1`, `u2` and imaginary parts of `u3`, `u4`.
+
+[^1]: https://steck.us/alkalidata/

From 81792ea77d1afb5568667413ecbc7b00f89bf399 Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Sun, 7 Apr 2024 08:38:18 -0400
Subject: [PATCH 26/27] Tag the original solution to sol.original and simplify
 dependencies

Gives a nice solution to https://discourse.julialang.org/t/continue-solving-ode-problem-when-using-neuralpde-nnode/101580
---
 Project.toml            |  2 --
 src/BPINN_ode.jl        |  6 +++---
 src/NeuralPDE.jl        |  3 +--
 src/advancedHMC_MCMC.jl |  8 ++++----
 src/dae_solve.jl        | 14 ++++++++------
 src/ode_solve.jl        | 22 ++++++++++++----------
 src/rode_solve.jl       | 18 +++++++++---------
 7 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3541fae00..80e56de80 100644
--- a/Project.toml
+++ b/Project.toml
@@ -10,7 +10,6 @@ ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 Cubature = "667455a9-e2ce-5579-9412-b964f529a492"
-DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqNoiseProcess = "77a26b50-5914-5dd7-bc55-306e6241c503"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
@@ -47,7 +46,6 @@ CUDA = "5.2"
 ChainRulesCore = "1.21"
 ComponentArrays = "0.15.8"
 Cubature = "1.5"
-DiffEqBase = "6.148"
 DiffEqNoiseProcess = "5.20"
 Distributions = "0.25.107"
 DocStringExtensions = "0.9"
diff --git a/src/BPINN_ode.jl b/src/BPINN_ode.jl
index e92d1b11c..226c3f329 100644
--- a/src/BPINN_ode.jl
+++ b/src/BPINN_ode.jl
@@ -4,7 +4,7 @@
     BNNODE(chain, Kernel = HMC; strategy = nothing, draw_samples = 2000,
                         priorsNNw = (0.0, 2.0), param = [nothing], l2std = [0.05],
                         phystd = [0.05], dataset = [nothing], physdt = 1 / 20.0,
-                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing, 
+                        MCMCargs = (n_leapfrog=30), nchains = 1, init_params = nothing,
                         Adaptorkwargs = (Adaptor = StanHMCAdaptor, targetacceptancerate = 0.8, Metric = DiagEuclideanMetric),
                         Integratorkwargs = (Integrator = Leapfrog,), autodiff = false,
                         progress = false, verbose = false)
@@ -64,7 +64,7 @@ sol_lux_pestim = solve(prob, alg)
 
 Note that the solution is evaluated at fixed time points according to the strategy chosen.
 ensemble solution is evaluated and given at steps of `saveat`.
-Dataset should only be provided when ODE parameter Estimation is being done.  
+Dataset should only be provided when ODE parameter Estimation is being done.
 The neural network is a fully continuous solution so `BPINNsolution`
 is an accurate interpolation (up to the neural network training result). In addition, the
 `BPINNstats` is returned as `sol.fullsolution` for further analysis.
@@ -170,7 +170,7 @@ struct BPINNsolution{O <: BPINNstats, E, NP, OP, P}
     end
 end
 
-function DiffEqBase.__solve(prob::DiffEqBase.ODEProblem,
+function SciMLBase.__solve(prob::SciMLBase.ODEProblem,
         alg::BNNODE,
         args...;
         dt = nothing,
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index cfc3b9367..1122afc83 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -5,7 +5,7 @@ module NeuralPDE
 
 using DocStringExtensions
 using Reexport, Statistics
-@reexport using DiffEqBase
+@reexport using SciMLBase
 @reexport using ModelingToolkit
 
 using Zygote, ForwardDiff, Random, Distributions
@@ -16,7 +16,6 @@ using Integrals, Cubature
 using QuasiMonteCarlo: LatinHypercubeSample
 import QuasiMonteCarlo
 using RuntimeGeneratedFunctions
-using SciMLBase
 using Statistics
 using ArrayInterface
 import Optim
diff --git a/src/advancedHMC_MCMC.jl b/src/advancedHMC_MCMC.jl
index 22292462d..252ca2f41 100644
--- a/src/advancedHMC_MCMC.jl
+++ b/src/advancedHMC_MCMC.jl
@@ -4,7 +4,7 @@ mutable struct LogTargetDensity{C, S, ST <: AbstractTrainingStrategy, I,
     Union{Vector{Nothing}, Vector{<:Vector{<:AbstractFloat}}}
 }
     dim::Int
-    prob::DiffEqBase.ODEProblem
+    prob::SciMLBase.ODEProblem
     chain::C
     st::S
     strategy::ST
@@ -336,12 +336,12 @@ end
 
 """
     ahmc_bayesian_pinn_ode(prob, chain; strategy = GridTraining,
-                        dataset = [nothing],init_params = nothing, 
+                        dataset = [nothing],init_params = nothing,
                         draw_samples = 1000, physdt = 1 / 20.0f0,l2std = [0.05],
                         phystd = [0.05], priorsNNw = (0.0, 2.0),
                         param = [], nchains = 1, autodiff = false, Kernel = HMC,
                         Adaptorkwargs = (Adaptor = StanHMCAdaptor,
-                                         Metric = DiagEuclideanMetric, 
+                                         Metric = DiagEuclideanMetric,
                                          targetacceptancerate = 0.8),
                         Integratorkwargs = (Integrator = Leapfrog,),
                         MCMCkwargs = (n_leapfrog = 30,),
@@ -431,7 +431,7 @@ Incase you are only solving the Equations for solution, do not provide dataset
 
 * AdvancedHMC.jl is still developing convenience structs so might need changes on new releases.
 """
-function ahmc_bayesian_pinn_ode(prob::DiffEqBase.ODEProblem, chain;
+function ahmc_bayesian_pinn_ode(prob::SciMLBase.ODEProblem, chain;
         strategy = GridTraining, dataset = [nothing],
         init_params = nothing, draw_samples = 1000,
         physdt = 1 / 20.0, l2std = [0.05],
diff --git a/src/dae_solve.jl b/src/dae_solve.jl
index f9edbb948..5a5ee83be 100644
--- a/src/dae_solve.jl
+++ b/src/dae_solve.jl
@@ -31,7 +31,7 @@ of the physics-informed neural network which is used as a solver for a standard
               By default, `GridTraining` is used with `dt` if given.
 """
 struct NNDAE{C, O, P, K, S <: Union{Nothing, AbstractTrainingStrategy}
-} <: DiffEqBase.AbstractDAEAlgorithm
+} <: SciMLBase.AbstractDAEAlgorithm
     chain::C
     opt::O
     init_params::P
@@ -79,7 +79,7 @@ function generate_loss(strategy::GridTraining, phi, f, autodiff::Bool, tspan, p,
     return loss
 end
 
-function DiffEqBase.__solve(prob::DiffEqBase.AbstractDAEProblem,
+function SciMLBase.__solve(prob::SciMLBase.AbstractDAEProblem,
         alg::NNDAE,
         args...;
         dt = nothing,
@@ -178,12 +178,14 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractDAEProblem,
         u = [phi(t, res.u) for t in ts]
     end
 
-    sol = DiffEqBase.build_solution(prob, alg, ts, u;
+    sol = SciMLBase.build_solution(prob, alg, ts, u;
         k = res, dense = true,
         calculate_error = false,
-        retcode = ReturnCode.Success)
-    DiffEqBase.has_analytic(prob.f) &&
-        DiffEqBase.calculate_solution_errors!(sol; timeseries_errors = true,
+        retcode = ReturnCode.Success,
+        original = res,
+        resid = res.objective)
+    SciMLBase.has_analytic(prob.f) &&
+        SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
             dense_errors = false)
     sol
 end
diff --git a/src/ode_solve.jl b/src/ode_solve.jl
index e10793647..64d7b3ac6 100644
--- a/src/ode_solve.jl
+++ b/src/ode_solve.jl
@@ -1,4 +1,4 @@
-abstract type NeuralPDEAlgorithm <: DiffEqBase.AbstractODEAlgorithm end
+abstract type NeuralPDEAlgorithm <: SciMLBase.AbstractODEAlgorithm end
 
 """
     NNODE(chain, opt, init_params = nothing; autodiff = false, batch = 0, additional_loss = nothing, kwargs...)
@@ -14,10 +14,10 @@ of the physics-informed neural network which is used as a solver for a standard
 
 ## Positional Arguments
 
-* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer` or `Flux.Chain`. 
+* `chain`: A neural network architecture, defined as a `Lux.AbstractExplicitLayer` or `Flux.Chain`.
           `Flux.Chain` will be converted to `Lux` using `adapt(FromFluxAdaptor(false, false), chain)`.
 * `opt`: The optimizer to train the neural network.
-* `init_params`: The initial parameter of the neural network. By default, this is `nothing` 
+* `init_params`: The initial parameter of the neural network. By default, this is `nothing`
                  which thus uses the random initialization provided by the neural network library.
 
 ## Keyword Arguments
@@ -28,8 +28,8 @@ of the physics-informed neural network which is used as a solver for a standard
               automatic differentiation (via Zygote), this is only for the derivative
               in the loss function (the derivative with respect to time).
 * `batch`: The batch size for the loss computation. Defaults to `true`, means the neural network is applied at a row vector of values
-           `t` simultaneously, i.e. it's the batch size for the neural network evaluations. This requires a neural network compatible with batched data. 
-           `false` means which means the application of the neural network is done at individual time points one at a time. 
+           `t` simultaneously, i.e. it's the batch size for the neural network evaluations. This requires a neural network compatible with batched data.
+           `false` means which means the application of the neural network is done at individual time points one at a time.
            This is not applicable to `QuadratureTraining` where `batch` is passed in the `strategy` which is the number of points it can parallelly compute the integrand.
 * `param_estim`: Boolean to indicate whether parameters of the differential equations are learnt along with parameters of the neural network.
 * `strategy`: The training strategy used to choose the points for the evaluations.
@@ -339,7 +339,7 @@ end
 SciMLBase.interp_summary(::NNODEInterpolation) = "Trained neural network interpolation"
 SciMLBase.allowscomplex(::NNODE) = true
 
-function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
+function SciMLBase.__solve(prob::SciMLBase.AbstractODEProblem,
         alg::NNODE,
         args...;
         dt = nothing,
@@ -479,13 +479,15 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem,
         u = [phi(t, res.u) for t in ts]
     end
 
-    sol = DiffEqBase.build_solution(prob, alg, ts, u;
+    sol = SciMLBase.build_solution(prob, alg, ts, u;
         k = res, dense = true,
         interp = NNODEInterpolation(phi, res.u),
         calculate_error = false,
-        retcode = ReturnCode.Success)
-    DiffEqBase.has_analytic(prob.f) &&
-        DiffEqBase.calculate_solution_errors!(sol; timeseries_errors = true,
+        retcode = ReturnCode.Success,
+        original = res,
+        resid = res.objective)
+    SciMLBase.has_analytic(prob.f) &&
+        SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
             dense_errors = false)
     sol
 end #solve
diff --git a/src/rode_solve.jl b/src/rode_solve.jl
index 85b496880..863a0d1be 100644
--- a/src/rode_solve.jl
+++ b/src/rode_solve.jl
@@ -20,7 +20,7 @@ function NNRODE(chain, W, opt = Optim.BFGS(), init_params = nothing; autodiff =
     NNRODE(chain, W, opt, init_params, autodiff, kwargs)
 end
 
-function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
+function SciMLBase.solve(prob::SciMLBase.AbstractRODEProblem,
         alg::NeuralPDEAlgorithm,
         args...;
         dt,
@@ -30,7 +30,7 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
         abstol = 1.0f-6,
         verbose = false,
         maxiters = 100)
-    DiffEqBase.isinplace(prob) && error("Only out-of-place methods are allowed!")
+    SciMLBase.isinplace(prob) && error("Only out-of-place methods are allowed!")
 
     u0 = prob.u0
     tspan = prob.tspan
@@ -52,12 +52,12 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
         if u0 isa Number
             phi = (t, W, θ) -> u0 +
                                (t - tspan[1]) *
-                               first(chain(adapt(DiffEqBase.parameterless_type(θ), [t, W]),
+                               first(chain(adapt(SciMLBase.parameterless_type(θ), [t, W]),
                 θ))
         else
             phi = (t, W, θ) -> u0 +
                                (t - tspan[1]) *
-                               chain(adapt(DiffEqBase.parameterless_type(θ), [t, W]), θ)
+                               chain(adapt(SciMLBase.parameterless_type(θ), [t, W]), θ)
         end
     else
         _, re = Flux.destructure(chain)
@@ -65,11 +65,11 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
         if u0 isa Number
             phi = (t, W, θ) -> u0 +
                                (t - t0) *
-                               first(re(θ)(adapt(DiffEqBase.parameterless_type(θ), [t, W])))
+                               first(re(θ)(adapt(SciMLBase.parameterless_type(θ), [t, W])))
         else
             phi = (t, W, θ) -> u0 +
                                (t - t0) *
-                               re(θ)(adapt(DiffEqBase.parameterless_type(θ), [t, W]))
+                               re(θ)(adapt(SciMLBase.parameterless_type(θ), [t, W]))
         end
     end
 
@@ -108,9 +108,9 @@ function DiffEqBase.solve(prob::DiffEqBase.AbstractRODEProblem,
         u = [(phi(ts[i], W.W[i], res.minimizer)) for i in 1:length(ts)]
     end
 
-    sol = DiffEqBase.build_solution(prob, alg, ts, u, W = W, calculate_error = false)
-    DiffEqBase.has_analytic(prob.f) &&
-        DiffEqBase.calculate_solution_errors!(sol; timeseries_errors = true,
+    sol = SciMLBase.build_solution(prob, alg, ts, u, W = W, calculate_error = false)
+    SciMLBase.has_analytic(prob.f) &&
+        SciMLBase.calculate_solution_errors!(sol; timeseries_errors = true,
             dense_errors = false)
     sol
 end #solve

From 6a555d583a38cc15cc6f7adbb89e17d2d56d19ab Mon Sep 17 00:00:00 2001
From: Christopher Rackauckas <accounts@chrisrackauckas.com>
Date: Sun, 7 Apr 2024 09:05:08 -0400
Subject: [PATCH 27/27] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 80e56de80..f2fa480a9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "NeuralPDE"
 uuid = "315f7962-48a3-4962-8226-d0f33b1235f0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "5.15.0"
+version = "5.16.0"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"