diff --git a/man-roxygen/rmdhunks/speed.Rmdh b/man-roxygen/rmdhunks/speed.Rmdh
index 9fa84489b..c51742540 100644
--- a/man-roxygen/rmdhunks/speed.Rmdh
+++ b/man-roxygen/rmdhunks/speed.Rmdh
@@ -29,7 +29,7 @@ mod1 <- function() {
     d/dt(peri) = Q*C2 - Q*C3
     d/dt(eff) = Kin - Kout*(1-C2/(EC50+C2))*eff
     eff(0) = 1
-  }) 
+  })
 }
 ```
 
@@ -37,7 +37,7 @@ Or you can also specify the end-points for simulation/estimation just
 like `nlmixr2`:
 
 ```{r mod2speed}
-mod2 <- function() {
+mod2f <- function() {
   ini({
     TKA   <- 0.3
     TCL   <- 7
@@ -53,7 +53,7 @@ mod2 <- function() {
     eff.add.sd <- 0.1
   })
   model({
-    KA <- TKA   
+    KA <- TKA
     CL <- TCL*exp(eta.cl)
     V2  <- TV2*exp(eta.v)
     Q   <- TQ
@@ -70,7 +70,7 @@ mod2 <- function() {
     eff(0) = 1
     C2 ~ prop(c2.prop.sd)
     eff ~ add(eff.add.sd)
-  }) 
+  })
 }
 ```
 
@@ -86,7 +86,7 @@ The first step can be done by `rxode2(mod1)` or `mod1()` (or for the second mode
 
 ```{r mod3eval}
 mod1 <- mod1()
-mod2 <- rxode2(mod2)
+mod2f <- rxode2(mod2f)
 ```
 
 The second step is to create the underlying "classic" `rxode2` model,
@@ -100,8 +100,8 @@ You can see the differences below:
 ```{r modSimModel}
 summary(mod1$simulationModel)
 summary(mod1$simulationIniModel)
-summary(mod2$simulationModel)
-summary(mod2$simulationIniModel)
+summary(mod2f$simulationModel)
+summary(mod2f$simulationIniModel)
 ```
 
 If you wish to speed up multiple simualtions from the `rxode2`
@@ -110,7 +110,7 @@ functions, you need to pre-calculate care of the steps above:
 ```{r simModel2}
 mod1 <- mod1$simulationModel
 
-mod2 <- mod2$simulationModel
+mod2 <- mod2f$simulationModel
 ```
 
 These functions then can act like a normal ui model to be solved.  You
@@ -180,7 +180,7 @@ runFor <- function(){
     return(res)
 }
 ```
-## Running with apply 
+## Running with apply
 
 In general for R, the `apply` types of functions perform better than a
 `for` loop, so the tutorial also suggests this speed enhancement
@@ -209,7 +209,7 @@ runSingleThread <- function(){
 
 rxode2 supports multi-threaded solves, so another option is to have `2`
 threads (called `cores` in the solve options, you can see the options
-in `rxControl()` or `rxSolve()`). 
+in `rxControl()` or `rxSolve()`).
 
 ```{r}
 run2Thread <- function(){
@@ -260,9 +260,97 @@ threads. 4 threads is a good number to use without any prior knowledge
 because most systems these days have at least 4 threads (or 2
 processors with 4 threads).
 
+# Increasing speed with compiler options
+
+One of the way that allows faster ODE solving is to make some
+approximations that make some math operators like `exp()` faster but
+not technically accurate enough to follow the IEEE standard for the
+math functions values (there are other implications that I will not
+cover here).
+
+While these are optimizations are [opt-in for
+Julia](https://github.com/JuliaLang/julia/blob/master/base/fastmath.jl)
+since they compile everything each session, CRAN has a more
+conservative approach since individuals do not compile each R function
+before running it.
+
+Still, `rxode2` models can be compiled with this option without
+disturbing CRAN policies.  The key is to set an option.  Here is an
+example:
+
+```{r}
+# Using the first example subset to PK
+mod2f <- function() {
+  ini({
+    TKA   <- 0.3
+    TCL   <- 7
+    TV2   <- 40
+    TQ    <- 10
+    TV3   <- 300
+    TKin  <- 0.2
+    TKout <- 0.2
+    TEC50 <- 8
+    eta.cl + eta.v ~ c(0.09,
+                       0.08, 0.25)
+    c2.prop.sd <- 0.1
+  })
+  model({
+    KA <- TKA
+    CL <- TCL*exp(eta.cl)
+    V2  <- TV2*exp(eta.v)
+    Q   <- TQ
+    V3  <- TV3
+    Kin  <- TKin
+    Kout <- TKout
+    EC50 <- TEC50
+    C2 = centr/V2
+    C3 = peri/V3
+    d/dt(depot) = -KA*depot
+    d/dt(centr) = KA*depot - CL*C2 - Q*C2 + Q*C3
+    d/dt(peri) = Q*C2 - Q*C3
+    C2 ~ prop(c2.prop.sd)
+  })
+}
+
+mod2f <- mod2f()
+
+mod2s <- mod2f$simulationIniModel
+
+ev  <- et(amountUnits="mg", timeUnits="hours") %>%
+  et(amt=10000, addl=9,ii=12,cmt="depot") %>%
+  et(time=120, amt=2000, addl=4, ii=14, cmt="depot") %>%
+  et(0:240) # Add sampling
+
+bench1 <- microbenchmark(standardCompile=rxSolve(mod2s, ev, nSub=1000))
+
+# Now clear the cache of models so we can change the compile options for the same model
+rxClean()
+
+# Use withr to preserve the options
+withr::with_options(list(rxode2.compile.O="fast"), {
+  mod2s <- mod2f$simulationIniModel
+})
+
+bench2 <- microbenchmark(fastCompile=rxSolve(mod2s, ev, nSub=1000))
+
+bench <- rbind(bench1, bench2)
+
+print(bench)
+
+autoplot(bench)
+```
+
+Note compiler settings can be tricky and if you setup your system wide
+`Makevars` it may interact with this setting. For example if you use
+`ccache` the compile may not be produced with the same options since
+it was cached with the other options.
+
+Anyhow, there is some minimal speed improvement by adding this compile
+option.
+
 # A real life example
 
-Before some of the parallel solving was implemented, the fastest way
+cBefore some of the parallel solving was implemented, the fastest way
 to run `rxode2` was with `lapply`.  This is how Rik Schoemaker created
 the data-set for `nlmixr` comparisons, but reduced to run faster
 automatic building of the pkgdown website.
@@ -277,7 +365,7 @@ library(data.table)
   d/dt(centr)  =  KA*abs-(CL/V)*centr;
   C2=centr/V;
   "
-  
+
 #Create the rxode2 simulation object
 mod1 <- rxode2(model = ode1)
 
@@ -310,7 +398,7 @@ mod1 <- rxode2(model = ode1)
 params.all[, AMT := rep(100 * doses,nsubg)]
 
 Startlapply <- Sys.time()
-  
+
 #Run the simulations using lapply for speed
   s = lapply(1:nsub, function(i) {
 #selects the parameters associated with the subject to be simulated
@@ -331,12 +419,12 @@ Startlapply <- Sys.time()
 #merges the parameters and ID number to the simulation output
     x[, names(params) := params]
   })
-  
+
 #runs the entire sequence of 100 subjects and binds the results to the object res
   res = as.data.table(do.call("rbind", s))
-  
+
 Stoplapply <- Sys.time()
-  
+
 print(Stoplapply - Startlapply)
 ```
 
@@ -374,7 +462,7 @@ ev <- do.call("rbind",
                 et(id=seq(1, nsubg) + (i - 1) * nsubg) %>%
                 ## Convert to data frame to skip sorting the data
                 ## When binding the data together
-                as.data.frame 
+                as.data.frame
         }))
 ## To better compare, use the same output, that is data.table
 res <- rxSolve(rx, ev, omega=omega, returnType="data.table")
@@ -388,12 +476,12 @@ things to keep in mind:
  - `rxode2` use the thread-safe sitmo `threefry` routines for simulation of `eta`
    values. Therefore the results are expected to be different (also the random
    samples are taken in a different order which would be different)
-   
+
  - This prior simulation was run in R 3.5, which has a different
    random number generator so the results in this simulation will be
    different from the actual nlmixr comparison when using the slower
    simulation.
-   
+
  - This speed comparison used `data.table`.  `rxode2` uses `data.table`
    internally (when available) try to speed up sorting, so this would
    be different than installations where `data.table` is not