Merge pull request #13 from gaelforget/v0p1p6b

consolidate examples and interface
gaelforget · Apr 20, 2021 · c439210 · c439210
2 parents f04725d + a13ef5c
commit c439210
Show file tree

Hide file tree

Showing 9 changed files with 273 additions and 136 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "ClimateModels"
 uuid = "f6adb021-9183-4f40-84dc-8cea6f651bb0"
 authors = ["gaelforget <gforget@mit.edu>"]
-version = "0.1.5"
+version = "0.1.6"
 
 [deps]
 AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
@@ -11,9 +11,12 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
+NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
@@ -24,6 +27,8 @@ CFTime = "0.1"
 CSV = "0.6, 0.7, 0.8"
 DataFrames = "0.21, 0.22"
 Git = "1.2"
+NetCDF = "0.11"
+OrderedCollections = "1.4"
 Suppressor = "0.2"
 Zarr = "0.6"
 julia = "^1.6"
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -51,13 +51,14 @@ monitor
 
 ## Git Support
 
-The `setup` method normally calls `init_git_log` to set up a temporary run folder with a `git` enabled subfolder called `log`. This allows for recording a workflow steps e.g. through the other functions listed here.
+The `setup` method normally calls `git_log_init` to set up a temporary run folder with a `git` enabled subfolder called `log`. This allows for recording a workflow steps e.g. through the other functions listed here.
 
 ```@docs
-init_git_log
+git_log_init
 git_log_msg
 git_log_fil
 git_log_prm
+git_log_show
 ```
 
 ## Cloud Support

diff --git a/examples/CMIP6.jl b/examples/CMIP6.jl
@@ -1,39 +1,102 @@
-# # Retrieve CMIP6 model output
+# # Cloud Computing Workflow
 #
-# - Access Climate Model Output Using `AWS.jl` and `Zarr.jl`
-# - Choose institution_id, source_id, variable_id
-# - Compute and plot (1) time mean global map and (2) time evolving global mean
+# This example relies on model output that has already been computed and made available over the internet. 
+# It accesses model output via the `AWS.jl` and `Zarr.jl` packages as the starting point for further modeling / computation. 
+# Workflow summary:
+#
+# - Access climate model output in cloud storage
+# - Choose model (`institution_id`, `source_id`, `variable_id`)
+# - Compute, save, and plot (_1._ global mean over time; _2._ time mean global map)
+
+using ClimateModels, Plots, Statistics, TOML, CSV, DataFrames, NetCDF
+
+# ## Model Configuration
+#
+# Here we select that we want to access temperature (`tas`) from a model run by `IPSL` as part of [CMIP6](https://www.wcrp-climate.org/wgcm-cmip/wgcm-cmip6) (Coupled Model Intercomparison Project Phase 6).
+
+parameters=Dict("institution_id" => "IPSL", "source_id" => "IPSL-CM6A-LR", "variable_id" => "tas")
+
+function GlobalAverage(x)
+
+ #main computation = model run = access cloud storage + compute averages
+
+ (mm,gm,meta)=cmip(x.inputs["institution_id"],x.inputs["source_id"],x.inputs["variable_id"])
+
+ #save results to files
+
+ fil=joinpath(x.folder,string(x.ID),"GlobalAverages.csv")
+ df = DataFrame(time = gm["t"], tas = gm["y"])
+ CSV.write(fil, df)
+
+ fil=joinpath(x.folder,string(x.ID),"Details.toml")
+ open(fil, "w") do io
+ TOML.print(io, meta)
+ end
+
+ filename = joinpath(x.folder,string(x.ID),"MeanMaps.nc")
+ varname = x.inputs["variable_id"]
+ (ni,nj)=size(mm["m"])
+ nccreate(filename, "tas", "lon", collect(Float32.(mm["lon"][:])), "lat", collect(Float32.(mm["lat"][:])), atts=meta)
+ ncwrite(Float32.(mm["m"]), filename, varname)
+
+ return x
+end
+
+MC=ModelConfig(model="GlobalAverage",configuration=GlobalAverage,inputs=parameters)
 
-using ClimateModels, Plots, Statistics
+# ## Setup and Launch
+#
+# _Note: this step may take most time, since `launch` is where data is accessed over the internet, and computation takes place.
+
+setup(MC)
+launch(MC)
+
+# ## Read Output Files
+#
+# The `GlobalAverage` function, called via `launch`, should now have generated the following output:
+#
+# - Global averages in a `CSV` file
+# - Meta-data in a `TOML` file
+# - Maps + meta-data in a `NetCDF` file
+
+fil=joinpath(MC.folder,string(MC.ID),"MeanMaps.nc")
+lon = NetCDF.open(fil, "lon")
+lat = NetCDF.open(fil, "lat")
+tas = NetCDF.open(fil, "tas")
 
-# ## Access Model Ouput
 #
-# Here we select that we want to access temperate `tas` from a model by `IPSL`.
 
-(mm,gm,meta)=cmip("IPSL","IPSL-CM6A-LR","tas")
+fil=joinpath(MC.folder,string(MC.ID),"Details.toml")
+meta=TOML.parsefile(fil)
+
+#
+
+fil=joinpath(MC.folder,string(MC.ID),"GlobalAverages.csv")
+GA=CSV.read(fil,DataFrame)
+show(GA,truncate=8)
 
 # ## Plot Results
 #
-# Afterwards, one often uses model output for further analysis. Here we 
-# compute and plot (1) time mean global map and (2) time evolving global mean.
+# Plots below are based on results from the output file(s) shown above.
+#
+# #### 1. Time Mean Seasonal Cycle
 
 nm=meta["long_name"]*" in "*meta["units"]
-m=heatmap(mm["lon"], mm["lat"], transpose(mm["m"]), title=nm*" (time mean)")
-
-# ### Time Mean Seasonal Cycle
 
-t=gm["t"]; y=gm["y"]
-ylab=meta["long_name"]*" in "*meta["units"]
-ny=Int(length(t)/12)
-a_y=fill(0.0,(ny,12))
-[a_y[:,i].=y[i:12:end] for i in 1:12]
+ny=Int(length(GA.time)/12)
+y=fill(0.0,(ny,12))
+[y[:,i].=GA.tas[i:12:end] for i in 1:12]
 
-s=plot([0.5:1:11.5],vec(mean(a_y,dims=1)), xlabel="month",ylabel=ylab, 
+s=plot([0.5:1:11.5],vec(mean(y,dims=1)), xlabel="month",ylabel=nm,
 leg = false, title=meta["institution_id"]*" (global mean, seasonal cycle)",frmt=:png)
 
-# ### Month By Month Time Series
+# #### 2. Month By Month Time Series
 
-p=plot(gm["t"][1:12:end],gm["y"][1:12:end],xlabel="time",ylabel=nm,
+p=plot(GA.time[1:12:end],GA.tas[1:12:end],xlabel="time",ylabel=nm,
 title=meta["institution_id"]*" (global mean, Month By Month)",frmt=:png)
-[plot!(gm["t"][i:12:end],gm["y"][i:12:end], leg = false) for i in 2:12];
+[plot!(GA.time[i:12:end],GA.tas[i:12:end], leg = false) for i in 2:12];
+p
+
+# #### 3. Time Mean Global Map
 
+m=heatmap(lon[:], lat[:], permutedims(tas[:,:]), title=nm*" (time mean)")
diff --git a/examples/MITgcm.jl b/examples/MITgcm.jl
@@ -11,46 +11,41 @@ using ClimateModels, MITgcmTools, MeshArrays, Plots, Suppressor
 
 # ## Setup Model
 #
-# The most standard MITgcm configurations (_verification experiments_) are all available via the `MITgcmTools.jl` package.
+# The most standard MITgcm configurations (_verification experiments_) are all readily available via `MITgcmTools.jl`'s `MITgcm_config` function.
 #
 
-exps=verification_experiments() 
-myexp="global_with_exf"
-tmp=[exps[i].configuration==myexp for i in 1:length(exps)]
-iexp=findall(tmp)[1];
+MC=MITgcm_config(configuration="global_with_exf")
 
-# User can inspect model parameters (e.g. in _data_) via functions also provided by `MITgcmTools.jl` (e.g. `MITgcm_namelist`)
+# The `setup` function links input files to the `run/` folder (see below) including model parameters that are then accessed via `MC.inputs`.
 
-fil=joinpath(MITgcm_path,"verification",exps[iexp].configuration,"input","data")
-nml=read(fil,MITgcm_namelist())
+setup(MC)
+MC.inputs
 
-# ### Where Is `mitgcmuv` located?
+# ## Build `mitgcmuv`
 #
 # The model executable `mitcmuv` is normally found in the `build/` subfolder of the selected experiment.
-# If `mitcmuv` is not found at this stage then it is assumed that the chosen model configuration has never been compiled. 
-# Thus we need to compile and run the model a first time via the `build` function. 
+# If `mitcmuv` is not found at this stage then it is assumed that the chosen model configuration still needs to be compiled (once, via the `build` function).
 # This might take a lot longer than a normal model run due to the one-time cost of compiling the model.
 
-filexe=joinpath(MITgcm_path,"verification",exps[iexp].configuration,"build","mitgcmuv")
-!isfile(filexe) ? build(exps[iexp]) : nothing
-pp=joinpath(exps[iexp].folder,string(exps[iexp].ID),"run")
-filout=joinpath(pp,"output.txt")
-filstat=joinpath(pp,"onestat.txt");
+filexe=joinpath(MITgcm_path,"verification",MC.configuration,"build","mitgcmuv")
+@suppress !isfile(filexe) ? build(MC) : nothing
 
 # ## Run Model
 #
-# The main model computation takes place here.
+# The main model computation takes place via the `launch` function. This will output files in the `rundir` folder, incl. the MITgcm standard `output.txt` file.
 
-@suppress setup(exps[iexp])
+@suppress launch(MC)
 
-@suppress launch(exps[iexp])
+rundir=joinpath(MC.folder,string(MC.ID),"run")
+fileout=joinpath(rundir,"output.txt")
+readlines(fileout)
 
-# ### Plot Monitor
-#
-# Often, _monitor_ denotes a statement / counter printed to standard model output (text file) at regular intervals. 
-# In the example below, we use global mean temperature which is reported every time step as `dynstat_theta_mean`.
+# ## Monitor Model
+# 
+# Often, the term _monitor_ in climate modeling denotes a statement / counter printed to standard model output (text file) at regular intervals to monitor the model's integration through time. In the example below, we use global mean temperature which is reported every time step as `dynstat_theta_mean` in the MITgcm `output.txt` file.
 
-run(pipeline(`grep dynstat_theta_mean $(filout)`,filstat))
+filstat=joinpath(rundir,"onestat.txt")
+run(pipeline(`grep dynstat_theta_mean $(fileout)`,filstat))
 
 tmp0 = read(filstat,String)
 tmp0 = split(tmp0,"\n")
@@ -59,12 +54,9 @@ p=plot(Tmean,frmt=:png)
 
 # ## Plot Results
 #
-# While such models run, they typically output snapshots and/or time-averages of state variables 
-# in e.g. `binary` or `netcdf` format. Aftewards, e.g. once the model run has completed, 
-# one often wants to reread this output for further analysis. Here, for example, we
-# reread and plot a temperature field saved at the last time step (`T.0000000020`).
+# As models run through time, they typically output snapshots and/or time-averages of state variables in `binary` or `netcdf` format for example. Afterwards, or even while the model runs, one can reread this output. Here, for example, we plot the temperature map after 20 time steps (`T.0000000020`) this way by using the convenient [MITgcmTools.jl](https://gaelforget.github.io/MITgcmTools.jl/dev/) and [MeshArrays.jl](https://juliaclimate.github.io/MeshArrays.jl/dev/) packages which simplify the handling of files and data.
 
-XC=read_mdsio(pp,"XC"); siz=size(XC)
+XC=read_mdsio(rundir,"XC"); siz=size(XC)
 
 mread(xx::Array,x::MeshArray) = read(xx,x) 
 function mread(fil::String,x::MeshArray)
@@ -73,7 +65,13 @@ function mread(fil::String,x::MeshArray)
  read(read_mdsio(d,b),x)
 end
 
-γ=gcmgrid(pp,"PeriodicChannel",1,fill(siz,1), [siz[1] siz[2]], eltype(XC), mread, write)
+γ=gcmgrid(rundir,"PeriodicChannel",1,fill(siz,1), [siz[1] siz[2]], eltype(XC), mread, write)
 Γ=GridLoad(γ)
-T=read_mdsio(pp,"T.0000000020")
+T=read_mdsio(rundir,"T.0000000020")
 h=heatmap(T[:,:,1]',frmt=:png)
+
+# ## Workflow Outline
+# 
+# _ClimateModels.jl_ additionally supports workflow documentation using `git`. Here we summarize this workflow's record.
+
+git_log_show(MC)
diff --git a/examples/RandomWalker.jl b/examples/RandomWalker.jl
@@ -10,9 +10,9 @@ using ClimateModels, Pkg, Plots, CSV, DataFrames
 
 function RandomWalker(x)
  #model run
- N=10000
- m=zeros(N,2)
- [m[i,j]=m[i-1,j]+rand((-1,1)) for j in 1:2, i in 2:N]
+ nSteps=x.inputs["nSteps"]
+ m=zeros(nSteps,2)
+ [m[i,j]=m[i-1,j]+rand((-1,1)) for j in 1:2, i in 2:nSteps]
 
  #output to file
  df = DataFrame(x = m[:,1], y = m[:,2])
@@ -30,14 +30,30 @@ end
 #
 # _Note: `RandomWalker` returns results also directly as an Array, but this is generally not an option for most, larger, models_
 
-m=ModelConfig(model=RandomWalker)
+m=ModelConfig(model=RandomWalker,inputs=Dict("nSteps" => 1000))
 setup(m)
-xy=launch(m);
+launch(m)
+m
+
+# ## Exercise 
+#
+# Change the duration parameter (nSteps) and update the following cells?
+
+m.inputs["nSteps"]=10000
+setup(m)
+launch(m)
 
 # ## Plot Results
 #
 # Afterwards, one often uses model output for further analysis. Here we plot the random walker path from the `csv` output file.
 
 fil=joinpath(m.folder,string(m.ID),"RandomWalker.csv")
 output = CSV.File(fil) |> DataFrame
-img=plot(output.x,output.y,frmt=:png)
+img=plot(output.x,output.y,frmt=:png,leg=:none)
+
+# ## Workflow Outline
+#
+# Workflow steps are documented using `git`.
+# Here we show the git record for this workflow (in timeline order).
+
+git_log_show(m)