From 120da8db7fdceab794b7c51d8dd574b6f7451583 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 8 Oct 2024 20:22:51 -0700 Subject: [PATCH] Add a tutorial framework + more tutorials --- .gitignore | 4 +- docs/CondaPkg.toml | 8 +++ docs/Project.toml | 3 ++ docs/make.jl | 15 ++++++ docs/src/assets/favicon.png | Bin 0 -> 15075 bytes docs/src/index.md | 47 +++++++++++++++-- docs/src/tutorials/creating_kerchunks.jl | 61 +++++++++++++++++++++++ docs/src/tutorials/mur_sst.jl | 0 8 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 docs/CondaPkg.toml create mode 100644 docs/src/assets/favicon.png create mode 100644 docs/src/tutorials/creating_kerchunks.jl create mode 100644 docs/src/tutorials/mur_sst.jl diff --git a/.gitignore b/.gitignore index 7bd8831..1452b2a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ /docs/build/ /test/ref.parquet/ -/test/real_zarray.zarr/ \ No newline at end of file +/test/real_zarray.zarr/ + +*/.CondaPkg/ \ No newline at end of file diff --git a/docs/CondaPkg.toml b/docs/CondaPkg.toml new file mode 100644 index 0000000..53768d8 --- /dev/null +++ b/docs/CondaPkg.toml @@ -0,0 +1,8 @@ +[deps] +# netcdf4 = "" +virtualizarr = "" +xarray = "" +zarr = "" +certifi = "" +s3fs = "" +kerchunk = "" diff --git a/docs/Project.toml b/docs/Project.toml index b28f405..189c68c 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,7 +1,9 @@ [deps] +AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" @@ -9,6 +11,7 @@ JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" Kerchunk = "12c09fd5-fe6a-4e79-8f42-b31f49215243" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" RasterDataSources = "3cb90ccd-e1b6-4867-9617-4276c8b2ca36" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c" diff --git a/docs/make.jl b/docs/make.jl index 66db018..61a198f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,6 +1,12 @@ +# The first thing to do is to make sure Python's dynamic libraries +# are loaded first. +using CondaPkg, PythonCall +PythonCall.pyimport("aiohttp") + using Kerchunk using Documenter, DocumenterVitepress + DocMeta.setdocmeta!(Kerchunk, :DocTestSetup, :(using Kerchunk); recursive=true) using Literate @@ -69,6 +75,12 @@ withenv("JULIA_DEBUG" => "Literate") do # allow Literate debug output to escape # TODO: We should probably fix the above in `process_literate_recursive!`. end +# Now, process the tutorials +Literate.markdown( + joinpath("tutorials", "solar_dynamics_observatory.jl"), "tutorials"; + flavor = Literate.DocumenterFlavor(), +) + makedocs(; modules=[Kerchunk], authors="Anshul Singhvi and contributors", @@ -77,6 +89,9 @@ makedocs(; pages=[ "Home" => "index.md", "What is Kerchunk?" => "what_the_heck.md", + "Tutorials" => [ + "tutorials/solar_dynamics_observatory.md", + ], "API" => "api.md", "Source code" => literate_pages, ], diff --git a/docs/src/assets/favicon.png b/docs/src/assets/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..1dca6c57c87e69c89f7cf08bb1bc44bd29ff7d1f GIT binary patch literal 15075 zcmeHOeNL`xZVyhCD)WMG@-;iiNE)LaH z(WDhEK4FM01*)y2iik+U1yQ0RL_|e_B)}yk2qA<(Zocn3ftJqL!S_$sdaG+Cf8E^d zyZ1i(?0s_1@3-&qO&h}}Pw<^UAP^?6{qTd$1cE1-KzJ?ibuVxy^Dy2_AcS06`@zbO zSW!B~fd?APCn`)uenV;O=$KPbl{Rt4yHlZ`{4&=}_}k~fzQ^Z}YxeRSAD#7V{&(*k zJH4WA!R$uwuUISA1<${=!EgMZ@65KW{P5zpAJ5%)X8A+kk{P>$s`xwE(%QcIK1Eq- zV3a=P)DJuL%ZySdh7G+P_sQbFr}nXxM#qCY(Cq}^kNfjGN1$?x`q->EiKYX|`Kqw1 zE=a{uc@PL`neDxTP(4TMqcjvvM2t@1Kfh{p++j+3dY`t$vgbF~E%77}b}fpD!=I?) zIsNZRILVyaQl$Ma*RrEQ-c;_WSK6w)Nf1e&pNE=c0zs)=E8pQ{l)VOiJl_ATz1`gQ znX#e09}I^QM&q$4G)jZNpWhTBFy4s1!RMptnfBXwwjhEULIU|nC=3s#E`lIKbmx9? zax#SqZfz7MChu6z>t#r=yw3OYsyOi@w=)yL1?A4(#Sm#k-*+T%v2ls-18;sc@VhN= z0k}xta?;CKHR8dB`{1JOEw>6V8~+cnh=0@?kDkIvS!+&G7sN>n1wvAU@YhM)$(KzI zYVB`FC%^@FyOqg}bx;@~CoqQDdPuVhF$);*AXAB7S(CUF;f>IdqG01ITop z6$lDE{xq7W2_?-;PW<`EkdX7u-anli$XX=xE;Qd87Se?dXL}I5tOdxN5cO*J?uROO z-zV%Oe`8*fSmJuf>-%K=aE7JmE zfC!Xj5>PV*h*@e(9)B*FB=qP2N~zRSo*$iWqPctpYW!Scr*xj%{yLNFc^fD_#Paq2 zK2Xec`3ls8y2NI+fLOgx_4>k9>-~XKHtmbDxqQuTqA?c(v7qb3T}$?`CcWtnfI9=+ zCE#8I+>4<5U%>r0=-vmoH$v{6pnEIqJ`1=HLx1ekpkuu+)Lhfq5wV z%U7NL0%kJsJNULARq3xf#KQN?R)|jcxEq`OsFi*a>aILh8aJaXW2^&I;|#!$16 zrXP>Dd#dniddk*jd5us_=XaI_`aaNeUq(_6|7cXQNwTN^c`h6mRprifaG zJvFc*Xb229gnDc5Km=mniXqyGNM}W$geeh%9riQtCMpZd}!vYk8JMouOSfAxR$*7CEF=#Vho3EELUGUOx zJh|zmz1btiA0~8b+NRg>D@&H+hgr{i#E7$6)|mIJ%_d+oY!oh9vZCqp3{?x~^&JNs z&ZyP**?NTw{IVeG{GYSz^=z-1TYXOUuko*Mor7B}NU z#PJ>hT7aaI5R*VWKy8=S+F!WvA79wU-zYV$`6 zMsq{2@UAER%OoF^^i%qrDgmk{=F}HL`m%VE$cXQdEOS23XV7V5&-*5SV05R7Nh;k* zrFF1Fh8l(D6zLEiCYVJ}HlN8$9{`!++F~IfsoU(Ee!)S>p2=HT?FrN#m@bH<2I?Dy z81*nIz*;IxsK!~{O~<{)o{FvLO_Zfng$P=3R!t#fkmJXRkgN^^bD=F5GY592M6nD6 zivndmIhrEk-KJw1PDMiNAbqEG=)(Vu?dFCXeH0qT7ohe0BAv^cr5posR)`-QML|pjXZmQ9@$N9VWhVAR6on0uW=+y*+EvOO>g%qyBzSL#$wCW#+dMTS9Vc zs?ATyT@>UidDbx}GKBW+Fkf`5p{jv*6`kYafLNO;A(ZA?nJfry>A@d@MLHmtmE78; zZKH~lIUke<|D6yqA>3QW&L5GdEF}3+-!K}c0*1+hx`0e~d9o6$@#lrYy+N``ZP((H z`#`H{8lu=K<{wfbiZ?-Z3g ztwg^p=j_aR>Y*kEBsxrBo!G%tq(Ic7{+JUveM3R5@p3@+{vMwjms*pJbx@f(Ag1%; zp~ekCDkgPutfaeuE%dibI+NonEL}psh;%?T8*uzF|7le0He%1k)ue#6h1sB0&Aoe(aMd3J}N^|afXgQW0<4MYR1npYXrn`*A%GeeknV2%0VPo@bq?HxGpy8yW&vEE%y5$#e{3?t z-P-2v?Q=Iby1P5w?X9n3Q{DZ&?gnRfhxZ??#XZ7Y&%~dq>1~~x0eG+4UQ@U)Z5`IE zd9OsiQ+i^!&0nOl>fj_3#OW#~TPh&yaTu%B+v}Dlo{h6+(-ZN9rAkx(FlS#w;0~B@ zlMo5QKfQaPJ6kpI)H|+B{ghv{$0oA;7(=%@VB0)iz{sy~n`{vk{dzkjSqJPqm{y}< zNHyFTcp!;}wIfz*5O+4+t8)ULsb`^{OkT}W+{}jC!ppt2>vRnB52~R8OvCUjyM+&i z;?1j=3daNQqSZE$@s^z3*%E~p(C;E%iAVA0OsVCo44n#|?s|bkD3r66O!@Pyia|B8 zqN@b%&b7b(rN868*jD2$vV9V8LvMr4g(2SBH*`p+TFf#pW8kNz#0m!d5obV{y;Xy> zDA zfai*$!1J7Lf4p~<2p`Yj$hRx4*NZ%?EE~hJOw85p5!(mYCGvf1G6V3XaNjKrT$Mpj zUe*lPHC32y)iNw<=n<{Zvdt!TCJA{;cyKlR*!409b@>XFRfM!xWK^3f<#--5i*D~6 zmsgj;mmSTj7K>HqKV1tE!4yH#$5b@DOj<0Fj&s6%7dN$hzSF){1~@%_VQtSA;d8yiSRTaA!~+t z*YFg?XM|pHy)J|u9d0O~_jZ1e_aszrJsCdy|3m-) literal 0 HcmV?d00001 diff --git a/docs/src/index.md b/docs/src/index.md index 3ad69e5..bcd2c78 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,11 +4,48 @@ CurrentModule = Kerchunk # Kerchunk -Documentation for [Kerchunk](https://github.com/JuliaIO/Kerchunk.jl). +Kerchunk.jl is a Julia package that enables loading [Kerchunk reference catalogs](https://fsspec.github.io/kerchunk/) as [Zarr.jl](https://github.com/JuliaIO/Zarr.jl) arrays. -```@index -``` -```@docs -ReferenceStore +## Quick start + +Kerchunk.jl is simply a storage backend to [`Zarr.jl`](https://github.com/JuliaIO/Zarr.jl). Zarr does integrate with the more fully featured packages [`Rasters.jl`](https://github.com/rafaqz/Rasters.jl) and [`YAXArrays.jl`](https://github.com/JuliaDataCubes/YAXArrays.jl), which are the packages you will want to use to interact with Kerchunk data. + +```julia +using Kerchunk, Zarr + +za = Zarr.zopen("reference://path/to/kerchunk/catalog.json") +# and treat it like any other Zarr array! +# You can even wrap it in YAXArrays.jl to get DimensionalData.jl accessors: +using YAXArrays +YAXArrays.open_dataset(za) +# or open it as a Rasters.RasterStack: +using Rasters +Rasters.RasterStack( + "reference://catalog.json", + source = Rasters.Zarrsource(), + lazy = true, # need to include this +) # source must be explicit ``` + +It's most useful to open Kerchunk datasets as either RasterStacks or YAXArrays datasets, since both of those packages have great dimensionality support. + +## Background + +[`kerchunk`](https://fsspec.github.io/kerchunk/) is a Python package that generates the reference catalogs. + +## Limitations +- No support for `gen` references with templates. +- No support for complex Jinja2 templates in `refs`. (Although Kerchunk hardly supports this either...) + +## Acknowledgements + +This effort was funded by the NASA MEaSUREs program in contribution to the Inter-mission Time Series of Land Ice Velocity and Elevation (ITS_LIVE) project (https://its-live.jpl.nasa.gov/). + +## Alternatives and related packages + +- You can always use Python's `xarray` directly via PythonCall.jl +- [FSSpec.jl](https://github.com/asinghvi17/FSSpec.jl) is an alternative storage backends for Zarr.jl that wraps the same [`fsspec`](https://github.com/fsspec/filesystem_spec) that `xarray` uses under the hood. + +This package is of course built on top of [Zarr.jl](https://github.com/JuliaIO/Zarr.jl), which is a pure-Julia Zarr array library. +[YAXArrays.jl](https://github.com/JuliaDataCubes/YAXArrays.jl) is a Julia package that can wrap Zarr arrays in a DimensionalData-compatible interface. diff --git a/docs/src/tutorials/creating_kerchunks.jl b/docs/src/tutorials/creating_kerchunks.jl new file mode 100644 index 0000000..40bb75a --- /dev/null +++ b/docs/src/tutorials/creating_kerchunks.jl @@ -0,0 +1,61 @@ +#= +# Creating Kerchunk catalogs + +Kerchunk.jl is only a Kerchunk reader, meaning that if you want to create Kerchunk catalogs, you need to use the Python `kerchunk` package. + +The easiest way to do this in Julia is to use the [CondaPkg.jl](https://github.com/JuliaPy/CondaPkg.jl) package to install the `kerchunk` package into a Conda environment and then use PythonCall.jl to call the `kerchunk` package. This ensures reproducibility, since you can pin the versions in the generated CondaPkg.toml as well, and package management via CondaPkg has a very similar interface to `Pkg.jl`. +=# + + +#= + +## Setting up the Conda environment + +```julia +using CondaPkg +CondaPkg.add("python") +CondaPkg.add("kerchunk") +``` + + +=# + +#= + +## Creating a Kerchunk catalog + +=# + +using CondaPkg, PythonCall + +# There are two approaches to this - either call Python explicitly via the command line, or call Python via PythonCall.jl. +# Calling Python via Julia is a nicer interface, but you will very quickly run into binary incompatibility issues. + +CondaPkg.withenv() do +run(``` +$(CondaPkg.which("python")) -e " +import kerchunk +import kerchunk.hdf5 as hdf + +# do something +" +```) +end + +#= + +## Using PythonCall.jl + +```julia + +``` + +=# + +# Let's load this using Kerchunk.jl, and see what we get! + +using Zarr, Kerchunk +z = Zarr.zopen("reference://catalog.json") + +using Rasters, ZarrDatasets +r = Raster(z) \ No newline at end of file diff --git a/docs/src/tutorials/mur_sst.jl b/docs/src/tutorials/mur_sst.jl new file mode 100644 index 0000000..e69de29