Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement basic functionality of the Zarr Version 3 specification. #1

Merged
merged 1 commit into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 3-Clause License

Copyright (c) 2024, zoj613
Copyright (c) 2024, Zolisa Bleki

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Expand Down
112 changes: 111 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,112 @@
# zarr-ml
An Ocaml implementation of the Zarr version 3 specification.
An implementation of the Zarr version 3 specification.


## Example
```ocaml
open Zarr
open Zarr.Codecs
open Zarr.Storage
open Zarr.Metadata
open Zarr.Extensions
module Ndarray = Owl.Dense.Ndarray.Generic

let store =
Result.get_ok @@
FilesystemStore.open_or_create ~file_perm:0o777 "testdata.zarr";;

let group_node =
Result.get_ok @@ Node.of_path "/some/group";;

FilesystemStore.create_group store group_node;;

let array_node =
Result.get_ok @@ Node.(group_node / "name");;

let shard_config = {
chunk_shape = [|5; 5; 10|];
codecs = Chain.create [] (Bytes Little) [Gzip L5];
index_codecs = Chain.create [] (Bytes Big) [Crc32c];
index_location = Start
};;
let codec_chain =
Chain.create [Transpose [|0; 1; 2|]] (ShardingIndexed shard_config) [];;

FilesystemStore.create_array
~codecs:codec_chain
~shape:[|100; 100; 50|]
~chunks:[|15; 15; 20|]
(FillValue.Float Float.neg_infinity)
Datatype.Float32
array_node
store;;

FilesystemStore.find_all_nodes store |> List.map Node.to_path;;
(* - : string list = ["/"; "/some"; "/some/group/name"; "/some/group"] *)

let slice = Owl_types.[|R [0; 20]; I 10; R []|];;
let x =
Result.get_ok @@
FilesystemStore.get_array array_node slice Bigarray.Float32 store;;
(*
C0 C1 C2 C3 C4 C45 C46 C47 C48 C49
R[0,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[1,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[2,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[3,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[4,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
... ... ... ... ... ... ... ... ... ... ...
R[16,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[17,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[18,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[19,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF
R[20,0] -INF -INF -INF -INF -INF ... -INF -INF -INF -INF -INF *)

(* Do some computation on the array slice *)
let x' = Ndarray.map (fun _ -> Owl_stats_dist.uniform_rvs 0. 100.) x;;
FilesystemStore.set_array array_node slice x' store;;

FilesystemStore.get_array
array_node
Owl_types.[|R [0; 73]; L [10; 16]; R[0; 5]|]
Bigarray.Float32
store;;
(*
C0 C1 C2 C3 C4 C5
R[0,0] 68.0272 44.914 85.2431 39.0772 26.582 16.577
R[0,1] -INF -INF -INF -INF -INF -INF
R[1,0] 88.418 77.0368 43.4968 45.1263 8.95641 76.9155
R[1,1] -INF -INF -INF -INF -INF -INF
R[2,0] 98.4036 77.8744 67.6689 56.8803 37.0718 97.042
... ... ... ... ... ...
R[71,1] -INF -INF -INF -INF -INF -INF
R[72,0] -INF -INF -INF -INF -INF -INF
R[72,1] -INF -INF -INF -INF -INF -INF
R[73,0] -INF -INF -INF -INF -INF -INF
R[73,1] -INF -INF -INF -INF -INF -INF *)

FilesystemStore.reshape store array_node [|25; 32; 10|];;
FilesystemStore.get_array
array_node
Owl_types.[|R []; I 10; R[0; 5]|]
Bigarray.Float32
store;;
(*
C0 C1 C2 C3 C4 C5
R[0,0] 68.0272 44.914 85.2431 39.0772 26.582 16.577
R[1,0] 88.418 77.0368 43.4968 45.1263 8.95641 76.9155
R[2,0] 98.4036 77.8744 67.6689 56.8803 37.0718 97.042
R[3,0] 22.8653 20.1767 88.9549 22.1052 9.86822 10.8826
R[4,0] 55.6043 93.8599 60.3723 40.543 46.8199 97.282
... ... ... ... ... ...
R[20,0] 61.2473 78.8035 52.3056 59.5631 78.2462 52.4205
R[21,0] -INF -INF -INF -INF -INF -INF
R[22,0] -INF -INF -INF -INF -INF -INF
R[23,0] -INF -INF -INF -INF -INF -INF
R[24,0] -INF -INF -INF -INF -INF -INF *)

FilesystemStore.array_metadata array_node store
|> Result.get_ok
|> ArrayMetadata.shape;;
(* - : int array = [|25; 32; 10|] *)
```
34 changes: 34 additions & 0 deletions dune-project
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
(lang dune 3.15)

(name zarr)

(generate_opam_files true)

(source
(github zoj613/zarr-ml))

(authors "Author Name")

(maintainers "Maintainer Name")

(license BSD-3-Clause)

(documentation https://url/to/documentation)

(package
(name zarr)
(synopsis "A short synopsis")
(description "A longer description")
(depends
dune
(ocaml (>= 4.14.2))
yojson
ppx_derviving_yojson
ezgzip
checkseum
stdint
owl)
(tags
(topics "to describe" your project)))

; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project
66 changes: 66 additions & 0 deletions lib/codecs/array_to_array.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type dimension_order = int array

type array_to_array =
| Transpose of dimension_order

type error =
[ `Invalid_transpose_order of dimension_order * string ]

(* https://zarr-specs.readthedocs.io/en/latest/v3/codecs/transpose/v1.0.html *)
module TransposeCodec = struct
type config = {order : int array} [@@deriving yojson]
type transpose = config Util.ext_point [@@deriving yojson]

let compute_encoded_size input_size = input_size

let encode o x =
try Ok (Ndarray.transpose ~axis:o x) with
| Failure s -> Error (`Invalid_transpose_order (o, s))

let decode o x =
let inv_order = Array.(make (length o) 0) in
Array.iteri (fun i x -> inv_order.(x) <- i) o;
try Ok (Ndarray.transpose ~axis:inv_order x) with
| Failure s -> Error (`Invalid_transpose_order (o, s))

let to_yojson order =
transpose_to_yojson
{name = "transpose"; configuration = {order}}

let parse_order o =
let o' = Array.copy o in
Array.fast_sort Int.compare o';
if o' <> Array.init (Array.length o') Fun.id then
Error (`Invalid_transpose_order (o, ""))
else
Result.ok @@ Transpose o

let of_yojson x =
let open Util.Result_syntax in
transpose_of_yojson x >>= fun trans ->
parse_order trans.configuration.order
>>? fun (`Invalid_transpose_order _) -> "Invalid transpose order"
end

module ArrayToArray = struct
let compute_encoded_size input_size = function
| Transpose _ -> TransposeCodec.compute_encoded_size input_size

let encode t x =
match t with
| Transpose order -> TransposeCodec.encode order x

let decode t x =
match t with
| Transpose order -> TransposeCodec.decode order x

let to_yojson = function
| Transpose order -> TransposeCodec.to_yojson order

let of_yojson x =
match Util.get_name x with
| "transpose" -> TransposeCodec.of_yojson x
| s -> Error ("array->array codec not supported: " ^ s)
end
23 changes: 23 additions & 0 deletions lib/codecs/array_to_array.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module Ndarray = Owl.Dense.Ndarray.Generic

type dimension_order = int array

type array_to_array =
| Transpose of dimension_order

type error =
[ `Invalid_transpose_order of dimension_order * string ]

module ArrayToArray : sig
val compute_encoded_size : int -> array_to_array -> int
val encode
: array_to_array ->
('a, 'b) Ndarray.t ->
(('a, 'b) Ndarray.t, [> error]) result
val decode
: array_to_array ->
('a, 'b) Ndarray.t ->
(('a, 'b) Ndarray.t, [> error]) result
val of_yojson : Yojson.Safe.t -> (array_to_array, string) result
val to_yojson : array_to_array -> Yojson.Safe.t
end
Loading