diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 998e3c8..9b99345 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,7 @@ jobs: dfx start --background dfx deploy --no-wallet dfx canister call Calc test + dfx canister call Sort test - name: Build docs run: /home/runner/.cache/dfinity/versions/$DFX_VERSION/mo-doc - name: Upload docs diff --git a/dfx.json b/dfx.json index 61f7ee6..507d6f8 100644 --- a/dfx.json +++ b/dfx.json @@ -2,6 +2,9 @@ "canisters": { "Calc": { "main": "examples/calc/Main.mo" + }, + "Sort": { + "main": "examples/sort/Main.mo" } }, "defaults": { diff --git a/examples/sort/Main.mo b/examples/sort/Main.mo new file mode 100644 index 0000000..4a5c6ce --- /dev/null +++ b/examples/sort/Main.mo @@ -0,0 +1,34 @@ +import Meta "../../src/data/Meta"; +import Sort "Sort"; + +actor { + var sort = Sort.Sort(); + let meta = Meta.Counter(); + public query func test() : async () { + let r = + sort.eval( + #seq(#treeOfStream(#streamOfArray( + #array( + [ + (#val(#num 04), meta.next()), + (#val(#num 16), meta.next()), + (#val(#num 02), meta.next()), + (#val(#num 07), meta.next()), + (#val(#num 11), meta.next()), + (#val(#num 09), meta.next()), + (#val(#num 06), meta.next()), + + (#val(#num 23), meta.next()), + (#val(#num 08), meta.next()), + (#val(#num 13), meta.next()), + (#val(#num 06), meta.next()), + (#val(#num 14), meta.next()), + (#val(#num 17), meta.next()), + (#val(#num 16), meta.next()), + ] + ))))); + + ignore sort.takeLog(); + + }; +} diff --git a/examples/sort/Sort.mo b/examples/sort/Sort.mo new file mode 100644 index 0000000..af4da3a --- /dev/null +++ b/examples/sort/Sort.mo @@ -0,0 +1,100 @@ +import Engine "../../src/Engine"; +import Seq "../../src/data/Sequence"; +import Meta "../../src/data/Meta"; +import R "mo:base/Result"; +import L "mo:base/List"; +import Text "mo:base/Text"; +import Debug "mo:base/Debug"; + +/** + Incremental sorting example. + Uses adapton for representing stream thunks and caching them as data changes. + */ +module { + +public type Name = Meta.Name; + +public type Val = { + #unit; + #num : Nat; + #seq : Seq.Val; +}; + +public type Error = { + #typeMismatch; + #seq : Seq.Error; +}; + +public type Exp = { + #unit; + #num : Nat; + #seq : Seq.Exp; +}; + +public class Sort() { + /* -- cache implementation, via adapton package -- */ + public var engine : Engine.Engine = + Engine.Engine( + { + nameEq=func (x:Name, y:Name) : Bool { x == y }; + valEq=func (x:Val, y:Val) : Bool { x == y }; + closureEq=func (x:Exp, y:Exp) : Bool { x == y }; + errorEq=func (x:Error, y:Error) : Bool { x == y }; + nameHash=Meta.Name.hash; + cyclicDependency=func (stack:L.List, name:Name) : Error { + Debug.print(debug_show {stack; name}); + assert false; loop { } + } + }, + true // logging + ); + public var engineIsInit = false; + + public var seq = Seq.Sequence( + engine, + { + valMax = func(v1 : Val, v2 : Val) : R.Result { + switch (v1, v2) { + case (#num n1, #num n2) + #ok(#num(if (n1 > n2) n1 else n2)); + case _ #err(#typeMismatch); + } + }; + getVal = func(v : Val) : ?Seq.Val = + switch v { case (#seq(s)) ?s; case _ null }; + putVal = func(v : Seq.Val) : Val = #seq(v); + putExp = func(e : Seq.Exp) : Exp = #seq(e); + getExp = func(e : Exp) : ?Seq.Exp = + switch e { case (#seq(e)) ?e; case _ null }; + putError = func(e : Seq.Error) : Error = #seq(e); + getError = func(e : Error) : ?Seq.Error = + switch e { case (#seq(e)) ?e; case _ null }; + } + ); + + func evalRec(exp : Exp) : R.Result { + switch exp { + case (#unit) #ok(#unit); + case (#num n) #ok(#num n); + case (#seq e) seq.eval(e); + } + }; + + public func eval(exp : Exp) : R.Result { + if (not engineIsInit) { + engine.init({eval=evalRec}); + engineIsInit := true + }; + evalRec(exp) + }; + + public func takeLog() : Engine.Log { + let log = engine.takeLog(); + debug { Debug.print (debug_show log) }; + log + }; + +}; + +} + diff --git a/src/Engine.mo b/src/Engine.mo index 126a71d..8cd677a 100644 --- a/src/Engine.mo +++ b/src/Engine.mo @@ -62,6 +62,12 @@ module { context.logOps.end(tag) }; + public func nest(name : Name, body : () -> X) : X { + // to do -- enter nested namespace using name + body() + // to do -- resume original namespace + }; + public func put(name:Name, val:Val) : R.Result { logBegin(); let newRefNode : G.Ref = { diff --git a/src/data/Meta.mo b/src/data/Meta.mo new file mode 100644 index 0000000..97a3c44 --- /dev/null +++ b/src/data/Meta.mo @@ -0,0 +1,64 @@ +import Engine "../Engine"; + +import H "mo:base/Hash"; +import L "mo:base/List"; +import R "mo:base/Result"; +import P "mo:base/Prelude"; +import Int "mo:base/Int"; +import Debug "mo:base/Debug"; +import Text "mo:base/Text"; +import Nat "mo:base/Nat"; +import Nat32 "mo:base/Nat32"; + +module { + + /// Names as untyped symbol trees. + /// Names serve as locally-unique dynamic identifiers. + public type Name = { + #none; + #int : Int; + #nat : Nat; + #text : Text; + #bin : (Name, Name); + #tri : (Name, Name, Name); + #cons : (Name, [ Name ]); + #record : [(Name, Name)]; + }; + + // Levels define [Cartesian trees](https://en.wikipedia.org/wiki/Cartesian_tree). + public type Level = Nat; + + // Meta data within inductive components of incremental data structures. + public type Meta = { + name : Name; + level : Level; + }; + + public module Level { + public func ofNat(n : Nat) : Level { + Nat32.toNat(Nat32.bitcountLeadingZero(H.hash(n))) + } + }; + + public module Name { + public func hash (n : Name) : H.Hash { + switch n { + case (#none) Text.hash "none"; + case (#text(t)) Text.hash t; + case _ Text.hash "?"; // to do + } + }; + }; + + public class Counter() { + var counter : Nat = 0; + public func next () : Meta { + let level = Level.ofNat(counter); + let name = #nat counter; + let meta = {level; name}; + counter += 1; + meta + }; + }; + +} diff --git a/src/data/README.md b/src/data/README.md new file mode 100644 index 0000000..673c2ca --- /dev/null +++ b/src/data/README.md @@ -0,0 +1,13 @@ +# Incremental data (structures) + +Following ideas from earlier [(nominal) Adapton implementations and papers](http://adapton.org). + +- [ ] Stacks -- as singly linked lists. +- [ ] Sequences -- as binary [Cartesian trees](https://en.wikipedia.org/wiki/Cartesian_tree). +- [ ] Maps -- as [binary tries](https://en.wikipedia.org/wiki/Trie). + +Each structure and its algorithms use names in its inductive definition. + +During change propagation, these names distinguish independent components +of each structure as it undergoes incremental change. + diff --git a/src/data/Sequence.mo b/src/data/Sequence.mo new file mode 100644 index 0000000..a719a84 --- /dev/null +++ b/src/data/Sequence.mo @@ -0,0 +1,417 @@ +import Engine "../Engine"; +import Meta "Meta"; + +import H "mo:base/Hash"; +import L "mo:base/List"; +import R "mo:base/Result"; +import P "mo:base/Prelude"; +import Int "mo:base/Int"; +import Debug "mo:base/Debug"; +import Text "mo:base/Text"; +import Buffer "mo:base/Buffer"; + +module { + +public type Name = Meta.Name; + +public type Meta = Meta.Meta; + +public type TreeMeta = { + name : Name; + level : Meta.Level; + size : Nat +}; + +// Expresssions serve as "spreadsheet formula" for sequences. +// Read "Val_" as "Any", but for our DSL system, not for Motoko. +public type Exp = { + // arrays for small test inputs, and little else. + #array: [ (Exp, Meta.Meta) ]; + // each `#put` case transforms into an #at case *before any evaluation*. + #put: (Name, Exp); + // `#at` case permits fine-grained re-use / re-evaluation via Adapton names. + #at: Name; + // Stream-literal definition/construction + #cons: { head: Exp; meta: Exp; tail: Exp }; + #nil; + #val: Val_; + // Sequence operations + #streamOfArray: Exp; + #treeOfStream: Exp; + #treeOfStreamRec: TreeOfStreamRec; + #maxOfTree: Exp; + //#sort: Exp; + //#median: Exp; +}; + +public type TreeOfStreamRec = { + parentLevel : ?Nat; + stream : Stream; + subTree : Tree +}; + +public type Array = [ (Val, Meta.Meta) ]; + +public type ArrayStream = { + array : Array; + offset : Nat; +}; + +public type Cons = (Val, Meta, Val); + +public type Bin = { + left : Val; + meta : TreeMeta; + right : Val; +}; + +public type Val = { + // value allocated at a name, stored by an adapton thunk or ref. + #at: Name; + // arrays for small test inputs, and little else. + #array: Array; + // array streams: Special stream where source is a fixed array. + #arrayStream: ArrayStream; + // empty list; empty tree. + #nil; + // lazy list / stream cell: left value is stream "now"; right is stream "later". + #cons: Cons; + // binary tree: binary case. + #bin: Bin; + // binary tree: leaf case. *Any* value, from any language module. + #leaf: Val_; + // pair of our values. + #pair: (Val, Val); +}; + +/// Cartesian trees as balanced representations for sequences. +public type Tree = { + #nil; + #bin: Bin; + #leaf: Val_; + // value allocated at a name, stored by an adapton thunk or ref. + #at: Name; +}; + +public type Stream = { + // empty list; empty tree. + #nil; + // lazy list / stream cell: left value is stream "now"; right is stream "later". + #cons: Cons; + // array streams: Special stream where source is a fixed array. + #arrayStream: ArrayStream; +}; + +/// Each sequence representation has a different run-time type, +/// with associated checks for its operations. +public type SeqType = { + #array; + #stream; + #tree; + #pair; +}; + +/// Result type for all "meta level" operations returning an `X`. +public type Result = R.Result>; + +/// Evaluation results in a `Val` on success. +public type EvalResult = Result, Val_>; + +public type Error = { + /// Wrong value form: Not from this language module. + #notOurVal : Val_; + /// Wrong value form: Type mismatch. + #doNotHave : (SeqType, Val); + // no max/min/median defined when sequence is empty + #emptySequence; + // to do -- improve with separate PR + #engineError; +}; + +public type Ops = { + valMax : (Val_, Val_) -> R.Result; + getVal : Val_ -> ?Val; + putExp : Exp -> Exp_; + getExp : Exp_ -> ?Exp; + putVal : Val -> Val_; + putError : Error -> Error_; + getError : Error_ -> ?Error; +}; + +public class Sequence( + engine: Engine.Engine, + ops: Ops +) { + + /// Evaluate expression into a result. + public func eval(e : Exp) : R.Result { + switch (evalRec(alloc(e))) { + case (#ok(v)) #ok(ops.putVal(v)); + case (#err(e)) #err(ops.putError(e)); + } + }; + + func alloc(e : Exp) : Exp { + switch e { + case (#array(a)) { + let elms = Buffer.Buffer<(Exp, Meta)>(a.size()); + for ((e, meta) in a.vals()) { + elms.add((alloc e, meta)) + }; + #array(elms.toArray()) + }; + case (#at(n)) #at((n)); + case (#put(n, e)) { + switch (engine.putThunk(n, ops.putExp(alloc(e)))) { + case (#err(err)) { loop { assert false } }; + case (#ok(n)) { #at(n) }; + }}; + case (#val(v)) #val(v); + case (#streamOfArray(e)) #streamOfArray(alloc e); + case (#treeOfStream(e)) #treeOfStream(alloc e); + case (#maxOfTree(e)) #maxOfTree(alloc e); + case (#nil) #nil; + case (#cons(c)) { + #cons({head=alloc(c.head); meta=alloc(c.meta); tail=alloc(c.tail)}) + }; + case (#treeOfStreamRec(args)) { #treeOfStreamRec(args) }; + } + }; + + /// Check canonical array forms. + public func haveArray(v : Val) : Result<[(Val, Meta)], Val_> { + switch v { + case (#array(a)) { #ok(a) }; + case _ { #err(#doNotHave(#array, v)) }; + }; + }; + + /// Check canonical stream head form. + public func haveStream(v : Val) : Result, Val_> { + switch v { + case (#arrayStream(s)) { #ok(#arrayStream(s)) }; + case (#cons(c)) { #ok(#cons(c)) }; + case (#nil) { #ok(#nil) }; + case _ { #err(#doNotHave(#stream, v)) }; + } + }; + + /// Check canonical tree head form. + public func haveTree(v : Val) : Result, Val_> { + switch v { + case (#at n) { #ok(#at(n)) }; + case (#nil) { #ok(#nil) }; + case (#bin(b)) { #ok(#bin(b)) }; + case (#leaf v) { #ok(#leaf(v)) }; + case _ { #err(#doNotHave(#tree, v)) }; + } + }; + + /// Transforms an array into a stream. + public func streamOfArray(v : Val) : EvalResult { + switch(haveArray(v)) { + case (#ok(array)) { #ok(#arrayStream({array; offset = 0})) }; + case (#err(err)) { #err(err) }; + } + }; + + // Returns the thunk, and its (eagerly-computed) value, as a pair. + func getPutThunk(name : Name, exp : Exp) : EvalResult { + let thunk = + engine.putThunk( + name, ops.putExp(exp) + ); + switch thunk { + case (#ok(putName)) { + switch(engine.get(putName)) { + case (#err(err)) { #err(#engineError) }; + case (#ok(v)) { + switch(v) { + case (#ok(v)) { + switch (ops.getVal(v)) { + case null { #err(#engineError) }; + case (?gotValue) { resultPair(#at(putName), gotValue) }; + } + }; + case (#err(err)) { + switch (ops.getError(err)) { + case null { #err(#engineError) }; + case (?e) { #err(e) }; + } + }; + } + }; + } + }; + case (#err(err)) { #err(#engineError) }; + } + }; + + // Does getPutThunk, but only returns the result of the thunk. + func memo(name : Name, exp : Exp) : EvalResult { + switch (getPutThunk(name, exp)) { + case (#err(e)) #err(e); + case (#ok(#pair((_, v)))) #ok(v); + case (#ok(_)) { assert false; loop {}}; + } + }; + + // Does getPutThunk, but returns the thunk and value as a Motoko pair. + func memo_(name : Name, exp : Exp) : Result<(Name, Val), Val_> { + switch (getPutThunk(name, exp)) { + case (#err(e)) #err(e); + case (#ok(#pair(#at(n), v))) #ok((n, v)); + case (#ok(_)) { assert false; loop {}}; + } + }; + + /// number of elms; ignore internal nodes + public func treeSize (t : Tree) : Nat { + switch t { + case (#at(n)) { assert false; loop {}}; // query engine here? + case (#nil) 0; + case (#bin(b)) b.meta.size; + case (#leaf _) 1; + } + }; + + public func treeLevel (t : Tree) : Nat { + switch t { + case (#at(n)) { assert false; loop {}}; // to do -- query engine here? + case (#nil) 0; + case (#bin(b)) b.meta.level; + case (#leaf _) 0; + } + }; + + public func streamNext (s : Stream) : ?Cons { + switch s { + case (#nil) null; + case (#arrayStream(a)) { + if(a.offset < a.array.size()) { + let (elm, meta) = a.array[a.offset]; + ?(elm, meta, #arrayStream{ offset = a.offset + 1; + array = a.array }) + } else null; + }; + case (#cons(c)) { ?c } + } + }; + + public func resultPairSplit(r : EvalResult) : Result<(Val, Val), Val_> { + switch r { + case (#ok(#pair(v1, v2))) { #ok((v1, v2)) }; + case (#ok(v)) { #err(#doNotHave(#pair, v)) }; + case (#err(e)) { #err(e) }; + } + }; + + public func resultPair(v1 : Val, v2 : Val) : EvalResult { + #ok(#pair(v1, v2)) + }; + + /// Transforms a stream into a tree. + public func treeOfStreamRec(parentLevel : ?Nat, s : Stream, tree : Tree) + : EvalResult // (Tree, Stream), for the result and remaining stream. + { + switch (streamNext(s)) { + case null (resultPair(tree, #nil)); + case (?cons) { + let (head, meta, tail) = cons; + switch parentLevel { + case (?pl) { + if (meta.level > pl) { + return resultPair(tree, s) + } }; + case _ { }; + }; + if (meta.level < treeLevel(tree)) { + return resultPair(tree, s) + }; + let tailAsStream = switch (haveStream(tail)) { + case (#ok(s)) s; + case (#err(e)) { return #err(e) } + }; + let (tree2, s2) = + switch (resultPairSplit(memo( + #bin(meta.name, #text("right")), + #treeOfStreamRec({ + parentLevel = ?meta.level; + stream = tailAsStream; + subTree = #leaf(ops.putVal(head)); + })))) { + case (#err(e)) { return #err(e) }; + case (#ok(v1, v2)) (v1, v2); + }; + let (tree2_, s2_) = switch (haveTree(tree2), haveStream(s2)) { + case (#ok(t), #ok(s)) (t, s); + case (#err(e1), _) { return #err(e1) }; + case (_, #err(e2)) { return #err(e2) }; + }; + let size = treeSize(tree) + treeSize(tree2_); + let tree3 = #bin({ left = tree; + meta = { level = meta.level; name = meta.name; size}; + right = tree2_ }); + memo( + #bin(meta.name, #text("root")), + #treeOfStreamRec({ + parentLevel; + stream = s2_; + subTree = tree3; + })); + } + } + }; + + /// Transforms a stream into a tree. + public func treeOfStream(s : Val) : EvalResult { + engine.nest(#text("treeOfStream"), func () : EvalResult { + switch(haveStream(s)) { + case (#ok(s)) { treeOfStreamRec(null, s, #nil) }; + case (#err(err)) { #err(err) }; + }}); + }; + + func evalRec(exp : Exp) : EvalResult { + switch exp { + case (#val(v)) { #ok(#leaf(v)) }; + case (#put(n, e)) loop { assert false }; + case (#array(arr)) { + let vals = Buffer.Buffer<(Val, Meta)>(arr.size()); + for ((e, meta) in arr.vals()) { + switch (evalRec(e)) { + case (#ok(v)) { vals.add((v, meta)) }; + case (#err(e)) { return #err(e) }; + }; + }; + #ok(#array(vals.toArray())) + }; + case (#at(n)) + switch (engine.get(n)) { + case (#err(_) or #ok(#err _)) #err(#engineError); + case (#ok(#ok(res))) switch (ops.getVal(res)) { + case null { #err(#notOurVal(res)) }; + case (?v) { #ok(v) }; + }; + }; + case (#streamOfArray(a)) { + switch (evalRec(a)) { + case (#err(err)) { #err(err) }; + case (#ok(array)) { streamOfArray(array) }; + } + }; + case (#treeOfStreamRec(args)) { + treeOfStreamRec(args.parentLevel, args.stream, args.subTree) + }; + case (#treeOfStream(e)) { + switch (evalRec(e)) { + case (#err(err)) { #err(err) }; + case (#ok(list)) { treeOfStream(list) }; + } + }; + } + }; + +}; + +}