Skip to content

Commit

Permalink
feat: add UnixFSPathSelectorBuilder
Browse files Browse the repository at this point in the history
Refactored and extracted out of Lassie

Ref: filecoin-project/lassie#142
  • Loading branch information
rvagg committed Mar 9, 2023
1 parent ca00f89 commit 51109f5
Show file tree
Hide file tree
Showing 2 changed files with 295 additions and 13 deletions.
105 changes: 92 additions & 13 deletions signaling.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,45 @@
package unixfsnode

import (
"strings"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/linking"
"github.com/ipld/go-ipld-prime/node/basicnode"
"github.com/ipld/go-ipld-prime/traversal/selector"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
)

// ExploreAllRecursivelySelector is a selector that will explore all nodes. It
// is the same selector as selectorparse.CommonSelector_ExploreAllRecursively
// but it is precompiled for use with UnixFSPathSelectorBuilder().
var ExploreAllRecursivelySelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreRecursive(
selector.RecursionLimitNone(),
ssb.ExploreAll(ssb.ExploreRecursiveEdge()),
)
})

// MatchUnixFSPreloadSelector is a selector that will match a single node,
// similar to selectorparse.CommonSelector_MatchPoint, but uses the
// "unixfs-preload" ADL to load sharded files and directories as a single node.
// Can be used to shallow load an entire UnixFS directory listing, sharded or
// not, but not its contents.
// MatchUnixfsPreloadSelector is precompiled for use with
// UnixFSPathSelectorBuilder().
var MatchUnixFSPreloadSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreInterpretAs("unixfs-preload", ssb.Matcher())
})

// MatchUnixFSSelector is a selector that will match a single node, similar to
// selectorparse.CommonSelector_MatchPoint, but uses the "unixfs" ADL to load
// as UnixFS data. Unlike MatchUnixFSPreloadSelector, this selector will not
// preload all blocks in sharded directories or files. Use
// MatchUnixFSPreloadSelector where the blocks that constitute the full UnixFS
// resource being selected are important to load.
var MatchUnixFSSelector = specBuilder(func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec {
return ssb.ExploreInterpretAs("unixfs", ssb.Matcher())
})

func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
if lsys.KnownReifiers == nil {
lsys.KnownReifiers = make(map[string]linking.NodeReifier)
Expand All @@ -18,18 +48,67 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
lsys.KnownReifiers["unixfs-preload"] = nonLazyReify
}

// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory
// if reification is setup on a link system
// UnixFSPathSelector creates a selector for IPLD path to a UnixFS resource if
// UnixFS reification is setup on a LinkSystem being used for traversal.
//
// Use UnixFSPathSelectorBuilder for more control over the selector, this
// function is the same as calling
//
// UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false)
func UnixFSPathSelector(path string) datamodel.Node {
segments := strings.Split(path, "/")
return UnixFSPathSelectorBuilder(path, MatchUnixFSSelector, false)
}

// UnixFSPathSelectorBuilder creates a selector for IPLD path to a UnixFS
// resource if UnixFS reification is setup on a LinkSystem being used for
// traversal.
//
// The path is interpreted according to
// github.com/ipld/go-ipld-prime/datamodel/Path rules,
// i.e.
// - leading and trailing slashes are ignored
// - redundant slashes are ignored
// - the segment `..` is a field named `..`, same with `.`
//
// targetSelector is the selector to apply to the final node in the path.
// Use ExploreAllRecursivelySelector to explore (i.e. load the blocks) all of
// the content from the terminus of the path. Use MatchUnixFSPreloadSelector to
// match the terminus of the path, but preload all blocks in sharded files and
// directories. Use MatchUnixFSSelector to match the terminus of the path, but
// not preload any blocks if the terminus is sharded. Or any other custom
// SelectorSpec can be supplied.
//
// If matchPath is false, the selector will explore, not match, so it's useful
// for traversals where block loads are important, not where the matcher visitor
// callback is important. if matchPath is true, the selector will match the
// nodes along the path while exploring them.
func UnixFSPathSelectorBuilder(path string, targetSelector builder.SelectorSpec, matchPath bool) ipld.Node {
segments := ipld.ParsePath(path)

ss := targetSelector
ssb := builder.NewSelectorSpecBuilder(basicnode.Prototype.Any)
selectorSoFar := ssb.ExploreInterpretAs("unixfs", ssb.Matcher())
for i := len(segments) - 1; i >= 0; i-- {
selectorSoFar = ssb.ExploreInterpretAs("unixfs",
ssb.ExploreFields(func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert(segments[i], selectorSoFar)
}),
)

for segments.Len() > 0 {
// Wrap selector in ExploreFields as we walk back up through the path.
// We can assume each segment to be a unixfs path section, so we
// InterpretAs to make sure the node is reified through go-unixfsnode
// (if possible) and we can traverse through according to unixfs pathing
// rather than bare IPLD pathing - which also gives us the ability to
// traverse through HAMT shards.
ss = ssb.ExploreInterpretAs("unixfs", ssb.ExploreFields(
func(efsb builder.ExploreFieldsSpecBuilder) {
efsb.Insert(segments.Last().String(), ss)
},
))
if matchPath {
ss = ssb.ExploreUnion(ssb.Matcher(), ss)
}
segments = segments.Pop()
}
return selectorSoFar.Node()

return ss.Node()
}

func specBuilder(b func(ssb builder.SelectorSpecBuilder) builder.SelectorSpec) builder.SelectorSpec {
return b(builder.NewSelectorSpecBuilder(basicnode.Prototype.Any))
}
203 changes: 203 additions & 0 deletions signalling_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package unixfsnode_test

import (
"fmt"
"strings"
"testing"

"github.com/ipfs/go-unixfsnode"
"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec/dagjson"
"github.com/ipld/go-ipld-prime/traversal/selector/builder"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
"github.com/stretchr/testify/require"
)

// Selectors are tested against JSON expected forms; this doesn't necessarily
// validate that they work as advertised. It's just a sanity check that the
// selectors are being built as expected.

var exploreAllJson = mustDagJson(selectorparse.CommonSelector_ExploreAllRecursively)

// explore interpret-as (~), next (>), match (.), interpreted as unixfs-preload
var matchUnixfsPreloadJson = `{"~":{">":{".":{}},"as":"unixfs-preload"}}`

// match interpret-as (~), next (>), match (.), interpreted as unixfs
var matchUnixfsJson = `{"~":{">":{".":{}},"as":"unixfs"}}`

func TestUnixFSPathSelector(t *testing.T) {
testCases := []struct {
name string
path string
expextedSelector string
}{
{
name: "empty path",
path: "",
expextedSelector: matchUnixfsJson,
},
{
name: "single field",
path: "/foo",
expextedSelector: jsonFields(matchUnixfsJson, "foo"),
},
{
name: "multiple fields",
path: "/foo/bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
name: "leading slash optional",
path: "foo/bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
name: "trailing slash optional",
path: "/foo/bar/",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec (?)
name: ".. is a field named ..",
path: "/foo/../bar/",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "..", "bar"),
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec
name: "redundant slashes ignored",
path: "foo///bar",
expextedSelector: jsonFields(matchUnixfsJson, "foo", "bar"),
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
sel := unixfsnode.UnixFSPathSelector(tc.path)
require.Equal(t, tc.expextedSelector, mustDagJson(sel))
})
}
}

func TestUnixFSPathSelectorBuilder(t *testing.T) {
testCases := []struct {
name string
path string
target builder.SelectorSpec
matchPath bool
expextedSelector string
}{
{
name: "empty path",
path: "",
target: unixfsnode.ExploreAllRecursivelySelector,
expextedSelector: exploreAllJson,
},
{
name: "empty path shallow",
path: "",
target: unixfsnode.MatchUnixFSPreloadSelector,
expextedSelector: matchUnixfsPreloadJson,
},
{
name: "single field",
path: "/foo",
expextedSelector: jsonFields(exploreAllJson, "foo"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "single field, match path",
path: "/foo",
expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo"),
target: unixfsnode.ExploreAllRecursivelySelector,
matchPath: true,
},
{
name: "single field shallow",
path: "/foo",
expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo"),
target: unixfsnode.MatchUnixFSPreloadSelector,
},
{
name: "multiple fields",
path: "/foo/bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "multiple fields, match path",
path: "/foo/bar",
expextedSelector: jsonFieldsMatchPoint(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
matchPath: true,
},
{
name: "multiple fields shallow",
path: "/foo/bar",
expextedSelector: jsonFields(matchUnixfsPreloadJson, "foo", "bar"),
target: unixfsnode.MatchUnixFSPreloadSelector,
},
{
name: "leading slash optional",
path: "foo/bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
name: "trailing slash optional",
path: "/foo/bar/",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
// a go-ipld-prime specific thing, not clearly specified by path spec (?)
{
name: ".. is a field named ..",
path: "/foo/../bar/",
expextedSelector: jsonFields(exploreAllJson, "foo", "..", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
{
// a go-ipld-prime specific thing, not clearly specified by path spec
name: "redundant slashes ignored",
path: "foo///bar",
expextedSelector: jsonFields(exploreAllJson, "foo", "bar"),
target: unixfsnode.ExploreAllRecursivelySelector,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
sel := unixfsnode.UnixFSPathSelectorBuilder(tc.path, tc.target, tc.matchPath)
require.Equal(t, tc.expextedSelector, mustDagJson(sel))
})
}
}

func jsonFields(target string, fields ...string) string {
var sb strings.Builder
for _, n := range fields {
// explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
sb.WriteString(fmt.Sprintf(`{"~":{">":{"f":{"f>":{"%s":`, n))
}
sb.WriteString(target)
sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}`, len(fields)))
return sb.String()
}

func jsonFieldsMatchPoint(target string, fields ...string) string {
var sb strings.Builder
for _, n := range fields {
// union (|) of match (.) and explore interpret-as (~) next (>), explore field (f) + specific field (f>), with field name
sb.WriteString(fmt.Sprintf(`{"|":[{".":{}},{"~":{">":{"f":{"f>":{"%s":`, n))
}
sb.WriteString(target)
sb.WriteString(strings.Repeat(`}}},"as":"unixfs"}}]}`, len(fields)))
return sb.String()
}

func mustDagJson(n ipld.Node) string {
byts, err := ipld.Encode(n, dagjson.Encode)
if err != nil {
panic(err)
}
return string(byts)
}

0 comments on commit 51109f5

Please sign in to comment.