From 444036b48b766fa068666039cc247f62b27416ea Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Mon, 18 Sep 2023 21:05:15 +1000 Subject: [PATCH] fix: test and handle identity CIDs, expose more testutils --- .../correctedmemstore.go | 28 ++++ testutil/identity.go | 143 ++++++++++++++++++ .../testutil => testutil}/zeroreader.go | 0 traversal/internal/testutil/toblocks.go | 7 + traversal/traversal.go | 18 +++ traversal/traversal_test.go | 32 +++- 6 files changed, 222 insertions(+), 6 deletions(-) rename {traversal/internal/testutil => testutil}/correctedmemstore.go (55%) create mode 100644 testutil/identity.go rename {traversal/internal/testutil => testutil}/zeroreader.go (100%) diff --git a/traversal/internal/testutil/correctedmemstore.go b/testutil/correctedmemstore.go similarity index 55% rename from traversal/internal/testutil/correctedmemstore.go rename to testutil/correctedmemstore.go index e05374f..791c570 100644 --- a/traversal/internal/testutil/correctedmemstore.go +++ b/testutil/correctedmemstore.go @@ -1,11 +1,14 @@ package testutil import ( + "bytes" "context" "io" + "github.com/ipfs/go-cid" format "github.com/ipfs/go-ipld-format" "github.com/ipld/go-ipld-prime/storage" + "github.com/multiformats/go-multihash" ) type ParentStore interface { @@ -19,7 +22,27 @@ type CorrectedMemStore struct { ParentStore } +func AsIdentity(key string) (digest []byte, ok bool, err error) { + keyCid, err := cid.Cast([]byte(key)) + if err != nil { + return nil, false, err + } + dmh, err := multihash.Decode(keyCid.Hash()) + if err != nil { + return nil, false, err + } + ok = dmh.Code == multihash.IDENTITY + digest = dmh.Digest + return digest, ok, nil +} + func (cms *CorrectedMemStore) Get(ctx context.Context, key string) ([]byte, error) { + if digest, ok, err := AsIdentity(key); ok { + return digest, nil + } else if err != nil { + return nil, err + } + data, err := cms.ParentStore.Get(ctx, key) if err != nil && err.Error() == "404" { err = format.ErrNotFound{} @@ -28,6 +51,11 @@ func (cms *CorrectedMemStore) Get(ctx context.Context, key string) ([]byte, erro } func (cms *CorrectedMemStore) GetStream(ctx context.Context, key string) (io.ReadCloser, error) { + if digest, ok, err := AsIdentity(key); ok { + return io.NopCloser(bytes.NewReader(digest)), nil + } else if err != nil { + return nil, err + } rc, err := cms.ParentStore.GetStream(ctx, key) if err != nil && err.Error() == "404" { err = format.ErrNotFound{} diff --git a/testutil/identity.go b/testutil/identity.go new file mode 100644 index 0000000..9c0fb55 --- /dev/null +++ b/testutil/identity.go @@ -0,0 +1,143 @@ +package testutil + +import ( + "testing" + + cid "github.com/ipfs/go-cid" + unixfs "github.com/ipfs/go-unixfsnode/testutil" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/codec/dagjson" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent/qp" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/require" +) + +var djlp = cidlink.LinkPrototype{ + Prefix: cid.Prefix{ + Version: 1, + Codec: cid.DagJSON, + MhType: multihash.SHA2_256, + MhLength: 32, + }, +} + +var rawlp = cidlink.LinkPrototype{ + Prefix: cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: multihash.SHA2_256, + MhLength: 32, + }, +} + +// MakeDagWithIdentity makes a non-unixfs DAG, wrapped in the +// go-unixfsnode/testutil/DirEntry struct (so it can be used in all the places +// DirEntry is) that has an identity CID in the middle linking a section that +// the identity CID must be traversed through to find. +func MakeDagWithIdentity(t *testing.T, lsys linking.LinkSystem) unixfs.DirEntry { + /* ugly, but it makes a DAG with paths that look like this but doesn't involved dag-pb or unixfs + > [/] + > [/a/!foo] + > [/a/b/!bar] + > [/a/b/c/!baz/identity jump] + > [/a/b/c/!baz/identity jump/these are my children/blip] + > [/a/b/c/!baz/identity jump/these are my children/bloop] + > [/a/b/c/!baz/identity jump/these are my children/bloop/ leaf ] + > [/a/b/c/!baz/identity jump/these are my children/blop] + > [/a/b/c/!baz/identity jump/these are my children/leaf] + > [/a/b/c/d/!leaf] + */ + store := func(path string, children []unixfs.DirEntry, lp cidlink.LinkPrototype, n datamodel.Node) unixfs.DirEntry { + l, err := lsys.Store(linking.LinkContext{}, lp, n) + require.NoError(t, err) + var content []byte + if n.Kind() == datamodel.Kind_Bytes { + content, err = n.AsBytes() + require.NoError(t, err) + } + return unixfs.DirEntry{ + Path: path, + Root: l.(cidlink.Link).Cid, + SelfCids: []cid.Cid{l.(cidlink.Link).Cid}, + Children: children, + Content: content, + } + } + + bazpath := "/a/b/c/!baz/identity jump" + bazchildpath := "/these are my children" + bloppath := "/bloop" + + children := []unixfs.DirEntry{store(bazpath+bazchildpath+bloppath+"/ leaf ", nil, rawlp, basicnode.NewBytes([]byte("leaf node in bloop")))} + bloop := store(bazpath+bazchildpath+"/bloop", children, djlp, must(qp.BuildMap(basicnode.Prototype.Any, -1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "desc", qp.List(-1, func(la datamodel.ListAssembler) { + qp.ListEntry(la, qp.String("this")) + qp.ListEntry(la, qp.String("is")) + qp.ListEntry(la, qp.String("bloop")) + })) + qp.MapEntry(ma, " leaf ", qp.Link(cidlink.Link{Cid: children[0].Root})) + }))(t)) + leaf := store(bazpath+bazchildpath+"/leaf", nil, rawlp, basicnode.NewBytes([]byte("leaf node in baz"))) + blop := store(bazpath+bazchildpath+"/blop", nil, djlp, must(qp.BuildList(basicnode.Prototype.Any, -1, func(la datamodel.ListAssembler) { + qp.ListEntry(la, qp.Int(100)) + qp.ListEntry(la, qp.Int(200)) + qp.ListEntry(la, qp.Int(300)) + }))(t)) + blip := store(bazpath+bazchildpath+"/blip", nil, djlp, basicnode.NewString("blip!")) + baz := store(bazpath, []unixfs.DirEntry{blip, bloop, blop, leaf}, djlp, must(qp.BuildMap(basicnode.Prototype.Any, -1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "desc", qp.List(-1, func(la datamodel.ListAssembler) { + qp.ListEntry(la, qp.String("this")) + qp.ListEntry(la, qp.String("is")) + qp.ListEntry(la, qp.String("baz")) + })) + qp.MapEntry(ma, "these are my children", qp.Map(-1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "blip", qp.Link(cidlink.Link{Cid: blip.Root})) + qp.MapEntry(ma, "bloop", qp.Link(cidlink.Link{Cid: bloop.Root})) + qp.MapEntry(ma, "blop", qp.Link(cidlink.Link{Cid: blop.Root})) + qp.MapEntry(ma, "leaf", qp.Link(cidlink.Link{Cid: leaf.Root})) + })) + }))(t)) + leaf = store("/a/b/c/d/!leaf", nil, rawlp, basicnode.NewBytes([]byte("leaf node in the root"))) + foo := store("/a/!foo", nil, djlp, basicnode.NewInt(1010101010101010)) + bar := store("/a/b/!bar", nil, djlp, basicnode.NewInt(2020202020202020)) + ident := must(qp.BuildMap(basicnode.Prototype.Any, -1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "identity jump", qp.Link(cidlink.Link{Cid: baz.Root})) + }))(t) + identBytes := must(ipld.Encode(ident, dagjson.Encode))(t) + mh := must(multihash.Sum(identBytes, multihash.IDENTITY, len(identBytes)))(t) + bazident := cid.NewCidV1(cid.DagJSON, mh) + bazidentChild := unixfs.DirEntry{ + Root: bazident, + Path: "/a/b/c/!baz", + Children: []unixfs.DirEntry{baz}, + } + root := store("", []unixfs.DirEntry{foo, bar, bazidentChild, leaf}, djlp, must(qp.BuildMap(basicnode.Prototype.Any, -1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "a", qp.Map(-1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "b", qp.Map(-1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "c", qp.Map(-1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "d", qp.Map(-1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, "!leaf", qp.Link(cidlink.Link{Cid: leaf.Root})) + })) + qp.MapEntry(ma, "!baz", qp.Link(cidlink.Link{Cid: bazident})) + })) + qp.MapEntry(ma, "!bar", qp.Link(cidlink.Link{Cid: bar.Root})) + })) + qp.MapEntry(ma, "!foo", qp.Link(cidlink.Link{Cid: foo.Root})) + })) + }))(t)) + return root +} + +func must[T any](v T, err error) func(t *testing.T) T { + return func(t *testing.T) T { + t.Helper() + if err != nil { + t.Fatal(err) + } + return v + } +} diff --git a/traversal/internal/testutil/zeroreader.go b/testutil/zeroreader.go similarity index 100% rename from traversal/internal/testutil/zeroreader.go rename to testutil/zeroreader.go diff --git a/traversal/internal/testutil/toblocks.go b/traversal/internal/testutil/toblocks.go index 81061f8..79ad296 100644 --- a/traversal/internal/testutil/toblocks.go +++ b/traversal/internal/testutil/toblocks.go @@ -15,6 +15,7 @@ import ( "github.com/ipld/go-ipld-prime/node/basicnode" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" + trustlesstestutil "github.com/ipld/go-trustless-utils/testutil" "github.com/stretchr/testify/require" ) @@ -26,6 +27,12 @@ func ToBlocks(t *testing.T, lsys linking.LinkSystem, root cid.Cid, selNode datam unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) osro := lsys.StorageReadOpener lsys.StorageReadOpener = func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { + if digest, ok, err := trustlesstestutil.AsIdentity(l.(cidlink.Link).Cid.KeyString()); ok { + return bytes.NewReader(digest), err + } else if err != nil { + return nil, err + } + r, err := osro(lc, l) if err != nil { return nil, err diff --git a/traversal/traversal.go b/traversal/traversal.go index 707d501..dba486f 100644 --- a/traversal/traversal.go +++ b/traversal/traversal.go @@ -15,6 +15,7 @@ import ( _ "github.com/ipld/go-ipld-prime/codec/dagjson" _ "github.com/ipld/go-ipld-prime/codec/json" _ "github.com/ipld/go-ipld-prime/codec/raw" + "github.com/multiformats/go-multihash" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" @@ -256,6 +257,16 @@ func loadNode(ctx context.Context, rootCid cid.Cid, lsys linking.LinkSystem) (da return rootNode, nil } +func asIdentity(c cid.Cid) (digest []byte, ok bool, err error) { + dmh, err := multihash.Decode(c.Hash()) + if err != nil { + return nil, false, err + } + ok = dmh.Code == multihash.IDENTITY + digest = dmh.Digest + return digest, ok, nil +} + // nextBlockReadOpener is a linking.BlockReadOpener that, for each call, will // read the next block from the provided BlockStream, verify it matches the // expected CID, and write it to the provided LinkSystem. It will then return @@ -275,6 +286,13 @@ func (cfg *Config) nextBlockReadOpener( seen := make(map[cid.Cid]struct{}) return func(lc linking.LinkContext, l datamodel.Link) (io.Reader, error) { cid := l.(cidlink.Link).Cid + + if digest, ok, err := asIdentity(cid); ok { + return io.NopCloser(bytes.NewReader(digest)), nil + } else if err != nil { + return nil, err + } + var data []byte var err error if _, ok := seen[cid]; ok { diff --git a/traversal/traversal_test.go b/traversal/traversal_test.go index 9090abd..27401ea 100644 --- a/traversal/traversal_test.go +++ b/traversal/traversal_test.go @@ -12,6 +12,7 @@ import ( "time" trustlessutils "github.com/ipld/go-trustless-utils" + trustlesstestutil "github.com/ipld/go-trustless-utils/testutil" "github.com/ipld/go-trustless-utils/traversal" "github.com/ipld/go-trustless-utils/traversal/internal/testutil" @@ -32,9 +33,9 @@ import ( "github.com/ipld/go-ipld-prime/traversal/selector" "github.com/ipld/go-ipld-prime/traversal/selector/builder" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" - gstestutil "github.com/ipld/go-trustless-utils/testutil" trustlesspathing "github.com/ipld/ipld/specs/pkg-go/trustless-pathing" mh "github.com/multiformats/go-multihash" + multihash "github.com/multiformats/go-multihash/core" "github.com/stretchr/testify/require" ) @@ -140,7 +141,7 @@ func TestVerifyCar(t *testing.T) { t.Logf("random seed: %d", rndSeed) var rndReader io.Reader = rand.New(rand.NewSource(rndSeed)) - store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ + store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{ Bag: make(map[string][]byte), }} lsys := cidlink.DefaultLinkSystem() @@ -148,7 +149,7 @@ func TestVerifyCar(t *testing.T) { lsys.SetReadStorage(store) lsys.SetWriteStorage(store) - tbc1 := gstestutil.SetupBlockChain(ctx, t, lsys, 1000, 100) + tbc1 := trustlesstestutil.SetupBlockChain(ctx, t, lsys, 1000, 100) root1 := tbc1.TipLink.(cidlink.Link).Cid allBlocks := tbc1.AllBlocks() extraneousLnk, err := lsys.Store(linking.LinkContext{}, cidlink.LinkPrototype{Prefix: cid.Prefix{Version: 1, Codec: 0x71, MhType: 0x12, MhLength: 32}}, basicnode.NewString("borp")) @@ -178,7 +179,7 @@ func TestVerifyCar(t *testing.T) { ss = ssb.ExploreInterpretAs("unixfs", ssb.MatcherSubset(1<<20, 2<<20)) unixfsFileRange1048576_2097152Selector := ss.Node() - unixfsFileWithDups := unixfs.GenerateFile(t, &lsys, testutil.ZeroReader{}, 4<<20) + unixfsFileWithDups := unixfs.GenerateFile(t, &lsys, trustlesstestutil.ZeroReader{}, 4<<20) unixfsFileWithDupsBlocks := testutil.ToBlocks(t, lsys, unixfsFileWithDups.Root, allSelector) var unixfsDir unixfs.DirEntry var unixfsDirBlocks []blocks.Block @@ -238,6 +239,10 @@ func TestVerifyCar(t *testing.T) { unixfsExclusiveWrappedShardedDirOnlyBlocks := testutil.ToBlocks(t, lsys, unixfsExclusiveWrappedShardedDir.Root, unixfsWrappedPreloadPathSelector) mismatchedCidBlk, _ := blocks.NewBlockWithCid(extraneousByts, allBlocks[99].Cid()) + + identityDag := trustlesstestutil.MakeDagWithIdentity(t, lsys) + identityBlocks := testutil.ToBlocks(t, lsys, identityDag.Root, allSelector) + testCases := []struct { name string skip bool @@ -741,6 +746,15 @@ func TestVerifyCar(t *testing.T) { checkPathWith: datamodel.ParsePath(wrapPathPlusMore), expectCheckPathErr: "failed to traverse full path", }, + { + name: "identity dag", + blocks: consumedBlocks(identityBlocks), + roots: []cid.Cid{identityDag.Root}, + cfg: traversal.Config{ + Root: identityDag.Root, + Selector: allSelector, + }, + }, } for _, testCase := range testCases { @@ -756,7 +770,7 @@ func TestVerifyCar(t *testing.T) { req := require.New(t) - store := &testutil.CorrectedMemStore{ParentStore: &memstore.Store{ + store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{ Bag: make(map[string][]byte), }} lsys := cidlink.DefaultLinkSystem() @@ -768,13 +782,19 @@ func TestVerifyCar(t *testing.T) { lsys.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { var buf bytes.Buffer return &buf, func(l datamodel.Link) error { + c := l.(cidlink.Link).Cid + if c.Prefix().MhType == multihash.IDENTITY { + // identity links are not written to the store + return nil + } + if testCase.blockWriteErr != nil && writeCounter+skipped == len(testCase.blocks)/2 { return testCase.blockWriteErr } for testCase.blocks[writeCounter+skipped].skipped { skipped++ } - req.Equal(testCase.blocks[writeCounter+skipped].Cid().String(), l.(cidlink.Link).Cid.String(), "block %d", writeCounter) + req.Equal(testCase.blocks[writeCounter+skipped].Cid().String(), c.String(), "block %d", writeCounter) req.Equal(testCase.blocks[writeCounter+skipped].RawData(), buf.Bytes(), "block %d", writeCounter) writeCounter++ w, wc, err := bwo(lc)