diff --git a/ipld/car/v2/blockstore/readonly.go b/ipld/car/v2/blockstore/readonly.go index c59bdcf68..a102b20c4 100644 --- a/ipld/car/v2/blockstore/readonly.go +++ b/ipld/car/v2/blockstore/readonly.go @@ -179,6 +179,12 @@ func OpenReadOnly(path string, opts ...carv2.Option) (*ReadOnly, error) { return robs, nil } +// Index gives direct access to the index. +// You should never add records on your own there. +func (b *ReadOnly) Index() index.Index { + return b.idx +} + // DeleteBlock is unsupported and always errors. func (b *ReadOnly) DeleteBlock(_ context.Context, _ cid.Cid) error { return errReadOnly diff --git a/ipld/car/v2/blockstore/readonly_test.go b/ipld/car/v2/blockstore/readonly_test.go index 0bb4df42e..f2bc7f105 100644 --- a/ipld/car/v2/blockstore/readonly_test.go +++ b/ipld/car/v2/blockstore/readonly_test.go @@ -12,10 +12,14 @@ import ( format "github.com/ipfs/go-ipld-format" blocks "github.com/ipfs/go-libipfs/blocks" "github.com/ipfs/go-merkledag" - carv2 "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" "github.com/stretchr/testify/require" + + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/internal/carv1" + "github.com/ipld/go-car/v2/internal/store" ) func TestReadOnlyGetReturnsBlockstoreNotFoundWhenCidDoesNotExist(t *testing.T) { @@ -331,3 +335,62 @@ func TestNewReadOnly_CarV1WithoutIndexWorksAsExpected(t *testing.T) { require.NoError(t, err) require.Equal(t, wantBlock, gotBlock) } + +func TestReadOnlyIndex(t *testing.T) { + tests := []struct { + name string + path string + wantCIDs []cid.Cid + }{ + { + "IndexCarV1", + "../testdata/sample-v1.car", + listCids(t, newV1ReaderFromV1File(t, "../testdata/sample-v1.car", false)), + }, + { + "IndexCarV2", + "../testdata/sample-wrapped-v2.car", + listCids(t, newV1ReaderFromV2File(t, "../testdata/sample-wrapped-v2.car", false)), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + subject, err := OpenReadOnly(tt.path, UseWholeCIDs(true)) + require.NoError(t, err) + + idx := subject.Index() + + for _, c := range tt.wantCIDs { + _, isIdentity, err := store.IsIdentity(c) + require.NoError(t, err) + if isIdentity { + // the index doesn't hold identity CIDs + continue + } + _, err = index.GetFirst(idx, c) + require.NoError(t, err) + } + + if idx, ok := idx.(index.IterableIndex); ok { + expected := make([]multihash.Multihash, 0, len(tt.wantCIDs)) + for _, c := range tt.wantCIDs { + _, isIdentity, err := store.IsIdentity(c) + require.NoError(t, err) + if isIdentity { + // the index doesn't hold identity CIDs + continue + } + expected = append(expected, c.Hash()) + } + + var got []multihash.Multihash + err = idx.ForEach(func(m multihash.Multihash, u uint64) error { + got = append(got, m) + return nil + }) + require.NoError(t, err) + require.ElementsMatch(t, expected, got) + } + }) + } +} diff --git a/ipld/car/v2/blockstore/readwrite.go b/ipld/car/v2/blockstore/readwrite.go index 0f9cd4cae..e33f13104 100644 --- a/ipld/car/v2/blockstore/readwrite.go +++ b/ipld/car/v2/blockstore/readwrite.go @@ -10,6 +10,7 @@ import ( blocks "github.com/ipfs/go-libipfs/blocks" carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/internal/carv1" "github.com/ipld/go-car/v2/internal/carv1/util" internalio "github.com/ipld/go-car/v2/internal/io" @@ -178,6 +179,12 @@ func (b *ReadWrite) initWithRoots(v2 bool, roots []cid.Cid) error { return carv1.WriteHeader(&carv1.CarHeader{Roots: roots, Version: 1}, b.dataWriter) } +// Index gives direct access to the index. +// You should never add records on your own there. +func (b *ReadWrite) Index() index.Index { + return b.idx +} + // Put puts a given block to the underlying datastore func (b *ReadWrite) Put(ctx context.Context, blk blocks.Block) error { // PutMany already checks b.ronly.closed. diff --git a/ipld/car/v2/blockstore/readwrite_test.go b/ipld/car/v2/blockstore/readwrite_test.go index 5766c2d3b..40731e548 100644 --- a/ipld/car/v2/blockstore/readwrite_test.go +++ b/ipld/car/v2/blockstore/readwrite_test.go @@ -18,14 +18,15 @@ import ( format "github.com/ipfs/go-ipld-format" blocks "github.com/ipfs/go-libipfs/blocks" "github.com/ipfs/go-merkledag" - carv2 "github.com/ipld/go-car/v2" - "github.com/ipld/go-car/v2/blockstore" - "github.com/ipld/go-car/v2/index" - "github.com/ipld/go-car/v2/internal/carv1" "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + "github.com/ipld/go-car/v2/index" + "github.com/ipld/go-car/v2/internal/carv1" ) var ( @@ -1135,3 +1136,42 @@ func TestWholeCID(t *testing.T) { }) } } + +func TestReadWriteIndex(t *testing.T) { + tmpPath := requireTmpCopy(t, "../testdata/sample-wrapped-v2.car") + + root := cid.MustParse("bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy") + subject, err := blockstore.OpenReadWrite(tmpPath, []cid.Cid{root}) + require.NoError(t, err) + + defer func() { + err = subject.Finalize() + require.NoError(t, err) + }() + + var wantCids []cid.Cid + var wantMh []multihash.Multihash + ch, err := subject.AllKeysChan(context.Background()) + require.NoError(t, err) + for c := range ch { + wantCids = append(wantCids, c) + wantMh = append(wantMh, c.Hash()) + } + + idx := subject.Index() + + for _, c := range wantCids { + _, err = index.GetFirst(idx, c) + require.NoError(t, err) + } + + if idx, ok := idx.(index.IterableIndex); ok { + var got []multihash.Multihash + err = idx.ForEach(func(m multihash.Multihash, u uint64) error { + got = append(got, m) + return nil + }) + require.NoError(t, err) + require.ElementsMatch(t, wantMh, got) + } +} diff --git a/ipld/car/v2/index/index.go b/ipld/car/v2/index/index.go index 1634dda5d..af16fcc1e 100644 --- a/ipld/car/v2/index/index.go +++ b/ipld/car/v2/index/index.go @@ -134,7 +134,7 @@ func WriteTo(idx Index, w io.Writer) (uint64, error) { // Returns error if the encoding is not known. // // Attempting to read index data from untrusted sources is not recommended. -// Instead the index should be regenerated from the CARv2 data payload. +// Instead, the index should be regenerated from the CARv2 data payload. func ReadFrom(r io.Reader) (Index, error) { codec, err := ReadCodec(r) if err != nil {