From 9d6bafd8c938304e3fdf267b72e623264b53bcf8 Mon Sep 17 00:00:00 2001 From: Andrew Gillis Date: Thu, 13 Jan 2022 04:35:43 -0800 Subject: [PATCH] Update config defaults (#132) * Update defaults and comments * Update dependencies * Populate defaults if config missing values --- cmd/go.mod | 4 ++-- cmd/go.sum | 11 ++++++--- config/adminserver.go | 13 +++++++++++ config/config.go | 9 +++++++- config/datastore.go | 10 ++++++++ config/ingest.go | 36 ++++++++++++++++------------- config/providerserver.go | 9 ++++++++ engine/engine.go | 2 +- engine/linksystem.go | 49 ++++++++++++++++------------------------ go.mod | 4 ++-- go.sum | 7 +++--- version.json | 2 +- 12 files changed, 97 insertions(+), 59 deletions(-) diff --git a/cmd/go.mod b/cmd/go.mod index dc5c0539..4155d362 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -5,12 +5,12 @@ go 1.16 require ( github.com/filecoin-project/go-data-transfer v1.12.1 github.com/filecoin-project/index-provider v0.0.0-20211115210313-7957526f5b07 - github.com/filecoin-project/storetheindex v0.2.1 + github.com/filecoin-project/storetheindex v0.2.2 github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-ds-leveldb v0.5.0 github.com/ipfs/go-graphsync v0.11.5 github.com/ipfs/go-ipfs v0.11.0 - github.com/ipfs/go-log/v2 v2.4.0 + github.com/ipfs/go-log/v2 v2.5.0 github.com/ipld/go-car/v2 v2.1.1 github.com/ipld/go-ipld-prime v0.14.3 github.com/libp2p/go-libp2p v0.17.0 diff --git a/cmd/go.sum b/cmd/go.sum index ea7d3654..dbcc79fd 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -213,8 +213,8 @@ github.com/filecoin-project/go-statemachine v0.0.0-20200925024713-05bd7c71fbfe/g github.com/filecoin-project/go-statestore v0.1.0/go.mod h1:LFc9hD+fRxPqiHiaqUEZOinUJB4WARkRfNl10O7kTnI= github.com/filecoin-project/go-statestore v0.2.0 h1:cRRO0aPLrxKQCZ2UOQbzFGn4WDNdofHZoGPjfNaAo5Q= github.com/filecoin-project/go-statestore v0.2.0/go.mod h1:8sjBYbS35HwPzct7iT4lIXjLlYyPor80aU7t7a/Kspo= -github.com/filecoin-project/storetheindex v0.2.1 h1:jhsMh5O52bBU/NrnjJs2iLPI92T38kjlU9bk/dP0sts= -github.com/filecoin-project/storetheindex v0.2.1/go.mod h1:Tc5mYdAnGUzly40cuo35yITDD3XhF9EX5BO7evGsJ5M= +github.com/filecoin-project/storetheindex v0.2.2 h1:2zQlAtHKOVAfBpqIuQ8Njas3kM0VwgpicUCKC+epPG4= +github.com/filecoin-project/storetheindex v0.2.2/go.mod h1:05vxs5u3vTQFAwGW9+pgWSMc3BgFOK513nPyIQyLwyQ= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/flynn/noise v0.0.0-20180327030543-2492fe189ae6/go.mod h1:1i71OnUq3iUe1ma7Lr6yG6/rjvM3emb6yoL7xLFzcVQ= github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= @@ -475,8 +475,11 @@ github.com/ipfs/go-ds-leveldb v0.4.2/go.mod h1:jpbku/YqBSsBc1qgME8BkWS4AxzF2cEu1 github.com/ipfs/go-ds-leveldb v0.5.0 h1:s++MEBbD3ZKc9/8/njrn4flZLnCuY9I79v94gBUNumo= github.com/ipfs/go-ds-leveldb v0.5.0/go.mod h1:d3XG9RUDzQ6V4SHi8+Xgj9j1XuEk1z82lquxrVbml/Q= github.com/ipfs/go-ds-measure v0.2.0/go.mod h1:SEUD/rE2PwRa4IQEC5FuNAmjJCyYObZr9UvVh8V3JxE= +github.com/ipfs/go-ds-measure v0.2.0/go.mod h1:SEUD/rE2PwRa4IQEC5FuNAmjJCyYObZr9UvVh8V3JxE= +github.com/ipfs/go-fetcher v1.5.0/go.mod h1:5pDZ0393oRF/fHiLmtFZtpMNBQfHOYNPtryWedVuSWE= github.com/ipfs/go-fetcher v1.5.0/go.mod h1:5pDZ0393oRF/fHiLmtFZtpMNBQfHOYNPtryWedVuSWE= github.com/ipfs/go-fetcher v1.6.1/go.mod h1:27d/xMV8bodjVs9pugh/RCjjK2OZ68UgAMspMdingNo= +github.com/ipfs/go-fetcher v1.6.1/go.mod h1:27d/xMV8bodjVs9pugh/RCjjK2OZ68UgAMspMdingNo= github.com/ipfs/go-filestore v0.1.0/go.mod h1:0KTrzoJnJ3sJDEDM09Vq8nz8H475rRyeq4i0n/bpF00= github.com/ipfs/go-fs-lock v0.0.7/go.mod h1:Js8ka+FNYmgQRLrRXzU3CB/+Csr1BwrRilEcvYrHhhc= github.com/ipfs/go-graphsync v0.10.0/go.mod h1:cKIshzTaa5rCZjryH5xmSKZVGX9uk1wvwGvz2WEha5Y= @@ -565,8 +568,9 @@ github.com/ipfs/go-log/v2 v2.0.5/go.mod h1:eZs4Xt4ZUJQFM3DlanGhy7TkwwawCZcSByscw github.com/ipfs/go-log/v2 v2.1.1/go.mod h1:2v2nsGfZsvvAJz13SyFzf9ObaqwHiHxsPLEHntrv9KM= github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g= github.com/ipfs/go-log/v2 v2.3.0/go.mod h1:QqGoj30OTpnKaG/LKTGTxoP2mmQtjVMEnK72gynbe/g= -github.com/ipfs/go-log/v2 v2.4.0 h1:iR/2o9PGWanVJrBgIH5Ff8mPGOwpqLaPIAFqSnsdlzk= github.com/ipfs/go-log/v2 v2.4.0/go.mod h1:nPZnh7Cj7lwS3LpRU5Mwr2ol1c2gXIEXuF6aywqrtmo= +github.com/ipfs/go-log/v2 v2.5.0 h1:+MhAooFd9XZNvR0i9FriKW6HB0ql7HNXUuflWtc0dd4= +github.com/ipfs/go-log/v2 v2.5.0/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI= github.com/ipfs/go-merkledag v0.0.6/go.mod h1:QYPdnlvkOg7GnQRofu9XZimC5ZW5Wi3bKys/4GQQfto= github.com/ipfs/go-merkledag v0.2.3/go.mod h1:SQiXrtSts3KGNmgOzMICy5c0POOpUNQLvB3ClKnBAlk= github.com/ipfs/go-merkledag v0.2.4/go.mod h1:SQiXrtSts3KGNmgOzMICy5c0POOpUNQLvB3ClKnBAlk= @@ -603,6 +607,7 @@ github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZ github.com/ipfs/interface-go-ipfs-core v0.4.0/go.mod h1:UJBcU6iNennuI05amq3FQ7g0JHUkibHFAfhfUIy927o= github.com/ipfs/interface-go-ipfs-core v0.5.2/go.mod h1:lNBJrdXHtWS46evMPBdWtDQMDsrKcGbxCOGoKLkztOE= github.com/ipfs/tar-utils v0.0.2/go.mod h1:4qlnRWgTVljIMhSG2SqRYn66NT+3wrv/kZt9V+eqxDM= +github.com/ipfs/tar-utils v0.0.2/go.mod h1:4qlnRWgTVljIMhSG2SqRYn66NT+3wrv/kZt9V+eqxDM= github.com/ipld/go-car v0.1.0/go.mod h1:RCWzaUh2i4mOEkB3W45Vc+9jnS/M6Qay5ooytiBHl3g= github.com/ipld/go-car v0.3.2 h1:V9wt/80FNfbMRWSD98W5br6fyjUAyVgI2lDOTZX16Lg= github.com/ipld/go-car v0.3.2/go.mod h1:WEjynkVt04dr0GwJhry0KlaTeSDEiEYyMPOxDBQ17KE= diff --git a/config/adminserver.go b/config/adminserver.go index 753813df..471605a2 100644 --- a/config/adminserver.go +++ b/config/adminserver.go @@ -41,3 +41,16 @@ func (as *AdminServer) ListenNetAddr() (string, error) { } return netAddr.String(), nil } + +// PopulateDefaults replaces zero-values in the config with default values. +func (c *AdminServer) PopulateDefaults() { + if c.ListenMultiaddr == "" { + c.ListenMultiaddr = defaultAdminServerAddr + } + if c.ReadTimeout == 0 { + c.ReadTimeout = defaultReadTimeout + } + if c.WriteTimeout == 0 { + c.WriteTimeout = defaultWriteTimeout + } +} diff --git a/config/config.go b/config/config.go index fcd25adf..9c83e9ad 100644 --- a/config/config.go +++ b/config/config.go @@ -98,7 +98,7 @@ func Load(filePath string) (*Config, error) { } // Replace any zero-values with defaults. - cfg.Ingest.OverrideUnsetToDefaults() + cfg.PopulateDefaults() return &cfg, nil } @@ -140,3 +140,10 @@ func (c *Config) String() string { } return string(b) } + +func (c *Config) PopulateDefaults() { + c.AdminServer.PopulateDefaults() + c.Datastore.PopulateDefaults() + c.Ingest.PopulateDefaults() + c.ProviderServer.PopulateDefaults() +} diff --git a/config/datastore.go b/config/datastore.go index fda8fc41..203fb554 100644 --- a/config/datastore.go +++ b/config/datastore.go @@ -20,3 +20,13 @@ func NewDatastore() Datastore { Dir: defaultDatastoreDir, } } + +// PopulateDefaults replaces zero-values in the config with default values. +func (c *Datastore) PopulateDefaults() { + if c.Type == "" { + c.Type = defaultDatastoreType + } + if c.Dir == "" { + c.Dir = defaultDatastoreDir + } +} diff --git a/config/ingest.go b/config/ingest.go index 6a49ee68..1ba2ea8f 100644 --- a/config/ingest.go +++ b/config/ingest.go @@ -1,19 +1,23 @@ package config const ( - defaultLinkCacheSize = 1024 - defaultLinkedChunkSize = 100 + // Keep 1024 chunks in cache; keeps 2G if chunks are 2MB. + defaultLinkCacheSize = 1024 + // Multihashes are 128 bytes so 16384 redults in 2MB chunk when full. + defaultLinkedChunkSize = 16384 defaultPubSubTopic = "indexer/ingest" ) -// Ingest tracks the configuration related to the ingestion protocol +// Ingest configures settings related to the ingestion protocol. type Ingest struct { - // LinkCacheSize is the maximum number of links that cash can store before LRU eviction. If a - // single linked list has more links than the cache can hold, the cache is - // resized to be able to hold all links. + // LinkCacheSize is the maximum number of links that cash can store before + // LRU eviction. If a single linked list has more links than the cache can + // hold, the cache is resized to be able to hold all links. LinkCacheSize int - // LinkedChunkSize is the number of hashes in each chunk of ingestion - // linked list. + // LinkedChunkSize is the number of multihashes in each chunk of in the + // advertised entries linked list. If multihashes are 128 bytes, then + // setting LinkedChunkSize = 16384 will result in blocks of about 2Mb when + // full. LinkedChunkSize int // PubSubTopic used to advertise ingestion announcements. PubSubTopic string @@ -30,15 +34,15 @@ func NewIngest() Ingest { } } -// OverrideUnsetToDefaults replaces zero-values in the config with default values. -func (cfg *Ingest) OverrideUnsetToDefaults() { - if cfg.LinkCacheSize == 0 { - cfg.LinkCacheSize = defaultLinkCacheSize +// PopulateDefaults replaces zero-values in the config with default values. +func (c *Ingest) PopulateDefaults() { + if c.LinkCacheSize == 0 { + c.LinkCacheSize = defaultLinkCacheSize } - if cfg.LinkedChunkSize == 0 { - cfg.LinkedChunkSize = defaultLinkedChunkSize + if c.LinkedChunkSize == 0 { + c.LinkedChunkSize = defaultLinkedChunkSize } - if cfg.PubSubTopic == "" { - cfg.PubSubTopic = defaultPubSubTopic + if c.PubSubTopic == "" { + c.PubSubTopic = defaultPubSubTopic } } diff --git a/config/providerserver.go b/config/providerserver.go index 3ef6a402..3fab3669 100644 --- a/config/providerserver.go +++ b/config/providerserver.go @@ -14,3 +14,12 @@ func NewProviderServer() ProviderServer { ListenMultiaddr: "/ip4/0.0.0.0/tcp/3103", } } + +// PopulateDefaults replaces zero-values in the config with default values. +func (c *ProviderServer) PopulateDefaults() { + def := NewProviderServer() + + if c.ListenMultiaddr == "" { + c.ListenMultiaddr = def.ListenMultiaddr + } +} diff --git a/engine/engine.go b/engine/engine.go index 97e94693..a1c7035b 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -99,7 +99,7 @@ func New(ingestCfg config.Ingest, privKey crypto.PrivKey, dt dt.Manager, h host. log.Infof("Retrieval address not configured, using %s", retAddrs[0]) } - ingestCfg.OverrideUnsetToDefaults() + ingestCfg.PopulateDefaults() // TODO(security): We should not keep the privkey decoded here. // We should probably unlock it and lock it every time we need it. diff --git a/engine/linksystem.go b/engine/linksystem.go index b6672fdd..5ebdd79a 100644 --- a/engine/linksystem.go +++ b/engine/linksystem.go @@ -65,22 +65,14 @@ func (e *Engine) mkLinkSystem() ipld.LinkSystem { return nil, err } - // If we don't have the link, generate the linked list in cache so it's - // ready to be served for this (and future) ingestions. + // If we don't have the link, generate the linked list of entries in + // cache so it is ready to serve for this and future ingestions. // - // TODO: This process may take a lot of time, we should do it - // asynchronously to parallelize it. We could implement a cache manager - // that keeps the state of what has been generated, what has been - // requested but not available and requires reading from a CAR, and - // what is ready for ingestion. This manager will also have to handle - // garbage collecting the cache. - // - // The reason for caching this? When we build the ingestion linked - // lists and we are serving back the structure to an indexer, we will - // be receiving requests for a chunkEntry, as we can't read a specific - // subset of CIDs from the CAR index, we need some intermediate storage - // to map link of the chunk in the linked list with the list of CIDs it - // corresponds to. + // The reason for caching this is because the indexer requests each + // chunk entry, and a specific subset of entries cannot be read from a + // car. So all entry chunks are kept in cache to serve to the indexer. + // The cache uses the entry chunk CID as a key that maps to the entry + // chunk data. if b == nil { log.Infow("Entry for CID is not cached, generating chunks", "cid", c) // If the link is not found, it means that the root link of the list has @@ -93,14 +85,11 @@ func (e *Engine) mkLinkSystem() ipld.LinkSystem { return nil, err } - // TODO: For removals we may not have the list of CIDs, let's see - // what selector we end up using, but we may need additional - // validation here in order not to follow the link. If we do - // step-by-step syncs, this would mean that when the subscribers - // sees an advertisement of remove type, it doesn't follow the - // Entries link, if just gets the cid, and uses its local map cid - // to contextID to trigger the removal of all entries for that - // contextID in its index. + // Get the car iterator needed to create the entry chunks. + // Normally for removal this is not needed since the indexer + // deletes all indexes for the contextID in the removal + // advertisement. Only if the removal had no contextID would the + // indexer ask for entry chunks to remove. mhIter, err := e.cb(lctx.Ctx, key) if err != nil { return nil, err @@ -150,7 +139,7 @@ func (e *Engine) generateChunks(mhIter provider.MultihashIterator) (ipld.Link, e chunkSize := e.linkedChunkSize mhs := make([]multihash.Multihash, 0, chunkSize) - dsc, isDsc := e.cache.(*lrustore.LRUStore) + ls, lsOK := e.cache.(*lrustore.LRUStore) var resized bool var chunkLnk ipld.Link @@ -168,8 +157,8 @@ func (e *Engine) generateChunks(mhIter provider.MultihashIterator) (ipld.Link, e if len(mhs) >= chunkSize { // Cache needs to be large enough to store all links in a list. - if isDsc && dsc.Len() == dsc.Cap() { - dsc.Resize(context.Background(), dsc.Cap()*2) + if lsOK && ls.Len() == ls.Cap() { + ls.Resize(context.Background(), ls.Cap()*2) } chunkLnk, _, err = schema.NewLinkedListOfMhs(e.cachelsys, mhs, chunkLnk) if err != nil { @@ -183,8 +172,8 @@ func (e *Engine) generateChunks(mhIter provider.MultihashIterator) (ipld.Link, e // Chunk remaining multihashes. if len(mhs) != 0 { - if isDsc && dsc.Len() == dsc.Cap() { - dsc.Resize(context.Background(), dsc.Cap()*2) + if lsOK && ls.Len() == ls.Cap() { + ls.Resize(context.Background(), ls.Cap()*2) } var err error chunkLnk, _, err = schema.NewLinkedListOfMhs(e.cachelsys, mhs, chunkLnk) @@ -197,8 +186,8 @@ func (e *Engine) generateChunks(mhIter provider.MultihashIterator) (ipld.Link, e // If the cache was resized to expand beyond its original capacity, then // set its size to only as big as the number of links in this list. if resized { - dsc.Resize(context.Background(), dsc.Len()) - log.Infow("Link cache expanded to hold links", "new_size", dsc.Cap()) + ls.Resize(context.Background(), ls.Len()) + log.Infow("Link cache expanded to hold links", "new_size", ls.Cap()) } log.Infow("Generated linked chunks of multihashes", "totalMhCount", totalMhCount, "chunkCount", chunkCount) diff --git a/go.mod b/go.mod index 23289a0b..fde7152d 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/filecoin-project/go-data-transfer v1.12.1 github.com/filecoin-project/go-legs v0.2.1 github.com/filecoin-project/go-state-types v0.1.0 - github.com/filecoin-project/storetheindex v0.2.1 + github.com/filecoin-project/storetheindex v0.2.2 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da github.com/golang/mock v1.6.0 github.com/gorilla/mux v1.7.4 @@ -15,7 +15,7 @@ require ( github.com/ipfs/go-graphsync v0.11.5 github.com/ipfs/go-ipfs-blockstore v1.1.2 github.com/ipfs/go-ipfs-blocksutil v0.0.1 - github.com/ipfs/go-log/v2 v2.4.0 + github.com/ipfs/go-log/v2 v2.5.0 github.com/ipld/go-car/v2 v2.1.1 github.com/ipld/go-codec-dagpb v1.3.0 github.com/ipld/go-ipld-prime v0.14.3 diff --git a/go.sum b/go.sum index 9845eaa3..08851b50 100644 --- a/go.sum +++ b/go.sum @@ -213,8 +213,8 @@ github.com/filecoin-project/go-statemachine v0.0.0-20200925024713-05bd7c71fbfe/g github.com/filecoin-project/go-statestore v0.1.0/go.mod h1:LFc9hD+fRxPqiHiaqUEZOinUJB4WARkRfNl10O7kTnI= github.com/filecoin-project/go-statestore v0.2.0 h1:cRRO0aPLrxKQCZ2UOQbzFGn4WDNdofHZoGPjfNaAo5Q= github.com/filecoin-project/go-statestore v0.2.0/go.mod h1:8sjBYbS35HwPzct7iT4lIXjLlYyPor80aU7t7a/Kspo= -github.com/filecoin-project/storetheindex v0.2.1 h1:jhsMh5O52bBU/NrnjJs2iLPI92T38kjlU9bk/dP0sts= -github.com/filecoin-project/storetheindex v0.2.1/go.mod h1:Tc5mYdAnGUzly40cuo35yITDD3XhF9EX5BO7evGsJ5M= +github.com/filecoin-project/storetheindex v0.2.2 h1:2zQlAtHKOVAfBpqIuQ8Njas3kM0VwgpicUCKC+epPG4= +github.com/filecoin-project/storetheindex v0.2.2/go.mod h1:05vxs5u3vTQFAwGW9+pgWSMc3BgFOK513nPyIQyLwyQ= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/flynn/noise v0.0.0-20180327030543-2492fe189ae6/go.mod h1:1i71OnUq3iUe1ma7Lr6yG6/rjvM3emb6yoL7xLFzcVQ= github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= @@ -562,8 +562,9 @@ github.com/ipfs/go-log/v2 v2.0.5/go.mod h1:eZs4Xt4ZUJQFM3DlanGhy7TkwwawCZcSByscw github.com/ipfs/go-log/v2 v2.1.1/go.mod h1:2v2nsGfZsvvAJz13SyFzf9ObaqwHiHxsPLEHntrv9KM= github.com/ipfs/go-log/v2 v2.1.3/go.mod h1:/8d0SH3Su5Ooc31QlL1WysJhvyOTDCjcCZ9Axpmri6g= github.com/ipfs/go-log/v2 v2.3.0/go.mod h1:QqGoj30OTpnKaG/LKTGTxoP2mmQtjVMEnK72gynbe/g= -github.com/ipfs/go-log/v2 v2.4.0 h1:iR/2o9PGWanVJrBgIH5Ff8mPGOwpqLaPIAFqSnsdlzk= github.com/ipfs/go-log/v2 v2.4.0/go.mod h1:nPZnh7Cj7lwS3LpRU5Mwr2ol1c2gXIEXuF6aywqrtmo= +github.com/ipfs/go-log/v2 v2.5.0 h1:+MhAooFd9XZNvR0i9FriKW6HB0ql7HNXUuflWtc0dd4= +github.com/ipfs/go-log/v2 v2.5.0/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI= github.com/ipfs/go-merkledag v0.0.6/go.mod h1:QYPdnlvkOg7GnQRofu9XZimC5ZW5Wi3bKys/4GQQfto= github.com/ipfs/go-merkledag v0.2.3/go.mod h1:SQiXrtSts3KGNmgOzMICy5c0POOpUNQLvB3ClKnBAlk= github.com/ipfs/go-merkledag v0.2.4/go.mod h1:SQiXrtSts3KGNmgOzMICy5c0POOpUNQLvB3ClKnBAlk= diff --git a/version.json b/version.json index 1437d5b7..002fae3b 100644 --- a/version.json +++ b/version.json @@ -1,3 +1,3 @@ { - "version": "v0.2.0" + "version": "v0.2.1" }