diff --git a/cmd/geth/snapshot.go b/cmd/geth/snapshot.go index 7d713ad1109a..13852ac6d102 100644 --- a/cmd/geth/snapshot.go +++ b/cmd/geth/snapshot.go @@ -220,6 +220,25 @@ func verifyState(ctx *cli.Context) error { triedb := utils.MakeTrieDatabase(ctx, chaindb, false, true, false) defer triedb.Close() + var ( + err error + root = headBlock.Root() + ) + if ctx.NArg() == 1 { + root, err = parseRoot(ctx.Args().First()) + if err != nil { + log.Error("Failed to resolve state root", "err", err) + return err + } + } + if triedb.Scheme() == rawdb.PathScheme { + if err := triedb.VerifyState(root); err != nil { + log.Error("Failed to verify state", "root", root, "err", err) + return err + } + log.Info("Verified the state", "root", root) + return snapshot.CheckDanglingStorage(chaindb) + } snapConfig := snapshot.Config{ CacheSize: 256, Recovery: false, @@ -235,14 +254,6 @@ func verifyState(ctx *cli.Context) error { log.Error("Too many arguments given") return errors.New("too many arguments") } - var root = headBlock.Root() - if ctx.NArg() == 1 { - root, err = parseRoot(ctx.Args().First()) - if err != nil { - log.Error("Failed to resolve state root", "err", err) - return err - } - } if err := snaptree.Verify(root); err != nil { log.Error("Failed to verify state", "root", root, "err", err) return err @@ -428,7 +439,7 @@ func traverseRawState(ctx *cli.Context) error { log.Error("Failed to open iterator", "root", root, "err", err) return err } - reader, err := triedb.Reader(root) + reader, err := triedb.NodeReader(root) if err != nil { log.Error("State is non-existent", "root", root) return nil diff --git a/core/blockchain.go b/core/blockchain.go index f7c921fe64fe..8317c4c3de2a 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -159,9 +159,10 @@ func (c *CacheConfig) triedbConfig(isVerkle bool) *triedb.Config { } if c.StateScheme == rawdb.PathScheme { config.PathDB = &pathdb.Config{ - StateHistory: c.StateHistory, - CleanCacheSize: c.TrieCleanLimit * 1024 * 1024, - DirtyCacheSize: c.TrieDirtyLimit * 1024 * 1024, + StateHistory: c.StateHistory, + TrieCleanSize: c.TrieCleanLimit * 1024 * 1024, + StateCleanSize: c.SnapshotLimit * 1024 * 1024, + WriteBufferSize: c.TrieDirtyLimit * 1024 * 1024, } } return config @@ -349,11 +350,14 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis // Do nothing here until the state syncer picks it up. log.Info("Genesis state is missing, wait state sync") } else { - // Head state is missing, before the state recovery, find out the - // disk layer point of snapshot(if it's enabled). Make sure the - // rewound point is lower than disk layer. + // Head state is missing, before the state recovery, find out the disk + // layer point of snapshot(if it's enabled). Make sure the rewound point + // is lower than disk layer. + // + // Note it's unnecessary in path mode which always keep trie data and + // state data in consistent. var diskRoot common.Hash - if bc.cacheConfig.SnapshotLimit > 0 { + if bc.cacheConfig.SnapshotLimit > 0 && bc.cacheConfig.StateScheme == rawdb.HashScheme { diskRoot = rawdb.ReadSnapshotRoot(bc.db) } if diskRoot != (common.Hash{}) { @@ -426,7 +430,32 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis bc.logger.OnGenesisBlock(bc.genesisBlock, alloc) } } + bc.setupSnapshot() + + // Rewind the chain in case of an incompatible config upgrade. + if compat, ok := genesisErr.(*params.ConfigCompatError); ok { + log.Warn("Rewinding chain to upgrade configuration", "err", compat) + if compat.RewindToTime > 0 { + bc.SetHeadWithTimestamp(compat.RewindToTime) + } else { + bc.SetHead(compat.RewindToBlock) + } + rawdb.WriteChainConfig(db, genesisHash, chainConfig) + } + + // Start tx indexer if it's enabled. + if txLookupLimit != nil { + bc.txIndexer = newTxIndexer(*txLookupLimit, bc) + } + return bc, nil +} +func (bc *BlockChain) setupSnapshot() { + // Short circuit if the chain is established with path scheme, as the + // state snapshot has been integrated into path database natively. + if bc.cacheConfig.StateScheme == rawdb.PathScheme { + return + } // Load any existing snapshot, regenerating it if loading failed if bc.cacheConfig.SnapshotLimit > 0 { // If the chain was rewound past the snapshot persistent layer (causing @@ -434,7 +463,6 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis // in recovery mode and in that case, don't invalidate the snapshot on a // head mismatch. var recover bool - head := bc.CurrentBlock() if layer := rawdb.ReadSnapshotRecoveryNumber(bc.db); layer != nil && *layer >= head.Number.Uint64() { log.Warn("Enabling snapshot recovery", "chainhead", head.Number, "diskbase", *layer) @@ -451,23 +479,6 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis // Re-initialize the state database with snapshot bc.statedb = state.NewDatabase(bc.triedb, bc.snaps) } - - // Rewind the chain in case of an incompatible config upgrade. - if compat, ok := genesisErr.(*params.ConfigCompatError); ok { - log.Warn("Rewinding chain to upgrade configuration", "err", compat) - if compat.RewindToTime > 0 { - bc.SetHeadWithTimestamp(compat.RewindToTime) - } else { - bc.SetHead(compat.RewindToBlock) - } - rawdb.WriteChainConfig(db, genesisHash, chainConfig) - } - - // Start tx indexer if it's enabled. - if txLookupLimit != nil { - bc.txIndexer = newTxIndexer(*txLookupLimit, bc) - } - return bc, nil } // empty returns an indicator whether the blockchain is empty. diff --git a/core/blockchain_repair_test.go b/core/blockchain_repair_test.go index aeeb9095d87d..4fbd89d20d35 100644 --- a/core/blockchain_repair_test.go +++ b/core/blockchain_repair_test.go @@ -1790,7 +1790,7 @@ func testRepairWithScheme(t *testing.T, tt *rewindTest, snapshots bool, scheme s } ) defer engine.Close() - if snapshots { + if snapshots && scheme == rawdb.HashScheme { config.SnapshotLimit = 256 config.SnapshotWait = true } @@ -1819,7 +1819,7 @@ func testRepairWithScheme(t *testing.T, tt *rewindTest, snapshots bool, scheme s if err := chain.triedb.Commit(canonblocks[tt.commitBlock-1].Root(), false); err != nil { t.Fatalf("Failed to flush trie state: %v", err) } - if snapshots { + if snapshots && scheme == rawdb.HashScheme { if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { t.Fatalf("Failed to flatten snapshots: %v", err) } @@ -1950,8 +1950,10 @@ func testIssue23496(t *testing.T, scheme string) { if _, err := chain.InsertChain(blocks[1:2]); err != nil { t.Fatalf("Failed to import canonical chain start: %v", err) } - if err := chain.snaps.Cap(blocks[1].Root(), 0); err != nil { - t.Fatalf("Failed to flatten snapshots: %v", err) + if scheme == rawdb.HashScheme { + if err := chain.snaps.Cap(blocks[1].Root(), 0); err != nil { + t.Fatalf("Failed to flatten snapshots: %v", err) + } } // Insert block B3 and commit the state into disk @@ -1995,15 +1997,21 @@ func testIssue23496(t *testing.T, scheme string) { } expHead := uint64(1) if scheme == rawdb.PathScheme { - expHead = uint64(2) + expHead = uint64(3) } if head := chain.CurrentBlock(); head.Number.Uint64() != expHead { t.Errorf("Head block mismatch: have %d, want %d", head.Number, expHead) } - - // Reinsert B2-B4 - if _, err := chain.InsertChain(blocks[1:]); err != nil { - t.Fatalf("Failed to import canonical chain tail: %v", err) + if scheme == rawdb.PathScheme { + // Reinsert B3-B4 + if _, err := chain.InsertChain(blocks[2:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } + } else { + // Reinsert B2-B4 + if _, err := chain.InsertChain(blocks[1:]); err != nil { + t.Fatalf("Failed to import canonical chain tail: %v", err) + } } if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) { t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4) @@ -2014,7 +2022,9 @@ func testIssue23496(t *testing.T, scheme string) { if head := chain.CurrentBlock(); head.Number.Uint64() != uint64(4) { t.Errorf("Head block mismatch: have %d, want %d", head.Number, uint64(4)) } - if layer := chain.Snapshots().Snapshot(blocks[2].Root()); layer == nil { - t.Error("Failed to regenerate the snapshot of known state") + if scheme == rawdb.HashScheme { + if layer := chain.Snapshots().Snapshot(blocks[2].Root()); layer == nil { + t.Error("Failed to regenerate the snapshot of known state") + } } } diff --git a/core/blockchain_sethead_test.go b/core/blockchain_sethead_test.go index 123c2c9af16e..192429c99961 100644 --- a/core/blockchain_sethead_test.go +++ b/core/blockchain_sethead_test.go @@ -2022,7 +2022,7 @@ func testSetHeadWithScheme(t *testing.T, tt *rewindTest, snapshots bool, scheme } if tt.commitBlock > 0 { chain.triedb.Commit(canonblocks[tt.commitBlock-1].Root(), false) - if snapshots { + if snapshots && scheme == rawdb.HashScheme { if err := chain.snaps.Cap(canonblocks[tt.commitBlock-1].Root(), 0); err != nil { t.Fatalf("Failed to flatten snapshots: %v", err) } diff --git a/core/blockchain_snapshot_test.go b/core/blockchain_snapshot_test.go index 3803c153e700..ed300de1ef33 100644 --- a/core/blockchain_snapshot_test.go +++ b/core/blockchain_snapshot_test.go @@ -104,7 +104,7 @@ func (basic *snapshotTestBasic) prepare(t *testing.T) (*BlockChain, []*types.Blo if basic.commitBlock > 0 && basic.commitBlock == point { chain.TrieDB().Commit(blocks[point-1].Root(), false) } - if basic.snapshotBlock > 0 && basic.snapshotBlock == point { + if basic.snapshotBlock > 0 && basic.snapshotBlock == point && basic.scheme == rawdb.HashScheme { // Flushing the entire snap tree into the disk, the // relevant (a) snapshot root and (b) snapshot generator // will be persisted atomically. @@ -148,13 +148,17 @@ func (basic *snapshotTestBasic) verify(t *testing.T, chain *BlockChain, blocks [ block := chain.GetBlockByNumber(basic.expSnapshotBottom) if block == nil { t.Errorf("The corresponding block[%d] of snapshot disk layer is missing", basic.expSnapshotBottom) - } else if !bytes.Equal(chain.snaps.DiskRoot().Bytes(), block.Root().Bytes()) { - t.Errorf("The snapshot disk layer root is incorrect, want %x, get %x", block.Root(), chain.snaps.DiskRoot()) + } else if basic.scheme == rawdb.HashScheme { + if !bytes.Equal(chain.snaps.DiskRoot().Bytes(), block.Root().Bytes()) { + t.Errorf("The snapshot disk layer root is incorrect, want %x, get %x", block.Root(), chain.snaps.DiskRoot()) + } } // Check the snapshot, ensure it's integrated - if err := chain.snaps.Verify(block.Root()); err != nil { - t.Errorf("The disk layer is not integrated %v", err) + if basic.scheme == rawdb.HashScheme { + if err := chain.snaps.Verify(block.Root()); err != nil { + t.Errorf("The disk layer is not integrated %v", err) + } } } @@ -569,7 +573,7 @@ func TestHighCommitCrashWithNewSnapshot(t *testing.T) { for _, scheme := range []string{rawdb.HashScheme, rawdb.PathScheme} { expHead := uint64(0) if scheme == rawdb.PathScheme { - expHead = uint64(4) + expHead = uint64(6) } test := &crashSnapshotTest{ snapshotTestBasic{ diff --git a/core/state/database.go b/core/state/database.go index de61dee036eb..d8b87f2a944e 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -186,9 +186,19 @@ func (db *CachingDB) Reader(stateRoot common.Hash) (Reader, error) { // is optional and may be partially useful if it's not fully // generated. if db.snap != nil { - sr, err := newStateReader(stateRoot, db.snap) + // If standalone state snapshot is available (hash scheme), + // then construct the legacy snap reader. + snap := db.snap.Snapshot(stateRoot) + if snap != nil { + readers = append(readers, newStateReader(snap)) // snap reader is optional + } + } else { + // If standalone state snapshot is not available (path scheme + // or the state snapshot is explicitly disabled in hash mode), + // try to construct the state reader with database. + reader, err := db.triedb.StateReader(stateRoot) if err == nil { - readers = append(readers, sr) // snap reader is optional + readers = append(readers, newStateReader(reader)) // state reader is optional } } // Set up the trie reader, which is expected to always be available diff --git a/core/state/reader.go b/core/state/reader.go index 6bddefc2a7dd..85842adde85f 100644 --- a/core/state/reader.go +++ b/core/state/reader.go @@ -21,13 +21,13 @@ import ( "maps" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/utils" "github.com/ethereum/go-ethereum/triedb" + "github.com/ethereum/go-ethereum/triedb/database" ) // Reader defines the interface for accessing accounts and storage slots @@ -52,23 +52,18 @@ type Reader interface { Copy() Reader } -// stateReader is a wrapper over the state snapshot and implements the Reader -// interface. It provides an efficient way to access flat state. +// stateReader wraps a database state reader. type stateReader struct { - snap snapshot.Snapshot - buff crypto.KeccakState + reader database.StateReader + buff crypto.KeccakState } -// newStateReader constructs a flat state reader with on the specified state root. -func newStateReader(root common.Hash, snaps *snapshot.Tree) (*stateReader, error) { - snap := snaps.Snapshot(root) - if snap == nil { - return nil, errors.New("snapshot is not available") - } +// newStateReader constructs a state reader with on the given state root. +func newStateReader(reader database.StateReader) *stateReader { return &stateReader{ - snap: snap, - buff: crypto.NewKeccakState(), - }, nil + reader: reader, + buff: crypto.NewKeccakState(), + } } // Account implements Reader, retrieving the account specified by the address. @@ -78,18 +73,18 @@ func newStateReader(root common.Hash, snaps *snapshot.Tree) (*stateReader, error // // The returned account might be nil if it's not existent. func (r *stateReader) Account(addr common.Address) (*types.StateAccount, error) { - ret, err := r.snap.Account(crypto.HashData(r.buff, addr.Bytes())) + account, err := r.reader.Account(crypto.HashData(r.buff, addr.Bytes())) if err != nil { return nil, err } - if ret == nil { + if account == nil { return nil, nil } acct := &types.StateAccount{ - Nonce: ret.Nonce, - Balance: ret.Balance, - CodeHash: ret.CodeHash, - Root: common.BytesToHash(ret.Root), + Nonce: account.Nonce, + Balance: account.Balance, + CodeHash: account.CodeHash, + Root: common.BytesToHash(account.Root), } if len(acct.CodeHash) == 0 { acct.CodeHash = types.EmptyCodeHash.Bytes() @@ -110,7 +105,7 @@ func (r *stateReader) Account(addr common.Address) (*types.StateAccount, error) func (r *stateReader) Storage(addr common.Address, key common.Hash) (common.Hash, error) { addrHash := crypto.HashData(r.buff, addr.Bytes()) slotHash := crypto.HashData(r.buff, key.Bytes()) - ret, err := r.snap.Storage(addrHash, slotHash) + ret, err := r.reader.Storage(addrHash, slotHash) if err != nil { return common.Hash{}, err } @@ -131,8 +126,8 @@ func (r *stateReader) Storage(addr common.Address, key common.Hash) (common.Hash // Copy implements Reader, returning a deep-copied snap reader. func (r *stateReader) Copy() Reader { return &stateReader{ - snap: r.snap, - buff: crypto.NewKeccakState(), + reader: r.reader, + buff: crypto.NewKeccakState(), } } diff --git a/core/state/snapshot/generate.go b/core/state/snapshot/generate.go index 6d9e16307516..01fb55ea4cd6 100644 --- a/core/state/snapshot/generate.go +++ b/core/state/snapshot/generate.go @@ -31,7 +31,6 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" - "github.com/ethereum/go-ethereum/trie/trienode" "github.com/ethereum/go-ethereum/triedb" ) @@ -353,20 +352,14 @@ func (dl *diskLayer) generateRange(ctx *generatorContext, trieId *trie.ID, prefi // main account trie as a primary lookup when resolving hashes var resolver trie.NodeResolver if len(result.keys) > 0 { - mdb := rawdb.NewMemoryDatabase() - tdb := triedb.NewDatabase(mdb, triedb.HashDefaults) - defer tdb.Close() - snapTrie := trie.NewEmpty(tdb) + tr := trie.NewEmpty(nil) for i, key := range result.keys { - snapTrie.Update(key, result.vals[i]) - } - root, nodes := snapTrie.Commit(false) - if nodes != nil { - tdb.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) - tdb.Commit(root, false) + tr.Update(key, result.vals[i]) } + _, nodes := tr.Commit(false) + hashSet := nodes.HashSet() resolver = func(owner common.Hash, path []byte, hash common.Hash) []byte { - return rawdb.ReadTrieNode(mdb, owner, path, hash, tdb.Scheme()) + return hashSet[hash] } } // Construct the trie for state iteration, reuse the trie diff --git a/core/state/snapshot/generate_test.go b/core/state/snapshot/generate_test.go index 891111973a5e..4946dd1fd704 100644 --- a/core/state/snapshot/generate_test.go +++ b/core/state/snapshot/generate_test.go @@ -57,14 +57,14 @@ func testGeneration(t *testing.T, scheme string) { // a fake one manually. We're going with a small account trie of 3 accounts, // two of which also has the same 3-slot storage trie attached. var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) + stRoot := helper.makeStorageTrie("", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) root, snap := helper.CommitAndGenerate() if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want { @@ -97,7 +97,7 @@ func testGenerateExistentState(t *testing.T, scheme string) { // two of which also has the same 3-slot storage trie attached. var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -105,7 +105,7 @@ func testGenerateExistentState(t *testing.T, scheme string) { helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -159,29 +159,37 @@ type testHelper struct { triedb *triedb.Database accTrie *trie.StateTrie nodes *trienode.MergedNodeSet + states *triedb.StateSet } func newHelper(scheme string) *testHelper { diskdb := rawdb.NewMemoryDatabase() config := &triedb.Config{} if scheme == rawdb.PathScheme { - config.PathDB = &pathdb.Config{} // disable caching + config.PathDB = &pathdb.Config{ + SnapshotNoBuild: true, + } // disable caching } else { config.HashDB = &hashdb.Config{} // disable caching } - triedb := triedb.NewDatabase(diskdb, config) - accTrie, _ := trie.NewStateTrie(trie.StateTrieID(types.EmptyRootHash), triedb) + db := triedb.NewDatabase(diskdb, config) + accTrie, _ := trie.NewStateTrie(trie.StateTrieID(types.EmptyRootHash), db) return &testHelper{ diskdb: diskdb, - triedb: triedb, + triedb: db, accTrie: accTrie, nodes: trienode.NewMergedNodeSet(), + states: triedb.NewStateSet(), } } func (t *testHelper) addTrieAccount(acckey string, acc *types.StateAccount) { val, _ := rlp.EncodeToBytes(acc) t.accTrie.MustUpdate([]byte(acckey), val) + + accHash := hashData([]byte(acckey)) + t.states.Accounts[accHash] = val + t.states.AccountsOrigin[common.BytesToAddress([]byte(acckey))] = nil } func (t *testHelper) addSnapAccount(acckey string, acc *types.StateAccount) { @@ -201,11 +209,21 @@ func (t *testHelper) addSnapStorage(accKey string, keys []string, vals []string) } } -func (t *testHelper) makeStorageTrie(owner common.Hash, keys []string, vals []string, commit bool) common.Hash { +func (t *testHelper) makeStorageTrie(accKey string, keys []string, vals []string, commit bool) common.Hash { + owner := hashData([]byte(accKey)) + addr := common.BytesToAddress([]byte(accKey)) id := trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash) stTrie, _ := trie.NewStateTrie(id, t.triedb) for i, k := range keys { stTrie.MustUpdate([]byte(k), []byte(vals[i])) + if t.states.Storages[owner] == nil { + t.states.Storages[owner] = make(map[common.Hash][]byte) + } + if t.states.StoragesOrigin[addr] == nil { + t.states.StoragesOrigin[addr] = make(map[common.Hash][]byte) + } + t.states.Storages[owner][hashData([]byte(k))] = []byte(vals[i]) + t.states.StoragesOrigin[addr][hashData([]byte(k))] = nil } if !commit { return stTrie.Hash() @@ -222,7 +240,7 @@ func (t *testHelper) Commit() common.Hash { if nodes != nil { t.nodes.Merge(nodes) } - t.triedb.Update(root, types.EmptyRootHash, 0, t.nodes, nil) + t.triedb.Update(root, types.EmptyRootHash, 0, t.nodes, t.states) t.triedb.Commit(root, false) return root } @@ -264,23 +282,23 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) // Account two, non empty root but empty database - stRoot := helper.makeStorageTrie(hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // Miss slots { // Account three, non empty root but misses slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) // Account four, non empty root but misses slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-4", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"}) // Account five, non empty root but misses slots in the end - helper.makeStorageTrie(hashData([]byte("acc-5")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-5", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-5", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) } @@ -288,22 +306,22 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { // Wrong storage slots { // Account six, non empty root but wrong slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-6", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"}) // Account seven, non empty root but wrong slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-7")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-7", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"}) // Account eight, non empty root but wrong slots in the end - helper.makeStorageTrie(hashData([]byte("acc-8")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-8", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"}) // Account 9, non empty root but rotated slots - helper.makeStorageTrie(hashData([]byte("acc-9")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-9", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"}) } @@ -311,17 +329,17 @@ func testGenerateExistentStateWithWrongStorage(t *testing.T, scheme string) { // Extra storage slots { // Account 10, non empty root but extra slots in the beginning - helper.makeStorageTrie(hashData([]byte("acc-10")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-10", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-10", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"}) // Account 11, non empty root but extra slots in the middle - helper.makeStorageTrie(hashData([]byte("acc-11")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-11", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-11", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"}) // Account 12, non empty root but extra slots in the end - helper.makeStorageTrie(hashData([]byte("acc-12")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-12", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-12", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"}) } @@ -356,11 +374,11 @@ func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { func testGenerateExistentStateWithWrongAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) - helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-2")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - helper.makeStorageTrie(hashData([]byte("acc-4")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-6")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6] // Extra accounts [acc-0, acc-5, acc-7] @@ -463,10 +481,10 @@ func testGenerateMissingStorageTrie(t *testing.T, scheme string) { acc3 = hashData([]byte("acc-3")) helper = newHelper(scheme) ) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 root := helper.Commit() @@ -503,10 +521,10 @@ func testGenerateCorruptStorageTrie(t *testing.T, scheme string) { // two of which also has the same 3-slot storage trie attached. helper := newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 - stRoot = helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 root := helper.Commit() @@ -542,7 +560,7 @@ func testGenerateWithExtraAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) { // Account one in the trie - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, true, @@ -562,7 +580,7 @@ func testGenerateWithExtraAccounts(t *testing.T, scheme string) { } { // Account two exists only in the snapshot - stRoot := helper.makeStorageTrie(hashData([]byte("acc-2")), + stRoot := helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, true, @@ -618,7 +636,7 @@ func testGenerateWithManyExtraAccounts(t *testing.T, scheme string) { helper := newHelper(scheme) { // Account one in the trie - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true, @@ -763,7 +781,7 @@ func testGenerateFromEmptySnap(t *testing.T, scheme string) { helper := newHelper(scheme) // Add 1K accounts to the trie for i := 0; i < 400; i++ { - stRoot := helper.makeStorageTrie(hashData([]byte(fmt.Sprintf("acc-%d", i))), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie(fmt.Sprintf("acc-%d", i), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount(fmt.Sprintf("acc-%d", i), &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) } @@ -806,7 +824,7 @@ func testGenerateWithIncompleteStorage(t *testing.T, scheme string) { // on the sensitive spots at the boundaries for i := 0; i < 8; i++ { accKey := fmt.Sprintf("acc-%d", i) - stRoot := helper.makeStorageTrie(hashData([]byte(accKey)), stKeys, stVals, true) + stRoot := helper.makeStorageTrie(accKey, stKeys, stVals, true) helper.addAccount(accKey, &types.StateAccount{Balance: uint256.NewInt(uint64(i)), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) var moddedKeys []string var moddedVals []string @@ -903,11 +921,11 @@ func TestGenerateCompleteSnapshotWithDanglingStorage(t *testing.T) { func testGenerateCompleteSnapshotWithDanglingStorage(t *testing.T, scheme string) { var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) @@ -943,11 +961,11 @@ func TestGenerateBrokenSnapshotWithDanglingStorage(t *testing.T) { func testGenerateBrokenSnapshotWithDanglingStorage(t *testing.T, scheme string) { var helper = newHelper(scheme) - stRoot := helper.makeStorageTrie(hashData([]byte("acc-1")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) - helper.makeStorageTrie(hashData([]byte("acc-3")), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) populateDangling(helper.diskdb) diff --git a/core/state/statedb.go b/core/state/statedb.go index b2b4f8fb97b1..527d9bc08d04 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -38,7 +38,6 @@ import ( "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/trie/utils" "github.com/holiman/uint256" "golang.org/x/sync/errgroup" @@ -1282,8 +1281,7 @@ func (s *StateDB) commitAndFlush(block uint64, deleteEmptyObjects bool) (*stateU // If trie database is enabled, commit the state update as a new layer if db := s.db.TrieDB(); db != nil { start := time.Now() - set := triestate.New(ret.accountsOrigin, ret.storagesOrigin) - if err := db.Update(ret.root, ret.originRoot, block, ret.nodes, set); err != nil { + if err := db.Update(ret.root, ret.originRoot, block, ret.nodes, ret.stateSet()); err != nil { return nil, err } s.TrieDBCommits += time.Since(start) diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index 9441834c6a24..203b454ed5d5 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -981,8 +981,9 @@ func testMissingTrieNodes(t *testing.T, scheme string) { ) if scheme == rawdb.PathScheme { tdb = triedb.NewDatabase(memDb, &triedb.Config{PathDB: &pathdb.Config{ - CleanCacheSize: 0, - DirtyCacheSize: 0, + TrieCleanSize: 0, + StateCleanSize: 0, + WriteBufferSize: 0, }}) // disable caching } else { tdb = triedb.NewDatabase(memDb, &triedb.Config{HashDB: &hashdb.Config{ diff --git a/core/state/stateupdate.go b/core/state/stateupdate.go index f3e6af997e44..c9231f0526b3 100644 --- a/core/state/stateupdate.go +++ b/core/state/stateupdate.go @@ -20,6 +20,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb" ) // contractCode represents a contract code with associated metadata. @@ -131,3 +132,17 @@ func newStateUpdate(originRoot common.Hash, root common.Hash, deletes map[common nodes: nodes, } } + +// stateSet converts the current stateUpdate object into a triedb.StateSet +// object. This function extracts the necessary data from the stateUpdate +// struct and formats it into the StateSet structure consumed by the triedb +// package. +func (sc *stateUpdate) stateSet() *triedb.StateSet { + return &triedb.StateSet{ + Destructs: sc.destructs, + Accounts: sc.accounts, + AccountsOrigin: sc.accountsOrigin, + Storages: sc.storages, + StoragesOrigin: sc.storagesOrigin, + } +} diff --git a/core/state/sync_test.go b/core/state/sync_test.go index cc15422c0cf8..2416cda873db 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -207,7 +207,7 @@ func testIterativeStateSync(t *testing.T, count int, commit bool, bypath bool, s for i := 0; i < len(codes); i++ { codeElements = append(codeElements, stateElement{code: codes[i]}) } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -326,7 +326,7 @@ func testIterativeDelayedStateSync(t *testing.T, scheme string) { for i := 0; i < len(codes); i++ { codeElements = append(codeElements, stateElement{code: codes[i]}) } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -430,7 +430,7 @@ func testIterativeRandomStateSync(t *testing.T, count int, scheme string) { for _, hash := range codes { codeQueue[hash] = struct{}{} } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -523,7 +523,7 @@ func testIterativeRandomDelayedStateSync(t *testing.T, scheme string) { for _, hash := range codes { codeQueue[hash] = struct{}{} } - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not existent, %#x", srcRoot) } @@ -628,7 +628,7 @@ func testIncompleteStateSync(t *testing.T, scheme string) { addedPaths []string addedHashes []common.Hash ) - reader, err := ndb.Reader(srcRoot) + reader, err := ndb.NodeReader(srcRoot) if err != nil { t.Fatalf("state is not available %x", srcRoot) } diff --git a/eth/handler.go b/eth/handler.go index d5117584c001..ca0e8837049a 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -41,7 +41,6 @@ import ( "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" - "github.com/ethereum/go-ethereum/triedb/pathdb" ) const ( @@ -176,7 +175,7 @@ func newHandler(config *handlerConfig) (*handler, error) { } } // If snap sync is requested but snapshots are disabled, fail loudly - if h.snapSync.Load() && config.Chain.Snapshots() == nil { + if h.snapSync.Load() && (config.Chain.Snapshots() == nil && config.Chain.TrieDB().Scheme() == rawdb.HashScheme) { return nil, errors.New("snap sync not supported with snapshots disabled") } // Construct the downloader (long sync) @@ -558,7 +557,4 @@ func (h *handler) enableSyncedFeatures() { log.Info("Snap sync complete, auto disabling") h.snapSync.Store(false) } - if h.chain.TrieDB().Scheme() == rawdb.PathScheme { - h.chain.TrieDB().SetBufferSize(pathdb.DefaultBufferSize) - } } diff --git a/eth/protocols/snap/handler.go b/eth/protocols/snap/handler.go index a6c60bc0757f..1a74d5b9f845 100644 --- a/eth/protocols/snap/handler.go +++ b/eth/protocols/snap/handler.go @@ -23,6 +23,8 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state/snapshot" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" @@ -31,6 +33,7 @@ import ( "github.com/ethereum/go-ethereum/p2p/enr" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/triedb/database" ) const ( @@ -281,7 +284,12 @@ func ServiceGetAccountRangeQuery(chain *core.BlockChain, req *GetAccountRangePac if err != nil { return nil, nil } - it, err := chain.Snapshots().AccountIterator(req.Root, req.Origin) + var it snapshot.AccountIterator // ugly hack + if chain.TrieDB().Scheme() == rawdb.HashScheme { + it, err = chain.Snapshots().AccountIterator(req.Root, req.Origin) + } else { + it, err = chain.TrieDB().AccountIterator(req.Root, req.Origin) + } if err != nil { return nil, nil } @@ -361,7 +369,15 @@ func ServiceGetStorageRangesQuery(chain *core.BlockChain, req *GetStorageRangesP limit, req.Limit = common.BytesToHash(req.Limit), nil } // Retrieve the requested state and bail out if non existent - it, err := chain.Snapshots().StorageIterator(req.Root, account, origin) + var ( + err error + it snapshot.StorageIterator // ugly hack + ) + if chain.TrieDB().Scheme() == rawdb.HashScheme { + it, err = chain.Snapshots().StorageIterator(req.Root, account, origin) + } else { + it, err = chain.TrieDB().StorageIterator(req.Root, account, origin) + } if err != nil { return nil, nil } @@ -481,8 +497,15 @@ func ServiceGetTrieNodesQuery(chain *core.BlockChain, req *GetTrieNodesPacket, s // We don't have the requested state available, bail out return nil, nil } - // The 'snap' might be nil, in which case we cannot serve storage slots. - snap := chain.Snapshots().Snapshot(req.Root) + // The 'reader' might be nil, in which case we cannot serve storage slots + // via snapshot. + var reader database.StateReader + if chain.Snapshots() != nil { + reader = chain.Snapshots().Snapshot(req.Root) + } + if reader == nil { + reader, _ = triedb.StateReader(req.Root) + } // Retrieve trie nodes until the packet size limit is reached var ( nodes [][]byte @@ -507,8 +530,9 @@ func ServiceGetTrieNodesQuery(chain *core.BlockChain, req *GetTrieNodesPacket, s default: var stRoot common.Hash + // Storage slots requested, open the storage trie and retrieve from there - if snap == nil { + if reader == nil { // We don't have the requested state snapshotted yet (or it is stale), // but can look up the account via the trie instead. account, err := accTrie.GetAccountByHash(common.BytesToHash(pathset[0])) @@ -518,7 +542,7 @@ func ServiceGetTrieNodesQuery(chain *core.BlockChain, req *GetTrieNodesPacket, s } stRoot = account.Root } else { - account, err := snap.Account(common.BytesToHash(pathset[0])) + account, err := reader.Account(common.BytesToHash(pathset[0])) loads++ // always account database reads, even for failures if err != nil || account == nil { break diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index c97c3b99b353..d599e7ecc325 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -1515,7 +1515,7 @@ func makeAccountTrieNoStorage(n int, scheme string) (string, *trie.Trie, []*kv) // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1577,7 +1577,7 @@ func makeBoundaryAccountTrie(scheme string, n int) (string, *trie.Trie, []*kv) { // Commit the state changes into db and re-create the trie // for accessing later. root, nodes := accTrie.Commit(false) - db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), nil) + db.Update(root, types.EmptyRootHash, 0, trienode.NewWithNodeSet(nodes), triedb.NewStateSet()) accTrie, _ = trie.New(trie.StateTrieID(root), db) return db.Scheme(), accTrie, entries @@ -1626,7 +1626,7 @@ func makeAccountTrieWithStorageWithUniqueStorage(scheme string, accounts, slots nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(root, types.EmptyRootHash, 0, nodes, nil) + db.Update(root, types.EmptyRootHash, 0, nodes, triedb.NewStateSet()) // Re-create tries with new root accTrie, _ = trie.New(trie.StateTrieID(root), db) @@ -1693,7 +1693,7 @@ func makeAccountTrieWithStorage(scheme string, accounts, slots int, code, bounda nodes.Merge(set) // Commit gathered dirty nodes into database - db.Update(root, types.EmptyRootHash, 0, nodes, nil) + db.Update(root, types.EmptyRootHash, 0, nodes, triedb.NewStateSet()) // Re-create tries with new root accTrie, err := trie.New(trie.StateTrieID(root), db) @@ -1962,5 +1962,5 @@ func newDbConfig(scheme string) *triedb.Config { if scheme == rawdb.HashScheme { return &triedb.Config{} } - return &triedb.Config{PathDB: pathdb.Defaults} + return &triedb.Config{PathDB: &pathdb.Config{SnapshotNoBuild: true}} } diff --git a/tests/block_test_util.go b/tests/block_test_util.go index b0a31a69720b..0f79933d40cc 100644 --- a/tests/block_test_util.go +++ b/tests/block_test_util.go @@ -181,7 +181,17 @@ func (t *BlockTest) Run(snapshotter bool, scheme string, witness bool, tracer *t } // Cross-check the snapshot-to-hash against the trie hash if snapshotter { - if err := chain.Snapshots().Verify(chain.CurrentBlock().Root); err != nil { + if chain.Snapshots() != nil { + if err := chain.Snapshots().Verify(chain.CurrentBlock().Root); err != nil { + return err + } + } + } + if scheme == rawdb.PathScheme { + if err := chain.TrieDB().WaitGeneration(); err != nil { + return err + } + if err := chain.TrieDB().VerifyState(chain.CurrentBlock().Root); err != nil { return err } } diff --git a/trie/database_test.go b/trie/database_test.go index aed508b368ca..535f0d61b208 100644 --- a/trie/database_test.go +++ b/trie/database_test.go @@ -25,7 +25,7 @@ import ( "github.com/ethereum/go-ethereum/triedb/database" ) -// testReader implements database.Reader interface, providing function to +// testReader implements database.NodeReader interface, providing function to // access trie nodes. type testReader struct { db ethdb.Database @@ -33,7 +33,7 @@ type testReader struct { nodes []*trienode.MergedNodeSet // sorted from new to old } -// Node implements database.Reader interface, retrieving trie node with +// Node implements database.NodeReader interface, retrieving trie node with // all available cached layers. func (r *testReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { // Check the node presence with the cached layer, from latest to oldest. @@ -54,7 +54,7 @@ func (r *testReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]b return rawdb.ReadTrieNode(r.db, owner, path, hash, r.scheme), nil } -// testDb implements database.Database interface, using for testing purpose. +// testDb implements database.NodeDatabase interface, using for testing purpose. type testDb struct { disk ethdb.Database root common.Hash @@ -73,7 +73,7 @@ func newTestDatabase(diskdb ethdb.Database, scheme string) *testDb { } } -func (db *testDb) Reader(stateRoot common.Hash) (database.Reader, error) { +func (db *testDb) NodeReader(stateRoot common.Hash) (database.NodeReader, error) { nodes, _ := db.dirties(stateRoot, true) return &testReader{db: db.disk, scheme: db.scheme, nodes: nodes}, nil } diff --git a/trie/iterator_test.go b/trie/iterator_test.go index b463294b09dd..74a1aa378c65 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -146,7 +146,7 @@ func testNodeIteratorCoverage(t *testing.T, scheme string) { } } // Cross check the hashes and the database itself - reader, err := nodeDb.Reader(trie.Hash()) + reader, err := nodeDb.NodeReader(trie.Hash()) if err != nil { t.Fatalf("state is not available %x", trie.Hash()) } diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 91fd38269f0f..f53b10758f31 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -40,7 +40,7 @@ type SecureTrie = StateTrie // NewSecure creates a new StateTrie. // Deprecated: use NewStateTrie. -func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db database.Database) (*SecureTrie, error) { +func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db database.NodeDatabase) (*SecureTrie, error) { id := &ID{ StateRoot: stateRoot, Owner: owner, @@ -61,7 +61,7 @@ func NewSecure(stateRoot common.Hash, owner common.Hash, root common.Hash, db da // StateTrie is not safe for concurrent use. type StateTrie struct { trie Trie - db database.Database + db database.NodeDatabase preimages preimageStore hashKeyBuf [common.HashLength]byte secKeyCache map[string][]byte @@ -73,7 +73,7 @@ type StateTrie struct { // If root is the zero hash or the sha3 hash of an empty string, the // trie is initially empty. Otherwise, New will panic if db is nil // and returns MissingNodeError if the root node cannot be found. -func NewStateTrie(id *ID, db database.Database) (*StateTrie, error) { +func NewStateTrie(id *ID, db database.NodeDatabase) (*StateTrie, error) { if db == nil { panic("trie.NewStateTrie called without a database") } diff --git a/trie/sync_test.go b/trie/sync_test.go index ccdee7d01400..2ff02576d4dd 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -183,7 +183,7 @@ func testIterativeSync(t *testing.T, count int, bypath bool, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -258,7 +258,7 @@ func testIterativeDelayedSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -327,7 +327,7 @@ func testIterativeRandomSync(t *testing.T, count int, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), } } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -394,7 +394,7 @@ func testIterativeRandomDelayedSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(path)), } } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -466,7 +466,7 @@ func testDuplicateAvoidanceSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -542,7 +542,7 @@ func testIncompleteSync(t *testing.T, scheme string) { syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -634,7 +634,7 @@ func testSyncOrdering(t *testing.T, scheme string) { }) reqs = append(reqs, NewSyncPath([]byte(paths[i]))) } - reader, err := srcDb.Reader(srcTrie.Hash()) + reader, err := srcDb.NodeReader(srcTrie.Hash()) if err != nil { t.Fatalf("State is not available %x", srcTrie.Hash()) } @@ -704,7 +704,7 @@ func syncWithHookWriter(t *testing.T, root common.Hash, db ethdb.Database, srcDb syncPath: NewSyncPath([]byte(paths[i])), }) } - reader, err := srcDb.Reader(root) + reader, err := srcDb.NodeReader(root) if err != nil { t.Fatalf("State is not available %x", root) } diff --git a/trie/trie.go b/trie/trie.go index 885b6b79628c..5b40e06e1751 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -79,7 +79,7 @@ func (t *Trie) Copy() *Trie { // zero hash or the sha3 hash of an empty string, then trie is initially // empty, otherwise, the root node must be present in database or returns // a MissingNodeError if not. -func New(id *ID, db database.Database) (*Trie, error) { +func New(id *ID, db database.NodeDatabase) (*Trie, error) { reader, err := newTrieReader(id.StateRoot, id.Owner, db) if err != nil { return nil, err @@ -100,7 +100,7 @@ func New(id *ID, db database.Database) (*Trie, error) { } // NewEmpty is a shortcut to create empty tree. It's mostly used in tests. -func NewEmpty(db database.Database) *Trie { +func NewEmpty(db database.NodeDatabase) *Trie { tr, _ := New(TrieID(types.EmptyRootHash), db) return tr } diff --git a/trie/trie_reader.go b/trie/trie_reader.go index adbf43d287ca..ff2db1d67499 100644 --- a/trie/trie_reader.go +++ b/trie/trie_reader.go @@ -27,19 +27,19 @@ import ( // for concurrent usage. type trieReader struct { owner common.Hash - reader database.Reader + reader database.NodeReader banned map[string]struct{} // Marker to prevent node from being accessed, for tests } // newTrieReader initializes the trie reader with the given node reader. -func newTrieReader(stateRoot, owner common.Hash, db database.Database) (*trieReader, error) { +func newTrieReader(stateRoot, owner common.Hash, db database.NodeDatabase) (*trieReader, error) { if stateRoot == (common.Hash{}) || stateRoot == types.EmptyRootHash { if stateRoot == (common.Hash{}) { log.Error("Zero state root hash!") } return &trieReader{owner: owner}, nil } - reader, err := db.Reader(stateRoot) + reader, err := db.NodeReader(stateRoot) if err != nil { return nil, &MissingNodeError{Owner: owner, NodeHash: stateRoot, err: err} } diff --git a/trie/trienode/node.go b/trie/trienode/node.go index 09f355f3b590..37eff1136914 100644 --- a/trie/trienode/node.go +++ b/trie/trienode/node.go @@ -135,6 +135,15 @@ func (set *NodeSet) Size() (int, int) { return set.updates, set.deletes } +// HashSet returns a set of trie nodes keyed by node hash. +func (set *NodeSet) HashSet() map[common.Hash][]byte { + ret := make(map[common.Hash][]byte) + for _, n := range set.Nodes { + ret[n.Hash] = n.Blob + } + return ret +} + // Summary returns a string-representation of the NodeSet. func (set *NodeSet) Summary() string { var out = new(strings.Builder) diff --git a/trie/triestate/state.go b/trie/triestate/state.go deleted file mode 100644 index 62a904387353..000000000000 --- a/trie/triestate/state.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2023 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see - -package triestate - -import "github.com/ethereum/go-ethereum/common" - -// Set represents a collection of mutated states during a state transition. -// The value refers to the original content of state before the transition -// is made. Nil means that the state was not present previously. -type Set struct { - Accounts map[common.Address][]byte // Mutated account set, nil means the account was not present - Storages map[common.Address]map[common.Hash][]byte // Mutated storage set, nil means the slot was not present - size common.StorageSize // Approximate size of set -} - -// New constructs the state set with provided data. -func New(accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) *Set { - return &Set{ - Accounts: accounts, - Storages: storages, - } -} - -// Size returns the approximate memory size occupied by the set. -func (s *Set) Size() common.StorageSize { - if s.size != 0 { - return s.size - } - for _, account := range s.Accounts { - s.size += common.StorageSize(common.AddressLength + len(account)) - } - for _, slots := range s.Storages { - for _, val := range slots { - s.size += common.StorageSize(common.HashLength + len(val)) - } - s.size += common.StorageSize(common.AddressLength) - } - return s.size -} diff --git a/trie/verkle.go b/trie/verkle.go index 6bd9d3d1af5a..a4c60e42c43f 100644 --- a/trie/verkle.go +++ b/trie/verkle.go @@ -45,7 +45,7 @@ type VerkleTrie struct { } // NewVerkleTrie constructs a verkle tree based on the specified root hash. -func NewVerkleTrie(root common.Hash, db database.Database, cache *utils.PointCache) (*VerkleTrie, error) { +func NewVerkleTrie(root common.Hash, db database.NodeDatabase, cache *utils.PointCache) (*VerkleTrie, error) { reader, err := newTrieReader(root, common.Hash{}, db) if err != nil { return nil, err diff --git a/triedb/database.go b/triedb/database.go index c1e6f9af4e69..a5a9e23c20b1 100644 --- a/triedb/database.go +++ b/triedb/database.go @@ -24,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/triedb/database" "github.com/ethereum/go-ethereum/triedb/hashdb" "github.com/ethereum/go-ethereum/triedb/pathdb" @@ -57,6 +56,14 @@ var VerkleDefaults = &Config{ // backend defines the methods needed to access/update trie nodes in different // state scheme. type backend interface { + // NodeReader returns a reader for accessing trie nodes within the specified state. + // An error will be returned if the specified state is not available. + NodeReader(root common.Hash) (database.NodeReader, error) + + // StateReader returns a reader for accessing flat states within the specified + // state. An error will be returned if the specified state is not available. + StateReader(root common.Hash) (database.StateReader, error) + // Initialized returns an indicator if the state data is already initialized // according to the state scheme. Initialized(genesisRoot common.Hash) bool @@ -68,24 +75,12 @@ type backend interface { // and dirty disk layer nodes, so both are merged into the second return. Size() (common.StorageSize, common.StorageSize) - // Update performs a state transition by committing dirty nodes contained - // in the given set in order to update state from the specified parent to - // the specified root. - // - // The passed in maps(nodes, states) will be retained to avoid copying - // everything. Therefore, these maps must not be changed afterwards. - Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error - // Commit writes all relevant trie nodes belonging to the specified state // to disk. Report specifies whether logs will be displayed in info level. Commit(root common.Hash, report bool) error // Close closes the trie database backend and releases all held resources. Close() error - - // Reader returns a reader for accessing all trie nodes with provided state - // root. An error will be returned if the requested state is not available. - Reader(root common.Hash) (database.Reader, error) } // Database is the wrapper of the underlying backend which is shared by different @@ -125,10 +120,17 @@ func NewDatabase(diskdb ethdb.Database, config *Config) *Database { return db } -// Reader returns a reader for accessing all trie nodes with provided state root. -// An error will be returned if the requested state is not available. -func (db *Database) Reader(blockRoot common.Hash) (database.Reader, error) { - return db.backend.Reader(blockRoot) +// NodeReader returns a reader for accessing trie nodes within the specified state. +// An error will be returned if the specified state is not available. +func (db *Database) NodeReader(blockRoot common.Hash) (database.NodeReader, error) { + return db.backend.NodeReader(blockRoot) +} + +// StateReader returns a reader that allows access to the state data associated +// with the specified state. An error will be returned if the specified state is +// not available. +func (db *Database) StateReader(blockRoot common.Hash) (database.StateReader, error) { + return db.backend.StateReader(blockRoot) } // Update performs a state transition by committing dirty nodes contained in the @@ -138,11 +140,17 @@ func (db *Database) Reader(blockRoot common.Hash) (database.Reader, error) { // // The passed in maps(nodes, states) will be retained to avoid copying everything. // Therefore, these maps must not be changed afterwards. -func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSet) error { if db.preimages != nil { db.preimages.commit(false) } - return db.backend.Update(root, parent, block, nodes, states) + switch b := db.backend.(type) { + case *hashdb.Database: + return b.Update(root, parent, block, nodes) + case *pathdb.Database: + return b.Update(root, parent, block, nodes, states.internal()) + } + return errors.New("unknown backend") } // Commit iterates over all the children of a particular node, writes them out @@ -314,15 +322,45 @@ func (db *Database) Journal(root common.Hash) error { return pdb.Journal(root) } -// SetBufferSize sets the node buffer size to the provided value(in bytes). -// It's only supported by path-based database and will return an error for -// others. -func (db *Database) SetBufferSize(size int) error { +// VerifyState traverses the flat states specified by the given state root and +// ensures they are matched with each other. +func (db *Database) VerifyState(root common.Hash) error { pdb, ok := db.backend.(*pathdb.Database) if !ok { return errors.New("not supported") } - return pdb.SetBufferSize(size) + return pdb.VerifyState(root) +} + +// WaitGeneration waits until the background generation is finished. It assumes +// that the generation is permitted; otherwise, it will block indefinitely. +func (db *Database) WaitGeneration() error { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return errors.New("not supported") + } + pdb.WaitGeneration() + return nil +} + +// AccountIterator creates a new account iterator for the specified root hash and +// seeks to a starting account hash. +func (db *Database) AccountIterator(root common.Hash, seek common.Hash) (pathdb.AccountIterator, error) { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return nil, errors.New("not supported") + } + return pdb.AccountIterator(root, seek) +} + +// StorageIterator creates a new storage iterator for the specified root hash and +// account. The iterator will be move to the specific start position. +func (db *Database) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (pathdb.StorageIterator, error) { + pdb, ok := db.backend.(*pathdb.Database) + if !ok { + return nil, errors.New("not supported") + } + return pdb.StorageIterator(root, account, seek) } // IsVerkle returns the indicator if the database is holding a verkle tree. diff --git a/triedb/database/database.go b/triedb/database/database.go index 9bd5da08d109..6157dd08c035 100644 --- a/triedb/database/database.go +++ b/triedb/database/database.go @@ -16,10 +16,13 @@ package database -import "github.com/ethereum/go-ethereum/common" +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" +) -// Reader wraps the Node method of a backing trie reader. -type Reader interface { +// NodeReader wraps the Node method of a backing trie reader. +type NodeReader interface { // Node retrieves the trie node blob with the provided trie identifier, // node path and the corresponding node hash. No error will be returned // if the node is not found. @@ -29,9 +32,35 @@ type Reader interface { Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) } -// Database wraps the methods of a backing trie store. -type Database interface { - // Reader returns a node reader associated with the specific state. +// NodeDatabase wraps the methods of a backing trie store. +type NodeDatabase interface { + // NodeReader returns a node reader associated with the specific state. // An error will be returned if the specified state is not available. - Reader(stateRoot common.Hash) (Reader, error) + NodeReader(stateRoot common.Hash) (NodeReader, error) +} + +// StateReader wraps the Account and Storage method of a backing state reader. +type StateReader interface { + // Account directly retrieves the account associated with a particular hash in + // the slim data format. An error will be returned if the read operation exits + // abnormally. Specifically, if the layer is already stale. + // + // No error will be returned if the requested account is not found in database + Account(hash common.Hash) (*types.SlimAccount, error) + + // Storage directly retrieves the storage data associated with a particular hash, + // within a particular account. An error will be returned if the read operation + // exits abnormally. + // + // Note: + // - the returned storage data is not a copy, please don't modify it + // - no error will be returned if the requested slot is not found in database + Storage(accountHash, storageHash common.Hash) ([]byte, error) +} + +// StateDatabase warps the methods of a backing state store. +type StateDatabase interface { + // StateReader returns a state reader associated with the specific state. + // An error will be returned if the specified state is not available. + StateReader(stateRoot common.Hash) (StateReader, error) } diff --git a/triedb/hashdb/database.go b/triedb/hashdb/database.go index 4def10e338b1..fb718f4e7426 100644 --- a/triedb/hashdb/database.go +++ b/triedb/hashdb/database.go @@ -33,7 +33,6 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/ethereum/go-ethereum/triedb/database" ) @@ -541,7 +540,7 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool { // Update inserts the dirty nodes in provided nodeset into database and link the // account trie with multiple storage tries if necessary. -func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parent common.Hash, block uint64, nodes *trienode.MergedNodeSet) error { // Ensure the parent state is present and signal a warning if not. if parent != types.EmptyRootHash { if blob, _ := db.node(parent); len(blob) == 0 { @@ -616,9 +615,9 @@ func (db *Database) Close() error { return nil } -// Reader retrieves a node reader belonging to the given state root. -// An error will be returned if the requested state is not available. -func (db *Database) Reader(root common.Hash) (database.Reader, error) { +// NodeReader returns a reader for accessing trie nodes within the specified state. +// An error will be returned if the specified state is not available. +func (db *Database) NodeReader(root common.Hash) (database.NodeReader, error) { if _, err := db.node(root); err != nil { return nil, fmt.Errorf("state %#x is not available, %v", root, err) } @@ -636,3 +635,9 @@ func (reader *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([] blob, _ := reader.db.node(hash) return blob, nil } + +// StateReader returns a reader that allows access to the state data associated +// with the specified state. +func (db *Database) StateReader(root common.Hash) (database.StateReader, error) { + return nil, errors.New("not implemented") +} diff --git a/triedb/pathdb/buffer.go b/triedb/pathdb/buffer.go new file mode 100644 index 000000000000..9a49a7926ae9 --- /dev/null +++ b/triedb/pathdb/buffer.go @@ -0,0 +1,165 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "fmt" + "time" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// buffer is a collection of modified states along with the modified trie nodes. +// They are cached here to aggregate the disk write. The content of the buffer +// must be checked before diving into disk (since it basically is not yet written +// data). +type buffer struct { + layers uint64 // The number of diff layers aggregated inside + limit uint64 // The maximum memory allowance in bytes + nodes *nodeSet // Aggregated trie node set + states *stateSet // Aggregated state set +} + +// newBuffer initializes the buffer with the provided states and trie nodes. +func newBuffer(limit int, nodes *nodeSet, states *stateSet, layers uint64) *buffer { + // Don't panic for lazy users if any provided set is nil + if nodes == nil { + nodes = newNodeSet(nil) + } + if states == nil { + states = newStates(nil, nil, nil) + } + return &buffer{ + layers: layers, + limit: uint64(limit), + nodes: nodes, + states: states, + } +} + +// account retrieves the account blob with account address hash. +func (b *buffer) account(hash common.Hash) ([]byte, bool) { + return b.states.account(hash) +} + +// storage retrieves the storage slot with account address hash and slot key. +func (b *buffer) storage(addrHash common.Hash, storageHash common.Hash) ([]byte, bool) { + return b.states.storage(addrHash, storageHash) +} + +// node retrieves the trie node with node path and its trie identifier. +func (b *buffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) { + return b.nodes.node(owner, path) +} + +// commit merges the provided states and trie nodes into the buffer. +// +// This operation does not take ownership of the passed maps, which belong to +// the bottom-most diff layer. Instead, it holds references to the given maps, +// which are safe to copy. +func (b *buffer) commit(nodes *nodeSet, states *stateSet) *buffer { + b.layers++ + b.nodes.merge(nodes) + b.states.merge(states) + return b +} + +// revert is the reverse operation of commit. It also merges the provided states +// and trie nodes into the buffer. The key difference is that the provided state +// set should reverse the changes made by the most recent state transition. +func (b *buffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node, accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte) error { + // Short circuit if no embedded state transition to revert + if b.layers == 0 { + return errStateUnrecoverable + } + b.layers-- + + // Reset the entire buffer if only a single transition left + if b.layers == 0 { + b.reset() + return nil + } + b.nodes.revert(db, nodes) + b.states.revert(accounts, storages) + return nil +} + +// reset cleans up the disk cache. +func (b *buffer) reset() { + b.layers = 0 + b.nodes.reset() + b.states.reset() +} + +// empty returns an indicator if buffer is empty. +func (b *buffer) empty() bool { + return b.layers == 0 +} + +// full returns an indicator if the size of accumulated content exceeds the +// configured threshold. +func (b *buffer) full() bool { + return b.size() > b.limit +} + +// size returns the approximate memory size of the held content. +func (b *buffer) size() uint64 { + return b.states.size + b.nodes.size +} + +// allocBatch returns a database batch with pre-allocated buffer. +func (b *buffer) allocBatch(db ethdb.KeyValueStore) ethdb.Batch { + return db.NewBatchWithSize((b.nodes.dbsize() + b.states.dbsize()) * 11 / 10) // extra 10% for potential pebble internal stuff +} + +// flush persists the in-memory dirty trie node into the disk if the configured +// memory threshold is reached. Note, all data must be written atomically. +func (b *buffer) flush(root common.Hash, db ethdb.KeyValueStore, progress []byte, nodesCache, statesCache *fastcache.Cache, id uint64) error { + // Ensure the target state id is aligned with the internal counter. + head := rawdb.ReadPersistentStateID(db) + if head+b.layers != id { + return fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id) + } + // Terminate the state snapshot generation if it's active + var ( + start = time.Now() + batch = b.allocBatch(db) + ) + nodes := b.nodes.write(batch, b.nodes.nodes, nodesCache) + accounts, slots := b.states.write(db, batch, progress, statesCache) + rawdb.WritePersistentStateID(batch, id) + rawdb.WriteSnapshotRoot(batch, root) + + // Flush all mutations in a single batch + size := batch.ValueSize() + if err := batch.Write(); err != nil { + return err + } + commitBytesMeter.Mark(int64(size)) + commitNodesMeter.Mark(int64(nodes)) + commitAccountsMeter.Mark(int64(accounts)) + commitStoragesMeter.Mark(int64(slots)) + commitTimeTimer.UpdateSince(start) + b.reset() + log.Info("Persisted buffer content", "nodes", nodes, "accounts", accounts, "slots", slots, "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} diff --git a/triedb/pathdb/context.go b/triedb/pathdb/context.go new file mode 100644 index 000000000000..6b0486fb4b82 --- /dev/null +++ b/triedb/pathdb/context.go @@ -0,0 +1,245 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "encoding/binary" + "errors" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/ethdb/memorydb" + "github.com/ethereum/go-ethereum/log" +) + +const ( + snapAccount = "account" // Identifier of account snapshot generation + snapStorage = "storage" // Identifier of storage snapshot generation +) + +// generatorStats is a collection of statistics gathered by the snapshot generator +// for logging purposes. This data structure is used throughout the entire +// lifecycle of the snapshot generation process and is shared across multiple +// generation cycles. +type generatorStats struct { + origin uint64 // Origin prefix where generation started + start time.Time // Timestamp when generation started + accounts uint64 // Number of accounts indexed(generated or recovered) + slots uint64 // Number of storage slots indexed(generated or recovered) + dangling uint64 // Number of dangling storage slots + storage common.StorageSize // Total account and storage slot size(generation or recovery) +} + +// log creates a contextual log with the given message and the context pulled +// from the internally maintained statistics. +func (gs *generatorStats) log(msg string, root common.Hash, marker []byte) { + var ctx []interface{} + if root != (common.Hash{}) { + ctx = append(ctx, []interface{}{"root", root}...) + } + // Figure out whether we're after or within an account + switch len(marker) { + case common.HashLength: + ctx = append(ctx, []interface{}{"at", common.BytesToHash(marker)}...) + case 2 * common.HashLength: + ctx = append(ctx, []interface{}{ + "in", common.BytesToHash(marker[:common.HashLength]), + "at", common.BytesToHash(marker[common.HashLength:]), + }...) + } + // Add the usual measurements + ctx = append(ctx, []interface{}{ + "accounts", gs.accounts, + "slots", gs.slots, + "storage", gs.storage, + "dangling", gs.dangling, + "elapsed", common.PrettyDuration(time.Since(gs.start)), + }...) + // Calculate the estimated indexing time based on current stats + if len(marker) > 0 { + if done := binary.BigEndian.Uint64(marker[:8]) - gs.origin; done > 0 { + left := math.MaxUint64 - binary.BigEndian.Uint64(marker[:8]) + + speed := done/uint64(time.Since(gs.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero + ctx = append(ctx, []interface{}{ + "eta", common.PrettyDuration(time.Duration(left/speed) * time.Millisecond), + }...) + } + } + log.Info(msg, ctx...) +} + +// generatorContext holds several global fields that are used throughout the +// current generation cycle. +type generatorContext struct { + root common.Hash // State root of the generation target + account *holdableIterator // Iterator of account snapshot data + storage *holdableIterator // Iterator of storage snapshot data + db ethdb.KeyValueStore // Key-value store containing the snapshot data + batch ethdb.Batch // Database batch for writing data atomically + logged time.Time // The timestamp when last generation progress was displayed +} + +// newGeneratorContext initializes the context for generation. +func newGeneratorContext(root common.Hash, marker []byte, db ethdb.KeyValueStore) *generatorContext { + ctx := &generatorContext{ + root: root, + db: db, + batch: db.NewBatch(), + logged: time.Now(), + } + accMarker, storageMarker := splitMarker(marker) + ctx.openIterator(snapAccount, accMarker) + ctx.openIterator(snapStorage, storageMarker) + return ctx +} + +// openIterator constructs global account and storage snapshot iterators +// at the interrupted position. These iterators should be reopened from time +// to time to avoid blocking leveldb compaction for a long time. +func (ctx *generatorContext) openIterator(kind string, start []byte) { + if kind == snapAccount { + iter := ctx.db.NewIterator(rawdb.SnapshotAccountPrefix, start) + ctx.account = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+common.HashLength)) + return + } + iter := ctx.db.NewIterator(rawdb.SnapshotStoragePrefix, start) + ctx.storage = newHoldableIterator(rawdb.NewKeyLengthIterator(iter, 1+2*common.HashLength)) +} + +// reopenIterator releases the specified snapshot iterator and re-open it +// in the next position. It's aimed for not blocking leveldb compaction. +func (ctx *generatorContext) reopenIterator(kind string) { + // Shift iterator one more step, so that we can reopen + // the iterator at the right position. + var iter = ctx.account + if kind == snapStorage { + iter = ctx.storage + } + hasNext := iter.Next() + if !hasNext { + // Iterator exhausted, release forever and create an already exhausted virtual iterator + iter.Release() + if kind == snapAccount { + ctx.account = newHoldableIterator(memorydb.New().NewIterator(nil, nil)) + return + } + ctx.storage = newHoldableIterator(memorydb.New().NewIterator(nil, nil)) + return + } + next := iter.Key() + iter.Release() + ctx.openIterator(kind, next[1:]) +} + +// close releases all the held resources. +func (ctx *generatorContext) close() { + ctx.account.Release() + ctx.storage.Release() +} + +// iterator returns the corresponding iterator specified by the kind. +func (ctx *generatorContext) iterator(kind string) *holdableIterator { + if kind == snapAccount { + return ctx.account + } + return ctx.storage +} + +// removeStorageBefore deletes all storage entries which are located before +// the specified account. When the iterator touches the storage entry which +// is located in or outside the given account, it stops and holds the current +// iterated element locally. +func (ctx *generatorContext) removeStorageBefore(account common.Hash) uint64 { + var ( + count uint64 + start = time.Now() + iter = ctx.storage + ) + for iter.Next() { + key := iter.Key() + if bytes.Compare(key[1:1+common.HashLength], account.Bytes()) >= 0 { + iter.Hold() + break + } + count++ + ctx.batch.Delete(key) + if ctx.batch.ValueSize() > ethdb.IdealBatchSize { + ctx.batch.Write() + ctx.batch.Reset() + } + } + storageCleanCounter.Inc(time.Since(start).Nanoseconds()) + return count +} + +// removeStorageAt deletes all storage entries which are located in the specified +// account. When the iterator touches the storage entry which is outside the given +// account, it stops and holds the current iterated element locally. An error will +// be returned if the initial position of iterator is not in the given account. +func (ctx *generatorContext) removeStorageAt(account common.Hash) error { + var ( + count int64 + start = time.Now() + iter = ctx.storage + ) + for iter.Next() { + key := iter.Key() + cmp := bytes.Compare(key[len(rawdb.SnapshotStoragePrefix):len(rawdb.SnapshotStoragePrefix)+common.HashLength], account.Bytes()) + if cmp < 0 { + return errors.New("invalid iterator position") + } + if cmp > 0 { + iter.Hold() + break + } + count++ + ctx.batch.Delete(key) + if ctx.batch.ValueSize() > ethdb.IdealBatchSize { + ctx.batch.Write() + ctx.batch.Reset() + } + } + wipedStorageMeter.Mark(count) + storageCleanCounter.Inc(time.Since(start).Nanoseconds()) + return nil +} + +// removeStorageLeft deletes all storage entries which are located after +// the current iterator position. +func (ctx *generatorContext) removeStorageLeft() uint64 { + var ( + count uint64 + start = time.Now() + iter = ctx.storage + ) + for iter.Next() { + count++ + ctx.batch.Delete(iter.Key()) + if ctx.batch.ValueSize() > ethdb.IdealBatchSize { + ctx.batch.Write() + ctx.batch.Reset() + } + } + danglingStorageMeter.Mark(int64(count)) + storageCleanCounter.Inc(time.Since(start).Nanoseconds()) + return count +} diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index 31e478117cd5..94e6ab6dc2bb 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -17,6 +17,7 @@ package pathdb import ( + "encoding/binary" "errors" "fmt" "io" @@ -31,25 +32,27 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) const ( - // defaultCleanSize is the default memory allowance of clean cache. - defaultCleanSize = 16 * 1024 * 1024 + // defaultTrieCleanSize is the default memory allowance of clean trie cache. + defaultTrieCleanSize = 16 * 1024 * 1024 + + // defaultStateCleanSize is the default memory allowance of clean state cache. + defaultStateCleanSize = 16 * 1024 * 1024 // maxBufferSize is the maximum memory allowance of node buffer. - // Too large nodebuffer will cause the system to pause for a long + // Too large buffer will cause the system to pause for a long // time when write happens. Also, the largest batch that pebble can // support is 4GB, node will panic if batch size exceeds this limit. maxBufferSize = 256 * 1024 * 1024 - // DefaultBufferSize is the default memory allowance of node buffer + // defaultBufferSize is the default memory allowance of node buffer // that aggregates the writes from above until it's flushed into the // disk. It's meant to be used once the initial sync is finished. // Do not increase the buffer size arbitrarily, otherwise the system // pause time will increase when the database writes happen. - DefaultBufferSize = 64 * 1024 * 1024 + defaultBufferSize = 64 * 1024 * 1024 ) var ( @@ -64,9 +67,29 @@ type layer interface { // if the read operation exits abnormally. Specifically, if the layer is // already stale. // - // Note, no error will be returned if the requested node is not found in database. + // Note: + // - the returned node is not a copy, please don't modify it. + // - no error will be returned if the requested node is not found in database. node(owner common.Hash, path []byte, depth int) ([]byte, common.Hash, *nodeLoc, error) + // account directly retrieves the account RLP associated with a particular + // hash in the slim data format. An error will be returned if the read + // operation exits abnormally. Specifically, if the layer is already stale. + // + // Note: + // - the returned account is not a copy, please don't modify it. + // - no error will be returned if the requested account is not found in database. + account(hash common.Hash, depth int) ([]byte, error) + + // storage directly retrieves the storage data associated with a particular hash, + // within a particular account. An error will be returned if the read operation + // exits abnormally. Specifically, if the layer is already stale. + // + // Note: + // - the returned storage data is not a copy, please don't modify it. + // - no error will be returned if the requested slot is not found in database. + storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) + // rootHash returns the root hash for which this layer was made. rootHash() common.Hash @@ -80,7 +103,7 @@ type layer interface { // the provided dirty trie nodes along with the state change set. // // Note, the maps are retained by the method to avoid copying everything. - update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer + update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer // journal commits an entire diff hierarchy to disk into a single journal entry. // This is meant to be used during shutdown to persist the layer without @@ -90,57 +113,77 @@ type layer interface { // Config contains the settings for database. type Config struct { - StateHistory uint64 // Number of recent blocks to maintain state history for - CleanCacheSize int // Maximum memory allowance (in bytes) for caching clean nodes - DirtyCacheSize int // Maximum memory allowance (in bytes) for caching dirty nodes - ReadOnly bool // Flag whether the database is opened in read only mode. + StateHistory uint64 // Number of recent blocks to maintain state history for + TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean **trie nodes** + StateCleanSize int // Maximum memory allowance (in bytes) for caching clean **states** + WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer + ReadOnly bool // Flag whether the database is opened in read only mode + SnapshotNoBuild bool // Flag Whether the background generation is allowed } // sanitize checks the provided user configurations and changes anything that's // unreasonable or unworkable. func (c *Config) sanitize() *Config { conf := *c - if conf.DirtyCacheSize > maxBufferSize { - log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.DirtyCacheSize), "updated", common.StorageSize(maxBufferSize)) - conf.DirtyCacheSize = maxBufferSize + if conf.WriteBufferSize > maxBufferSize { + log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize)) + conf.WriteBufferSize = maxBufferSize } return &conf } +// fields returns a list of attributes of config for printing. +func (c *Config) fields() []interface{} { + var list []interface{} + if c.ReadOnly { + list = append(list, "readonly", true) + } + if c.SnapshotNoBuild { + list = append(list, "snapshot", false) + } + list = append(list, "triecache", common.StorageSize(c.TrieCleanSize)) + list = append(list, "statecache", common.StorageSize(c.StateCleanSize)) + list = append(list, "buffer", common.StorageSize(c.WriteBufferSize)) + list = append(list, "history", c.StateHistory) + return list +} + // Defaults contains default settings for Ethereum mainnet. var Defaults = &Config{ - StateHistory: params.FullImmutabilityThreshold, - CleanCacheSize: defaultCleanSize, - DirtyCacheSize: DefaultBufferSize, + StateHistory: params.FullImmutabilityThreshold, + TrieCleanSize: defaultTrieCleanSize, + StateCleanSize: defaultStateCleanSize, + WriteBufferSize: defaultBufferSize, } // ReadOnly is the config in order to open database in read only mode. var ReadOnly = &Config{ReadOnly: true} -// Database is a multiple-layered structure for maintaining in-memory trie nodes. -// It consists of one persistent base layer backed by a key-value store, on top -// of which arbitrarily many in-memory diff layers are stacked. The memory diffs -// can form a tree with branching, but the disk layer is singleton and common to -// all. If a reorg goes deeper than the disk layer, a batch of reverse diffs can -// be applied to rollback. The deepest reorg that can be handled depends on the -// amount of state histories tracked in the disk. +// Database is a multiple-layered structure for maintaining in-memory states +// along with its dirty trie nodes. It consists of one persistent base layer +// backed by a key-value store, on top of which arbitrarily many in-memory diff +// layers are stacked. The memory diffs can form a tree with branching, but the +// disk layer is singleton and common to all. If a reorg goes deeper than the +// disk layer, a batch of reverse diffs can be applied to rollback. The deepest +// reorg that can be handled depends on the amount of state histories tracked +// in the disk. // // At most one readable and writable database can be opened at the same time in -// the whole system which ensures that only one database writer can operate disk -// state. Unexpected open operations can cause the system to panic. +// the whole system which ensures that only one database writer can operate the +// persistent state. Unexpected open operations can cause the system to panic. type Database struct { // readOnly is the flag whether the mutation is allowed to be applied. // It will be set automatically when the database is journaled during // the shutdown to reject all following unexpected mutations. - readOnly bool // Flag if database is opened in read only mode - waitSync bool // Flag if database is deactivated due to initial state sync - isVerkle bool // Flag if database is used for verkle tree - bufferSize int // Memory allowance (in bytes) for caching dirty nodes - config *Config // Configuration for database - diskdb ethdb.Database // Persistent storage for matured trie nodes - tree *layerTree // The group for all known layers - freezer ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests - lock sync.RWMutex // Lock to prevent mutations from happening at the same time + readOnly bool // Flag if database is opened in read only mode + waitSync bool // Flag if database is deactivated due to initial state sync + isVerkle bool // Flag if database is used for verkle tree + + config *Config // Configuration for database + diskdb ethdb.Database // Persistent storage for matured trie nodes + tree *layerTree // The group for all known layers + freezer ethdb.ResettableAncientStore // Freezer for storing trie histories, nil possible in tests + lock sync.RWMutex // Lock to prevent mutations from happening at the same time } // New attempts to load an already existing layer from a persistent key-value @@ -161,11 +204,10 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { diskdb = rawdb.NewTable(diskdb, string(rawdb.VerklePrefix)) } db := &Database{ - readOnly: config.ReadOnly, - isVerkle: isVerkle, - bufferSize: config.DirtyCacheSize, - config: config, - diskdb: diskdb, + readOnly: config.ReadOnly, + isVerkle: isVerkle, + config: config, + diskdb: diskdb, } // Construct the layer tree by resolving the in-disk singleton state // and in-memory layer journal. @@ -174,7 +216,7 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { // Repair the state history, which might not be aligned with the state // in the key-value store due to an unclean shutdown. if err := db.repairHistory(); err != nil { - log.Crit("Failed to repair pathdb", "err", err) + log.Crit("Failed to repair state history", "err", err) } // Disable database in case node is still in the initial state sync stage. if rawdb.ReadSnapSyncStatusFlag(diskdb) == rawdb.StateSyncRunning && !db.readOnly { @@ -182,6 +224,17 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { log.Crit("Failed to disable database", "err", err) // impossible to happen } } + // Resolving the state snapshot generation progress from the database is + // mandatory. This ensures that uncovered flat states are not accessed, + // even if background generation is not allowed. If permitted, the generation + // might be scheduled. + db.setStateGenerator() + + fields := config.fields() + if db.isVerkle { + fields = append(fields, "verkle", true) + } + log.Info("Initialized path database", fields...) return db } @@ -234,6 +287,52 @@ func (db *Database) repairHistory() error { return nil } +// setStateGenerator loads the state generation progress marker and potentially +// resume the state generation if it's permitted. +func (db *Database) setStateGenerator() { + // Load the state snapshot generation progress marker to prevent access + // to uncovered states. + generator, root := loadGenerator(db.diskdb) + if generator == nil { + // Initialize an empty generator to rebuild the state snapshot + // from scratch + generator = &journalGenerator{ + Marker: []byte{}, + } + } + // Short circuit if the whole state snapshot has already been fully generated + if generator.Done { + return + } + var origin uint64 + if len(generator.Marker) >= 8 { + origin = binary.BigEndian.Uint64(generator.Marker) + } + stats := &generatorStats{ + origin: origin, + start: time.Now(), + accounts: generator.Accounts, + slots: generator.Slots, + dangling: generator.DanglingSlots, + storage: common.StorageSize(generator.Storage), + } + dl := db.tree.bottom() + + // Construct the generator and link it to the disk layer, ensuring that the + // generation progress is resolved regardless of whether background state + // snapshot generation is allowed. + noBuild := db.readOnly || db.config.SnapshotNoBuild + dl.generator = newGenerator(db.diskdb, noBuild, generator.Marker, stats) + + // Short circuit if the background generation is not permitted. Notably, + // snapshot generation is not functional in the verkle manner. + if noBuild || db.isVerkle || db.waitSync { + return + } + stats.log("Starting snapshot generation", root, generator.Marker) + dl.generator.run(root) +} + // Update adds a new layer into the tree, if that can be linked to an existing // old parent. It is disallowed to insert a disk layer (the origin of all). Apart // from that this function will flatten the extra diff layers at bottom into disk @@ -241,7 +340,7 @@ func (db *Database) repairHistory() error { // // The passed in maps(nodes, states) will be retained to avoid copying everything. // Therefore, these maps must not be changed afterwards. -func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSetWithOrigin) error { // Hold the lock to prevent concurrent mutations. db.lock.Lock() defer db.lock.Unlock() @@ -294,13 +393,20 @@ func (db *Database) Disable() error { } db.waitSync = true - // Mark the disk layer as stale to prevent access to persistent state. - db.tree.bottom().markStale() + // Terminate the state generator if it's active and mark the disk layer + // as stale to prevent access to persistent state. + disk := db.tree.bottom() + if disk.generator != nil { + disk.generator.stop() + } + disk.markStale() // Write the initial sync flag to persist it across restarts. - rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncRunning) + batch := db.diskdb.NewBatch() + rawdb.WriteSnapSyncStatusFlag(batch, rawdb.StateSyncRunning) + rawdb.DeleteSnapshotRoot(batch) log.Info("Disabled trie database due to state sync") - return nil + return batch.Write() } // Enable activates database and resets the state tree with the provided persistent @@ -314,11 +420,11 @@ func (db *Database) Enable(root common.Hash) error { return errDatabaseReadOnly } // Ensure the provided state root matches the stored one. - root = types.TrieRootHash(root) stored := types.EmptyRootHash if blob := rawdb.ReadAccountTrieNode(db.diskdb, nil); len(blob) > 0 { stored = crypto.Keccak256Hash(blob) } + root = types.TrieRootHash(root) if stored != root { return fmt.Errorf("state root mismatch: stored %x, synced %x", stored, root) } @@ -339,13 +445,13 @@ func (db *Database) Enable(root common.Hash) error { return err } } - // Re-construct a new disk layer backed by persistent state - // with **empty clean cache and node buffer**. - db.tree.reset(newDiskLayer(root, 0, db, nil, newNodeBuffer(db.bufferSize, nil, 0))) - // Re-enable the database as the final step. db.waitSync = false rawdb.WriteSnapSyncStatusFlag(db.diskdb, rawdb.StateSyncFinished) + + // Re-construct a new disk layer backed by persistent state + // with **empty clean cache and node buffer**. + db.tree.reset(generateSnapshot(db, root)) log.Info("Rebuilt trie database", "root", root) return nil } @@ -357,19 +463,19 @@ func (db *Database) Recover(root common.Hash) error { db.lock.Lock() defer db.lock.Unlock() - // Short circuit if rollback operation is not supported. + // Short circuit if rollback operation is not supported if err := db.modifyAllowed(); err != nil { return err } if db.freezer == nil { return errors.New("state rollback is non-supported") } - // Short circuit if the target state is not recoverable. + // Short circuit if the target state is not recoverable root = types.TrieRootHash(root) if !db.Recoverable(root) { return errStateUnrecoverable } - // Apply the state histories upon the disk layer in order. + // Apply the state histories upon the disk layer in order var ( start = time.Now() dl = db.tree.bottom() @@ -454,7 +560,7 @@ func (db *Database) Close() error { func (db *Database) Size() (diffs common.StorageSize, nodes common.StorageSize) { db.tree.forEach(func(layer layer) { if diff, ok := layer.(*diffLayer); ok { - diffs += common.StorageSize(diff.memory) + diffs += common.StorageSize(diff.size()) } if disk, ok := layer.(*diskLayer); ok { nodes += disk.size() @@ -478,19 +584,6 @@ func (db *Database) Initialized(genesisRoot common.Hash) bool { return inited } -// SetBufferSize sets the node buffer size to the provided value(in bytes). -func (db *Database) SetBufferSize(size int) error { - db.lock.Lock() - defer db.lock.Unlock() - - if size > maxBufferSize { - log.Info("Capped node buffer size", "provided", common.StorageSize(size), "adjusted", common.StorageSize(maxBufferSize)) - size = maxBufferSize - } - db.bufferSize = size - return db.tree.bottom().setBufferSize(db.bufferSize) -} - // modifyAllowed returns the indicator if mutation is allowed. This function // assumes the db.lock is already held. func (db *Database) modifyAllowed() error { @@ -532,3 +625,31 @@ func (db *Database) StorageHistory(address common.Address, slot common.Hash, sta func (db *Database) HistoryRange() (uint64, uint64, error) { return historyRange(db.freezer) } + +// WaitGeneration waits until the background generation is finished. It assumes +// that the generation is permitted; otherwise, it will block indefinitely. +func (db *Database) WaitGeneration() { + gen := db.tree.bottom().generator + if gen == nil || gen.completed() { + return + } + <-gen.done +} + +// AccountIterator creates a new account iterator for the specified root hash and +// seeks to a starting account hash. +func (db *Database) AccountIterator(root common.Hash, seek common.Hash) (AccountIterator, error) { + if gen := db.tree.bottom().generator; gen != nil && !gen.completed() { + return nil, errNotConstructed + } + return newFastAccountIterator(db, root, seek) +} + +// StorageIterator creates a new storage iterator for the specified root hash and +// account. The iterator will be move to the specific start position. +func (db *Database) StorageIterator(root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + if gen := db.tree.bottom().generator; gen != nil && !gen.completed() { + return nil, errNotConstructed + } + return newFastStorageIterator(db, root, account, seek) +} diff --git a/triedb/pathdb/database_test.go b/triedb/pathdb/database_test.go index f667944784bf..642b47bd6c01 100644 --- a/triedb/pathdb/database_test.go +++ b/triedb/pathdb/database_test.go @@ -31,7 +31,6 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" "github.com/holiman/uint256" ) @@ -108,9 +107,10 @@ func newTester(t *testing.T, historyLimit uint64) *tester { var ( disk, _ = rawdb.NewDatabaseWithFreezer(rawdb.NewMemoryDatabase(), t.TempDir(), "", false) db = New(disk, &Config{ - StateHistory: historyLimit, - CleanCacheSize: 16 * 1024, - DirtyCacheSize: 16 * 1024, + StateHistory: historyLimit, + TrieCleanSize: 16 * 1024, + StateCleanSize: 16 * 1024, + WriteBufferSize: 16 * 1024, }, false) obj = &tester{ db: db, @@ -217,7 +217,7 @@ func (t *tester) clearStorage(ctx *genctx, addr common.Address, root common.Hash return root } -func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNodeSet, *triestate.Set) { +func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNodeSet, *StateSetWithOrigin) { var ( ctx = newCtx(parent) dirties = make(map[common.Hash]struct{}) @@ -310,7 +310,18 @@ func (t *tester) generate(parent common.Hash) (common.Hash, *trienode.MergedNode delete(t.storages, addrHash) } } - return root, ctx.nodes, triestate.New(ctx.accountOrigin, ctx.storageOrigin) + var ( + accounts = make(map[common.Hash][]byte) + destructs = make(map[common.Hash]struct{}) + ) + for addrHash, data := range ctx.accounts { + if len(data) == 0 { + destructs[addrHash] = struct{}{} + } else { + accounts[addrHash] = data + } + } + return root, ctx.nodes, NewStateSetWithOrigin(destructs, accounts, ctx.storages, ctx.accountOrigin, ctx.storageOrigin) } // lastHash returns the latest root hash, or empty if nothing is cached. diff --git a/triedb/pathdb/difflayer.go b/triedb/pathdb/difflayer.go index 6b87883482c9..b36f506eeaa5 100644 --- a/triedb/pathdb/difflayer.go +++ b/triedb/pathdb/difflayer.go @@ -22,8 +22,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // diffLayer represents a collection of modifications made to the in-memory tries @@ -33,45 +31,29 @@ import ( // made to the state, that have not yet graduated into a semi-immutable state. type diffLayer struct { // Immutables - root common.Hash // Root hash to which this layer diff belongs to - id uint64 // Corresponding state id - block uint64 // Associated block number - nodes map[common.Hash]map[string]*trienode.Node // Cached trie nodes indexed by owner and path - states *triestate.Set // Associated state change set for building history - memory uint64 // Approximate guess as to how much memory we use + root common.Hash // Root hash to which this layer diff belongs to + id uint64 // Corresponding state id + block uint64 // Associated block number + nodes *nodeSet // Cached trie nodes indexed by owner and path + states *StateSetWithOrigin // Associated state changes along with origin value parent layer // Parent layer modified by this one, never nil, **can be changed** lock sync.RWMutex // Lock used to protect parent } // newDiffLayer creates a new diff layer on top of an existing layer. -func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { - var ( - size int64 - count int - ) +func newDiffLayer(parent layer, root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { dl := &diffLayer{ root: root, id: id, block: block, + parent: parent, nodes: nodes, states: states, - parent: parent, - } - for _, subset := range nodes { - for path, n := range subset { - dl.memory += uint64(n.Size() + len(path)) - size += int64(len(n.Blob) + len(path)) - } - count += len(subset) } - if states != nil { - dl.memory += uint64(states.Size()) - } - dirtyWriteMeter.Mark(size) - diffLayerNodesMeter.Mark(int64(count)) - diffLayerBytesMeter.Mark(int64(dl.memory)) - log.Debug("Created new diff layer", "id", id, "block", block, "nodes", count, "size", common.StorageSize(dl.memory)) + dirtyNodeWriteMeter.Mark(int64(nodes.size)) + dirtyStateWriteMeter.Mark(int64(states.size)) + log.Debug("Created new diff layer", "id", id, "block", block, "nodesize", common.StorageSize(nodes.size), "statesize", common.StorageSize(states.size)) return dl } @@ -104,23 +86,72 @@ func (dl *diffLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co defer dl.lock.RUnlock() // If the trie node is known locally, return it - subset, ok := dl.nodes[owner] + n, ok := dl.nodes.node(owner, path) if ok { - n, ok := subset[string(path)] - if ok { - dirtyHitMeter.Mark(1) - dirtyNodeHitDepthHist.Update(int64(depth)) - dirtyReadMeter.Mark(int64(len(n.Blob))) - return n.Blob, n.Hash, &nodeLoc{loc: locDiffLayer, depth: depth}, nil - } + dirtyNodeHitMeter.Mark(1) + dirtyNodeHitDepthHist.Update(int64(depth)) + dirtyNodeReadMeter.Mark(int64(len(n.Blob))) + return n.Blob, n.Hash, &nodeLoc{loc: locDiffLayer, depth: depth}, nil } // Trie node unknown to this layer, resolve from parent return dl.parent.node(owner, path, depth+1) } +// account directly retrieves the account RLP associated with a particular +// hash in the slim data format. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diffLayer) account(hash common.Hash, depth int) ([]byte, error) { + // Hold the lock, ensure the parent won't be changed during the + // state accessing. + dl.lock.RLock() + defer dl.lock.RUnlock() + + if blob, found := dl.states.account(hash); found { + dirtyStateHitMeter.Mark(1) + dirtyStateHitDepthHist.Update(int64(depth)) + dirtyStateReadMeter.Mark(int64(len(blob))) + + if len(blob) == 0 { + stateAccountMissMeter.Mark(1) + } else { + stateAccountHitMeter.Mark(1) + } + return blob, nil + } + // Account is unknown to this layer, resolve from parent + return dl.parent.account(hash, depth+1) +} + +// storage directly retrieves the storage data associated with a particular hash, +// within a particular account. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diffLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) { + // Hold the lock, ensure the parent won't be changed during the + // state accessing. + dl.lock.RLock() + defer dl.lock.RUnlock() + + if blob, found := dl.states.storage(accountHash, storageHash); found { + dirtyStateHitMeter.Mark(1) + dirtyStateHitDepthHist.Update(int64(depth)) + dirtyStateReadMeter.Mark(int64(len(blob))) + + if len(blob) == 0 { + stateStorageMissMeter.Mark(1) + } else { + stateStorageHitMeter.Mark(1) + } + return blob, nil + } + // storage slot is unknown to this layer, resolve from parent + return dl.parent.storage(accountHash, storageHash, depth+1) +} + // update implements the layer interface, creating a new layer on top of the // existing layer tree with the specified data items. -func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { +func (dl *diffLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { return newDiffLayer(dl, root, id, block, nodes, states) } @@ -145,6 +176,11 @@ func (dl *diffLayer) persist(force bool) (layer, error) { return diffToDisk(dl, force) } +// size returns the approximate memory size occupied by this diff layer. +func (dl *diffLayer) size() uint64 { + return dl.nodes.size + dl.states.size +} + // diffToDisk merges a bottom-most diff into the persistent disk layer underneath // it. The method will panic if called onto a non-bottom-most diff layer. func diffToDisk(layer *diffLayer, force bool) (layer, error) { diff --git a/triedb/pathdb/difflayer_test.go b/triedb/pathdb/difflayer_test.go index 1e93a3f89214..532027b31a06 100644 --- a/triedb/pathdb/difflayer_test.go +++ b/triedb/pathdb/difflayer_test.go @@ -30,7 +30,7 @@ import ( func emptyLayer() *diskLayer { return &diskLayer{ db: New(rawdb.NewMemoryDatabase(), nil, false), - buffer: newNodeBuffer(DefaultBufferSize, nil, 0), + buffer: newBuffer(defaultBufferSize, nil, nil, 0), } } @@ -76,7 +76,7 @@ func benchmarkSearch(b *testing.B, depth int, total int) { nblob = common.CopyBytes(blob) } } - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } var layer layer layer = emptyLayer() @@ -118,7 +118,7 @@ func BenchmarkPersist(b *testing.B) { ) nodes[common.Hash{}][string(path)] = node } - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } for i := 0; i < b.N; i++ { b.StopTimer() @@ -157,7 +157,7 @@ func BenchmarkJournal(b *testing.B) { nodes[common.Hash{}][string(path)] = node } // TODO(rjl493456442) a non-nil state set is expected. - return newDiffLayer(parent, common.Hash{}, 0, 0, nodes, nil) + return newDiffLayer(parent, common.Hash{}, 0, 0, newNodeSet(nodes), nil) } var layer layer layer = emptyLayer() diff --git a/triedb/pathdb/disklayer.go b/triedb/pathdb/disklayer.go index b6ae39446cf0..fc609331293f 100644 --- a/triedb/pathdb/disklayer.go +++ b/triedb/pathdb/disklayer.go @@ -17,6 +17,7 @@ package pathdb import ( + "bytes" "fmt" "sync" @@ -25,8 +26,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // diskLayer is a low level persistent layer built on top of a key-value store. @@ -34,25 +33,33 @@ type diskLayer struct { root common.Hash // Immutable, root hash to which this layer was made for id uint64 // Immutable, corresponding state id db *Database // Path-based trie database - cleans *fastcache.Cache // GC friendly memory cache of clean node RLPs - buffer *nodebuffer // Node buffer to aggregate writes + nodes *fastcache.Cache // GC friendly memory cache of clean nodes + states *fastcache.Cache // GC friendly memory cache of clean states + buffer *buffer // Dirty buffer to aggregate writes of nodes and states stale bool // Signals that the layer became stale (state progressed) - lock sync.RWMutex // Lock used to protect stale flag + lock sync.RWMutex // Lock used to protect stale flag and genMarker + + // The generator is set if the state snapshot was not fully completed + generator *generator } // newDiskLayer creates a new disk layer based on the passing arguments. -func newDiskLayer(root common.Hash, id uint64, db *Database, cleans *fastcache.Cache, buffer *nodebuffer) *diskLayer { - // Initialize a clean cache if the memory allowance is not zero - // or reuse the provided cache if it is not nil (inherited from +func newDiskLayer(root common.Hash, id uint64, db *Database, nodes *fastcache.Cache, states *fastcache.Cache, buffer *buffer) *diskLayer { + // Initialize the clean caches if the memory allowance is not zero + // or reuse the provided caches if they are not nil (inherited from // the original disk layer). - if cleans == nil && db.config.CleanCacheSize != 0 { - cleans = fastcache.New(db.config.CleanCacheSize) + if nodes == nil && db.config.TrieCleanSize != 0 { + nodes = fastcache.New(db.config.TrieCleanSize) + } + if states == nil && db.config.StateCleanSize != 0 { + states = fastcache.New(db.config.StateCleanSize) } return &diskLayer{ root: root, id: id, db: db, - cleans: cleans, + nodes: nodes, + states: states, buffer: buffer, } } @@ -73,6 +80,13 @@ func (dl *diskLayer) parentLayer() layer { return nil } +// setGenerator links the given generator to disk layer, representing the +// associated state snapshot is not fully completed yet and the generation +// is potentially running in the background. +func (dl *diskLayer) setGenerator(generator *generator) { + dl.generator = generator +} + // isStale return whether this layer has become stale (was flattened across) or if // it's still live. func (dl *diskLayer) isStale() bool { @@ -108,25 +122,25 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co // layer as stale. n, found := dl.buffer.node(owner, path) if found { - dirtyHitMeter.Mark(1) - dirtyReadMeter.Mark(int64(len(n.Blob))) + dirtyNodeHitMeter.Mark(1) + dirtyNodeReadMeter.Mark(int64(len(n.Blob))) dirtyNodeHitDepthHist.Update(int64(depth)) return n.Blob, n.Hash, &nodeLoc{loc: locDirtyCache, depth: depth}, nil } - dirtyMissMeter.Mark(1) + dirtyNodeMissMeter.Mark(1) // Try to retrieve the trie node from the clean memory cache h := newHasher() defer h.release() - key := cacheKey(owner, path) - if dl.cleans != nil { - if blob := dl.cleans.Get(nil, key); len(blob) > 0 { - cleanHitMeter.Mark(1) - cleanReadMeter.Mark(int64(len(blob))) + key := nodeCacheKey(owner, path) + if dl.nodes != nil { + if blob := dl.nodes.Get(nil, key); len(blob) > 0 { + cleanNodeHitMeter.Mark(1) + cleanNodeReadMeter.Mark(int64(len(blob))) return blob, h.hash(blob), &nodeLoc{loc: locCleanCache, depth: depth}, nil } - cleanMissMeter.Mark(1) + cleanNodeMissMeter.Mark(1) } // Try to retrieve the trie node from the disk. var blob []byte @@ -135,16 +149,144 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co } else { blob = rawdb.ReadStorageTrieNode(dl.db.diskdb, owner, path) } - if dl.cleans != nil && len(blob) > 0 { - dl.cleans.Set(key, blob) - cleanWriteMeter.Mark(int64(len(blob))) + if dl.nodes != nil && len(blob) > 0 { + dl.nodes.Set(key, blob) + cleanNodeWriteMeter.Mark(int64(len(blob))) } return blob, h.hash(blob), &nodeLoc{loc: locDiskLayer, depth: depth}, nil } +// account directly retrieves the account RLP associated with a particular +// hash in the slim data format. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diskLayer) account(hash common.Hash, depth int) ([]byte, error) { + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return nil, errSnapshotStale + } + // Try to retrieve the trie node from the not-yet-written + // node buffer first. Note the buffer is lock free since + // it's impossible to mutate the buffer before tagging the + // layer as stale. + blob, found := dl.buffer.account(hash) + if found { + dirtyStateHitMeter.Mark(1) + dirtyStateReadMeter.Mark(int64(len(blob))) + dirtyStateHitDepthHist.Update(int64(depth)) + + if len(blob) == 0 { + stateAccountMissMeter.Mark(1) + } else { + stateAccountHitMeter.Mark(1) + } + return blob, nil + } + dirtyStateMissMeter.Mark(1) + + // If the layer is being generated, ensure the requested account has + // already been covered by the generator. + marker := dl.genMarker() + if marker != nil && bytes.Compare(hash.Bytes(), marker) > 0 { + return nil, errNotCoveredYet + } + // Try to retrieve the account from the memory cache + if dl.states != nil { + if blob, found := dl.states.HasGet(nil, hash[:]); found { + cleanStateHitMeter.Mark(1) + cleanStateReadMeter.Mark(int64(len(blob))) + + if len(blob) == 0 { + stateAccountMissMeter.Mark(1) + } else { + stateAccountHitMeter.Mark(1) + } + return blob, nil + } + cleanStateMissMeter.Mark(1) + } + // Try to retrieve the account from the disk. + blob = rawdb.ReadAccountSnapshot(dl.db.diskdb, hash) + if dl.states != nil { + dl.states.Set(hash[:], blob) + cleanStateWriteMeter.Mark(int64(len(blob))) + } + if len(blob) == 0 { + stateAccountMissMeter.Mark(1) + } else { + stateAccountHitMeter.Mark(1) + } + return blob, nil +} + +// storage directly retrieves the storage data associated with a particular hash, +// within a particular account. +// +// Note the returned account is not a copy, please don't modify it. +func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([]byte, error) { + // Hold the lock, ensure the parent won't be changed during the + // state accessing. + dl.lock.RLock() + defer dl.lock.RUnlock() + + if dl.stale { + return nil, errSnapshotStale + } + if blob, found := dl.buffer.storage(accountHash, storageHash); found { + dirtyStateHitMeter.Mark(1) + dirtyStateReadMeter.Mark(int64(len(blob))) + dirtyStateHitDepthHist.Update(int64(depth)) + + if len(blob) == 0 { + stateStorageMissMeter.Mark(1) + } else { + stateStorageHitMeter.Mark(1) + } + return blob, nil + } + dirtyStateMissMeter.Mark(1) + + // If the layer is being generated, ensure the requested storage slot + // has already been covered by the generator. + key := append(accountHash[:], storageHash[:]...) + marker := dl.genMarker() + if marker != nil && bytes.Compare(key, marker) > 0 { + return nil, errNotCoveredYet + } + // Try to retrieve the storage slot from the memory cache + if dl.states != nil { + if blob, found := dl.states.HasGet(nil, key); found { + cleanStateHitMeter.Mark(1) + cleanStateReadMeter.Mark(int64(len(blob))) + + if len(blob) == 0 { + stateStorageMissMeter.Mark(1) + } else { + stateStorageHitMeter.Mark(1) + } + return blob, nil + } + cleanStateMissMeter.Mark(1) + } + // Try to retrieve the account from the disk + blob := rawdb.ReadStorageSnapshot(dl.db.diskdb, accountHash, storageHash) + if dl.states != nil { + dl.states.Set(key, blob) + cleanStateWriteMeter.Mark(int64(len(blob))) + } + if len(blob) == 0 { + stateStorageMissMeter.Mark(1) + } else { + stateStorageHitMeter.Mark(1) + } + return blob, nil +} + // update implements the layer interface, returning a new diff layer on top // with the given state set. -func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes map[common.Hash]map[string]*trienode.Node, states *triestate.Set) *diffLayer { +func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer { return newDiffLayer(dl, root, id, block, nodes, states) } @@ -152,9 +294,6 @@ func (dl *diskLayer) update(root common.Hash, id uint64, block uint64, nodes map // and returns a newly constructed disk layer. Note the current disk // layer must be tagged as stale first to prevent re-access. func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { - dl.lock.Lock() - defer dl.lock.Unlock() - // Construct and store the state history first. If crash happens after storing // the state history but without flushing the corresponding states(journal), // the stored state history will be truncated from head in the next restart. @@ -180,7 +319,7 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { } } // Mark the diskLayer as stale before applying any mutations on top. - dl.stale = true + dl.markStale() // Store the root->id lookup afterwards. All stored lookups are identified // by the **unique** state root. It's impossible that in the same chain @@ -190,20 +329,43 @@ func (dl *diskLayer) commit(bottom *diffLayer, force bool) (*diskLayer, error) { } rawdb.WriteStateID(dl.db.diskdb, bottom.rootHash(), bottom.stateID()) - // Construct a new disk layer by merging the nodes from the provided diff - // layer, and flush the content in disk layer if there are too many nodes - // cached. The clean cache is inherited from the original disk layer. - ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.cleans, dl.buffer.commit(bottom.nodes)) - // In a unique scenario where the ID of the oldest history object (after tail // truncation) surpasses the persisted state ID, we take the necessary action - // of forcibly committing the cached dirty nodes to ensure that the persisted + // of forcibly committing the cached dirty states to ensure that the persisted // state ID remains higher. if !force && rawdb.ReadPersistentStateID(dl.db.diskdb) < oldest { force = true } - if err := ndl.buffer.flush(ndl.db.diskdb, ndl.cleans, ndl.id, force); err != nil { - return nil, err + // Merge the trie nodes and flat states of the bottom-most diff layer into the + // buffer as the combined layer. + combined := dl.buffer.commit(bottom.nodes, bottom.states.stateSet) + + // Terminate the background state snapshot generation before mutating the + // persistent state. + if combined.full() || force { + // Terminate the background state snapshot generator to prevent data race + var progress []byte + if dl.generator != nil { + dl.generator.stop() + progress = dl.generator.progressMarker() + log.Info("Terminated state snapshot generation") + } + // Flush the content in combined buffer. Any state data after the progress + // marker will be ignored, as the generator will pick it up later. + if err := combined.flush(bottom.root, dl.db.diskdb, progress, dl.nodes, dl.states, bottom.stateID()); err != nil { + return nil, err + } + // Relaunch the state snapshot generation if it's not done yet + if progress != nil { + dl.generator.run(bottom.root) + log.Info("Resumed state snapshot generation", "root", bottom.root) + } + } + ndl := newDiskLayer(bottom.root, bottom.stateID(), dl.db, dl.nodes, dl.states, combined) + + // Link the generator if snapshot is not yet completed + if dl.generator != nil && !dl.generator.completed() { + ndl.setGenerator(dl.generator) } // To remove outdated history objects from the end, we set the 'tail' parameter // to 'oldest-1' due to the offset between the freezer index and the history ID. @@ -225,6 +387,17 @@ func (dl *diskLayer) revert(h *history) (*diskLayer, error) { if dl.id == 0 { return nil, fmt.Errorf("%w: zero state id", errStateUnrecoverable) } + var ( + buff = crypto.NewKeccakState() + accounts = make(map[common.Hash][]byte) + storages = make(map[common.Hash]map[common.Hash][]byte) + ) + for addr, blob := range h.accounts { + accounts[crypto.HashData(buff, addr.Bytes())] = blob + } + for addr, storage := range h.storages { + storages[crypto.HashData(buff, addr.Bytes())] = storage + } // Apply the reverse state changes upon the current state. This must // be done before holding the lock in order to access state in "this" // layer. @@ -233,10 +406,7 @@ func (dl *diskLayer) revert(h *history) (*diskLayer, error) { return nil, err } // Mark the diskLayer as stale before applying any mutations on top. - dl.lock.Lock() - defer dl.lock.Unlock() - - dl.stale = true + dl.markStale() // State change may be applied to node buffer, or the persistent // state, depends on if node buffer is empty or not. If the node @@ -244,30 +414,45 @@ func (dl *diskLayer) revert(h *history) (*diskLayer, error) { // needs to be reverted is not yet flushed and cached in node // buffer, otherwise, manipulate persistent state directly. if !dl.buffer.empty() { - err := dl.buffer.revert(dl.db.diskdb, nodes) + err := dl.buffer.revert(dl.db.diskdb, nodes, accounts, storages) if err != nil { return nil, err } - } else { - batch := dl.db.diskdb.NewBatch() - writeNodes(batch, nodes, dl.cleans) - rawdb.WritePersistentStateID(batch, dl.id-1) - if err := batch.Write(); err != nil { - log.Crit("Failed to write states", "err", err) + ndl := newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.nodes, dl.states, dl.buffer) + + // Link the generator if it exists + if dl.generator != nil { + ndl.setGenerator(dl.generator) } + return ndl, nil } - return newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.cleans, dl.buffer), nil -} - -// setBufferSize sets the node buffer size to the provided value. -func (dl *diskLayer) setBufferSize(size int) error { - dl.lock.RLock() - defer dl.lock.RUnlock() - - if dl.stale { - return errSnapshotStale + // Terminate the generation before writing any data into database + var progress []byte + if dl.generator != nil { + dl.generator.stop() + progress = dl.generator.progressMarker() + } + batch := dl.db.diskdb.NewBatch() + writeNodes(batch, nodes, dl.nodes) + + // Provide the original values of modified accounts and storages for revert. + // Note the account deletions are included in accounts map (with value as nil), + // rather than the destruction list (nil list). + writeStates(dl.db.diskdb, batch, progress, nil, accounts, storages, dl.states) + rawdb.WritePersistentStateID(batch, dl.id-1) + rawdb.WriteSnapshotRoot(batch, h.meta.parent) + if err := batch.Write(); err != nil { + log.Crit("Failed to write states", "err", err) } - return dl.buffer.setSize(size, dl.db.diskdb, dl.cleans, dl.id) + // Link the generator and resume generation if the snapshot is not yet + // fully completed. + ndl := newDiskLayer(h.meta.parent, dl.id-1, dl.db, dl.nodes, dl.states, dl.buffer) + if dl.generator != nil && !dl.generator.completed() { + ndl.generator = dl.generator + ndl.generator.run(h.meta.parent) + log.Info("Resumed state snapshot generation", "root", h.meta.parent) + } + return ndl, nil } // size returns the approximate size of cached nodes in the disk layer. @@ -278,7 +463,7 @@ func (dl *diskLayer) size() common.StorageSize { if dl.stale { return 0 } - return common.StorageSize(dl.buffer.size) + return common.StorageSize(dl.buffer.size()) } // resetCache releases the memory held by clean cache to prevent memory leak. @@ -286,13 +471,25 @@ func (dl *diskLayer) resetCache() { dl.lock.RLock() defer dl.lock.RUnlock() - // Stale disk layer loses the ownership of clean cache. + // Stale disk layer loses the ownership of clean caches. if dl.stale { return } - if dl.cleans != nil { - dl.cleans.Reset() + if dl.nodes != nil { + dl.nodes.Reset() + } + if dl.states != nil { + dl.states.Reset() + } +} + +// genMarker returns the current state snapshot generation progress marker. If +// the state snapshot has already been fully generated, nil is returned. +func (dl *diskLayer) genMarker() []byte { + if dl.generator == nil { + return nil } + return dl.generator.progressMarker() } // hasher is used to compute the sha256 hash of the provided data. diff --git a/triedb/pathdb/errors.go b/triedb/pathdb/errors.go index 498bc9ec8107..b656a6f3dcab 100644 --- a/triedb/pathdb/errors.go +++ b/triedb/pathdb/errors.go @@ -39,4 +39,13 @@ var ( // errStateUnrecoverable is returned if state is required to be reverted to // a destination without associated state history available. errStateUnrecoverable = errors.New("state is unrecoverable") + + // errNotCoveredYet is returned from data accessors if the underlying snapshot + // is being generated currently and the requested data item is not yet in the + // range of accounts covered. + errNotCoveredYet = errors.New("not covered yet") + + // errNotConstructed is returned if the callers want to iterate the snapshot + // while the generation is not finished yet. + errNotConstructed = errors.New("state snapshot is not constructed") ) diff --git a/triedb/pathdb/execute.go b/triedb/pathdb/execute.go index 9074e4debf26..e24d0710f3da 100644 --- a/triedb/pathdb/execute.go +++ b/triedb/pathdb/execute.go @@ -43,7 +43,7 @@ type context struct { // apply processes the given state diffs, updates the corresponding post-state // and returns the trie nodes that have been modified. -func apply(db database.Database, prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) (map[common.Hash]map[string]*trienode.Node, error) { +func apply(db database.NodeDatabase, prevRoot common.Hash, postRoot common.Hash, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) (map[common.Hash]map[string]*trienode.Node, error) { tr, err := trie.New(trie.TrieID(postRoot), db) if err != nil { return nil, err @@ -80,7 +80,7 @@ func apply(db database.Database, prevRoot common.Hash, postRoot common.Hash, acc // updateAccount the account was present in prev-state, and may or may not // existent in post-state. Apply the reverse diff and verify if the storage // root matches the one in prev-state account. -func updateAccount(ctx *context, db database.Database, addr common.Address) error { +func updateAccount(ctx *context, db database.NodeDatabase, addr common.Address) error { // The account was present in prev-state, decode it from the // 'slim-rlp' format bytes. h := newHasher() @@ -141,7 +141,7 @@ func updateAccount(ctx *context, db database.Database, addr common.Address) erro // deleteAccount the account was not present in prev-state, and is expected // to be existent in post-state. Apply the reverse diff and verify if the // account and storage is wiped out correctly. -func deleteAccount(ctx *context, db database.Database, addr common.Address) error { +func deleteAccount(ctx *context, db database.NodeDatabase, addr common.Address) error { // The account must be existent in post-state, load the account. h := newHasher() defer h.release() diff --git a/triedb/pathdb/flush.go b/triedb/pathdb/flush.go new file mode 100644 index 000000000000..cd9eeeb3c590 --- /dev/null +++ b/triedb/pathdb/flush.go @@ -0,0 +1,140 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// nodeCacheKey constructs the unique key of clean cache. +func nodeCacheKey(owner common.Hash, path []byte) []byte { + if owner == (common.Hash{}) { + return path + } + return append(owner.Bytes(), path...) +} + +// writeNodes writes the trie nodes into the provided database batch. +// Note this function will also inject all the newly written nodes +// into clean cache. +func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) (total int) { + for owner, subset := range nodes { + for path, n := range subset { + if n.IsDeleted() { + if owner == (common.Hash{}) { + rawdb.DeleteAccountTrieNode(batch, []byte(path)) + } else { + rawdb.DeleteStorageTrieNode(batch, owner, []byte(path)) + } + if clean != nil { + clean.Del(nodeCacheKey(owner, []byte(path))) + } + } else { + if owner == (common.Hash{}) { + rawdb.WriteAccountTrieNode(batch, []byte(path), n.Blob) + } else { + rawdb.WriteStorageTrieNode(batch, owner, []byte(path), n.Blob) + } + if clean != nil { + clean.Set(nodeCacheKey(owner, []byte(path)), n.Blob) + } + } + } + total += len(subset) + } + return total +} + +// writeStates flushes state mutations into the provided database batch as a whole. +func writeStates(db ethdb.KeyValueStore, batch ethdb.Batch, genMarker []byte, destructSet map[common.Hash]struct{}, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) { + var ( + accounts int + slots int + ) + for addrHash := range destructSet { + // Skip any account not covered yet by the snapshot + if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { + continue + } + rawdb.DeleteAccountSnapshot(batch, addrHash) + accounts += 1 + if clean != nil { + clean.Set(addrHash[:], nil) + } + // Safe to traverse the account storage for Ethereum mainnet (no OOM issue) + it := rawdb.IterateStorageSnapshots(db, addrHash) + for it.Next() { + batch.Delete(it.Key()) + slots += 1 + if clean != nil { + clean.Del(it.Key()[len(rawdb.SnapshotStoragePrefix):]) + } + } + it.Release() + } + for addrHash, blob := range accountData { + // Skip any account not covered yet by the snapshot + if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { + continue + } + accounts += 1 + if len(blob) == 0 { + rawdb.DeleteAccountSnapshot(batch, addrHash) + if clean != nil { + clean.Set(addrHash[:], nil) + } + } else { + rawdb.WriteAccountSnapshot(batch, addrHash, blob) + if clean != nil { + clean.Set(addrHash[:], blob) + } + } + } + for addrHash, storages := range storageData { + // Skip any account not covered yet by the snapshot + if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 { + continue + } + midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength]) + + for storageHash, blob := range storages { + // Skip any slot not covered yet by the snapshot + if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 { + continue + } + slots += 1 + if len(blob) == 0 { + rawdb.DeleteStorageSnapshot(batch, addrHash, storageHash) + if clean != nil { + clean.Set(append(addrHash[:], storageHash[:]...), nil) + } + } else { + rawdb.WriteStorageSnapshot(batch, addrHash, storageHash, blob) + if clean != nil { + clean.Set(append(addrHash[:], storageHash[:]...), blob) + } + } + } + } + return accounts, slots +} diff --git a/triedb/pathdb/generate.go b/triedb/pathdb/generate.go new file mode 100644 index 000000000000..85778f3d7add --- /dev/null +++ b/triedb/pathdb/generate.go @@ -0,0 +1,843 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "errors" + "fmt" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/triedb/database" +) + +var ( + // accountCheckRange is the upper limit of the number of accounts involved in + // each range check. This is a value estimated based on experience. If this + // range is too large, the failure rate of range proof will increase. Otherwise, + // if the range is too small, the efficiency of the state recovery will decrease. + accountCheckRange = 128 + + // storageCheckRange is the upper limit of the number of storage slots involved + // in each range check. This is a value estimated based on experience. If this + // range is too large, the failure rate of range proof will increase. Otherwise, + // if the range is too small, the efficiency of the state recovery will decrease. + storageCheckRange = 1024 + + // errMissingTrie is returned if the target trie is missing while the generation + // is running. In this case the generation is aborted and wait the new signal. + errMissingTrie = errors.New("missing trie") +) + +// diskReader is a wrapper of key-value store and implements database.NodeReader, +// providing a function for accessing persistent trie nodes in the disk +type diskReader struct{ db ethdb.KeyValueStore } + +// Node retrieves the trie node blob with the provided trie identifier, +// node path and the corresponding node hash. No error will be returned +// if the node is not found. +func (r *diskReader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { + if owner == (common.Hash{}) { + return rawdb.ReadAccountTrieNode(r.db, path), nil + } + return rawdb.ReadStorageTrieNode(r.db, owner, path), nil +} + +// diskStore is a wrapper of key-value store and implements database.NodeDatabase. +// It's meant to be used for generating state snapshot from the trie data. +type diskStore struct { + db ethdb.KeyValueStore +} + +// NodeReader returns a node reader associated with the specific state. +// An error will be returned if the specified state is not available. +func (s *diskStore) NodeReader(stateRoot common.Hash) (database.NodeReader, error) { + root := types.EmptyRootHash + if blob := rawdb.ReadAccountTrieNode(s.db, nil); len(blob) > 0 { + root = crypto.Keccak256Hash(blob) + } + if root != stateRoot { + return nil, fmt.Errorf("state %x is not available", stateRoot) + } + return &diskReader{s.db}, nil +} + +// Generator is the struct for initial state snapshot generation. It is not thread-safe; +// the caller must manage concurrency issues themselves. +type generator struct { + noBuild bool // Flag indicating whether snapshot generation is permitted + running bool // Flag indicating whether the background generation is running + + db ethdb.KeyValueStore // Key-value store containing the snapshot data + stats *generatorStats // Generation statistics used throughout the entire life cycle + abort chan chan struct{} // Notification channel to abort generating the snapshot in this layer + done chan struct{} // Notification channel when generation is done (test synchronicity) + + progress []byte // Progress marker of the state generation, nil means it's completed + lock sync.RWMutex // Lock which protects the progress +} + +// newGenerator constructs the state snapshot generator. +func newGenerator(db ethdb.KeyValueStore, noBuild bool, progress []byte, stats *generatorStats) *generator { + if stats == nil { + stats = &generatorStats{start: time.Now()} + } + return &generator{ + noBuild: noBuild, + progress: progress, + db: db, + stats: stats, + abort: make(chan chan struct{}), + done: make(chan struct{}), + } +} + +// run starts the state snapshot generation in the background. +func (g *generator) run(root common.Hash) { + if g.noBuild { + log.Warn("Snapshot generation is not permitted") + return + } + if g.running { + g.stop() + log.Warn("Terminated the leftover generation cycle") + } + g.running = true + go g.generate(newGeneratorContext(root, g.progress, g.db)) +} + +// stop terminates the background generation if it's actively running. +func (g *generator) stop() { + if !g.running { + log.Warn("Snapshot generation is not running") + return + } + ch := make(chan struct{}) + g.abort <- ch + <-ch + g.running = false +} + +// completed returns the flag indicating if the whole generation is done. +func (g *generator) completed() bool { + progress := g.progressMarker() + return progress == nil +} + +// progressMarker returns the current generation progress marker. +func (g *generator) progressMarker() []byte { + g.lock.RLock() + defer g.lock.RUnlock() + + return g.progress +} + +// splitMarker is an internal helper which splits the generation progress marker +// into two parts. +func splitMarker(marker []byte) ([]byte, []byte) { + var accMarker []byte + if len(marker) > 0 { // []byte{} is the start, use nil for that + accMarker = marker[:common.HashLength] + } + return accMarker, marker +} + +// generateSnapshot regenerates a brand-new snapshot based on an existing state +// database and head block asynchronously. The snapshot is returned immediately +// and generation is continued in the background until done. +func generateSnapshot(triedb *Database, root common.Hash) *diskLayer { + // Create a new disk layer with an initialized state marker at zero + var ( + stats = &generatorStats{start: time.Now()} + genMarker = []byte{} // Initialized but empty! + ) + dl := newDiskLayer(root, 0, triedb, nil, nil, newBuffer(triedb.config.WriteBufferSize, nil, nil, 0)) + dl.generator = newGenerator(triedb.diskdb, false, genMarker, stats) + dl.generator.run(root) + log.Info("Started snapshot generation", "root", root) + return dl +} + +// journalProgress persists the generator stats into the database to resume later. +func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) { + // Write out the generator marker. Note it's a standalone disk layer generator + // which is not mixed with journal. It's ok if the generator is persisted while + // journal is not. + entry := journalGenerator{ + Done: marker == nil, + Marker: marker, + } + if stats != nil { + entry.Accounts = stats.accounts + entry.Slots = stats.slots + entry.DanglingSlots = stats.dangling + entry.Storage = uint64(stats.storage) + } + blob, err := rlp.EncodeToBytes(entry) + if err != nil { + panic(err) // Cannot happen, here to catch dev errors + } + var logstr string + switch { + case marker == nil: + logstr = "done" + case bytes.Equal(marker, []byte{}): + logstr = "empty" + case len(marker) == common.HashLength: + logstr = fmt.Sprintf("%#x", marker) + default: + logstr = fmt.Sprintf("%#x:%#x", marker[:common.HashLength], marker[common.HashLength:]) + } + log.Debug("Journalled generator progress", "progress", logstr) + rawdb.WriteSnapshotGenerator(db, blob) +} + +// proofResult contains the output of range proving which can be used +// for further processing regardless if it is successful or not. +type proofResult struct { + keys [][]byte // The key set of all elements being iterated, even proving is failed + vals [][]byte // The val set of all elements being iterated, even proving is failed + diskMore bool // Set when the database has extra snapshot states since last iteration + trieMore bool // Set when the trie has extra snapshot states(only meaningful for successful proving) + proofErr error // Indicator whether the given state range is valid or not + tr *trie.Trie // The trie, in case the trie was resolved by the prover (may be nil) +} + +// valid returns the indicator that range proof is successful or not. +func (result *proofResult) valid() bool { + return result.proofErr == nil +} + +// last returns the last verified element key regardless of whether the range proof is +// successful or not. Nil is returned if nothing involved in the proving. +func (result *proofResult) last() []byte { + var last []byte + if len(result.keys) > 0 { + last = result.keys[len(result.keys)-1] + } + return last +} + +// forEach iterates all the visited elements and applies the given callback on them. +// The iteration is aborted if the callback returns non-nil error. +func (result *proofResult) forEach(callback func(key []byte, val []byte) error) error { + for i := 0; i < len(result.keys); i++ { + key, val := result.keys[i], result.vals[i] + if err := callback(key, val); err != nil { + return err + } + } + return nil +} + +// proveRange proves the snapshot segment with particular prefix is "valid". +// The iteration start point will be assigned if the iterator is restored from +// the last interruption. Max will be assigned in order to limit the maximum +// amount of data involved in each iteration. +// +// The proof result will be returned if the range proving is finished, otherwise +// the error will be returned to abort the entire procedure. +func (g *generator) proveRange(ctx *generatorContext, trieId *trie.ID, prefix []byte, kind string, origin []byte, max int, valueConvertFn func([]byte) ([]byte, error)) (*proofResult, error) { + var ( + keys [][]byte + vals [][]byte + proof = rawdb.NewMemoryDatabase() + diskMore = false + iter = ctx.iterator(kind) + start = time.Now() + min = append(prefix, origin...) + ) + for iter.Next() { + // Ensure the iterated item is always equal or larger than the given origin. + key := iter.Key() + if bytes.Compare(key, min) < 0 { + return nil, errors.New("invalid iteration position") + } + // Ensure the iterated item still fall in the specified prefix. If + // not which means the items in the specified area are all visited. + // Move the iterator a step back since we iterate one extra element + // out. + if !bytes.Equal(key[:len(prefix)], prefix) { + iter.Hold() + break + } + // Break if we've reached the max size, and signal that we're not + // done yet. Move the iterator a step back since we iterate one + // extra element out. + if len(keys) == max { + iter.Hold() + diskMore = true + break + } + keys = append(keys, common.CopyBytes(key[len(prefix):])) + + if valueConvertFn == nil { + vals = append(vals, common.CopyBytes(iter.Value())) + } else { + val, err := valueConvertFn(iter.Value()) + if err != nil { + // Special case, the state data is corrupted (invalid slim-format account), + // don't abort the entire procedure directly. Instead, let the fallback + // generation to heal the invalid data. + // + // Here append the original value to ensure that the number of key and + // value are aligned. + vals = append(vals, common.CopyBytes(iter.Value())) + log.Error("Failed to convert account state data", "err", err) + } else { + vals = append(vals, val) + } + } + } + // Update metrics for database iteration and merkle proving + if kind == snapStorage { + storageSnapReadCounter.Inc(time.Since(start).Nanoseconds()) + } else { + accountSnapReadCounter.Inc(time.Since(start).Nanoseconds()) + } + defer func(start time.Time) { + if kind == snapStorage { + storageProveCounter.Inc(time.Since(start).Nanoseconds()) + } else { + accountProveCounter.Inc(time.Since(start).Nanoseconds()) + } + }(time.Now()) + + // The snap state is exhausted, pass the entire key/val set for verification + root := trieId.Root + if origin == nil && !diskMore { + stackTr := trie.NewStackTrie(nil) + for i, key := range keys { + if err := stackTr.Update(key, vals[i]); err != nil { + return nil, err + } + } + if gotRoot := stackTr.Hash(); gotRoot != root { + return &proofResult{ + keys: keys, + vals: vals, + proofErr: fmt.Errorf("wrong root: have %#x want %#x", gotRoot, root), + }, nil + } + return &proofResult{keys: keys, vals: vals}, nil + } + // Snap state is chunked, generate edge proofs for verification. + tr, err := trie.New(trieId, &diskStore{db: g.db}) + if err != nil { + log.Info("Trie missing, state snapshotting paused", "state", ctx.root, "kind", kind, "root", trieId.Root) + return nil, errMissingTrie + } + // Generate the Merkle proofs for the first and last element + if origin == nil { + origin = common.Hash{}.Bytes() + } + if err := tr.Prove(origin, proof); err != nil { + log.Debug("Failed to prove range", "kind", kind, "origin", origin, "err", err) + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + proofErr: err, + tr: tr, + }, nil + } + if len(keys) > 0 { + if err := tr.Prove(keys[len(keys)-1], proof); err != nil { + log.Debug("Failed to prove range", "kind", kind, "last", keys[len(keys)-1], "err", err) + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + proofErr: err, + tr: tr, + }, nil + } + } + // Verify the snapshot segment with range prover, ensure that all flat states + // in this range correspond to merkle trie. + cont, err := trie.VerifyRangeProof(root, origin, keys, vals, proof) + return &proofResult{ + keys: keys, + vals: vals, + diskMore: diskMore, + trieMore: cont, + proofErr: err, + tr: tr}, + nil +} + +// onStateCallback is a function that is called by generateRange, when processing a range of +// accounts or storage slots. For each element, the callback is invoked. +// +// - If 'delete' is true, then this element (and potential slots) needs to be deleted from the snapshot. +// - If 'write' is true, then this element needs to be updated with the 'val'. +// - If 'write' is false, then this element is already correct, and needs no update. +// The 'val' is the canonical encoding of the value (not the slim format for accounts) +// +// However, for accounts, the storage trie of the account needs to be checked. Also, +// dangling storages(storage exists but the corresponding account is missing) need to +// be cleaned up. +type onStateCallback func(key []byte, val []byte, write bool, delete bool) error + +// generateRange generates the state segment with particular prefix. Generation can +// either verify the correctness of existing state through range-proof and skip +// generation, or iterate trie to regenerate state on demand. +func (g *generator) generateRange(ctx *generatorContext, trieId *trie.ID, prefix []byte, kind string, origin []byte, max int, onState onStateCallback, valueConvertFn func([]byte) ([]byte, error)) (bool, []byte, error) { + // Use range prover to check the validity of the flat state in the range + result, err := g.proveRange(ctx, trieId, prefix, kind, origin, max, valueConvertFn) + if err != nil { + return false, nil, err + } + last := result.last() + + // Construct contextual logger + logCtx := []interface{}{"kind", kind, "prefix", hexutil.Encode(prefix)} + if len(origin) > 0 { + logCtx = append(logCtx, "origin", hexutil.Encode(origin)) + } + logger := log.New(logCtx...) + + // The range prover says the range is correct, skip trie iteration + if result.valid() { + successfulRangeProofMeter.Mark(1) + logger.Trace("Proved state range", "last", hexutil.Encode(last)) + + // The verification is passed, process each state with the given + // callback function. If this state represents a contract, the + // corresponding storage check will be performed in the callback + if err := result.forEach(func(key []byte, val []byte) error { return onState(key, val, false, false) }); err != nil { + return false, nil, err + } + // Only abort the iteration when both database and trie are exhausted + return !result.diskMore && !result.trieMore, last, nil + } + logger.Trace("Detected outdated state range", "last", hexutil.Encode(last), "err", result.proofErr) + failedRangeProofMeter.Mark(1) + + // Special case, the entire trie is missing. In the original trie scheme, + // all the duplicated subtries will be filtered out (only one copy of data + // will be stored). While in the snapshot model, all the storage tries + // belong to different contracts will be kept even they are duplicated. + // Track it to a certain extent remove the noise data used for statistics. + if origin == nil && last == nil { + meter := missallAccountMeter + if kind == snapStorage { + meter = missallStorageMeter + } + meter.Mark(1) + } + // We use the snap data to build up a cache which can be used by the + // main account trie as a primary lookup when resolving hashes + var resolver trie.NodeResolver + if len(result.keys) > 0 { + tr := trie.NewEmpty(nil) + for i, key := range result.keys { + tr.Update(key, result.vals[i]) + } + _, nodes := tr.Commit(false) + hashSet := nodes.HashSet() + resolver = func(owner common.Hash, path []byte, hash common.Hash) []byte { + return hashSet[hash] + } + } + // Construct the trie for state iteration, reuse the trie + // if it's already opened with some nodes resolved. + tr := result.tr + if tr == nil { + tr, err = trie.New(trieId, &diskStore{db: g.db}) + if err != nil { + log.Info("Trie missing, state snapshotting paused", "state", ctx.root, "kind", kind, "root", trieId.Root) + return false, nil, errMissingTrie + } + } + var ( + trieMore bool + kvkeys, kvvals = result.keys, result.vals + + // counters + count = 0 // number of states delivered by iterator + created = 0 // states created from the trie + updated = 0 // states updated from the trie + deleted = 0 // states not in trie, but were in snapshot + untouched = 0 // states already correct + + // timers + start = time.Now() + internal time.Duration + ) + nodeIt, err := tr.NodeIterator(origin) + if err != nil { + return false, nil, err + } + nodeIt.AddResolver(resolver) + iter := trie.NewIterator(nodeIt) + + for iter.Next() { + if last != nil && bytes.Compare(iter.Key, last) > 0 { + trieMore = true + break + } + count++ + write := true + created++ + for len(kvkeys) > 0 { + if cmp := bytes.Compare(kvkeys[0], iter.Key); cmp < 0 { + // delete the key + istart := time.Now() + if err := onState(kvkeys[0], nil, false, true); err != nil { + return false, nil, err + } + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] + deleted++ + internal += time.Since(istart) + continue + } else if cmp == 0 { + // the snapshot key can be overwritten + created-- + if write = !bytes.Equal(kvvals[0], iter.Value); write { + updated++ + } else { + untouched++ + } + kvkeys = kvkeys[1:] + kvvals = kvvals[1:] + } + break + } + istart := time.Now() + if err := onState(iter.Key, iter.Value, write, false); err != nil { + return false, nil, err + } + internal += time.Since(istart) + } + if iter.Err != nil { + // Trie errors should never happen. Still, in case of a bug, expose the + // error here, as the outer code will presume errors are interrupts, not + // some deeper issues. + log.Error("State snapshotter failed to iterate trie", "err", iter.Err) + return false, nil, iter.Err + } + // Delete all stale snapshot states remaining + istart := time.Now() + for _, key := range kvkeys { + if err := onState(key, nil, false, true); err != nil { + return false, nil, err + } + deleted += 1 + } + internal += time.Since(istart) + + // Update metrics for counting trie iteration + if kind == snapStorage { + storageTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds()) + } else { + accountTrieReadCounter.Inc((time.Since(start) - internal).Nanoseconds()) + } + logger.Debug("Regenerated state range", "root", trieId.Root, "last", hexutil.Encode(last), + "count", count, "created", created, "updated", updated, "untouched", untouched, "deleted", deleted) + + // If there are either more trie items, or there are more snap items + // (in the next segment), then we need to keep working + return !trieMore && !result.diskMore, last, nil +} + +// checkAndFlush checks if an interruption signal is received or the +// batch size has exceeded the allowance. +func (g *generator) checkAndFlush(ctx *generatorContext, current []byte) error { + var abort chan struct{} + select { + case abort = <-g.abort: + default: + } + if ctx.batch.ValueSize() > ethdb.IdealBatchSize || abort != nil { + if bytes.Compare(current, g.progress) < 0 { + log.Error("Snapshot generator went backwards", "current", fmt.Sprintf("%x", current), "genMarker", fmt.Sprintf("%x", g.progress)) + } + // Persist the progress marker regardless of whether the batch is empty or not. + // It may happen that all the flat states in the database are correct, so the + // generator indeed makes progress even if there is nothing to commit. + journalProgress(ctx.batch, current, g.stats) + + // Flush out the database writes atomically + if err := ctx.batch.Write(); err != nil { + return err + } + ctx.batch.Reset() + + // Update the generation progress marker + g.lock.Lock() + g.progress = current + g.lock.Unlock() + + // Abort the generation if it's required + if abort != nil { + g.stats.log("Aborting state snapshot generation", ctx.root, g.progress) + return newAbortErr(abort) // bubble up an error for interruption + } + // Don't hold the iterators too long, release them to let compactor works + ctx.reopenIterator(snapAccount) + ctx.reopenIterator(snapStorage) + } + if time.Since(ctx.logged) > 8*time.Second { + g.stats.log("Generating state snapshot", ctx.root, g.progress) + ctx.logged = time.Now() + } + return nil +} + +// generateStorages generates the missing storage slots of the specific contract. +// It's supposed to restart the generation from the given origin position. +func (g *generator) generateStorages(ctx *generatorContext, account common.Hash, storageRoot common.Hash, storeMarker []byte) error { + onStorage := func(key []byte, val []byte, write bool, delete bool) error { + defer func(start time.Time) { + storageWriteCounter.Inc(time.Since(start).Nanoseconds()) + }(time.Now()) + + if delete { + rawdb.DeleteStorageSnapshot(ctx.batch, account, common.BytesToHash(key)) + wipedStorageMeter.Mark(1) + return nil + } + if write { + rawdb.WriteStorageSnapshot(ctx.batch, account, common.BytesToHash(key), val) + generatedStorageMeter.Mark(1) + } else { + recoveredStorageMeter.Mark(1) + } + g.stats.storage += common.StorageSize(1 + 2*common.HashLength + len(val)) + g.stats.slots++ + + // If we've exceeded our batch allowance or termination was requested, flush to disk + if err := g.checkAndFlush(ctx, append(account[:], key...)); err != nil { + return err + } + return nil + } + // Loop for re-generating the missing storage slots. + var origin = common.CopyBytes(storeMarker) + for { + id := trie.StorageTrieID(ctx.root, account, storageRoot) + exhausted, last, err := g.generateRange(ctx, id, append(rawdb.SnapshotStoragePrefix, account.Bytes()...), snapStorage, origin, storageCheckRange, onStorage, nil) + if err != nil { + return err // The procedure it aborted, either by external signal or internal error. + } + // Abort the procedure if the entire contract storage is generated + if exhausted { + break + } + if origin = increaseKey(last); origin == nil { + break // special case, the last is 0xffffffff...fff + } + } + return nil +} + +// generateAccounts generates the missing snapshot accounts as well as their +// storage slots in the main trie. It's supposed to restart the generation +// from the given origin position. +func (g *generator) generateAccounts(ctx *generatorContext, accMarker []byte) error { + onAccount := func(key []byte, val []byte, write bool, delete bool) error { + // Make sure to clear all dangling storages before this account + account := common.BytesToHash(key) + g.stats.dangling += ctx.removeStorageBefore(account) + + start := time.Now() + if delete { + rawdb.DeleteAccountSnapshot(ctx.batch, account) + wipedAccountMeter.Mark(1) + accountWriteCounter.Inc(time.Since(start).Nanoseconds()) + + ctx.removeStorageAt(account) + return nil + } + // Retrieve the current account and flatten it into the internal format + var acc types.StateAccount + if err := rlp.DecodeBytes(val, &acc); err != nil { + log.Crit("Invalid account encountered during snapshot creation", "err", err) + } + // If the account is not yet in-progress, write it out + if accMarker == nil || !bytes.Equal(account[:], accMarker) { + dataLen := len(val) // Approximate size, saves us a round of RLP-encoding + if !write { + if bytes.Equal(acc.CodeHash, types.EmptyCodeHash[:]) { + dataLen -= 32 + } + if acc.Root == types.EmptyRootHash { + dataLen -= 32 + } + recoveredAccountMeter.Mark(1) + } else { + data := types.SlimAccountRLP(acc) + dataLen = len(data) + rawdb.WriteAccountSnapshot(ctx.batch, account, data) + generatedAccountMeter.Mark(1) + } + g.stats.storage += common.StorageSize(1 + common.HashLength + dataLen) + g.stats.accounts++ + } + // If the snap generation goes here after interrupted, genMarker may go backward + // when last genMarker is consisted of accountHash and storageHash + marker := account[:] + if accMarker != nil && bytes.Equal(marker, accMarker) && len(g.progress) > common.HashLength { + marker = g.progress + } + // If we've exceeded our batch allowance or termination was requested, flush to disk + if err := g.checkAndFlush(ctx, marker); err != nil { + return err + } + accountWriteCounter.Inc(time.Since(start).Nanoseconds()) // let's count flush time as well + + // If the iterated account is the contract, create a further loop to + // verify or regenerate the contract storage. + if acc.Root == types.EmptyRootHash { + ctx.removeStorageAt(account) + } else { + var storeMarker []byte + if accMarker != nil && bytes.Equal(account[:], accMarker) && len(g.progress) > common.HashLength { + storeMarker = g.progress[common.HashLength:] + } + if err := g.generateStorages(ctx, account, acc.Root, storeMarker); err != nil { + return err + } + } + // Some account processed, unmark the marker + accMarker = nil + return nil + } + origin := common.CopyBytes(accMarker) + for { + id := trie.StateTrieID(ctx.root) + exhausted, last, err := g.generateRange(ctx, id, rawdb.SnapshotAccountPrefix, snapAccount, origin, accountCheckRange, onAccount, types.FullAccountRLP) + if err != nil { + return err // The procedure it aborted, either by external signal or internal error. + } + origin = increaseKey(last) + + // Last step, cleanup the storages after the last account. + // All the left storages should be treated as dangling. + if origin == nil || exhausted { + g.stats.dangling += ctx.removeStorageLeft() + break + } + } + return nil +} + +// generate is a background thread that iterates over the state and storage tries, +// constructing the state snapshot. All the arguments are purely for statistics +// gathering and logging, since the method surfs the blocks as they arrive, often +// being restarted. +func (g *generator) generate(ctx *generatorContext) { + g.stats.log("Resuming state snapshot generation", ctx.root, g.progress) + defer ctx.close() + + // Persist the initial marker and state snapshot root if progress is none + if len(g.progress) == 0 { + batch := g.db.NewBatch() + rawdb.WriteSnapshotRoot(batch, ctx.root) + journalProgress(batch, g.progress, g.stats) + if err := batch.Write(); err != nil { + log.Crit("Failed to write initialized state marker", "err", err) + } + } + // Initialize the global generator context. The snapshot iterators are + // opened at the interrupted position because the assumption is held + // that all the snapshot data are generated correctly before the marker. + // Even if the snapshot data is updated during the interruption (before + // or at the marker), the assumption is still held. + // For the account or storage slot at the interruption, they will be + // processed twice by the generator(they are already processed in the + // last run) but it's fine. + var ( + accMarker, _ = splitMarker(g.progress) + abort chan struct{} + ) + if err := g.generateAccounts(ctx, accMarker); err != nil { + // Extract the received interruption signal if exists + var aerr *abortErr + if errors.As(err, &aerr) { + abort = aerr.abort + } + // Aborted by internal error, wait the signal + if abort == nil { + abort = <-g.abort + } + close(abort) + return + } + // Snapshot fully generated, set the marker to nil. + // Note even there is nothing to commit, persist the + // generator anyway to mark the snapshot is complete. + journalProgress(ctx.batch, nil, g.stats) + if err := ctx.batch.Write(); err != nil { + log.Error("Failed to flush batch", "err", err) + abort = <-g.abort + close(abort) + return + } + ctx.batch.Reset() + + log.Info("Generated state snapshot", "accounts", g.stats.accounts, "slots", g.stats.slots, + "storage", g.stats.storage, "dangling", g.stats.dangling, "elapsed", common.PrettyDuration(time.Since(g.stats.start))) + + // Update the generation progress marker + g.lock.Lock() + g.progress = nil + g.lock.Unlock() + close(g.done) + + // Someone will be looking for us, wait it out + abort = <-g.abort + close(abort) +} + +// increaseKey increase the input key by one bit. Return nil if the entire +// addition operation overflows. +func increaseKey(key []byte) []byte { + for i := len(key) - 1; i >= 0; i-- { + key[i]++ + if key[i] != 0x0 { + return key + } + } + return nil +} + +// abortErr wraps an interruption signal received to represent the +// generation is aborted by external processes. +type abortErr struct { + abort chan struct{} +} + +func newAbortErr(abort chan struct{}) error { + return &abortErr{abort: abort} +} + +func (err *abortErr) Error() string { + return "aborted" +} diff --git a/triedb/pathdb/generate_test.go b/triedb/pathdb/generate_test.go new file mode 100644 index 000000000000..bbc7da3c9b13 --- /dev/null +++ b/triedb/pathdb/generate_test.go @@ -0,0 +1,740 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "fmt" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/internal/testrand" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/holiman/uint256" +) + +func hashData(input []byte) common.Hash { + return crypto.Keccak256Hash(input) +} + +type genTester struct { + diskdb ethdb.Database + db *Database + acctTrie *trie.Trie + nodes *trienode.MergedNodeSet + states *StateSetWithOrigin +} + +func newGenTester() *genTester { + disk := rawdb.NewMemoryDatabase() + config := *Defaults + config.SnapshotNoBuild = true // no background generation + db := New(disk, &config, false) + tr, _ := trie.New(trie.StateTrieID(types.EmptyRootHash), db) + return &genTester{ + diskdb: disk, + db: db, + acctTrie: tr, + nodes: trienode.NewMergedNodeSet(), + states: NewStateSetWithOrigin(nil, nil, nil, nil, nil), + } +} + +func (t *genTester) addTrieAccount(acckey string, acc *types.StateAccount) { + var ( + addr = common.BytesToAddress([]byte(acckey)) + key = hashData([]byte(acckey)) + val, _ = rlp.EncodeToBytes(acc) + ) + t.acctTrie.MustUpdate(key.Bytes(), val) + + t.states.accountData[key] = val + t.states.accountOrigin[addr] = nil +} + +func (t *genTester) addSnapAccount(acckey string, acc *types.StateAccount) { + key := hashData([]byte(acckey)) + rawdb.WriteAccountSnapshot(t.diskdb, key, types.SlimAccountRLP(*acc)) +} + +func (t *genTester) addAccount(acckey string, acc *types.StateAccount) { + t.addTrieAccount(acckey, acc) + t.addSnapAccount(acckey, acc) +} + +func (t *genTester) addSnapStorage(accKey string, keys []string, vals []string) { + accHash := hashData([]byte(accKey)) + for i, key := range keys { + rawdb.WriteStorageSnapshot(t.diskdb, accHash, hashData([]byte(key)), []byte(vals[i])) + } +} + +func (t *genTester) makeStorageTrie(accKey string, keys []string, vals []string, commit bool) common.Hash { + var ( + owner = hashData([]byte(accKey)) + addr = common.BytesToAddress([]byte(accKey)) + id = trie.StorageTrieID(types.EmptyRootHash, owner, types.EmptyRootHash) + tr, _ = trie.New(id, t.db) + + storages = make(map[common.Hash][]byte) + storageOrigins = make(map[common.Hash][]byte) + ) + for i, k := range keys { + key := hashData([]byte(k)) + tr.MustUpdate(key.Bytes(), []byte(vals[i])) + storages[key] = []byte(vals[i]) + storageOrigins[key] = nil + } + if !commit { + return tr.Hash() + } + root, nodes := tr.Commit(false) + if nodes != nil { + t.nodes.Merge(nodes) + } + t.states.storageData[owner] = storages + t.states.storageOrigin[addr] = storageOrigins + return root +} + +func (t *genTester) Commit() common.Hash { + root, nodes := t.acctTrie.Commit(true) + if nodes != nil { + t.nodes.Merge(nodes) + } + t.db.Update(root, types.EmptyRootHash, 0, t.nodes, t.states) + t.db.Commit(root, false) + return root +} + +func (t *genTester) CommitAndGenerate() (common.Hash, *diskLayer) { + root := t.Commit() + dl := generateSnapshot(t.db, root) + return root, dl +} + +// Tests that snapshot generation from an empty database. +func TestGeneration(t *testing.T) { + helper := newGenTester() + stRoot := helper.makeStorageTrie("", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, false) + + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + + root, dl := helper.CommitAndGenerate() + if have, want := root, common.HexToHash("0xe3712f1a226f3782caca78ca770ccc19ee000552813a9f59d479f8611db9b1fd"); have != want { + t.Fatalf("have %#x want %#x", have, want) + } + select { + case <-dl.generator.done: + // Snapshot generation succeeded + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +// Tests that snapshot generation with existent flat state, where the flat state +// contains some errors: +// - the contract with empty storage root but has storage entries in the disk +// - the contract with non empty storage root but empty storage slots +// - the contract(non-empty storage) misses some storage slots +// - miss in the beginning +// - miss in the middle +// - miss in the end +// +// - the contract(non-empty storage) has wrong storage slots +// - wrong slots in the beginning +// - wrong slots in the middle +// - wrong slots in the end +// +// - the contract(non-empty storage) has extra storage slots +// - extra slots in the beginning +// - extra slots in the middle +// - extra slots in the end +func TestGenerateExistentStateWithWrongStorage(t *testing.T) { + helper := newGenTester() + + // Account one, empty storage trie root but non-empty flat states + helper.addAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Account two, non-empty storage trie root but empty flat states + stRoot := helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + + // Miss slots + { + // Account three, non-empty root but misses slots in the beginning + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-3", []string{"key-2", "key-3"}, []string{"val-2", "val-3"}) + + // Account four, non-empty root but misses slots in the middle + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-4", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-4", []string{"key-1", "key-3"}, []string{"val-1", "val-3"}) + + // Account five, non-empty root but misses slots in the end + helper.makeStorageTrie("acc-5", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-5", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-5", []string{"key-1", "key-2"}, []string{"val-1", "val-2"}) + } + + // Wrong storage slots + { + // Account six, non-empty root but wrong slots in the beginning + helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-6", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"badval-1", "val-2", "val-3"}) + + // Account seven, non-empty root but wrong slots in the middle + helper.makeStorageTrie("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-7", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-7", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "badval-2", "val-3"}) + + // Account eight, non-empty root but wrong slots in the end + helper.makeStorageTrie("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-8", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-8", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "badval-3"}) + + // Account 9, non-empty root but rotated slots + helper.makeStorageTrie("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-9", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-9", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-3", "val-2"}) + } + + // Extra storage slots + { + // Account 10, non-empty root but extra slots in the beginning + helper.makeStorageTrie("acc-10", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-10", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-10", []string{"key-0", "key-1", "key-2", "key-3"}, []string{"val-0", "val-1", "val-2", "val-3"}) + + // Account 11, non-empty root but extra slots in the middle + helper.makeStorageTrie("acc-11", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-11", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-11", []string{"key-1", "key-2", "key-2-1", "key-3"}, []string{"val-1", "val-2", "val-2-1", "val-3"}) + + // Account 12, non-empty root but extra slots in the end + helper.makeStorageTrie("acc-12", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-12", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapStorage("acc-12", []string{"key-1", "key-2", "key-3", "key-4"}, []string{"val-1", "val-2", "val-3", "val-4"}) + } + + root, dl := helper.CommitAndGenerate() + t.Logf("Root: %#x\n", root) // Root = 0x8746cce9fd9c658b2cfd639878ed6584b7a2b3e73bb40f607fcfa156002429a0 + + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +// Tests that snapshot generation with existent flat state, where the flat state +// contains some errors: +// - miss accounts +// - wrong accounts +// - extra accounts +func TestGenerateExistentStateWithWrongAccounts(t *testing.T) { + helper := newGenTester() + + // Trie accounts [acc-1, acc-2, acc-3, acc-4, acc-6] + helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-2", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.makeStorageTrie("acc-4", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + stRoot := helper.makeStorageTrie("acc-6", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + + // Missing accounts, only in the trie + { + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // Beginning + helper.addTrieAccount("acc-4", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // Middle + helper.addTrieAccount("acc-6", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // End + } + + // Wrong accounts + { + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: common.Hex2Bytes("0x1234")}) + + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addSnapAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) + } + + // Extra accounts, only in the snap + { + helper.addSnapAccount("acc-0", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // before the beginning + helper.addSnapAccount("acc-5", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: common.Hex2Bytes("0x1234")}) // Middle + helper.addSnapAccount("acc-7", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // after the end + } + + root, dl := helper.CommitAndGenerate() + t.Logf("Root: %#x\n", root) // Root = 0x825891472281463511e7ebcc7f109e4f9200c20fa384754e11fd605cd98464e8 + + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateCorruptAccountTrie(t *testing.T) { + helper := newGenTester() + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0xc7a30f39aff471c95d8a837497ad0e49b65be475cc0953540f80cfcdbdcd9074 + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x19ead688e907b0fab07176120dceec244a72aff2f0aa51e8b827584e378772f4 + + root := helper.Commit() // Root: 0xa04693ea110a31037fb5ee814308a6f1d76bdab0b11676bdf4541d2de55ba978 + + // Delete an account trie node and ensure the generator chokes + path := []byte{0xc} + rawdb.HasAccountTrieNode(helper.diskdb, path) + rawdb.DeleteAccountTrieNode(helper.diskdb, path) + helper.db.tree.bottom().resetCache() + + dl := generateSnapshot(helper.db, root) + select { + case <-dl.generator.done: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt account trie") + + case <-time.After(time.Second): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateMissingStorageTrie(t *testing.T) { + var ( + acc1 = hashData([]byte("acc-1")) + acc3 = hashData([]byte("acc-3")) + helper = newGenTester() + ) + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + + root := helper.Commit() + + // Delete storage trie root of account one and three. + rawdb.DeleteStorageTrieNode(helper.diskdb, acc1, nil) + rawdb.DeleteStorageTrieNode(helper.diskdb, acc3, nil) + helper.db.tree.bottom().resetCache() + + dl := generateSnapshot(helper.db, root) + select { + case <-dl.generator.done: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(time.Second): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateCorruptStorageTrie(t *testing.T) { + helper := newGenTester() + + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) // 0xddefcd9376dd029653ef384bd2f0a126bb755fe84fdcc9e7cf421ba454f2bc67 + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x65145f923027566669a1ae5ccac66f945b55ff6eaeb17d2ea8e048b7d381f2d7 + stRoot = helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) // 0x50815097425d000edfc8b3a4a13e175fc2bdcfee8bdfbf2d1ff61041d3c235b2 + + root := helper.Commit() + + // Delete a node in the storage trie. + rawdb.DeleteStorageTrieNode(helper.diskdb, hashData([]byte("acc-1")), []byte{0x4}) + rawdb.DeleteStorageTrieNode(helper.diskdb, hashData([]byte("acc-3")), []byte{0x4}) + helper.db.tree.bottom().resetCache() + + dl := generateSnapshot(helper.db, root) + select { + case <-dl.generator.done: + // Snapshot generation succeeded + t.Errorf("Snapshot generated against corrupt storage trie") + + case <-time.After(time.Second): + // Not generated fast enough, hopefully blocked inside on missing trie node fail + } + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateWithExtraAccounts(t *testing.T) { + helper := newGenTester() + + // Account one in the trie + stRoot := helper.makeStorageTrie("acc-1", + []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, + []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, + true, + ) + acc := &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + helper.acctTrie.MustUpdate(hashData([]byte("acc-1")).Bytes(), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + // Identical in the snap + key := hashData([]byte("acc-1")) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-3")), []byte("val-3")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-4")), []byte("val-4")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-5")), []byte("val-5")) + + // Account two exists only in the snapshot + stRoot = helper.makeStorageTrie("acc-2", + []string{"key-1", "key-2", "key-3", "key-4", "key-5"}, + []string{"val-1", "val-2", "val-3", "val-4", "val-5"}, + true, + ) + acc = &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ = rlp.EncodeToBytes(acc) + key = hashData([]byte("acc-2")) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("b-key-1")), []byte("b-val-1")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("b-key-2")), []byte("b-val-2")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("b-key-3")), []byte("b-val-3")) + + root := helper.Commit() + + // To verify the test: If we now inspect the snap db, there should exist extraneous storage items + if data := rawdb.ReadStorageSnapshot(helper.diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data == nil { + t.Fatalf("expected snap storage to exist") + } + dl := generateSnapshot(helper.db, root) + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() + + // If we now inspect the snap db, there should exist no extraneous storage items + if data := rawdb.ReadStorageSnapshot(helper.diskdb, hashData([]byte("acc-2")), hashData([]byte("b-key-1"))); data != nil { + t.Fatalf("expected slot to be removed, got %v", string(data)) + } +} + +func TestGenerateWithManyExtraAccounts(t *testing.T) { + helper := newGenTester() + + // Account one in the trie + stRoot := helper.makeStorageTrie("acc-1", + []string{"key-1", "key-2", "key-3"}, + []string{"val-1", "val-2", "val-3"}, + true, + ) + acc := &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + helper.acctTrie.MustUpdate(hashData([]byte("acc-1")).Bytes(), val) // 0x9250573b9c18c664139f3b6a7a8081b7d8f8916a8fcc5d94feec6c29f5fd4e9e + + // Identical in the snap + key := hashData([]byte("acc-1")) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-1")), []byte("val-1")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-2")), []byte("val-2")) + rawdb.WriteStorageSnapshot(helper.diskdb, key, hashData([]byte("key-3")), []byte("val-3")) + + // 100 accounts exist only in snapshot + for i := 0; i < 1000; i++ { + acc := &types.StateAccount{Balance: uint256.NewInt(uint64(i)), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + key := hashData([]byte(fmt.Sprintf("acc-%d", i))) + rawdb.WriteAccountSnapshot(helper.diskdb, key, val) + } + + _, dl := helper.CommitAndGenerate() + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateWithExtraBeforeAndAfter(t *testing.T) { + helper := newGenTester() + + acc := &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + + acctHashA := hashData([]byte("acc-1")) + acctHashB := hashData([]byte("acc-2")) + + helper.acctTrie.MustUpdate(acctHashA.Bytes(), val) + helper.acctTrie.MustUpdate(acctHashB.Bytes(), val) + + rawdb.WriteAccountSnapshot(helper.diskdb, acctHashA, val) + rawdb.WriteAccountSnapshot(helper.diskdb, acctHashB, val) + + for i := 0; i < 16; i++ { + rawdb.WriteAccountSnapshot(helper.diskdb, common.Hash{byte(i)}, val) + } + _, dl := helper.CommitAndGenerate() + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateWithMalformedStateData(t *testing.T) { + helper := newGenTester() + + acctHash := hashData([]byte("acc")) + acc := &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()} + val, _ := rlp.EncodeToBytes(acc) + helper.acctTrie.MustUpdate(acctHash.Bytes(), val) + + junk := make([]byte, 100) + copy(junk, []byte{0xde, 0xad}) + rawdb.WriteAccountSnapshot(helper.diskdb, acctHash, junk) + for i := 0; i < 16; i++ { + rawdb.WriteAccountSnapshot(helper.diskdb, common.Hash{byte(i)}, junk) + } + + _, dl := helper.CommitAndGenerate() + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateFromEmptySnap(t *testing.T) { + helper := newGenTester() + + for i := 0; i < 400; i++ { + stRoot := helper.makeStorageTrie(fmt.Sprintf("acc-%d", i), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount(fmt.Sprintf("acc-%d", i), &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + } + root, snap := helper.CommitAndGenerate() + t.Logf("Root: %#x\n", root) // Root: 0x6f7af6d2e1a1bf2b84a3beb3f8b64388465fbc1e274ca5d5d3fc787ca78f59e4 + + select { + case <-snap.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + snap.generator.stop() +} + +func TestGenerateWithIncompleteStorage(t *testing.T) { + helper := newGenTester() + stKeys := []string{"1", "2", "3", "4", "5", "6", "7", "8"} + stVals := []string{"v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8"} + + // We add 8 accounts, each one is missing exactly one of the storage slots. This means + // we don't have to order the keys and figure out exactly which hash-key winds up + // on the sensitive spots at the boundaries + for i := 0; i < 8; i++ { + accKey := fmt.Sprintf("acc-%d", i) + stRoot := helper.makeStorageTrie(accKey, stKeys, stVals, true) + helper.addAccount(accKey, &types.StateAccount{Balance: uint256.NewInt(uint64(i)), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + var moddedKeys []string + var moddedVals []string + for ii := 0; ii < 8; ii++ { + if ii != i { + moddedKeys = append(moddedKeys, stKeys[ii]) + moddedVals = append(moddedVals, stVals[ii]) + } + } + helper.addSnapStorage(accKey, moddedKeys, moddedVals) + } + root, dl := helper.CommitAndGenerate() + t.Logf("Root: %#x\n", root) // Root: 0xca73f6f05ba4ca3024ef340ef3dfca8fdabc1b677ff13f5a9571fd49c16e67ff + + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func incKey(key []byte) []byte { + for i := len(key) - 1; i >= 0; i-- { + key[i]++ + if key[i] != 0x0 { + break + } + } + return key +} + +func decKey(key []byte) []byte { + for i := len(key) - 1; i >= 0; i-- { + key[i]-- + if key[i] != 0xff { + break + } + } + return key +} + +func populateDangling(disk ethdb.KeyValueStore) { + populate := func(accountHash common.Hash, keys []string, vals []string) { + for i, key := range keys { + rawdb.WriteStorageSnapshot(disk, accountHash, hashData([]byte(key)), []byte(vals[i])) + } + } + // Dangling storages of the "first" account + populate(common.Hash{}, []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Dangling storages of the "last" account + populate(common.HexToHash("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Dangling storages around the account 1 + hash := decKey(hashData([]byte("acc-1")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + hash = incKey(hashData([]byte("acc-1")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Dangling storages around the account 2 + hash = decKey(hashData([]byte("acc-2")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + hash = incKey(hashData([]byte("acc-2")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Dangling storages around the account 3 + hash = decKey(hashData([]byte("acc-3")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + hash = incKey(hashData([]byte("acc-3")).Bytes()) + populate(common.BytesToHash(hash), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + // Dangling storages of the random account + populate(testrand.Hash(), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + populate(testrand.Hash(), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + populate(testrand.Hash(), []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) +} + +func TestGenerateCompleteSnapshotWithDanglingStorage(t *testing.T) { + var helper = newGenTester() + + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(1), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) + + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + + helper.addSnapStorage("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + helper.addSnapStorage("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}) + + populateDangling(helper.diskdb) + + _, dl := helper.CommitAndGenerate() + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} + +func TestGenerateBrokenSnapshotWithDanglingStorage(t *testing.T) { + var helper = newGenTester() + + stRoot := helper.makeStorageTrie("acc-1", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-1", &types.StateAccount{Balance: uint256.NewInt(1), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + helper.addTrieAccount("acc-2", &types.StateAccount{Balance: uint256.NewInt(2), Root: types.EmptyRootHash, CodeHash: types.EmptyCodeHash.Bytes()}) + + helper.makeStorageTrie("acc-3", []string{"key-1", "key-2", "key-3"}, []string{"val-1", "val-2", "val-3"}, true) + helper.addTrieAccount("acc-3", &types.StateAccount{Balance: uint256.NewInt(3), Root: stRoot, CodeHash: types.EmptyCodeHash.Bytes()}) + + populateDangling(helper.diskdb) + + _, dl := helper.CommitAndGenerate() + select { + case <-dl.generator.done: + // Snapshot generation succeeded + + case <-time.After(3 * time.Second): + t.Errorf("Snapshot generation failed") + } + //checkSnapRoot(t, snap, root) + + // Signal abortion to the generator and wait for it to tear down + dl.generator.stop() +} diff --git a/triedb/pathdb/history.go b/triedb/pathdb/history.go index d77f7aa04d07..e1cd98115374 100644 --- a/triedb/pathdb/history.go +++ b/triedb/pathdb/history.go @@ -28,7 +28,6 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/triestate" "golang.org/x/exp/maps" ) @@ -243,14 +242,14 @@ type history struct { } // newHistory constructs the state history object with provided state change set. -func newHistory(root common.Hash, parent common.Hash, block uint64, states *triestate.Set) *history { +func newHistory(root common.Hash, parent common.Hash, block uint64, accounts map[common.Address][]byte, storages map[common.Address]map[common.Hash][]byte) *history { var ( - accountList = maps.Keys(states.Accounts) + accountList = maps.Keys(accounts) storageList = make(map[common.Address][]common.Hash) ) slices.SortFunc(accountList, common.Address.Cmp) - for addr, slots := range states.Storages { + for addr, slots := range storages { slist := maps.Keys(slots) slices.SortFunc(slist, common.Hash.Cmp) storageList[addr] = slist @@ -262,9 +261,9 @@ func newHistory(root common.Hash, parent common.Hash, block uint64, states *trie root: root, block: block, }, - accounts: states.Accounts, + accounts: accounts, accountList: accountList, - storages: states.Storages, + storages: storages, storageList: storageList, } } @@ -499,7 +498,7 @@ func writeHistory(writer ethdb.AncientWriter, dl *diffLayer) error { } var ( start = time.Now() - history = newHistory(dl.rootHash(), dl.parentLayer().rootHash(), dl.block, dl.states) + history = newHistory(dl.rootHash(), dl.parentLayer().rootHash(), dl.block, dl.states.accountOrigin, dl.states.storageOrigin) ) accountData, storageData, accountIndex, storageIndex := history.encode() dataSize := common.StorageSize(len(accountData) + len(storageData)) diff --git a/triedb/pathdb/history_test.go b/triedb/pathdb/history_test.go index 586f907fe4e0..d430706dee8a 100644 --- a/triedb/pathdb/history_test.go +++ b/triedb/pathdb/history_test.go @@ -28,11 +28,10 @@ import ( "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/testrand" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie/triestate" ) // randomStateSet generates a random state change set. -func randomStateSet(n int) *triestate.Set { +func randomStateSet(n int) (map[common.Address][]byte, map[common.Address]map[common.Hash][]byte) { var ( accounts = make(map[common.Address][]byte) storages = make(map[common.Address]map[common.Hash][]byte) @@ -47,11 +46,12 @@ func randomStateSet(n int) *triestate.Set { account := generateAccount(types.EmptyRootHash) accounts[addr] = types.SlimAccountRLP(account) } - return triestate.New(accounts, storages) + return accounts, storages } func makeHistory() *history { - return newHistory(testrand.Hash(), types.EmptyRootHash, 0, randomStateSet(3)) + accounts, storages := randomStateSet(3) + return newHistory(testrand.Hash(), types.EmptyRootHash, 0, accounts, storages) } func makeHistories(n int) []*history { @@ -61,7 +61,8 @@ func makeHistories(n int) []*history { ) for i := 0; i < n; i++ { root := testrand.Hash() - h := newHistory(root, parent, uint64(i), randomStateSet(3)) + accounts, storages := randomStateSet(3) + h := newHistory(root, parent, uint64(i), accounts, storages) parent = root result = append(result, h) } diff --git a/triedb/pathdb/holdable_iterator.go b/triedb/pathdb/holdable_iterator.go new file mode 100644 index 000000000000..4b852e65abbc --- /dev/null +++ b/triedb/pathdb/holdable_iterator.go @@ -0,0 +1,97 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" +) + +// holdableIterator is a wrapper of underlying database iterator. It extends +// the basic iterator interface by adding Hold which can hold the element +// locally where the iterator is currently located and serve it up next time. +type holdableIterator struct { + it ethdb.Iterator + key []byte + val []byte + atHeld bool +} + +// newHoldableIterator initializes the holdableIterator with the given iterator. +func newHoldableIterator(it ethdb.Iterator) *holdableIterator { + return &holdableIterator{it: it} +} + +// Hold holds the element locally where the iterator is currently located which +// can be served up next time. +func (it *holdableIterator) Hold() { + if it.it.Key() == nil { + return // nothing to hold + } + it.key = common.CopyBytes(it.it.Key()) + it.val = common.CopyBytes(it.it.Value()) + it.atHeld = false +} + +// Next moves the iterator to the next key/value pair. It returns whether the +// iterator is exhausted. +func (it *holdableIterator) Next() bool { + if !it.atHeld && it.key != nil { + it.atHeld = true + } else if it.atHeld { + it.atHeld = false + it.key = nil + it.val = nil + } + if it.key != nil { + return true // shifted to locally held value + } + return it.it.Next() +} + +// Error returns any accumulated error. Exhausting all the key/value pairs +// is not considered to be an error. +func (it *holdableIterator) Error() error { return it.it.Error() } + +// Release releases associated resources. Release should always succeed and can +// be called multiple times without causing error. +func (it *holdableIterator) Release() { + it.atHeld = false + it.key = nil + it.val = nil + it.it.Release() +} + +// Key returns the key of the current key/value pair, or nil if done. The caller +// should not modify the contents of the returned slice, and its contents may +// change on the next call to Next. +func (it *holdableIterator) Key() []byte { + if it.key != nil { + return it.key + } + return it.it.Key() +} + +// Value returns the value of the current key/value pair, or nil if done. The +// caller should not modify the contents of the returned slice, and its contents +// may change on the next call to Next. +func (it *holdableIterator) Value() []byte { + if it.val != nil { + return it.val + } + return it.it.Value() +} diff --git a/triedb/pathdb/holdable_iterator_test.go b/triedb/pathdb/holdable_iterator_test.go new file mode 100644 index 000000000000..07a432b33f2f --- /dev/null +++ b/triedb/pathdb/holdable_iterator_test.go @@ -0,0 +1,163 @@ +// Copyright 2022 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" +) + +func TestIteratorHold(t *testing.T) { + // Create the key-value data store + var ( + content = map[string]string{"k1": "v1", "k2": "v2", "k3": "v3"} + order = []string{"k1", "k2", "k3"} + db = rawdb.NewMemoryDatabase() + ) + for key, val := range content { + if err := db.Put([]byte(key), []byte(val)); err != nil { + t.Fatalf("failed to insert item %s:%s into database: %v", key, val, err) + } + } + // Iterate over the database with the given configs and verify the results + it, idx := newHoldableIterator(db.NewIterator(nil, nil)), 0 + + // Nothing should be affected for calling Discard on non-initialized iterator + it.Hold() + + for it.Next() { + if len(content) <= idx { + t.Errorf("more items than expected: checking idx=%d (key %q), expecting len=%d", idx, it.Key(), len(order)) + break + } + if !bytes.Equal(it.Key(), []byte(order[idx])) { + t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx]) + } + if !bytes.Equal(it.Value(), []byte(content[order[idx]])) { + t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]]) + } + // Should be safe to call discard multiple times + it.Hold() + it.Hold() + + // Shift iterator to the discarded element + it.Next() + if !bytes.Equal(it.Key(), []byte(order[idx])) { + t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx]) + } + if !bytes.Equal(it.Value(), []byte(content[order[idx]])) { + t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]]) + } + + // Discard/Next combo should work always + it.Hold() + it.Next() + if !bytes.Equal(it.Key(), []byte(order[idx])) { + t.Errorf("item %d: key mismatch: have %s, want %s", idx, string(it.Key()), order[idx]) + } + if !bytes.Equal(it.Value(), []byte(content[order[idx]])) { + t.Errorf("item %d: value mismatch: have %s, want %s", idx, string(it.Value()), content[order[idx]]) + } + idx++ + } + if err := it.Error(); err != nil { + t.Errorf("iteration failed: %v", err) + } + if idx != len(order) { + t.Errorf("iteration terminated prematurely: have %d, want %d", idx, len(order)) + } + db.Close() +} + +func TestReopenIterator(t *testing.T) { + var ( + content = map[common.Hash]string{ + common.HexToHash("a1"): "v1", + common.HexToHash("a2"): "v2", + common.HexToHash("a3"): "v3", + common.HexToHash("a4"): "v4", + common.HexToHash("a5"): "v5", + common.HexToHash("a6"): "v6", + } + order = []common.Hash{ + common.HexToHash("a1"), + common.HexToHash("a2"), + common.HexToHash("a3"), + common.HexToHash("a4"), + common.HexToHash("a5"), + common.HexToHash("a6"), + } + db = rawdb.NewMemoryDatabase() + ) + for key, val := range content { + rawdb.WriteAccountSnapshot(db, key, []byte(val)) + } + checkVal := func(it *holdableIterator, index int) { + if !bytes.Equal(it.Key(), append(rawdb.SnapshotAccountPrefix, order[index].Bytes()...)) { + t.Fatalf("Unexpected data entry key, want %v got %v", order[index], it.Key()) + } + if !bytes.Equal(it.Value(), []byte(content[order[index]])) { + t.Fatalf("Unexpected data entry key, want %v got %v", []byte(content[order[index]]), it.Value()) + } + } + // Iterate over the database with the given configs and verify the results + ctx, idx := newGeneratorContext(common.Hash{}, nil, db), -1 + + idx++ + ctx.account.Next() + checkVal(ctx.account, idx) + + ctx.reopenIterator(snapAccount) + idx++ + ctx.account.Next() + checkVal(ctx.account, idx) + + // reopen twice + ctx.reopenIterator(snapAccount) + ctx.reopenIterator(snapAccount) + idx++ + ctx.account.Next() + checkVal(ctx.account, idx) + + // reopen iterator with held value + ctx.account.Next() + ctx.account.Hold() + ctx.reopenIterator(snapAccount) + idx++ + ctx.account.Next() + checkVal(ctx.account, idx) + + // reopen twice iterator with held value + ctx.account.Next() + ctx.account.Hold() + ctx.reopenIterator(snapAccount) + ctx.reopenIterator(snapAccount) + idx++ + ctx.account.Next() + checkVal(ctx.account, idx) + + // shift to the end and reopen + ctx.account.Next() // the end + ctx.reopenIterator(snapAccount) + ctx.account.Next() + if ctx.account.Key() != nil { + t.Fatal("Unexpected iterated entry") + } +} diff --git a/triedb/pathdb/iterator.go b/triedb/pathdb/iterator.go new file mode 100644 index 000000000000..31f3c68688bb --- /dev/null +++ b/triedb/pathdb/iterator.go @@ -0,0 +1,402 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "fmt" + "sort" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" +) + +// Iterator is an iterator to step over all the accounts or the specific +// storage in a snapshot which may or may not be composed of multiple layers. +type Iterator interface { + // Next steps the iterator forward one element, returning false if exhausted, + // or an error if iteration failed for some reason (e.g. root being iterated + // becomes stale and garbage collected). + Next() bool + + // Error returns any failure that occurred during iteration, which might have + // caused a premature iteration exit (e.g. snapshot stack becoming stale). + Error() error + + // Hash returns the hash of the account or storage slot the iterator is + // currently at. + Hash() common.Hash + + // Release releases associated resources. Release should always succeed and + // can be called multiple times without causing error. + Release() +} + +// AccountIterator is an iterator to step over all the accounts in a snapshot, +// which may or may not be composed of multiple layers. +type AccountIterator interface { + Iterator + + // Account returns the RLP encoded slim account the iterator is currently at. + // An error will be returned if the iterator becomes invalid + Account() []byte +} + +// StorageIterator is an iterator to step over the specific storage in a snapshot, +// which may or may not be composed of multiple layers. +type StorageIterator interface { + Iterator + + // Slot returns the storage slot the iterator is currently at. An error will + // be returned if the iterator becomes invalid + Slot() []byte +} + +// diffAccountIterator is an account iterator that steps over the accounts (both +// live and deleted) contained within a state set. Higher order iterators will +// use the deleted accounts to skip deeper iterators. +type diffAccountIterator struct { + // curHash is the current hash the iterator is positioned on. The field is + // explicitly tracked since the referenced diff layer might go stale after + // the iterator was positioned and we don't want to fail accessing the old + // hash as long as the iterator is not touched any more. + curHash common.Hash + + states *stateSet // Live state set to retrieve values from + stale func() bool // Signal if the referenced state set is stale + keys []common.Hash // Keys left in the layer to iterate + fail error // Any failures encountered (stale) +} + +// AccountIterator creates an account iterator over the given state set. +func newDiffAccountIterator(seek common.Hash, states *stateSet, stale func() bool) AccountIterator { + // Seek out the requested starting account + hashes := states.accountList() + index := sort.Search(len(hashes), func(i int) bool { + return bytes.Compare(seek[:], hashes[i][:]) <= 0 + }) + // Assemble and returned the already seeked iterator + return &diffAccountIterator{ + states: states, + stale: stale, + keys: hashes[index:], + } +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diffAccountIterator) Next() bool { + // If the iterator was already stale, consider it a programmer error. Although + // we could just return false here, triggering this path would probably mean + // somebody forgot to check for Error, so lets blow up instead of undefined + // behavior that's hard to debug. + if it.fail != nil { + panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail)) + } + // Stop iterating if all keys were exhausted + if len(it.keys) == 0 { + return false + } + if it.stale != nil && it.stale() { + it.fail, it.keys = errSnapshotStale, nil + return false + } + // Iterator seems to be still alive, retrieve and cache the live hash + it.curHash = it.keys[0] + + // key cached, shift the iterator and notify the user of success + it.keys = it.keys[1:] + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *diffAccountIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *diffAccountIterator) Hash() common.Hash { + return it.curHash +} + +// Account returns the RLP encoded slim account the iterator is currently at. +// This method may _fail_, if the associated state goes stale between the call +// to Next and Account. That type of error will set it.fail. +// +// This method assumes that states modification does not delete elements from +// the account mapping (writing nil into it is fine though), and will panic +// if elements have been deleted. +// +// Note the returned account is not a copy, please don't modify it. +func (it *diffAccountIterator) Account() []byte { + blob, ok := it.states.account(it.curHash) + if !ok { + panic(fmt.Sprintf("iterator referenced non-existent account: %x", it.curHash)) + } + if it.stale != nil && it.stale() { + it.fail, it.keys = errSnapshotStale, nil + } + return blob +} + +// Release is a noop for diff account iterators as there are no held resources. +func (it *diffAccountIterator) Release() {} + +// diskAccountIterator is an account iterator that steps over the persistent +// accounts within the database. +// +// To simplify, the staleness of the persistent state is not tracked. The disk +// iterator is not intended to be used alone. It should always be wrapped with +// a diff iterator, as the bottom-most disk layer uses both the in-memory +// aggregated buffer and the persistent disk layer as data sources. The staleness +// of the diff iterator is sufficient to invalidate the iterator pair. +type diskAccountIterator struct { + it ethdb.Iterator +} + +// newDiskAccountIterator creates an account iterator over the persistent state. +func newDiskAccountIterator(db ethdb.KeyValueStore, seek common.Hash) AccountIterator { + pos := common.TrimRightZeroes(seek[:]) + return &diskAccountIterator{ + it: db.NewIterator(rawdb.SnapshotAccountPrefix, pos), + } +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diskAccountIterator) Next() bool { + // If the iterator was already exhausted, don't bother + if it.it == nil { + return false + } + // Try to advance the iterator and release it if we reached the end + for { + if !it.it.Next() { + it.it.Release() + it.it = nil + return false + } + if len(it.it.Key()) == len(rawdb.SnapshotAccountPrefix)+common.HashLength { + break + } + } + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +// +// A diff layer is immutable after creation content wise and can always be fully +// iterated without error, so this method always returns nil. +func (it *diskAccountIterator) Error() error { + if it.it == nil { + return nil // Iterator is exhausted and released + } + return it.it.Error() +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *diskAccountIterator) Hash() common.Hash { + return common.BytesToHash(it.it.Key()) // The prefix will be truncated +} + +// Account returns the RLP encoded slim account the iterator is currently at. +func (it *diskAccountIterator) Account() []byte { + return it.it.Value() +} + +// Release releases the database snapshot held during iteration. +func (it *diskAccountIterator) Release() { + // The iterator is auto-released on exhaustion, so make sure it's still alive + if it.it != nil { + it.it.Release() + it.it = nil + } +} + +// diffStorageIterator is a storage iterator that steps over the specific storage +// (both live and deleted) contained within a state set. Higher order iterators +// will use the deleted slot to skip deeper iterators. +type diffStorageIterator struct { + // curHash is the current hash the iterator is positioned on. The field is + // explicitly tracked since the referenced diff layer might go stale after + // the iterator was positioned and we don't want to fail accessing the old + // hash as long as the iterator is not touched any more. + curHash common.Hash + account common.Hash + + states *stateSet // Live state set to retrieve values from + stale func() bool // Signal if the referenced state set is stale + keys []common.Hash // Keys left in the layer to iterate + fail error // Any failures encountered (stale) +} + +// newDiffStorageIterator creates a storage iterator over a single diff layer. +// Except the storage iterator is returned, there is an additional flag +// "destructed" returned. If it's true then it means the whole storage is +// destructed in this layer(maybe recreated too), don't bother deeper layer +// for storage retrieval. +func newDiffStorageIterator(account common.Hash, seek common.Hash, states *stateSet, stale func() bool) (StorageIterator, bool) { + // Create the storage for this account even it's marked + // as destructed. The iterator is for the new one which + // just has the same address as the deleted one. + hashes, destructed := states.storageList(account) + index := sort.Search(len(hashes), func(i int) bool { + return bytes.Compare(seek[:], hashes[i][:]) <= 0 + }) + // Assemble and returned the already seeked iterator + return &diffStorageIterator{ + states: states, + stale: stale, + account: account, + keys: hashes[index:], + }, destructed +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diffStorageIterator) Next() bool { + // If the iterator was already stale, consider it a programmer error. Although + // we could just return false here, triggering this path would probably mean + // somebody forgot to check for Error, so lets blow up instead of undefined + // behavior that's hard to debug. + if it.fail != nil { + panic(fmt.Sprintf("called Next of failed iterator: %v", it.fail)) + } + // Stop iterating if all keys were exhausted + if len(it.keys) == 0 { + return false + } + if it.stale != nil && it.stale() { + it.fail, it.keys = errSnapshotStale, nil + return false + } + // Iterator seems to be still alive, retrieve and cache the live hash + it.curHash = it.keys[0] + + // key cached, shift the iterator and notify the user of success + it.keys = it.keys[1:] + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *diffStorageIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the storage slot the iterator is currently at. +func (it *diffStorageIterator) Hash() common.Hash { + return it.curHash +} + +// Slot returns the raw storage slot value the iterator is currently at. +// This method may _fail_, if the associated state sets is modified between +// the call to Next and Value. That type of error will set it.fail. +// This method assumes that state set modification does not delete elements +// from the storage mapping (writing nil into it is fine though), and will +// panic if elements have been deleted. +// +// Note the returned slot is not a copy, please don't modify it. +func (it *diffStorageIterator) Slot() []byte { + storage, ok := it.states.storage(it.account, it.curHash) + if !ok { + panic(fmt.Sprintf("iterator referenced non-existent storage: %x %x", it.account, it.curHash)) + } + if it.stale != nil && it.stale() { + it.fail, it.keys = errSnapshotStale, nil + } + return storage +} + +// Release is a noop for diff account iterators as there are no held resources. +func (it *diffStorageIterator) Release() {} + +// diskStorageIterator is a storage iterator that steps over the persistent +// storage slots contained within the database. +// +// To simplify, the staleness of the persistent state is not tracked. The disk +// iterator is not intended to be used alone. It should always be wrapped with +// a diff iterator, as the bottom-most disk layer uses both the in-memory +// aggregated buffer and the persistent disk layer as data sources. The staleness +// of the diff iterator is sufficient to invalidate the iterator pair. +type diskStorageIterator struct { + account common.Hash + it ethdb.Iterator +} + +// StorageIterator creates a storage iterator over the persistent state. +// If the whole storage is destructed, then all entries in the disk +// layer are deleted already. So the "destructed" flag returned here +// is always false. +func newDiskStorageIterator(db ethdb.KeyValueStore, account common.Hash, seek common.Hash) StorageIterator { + pos := common.TrimRightZeroes(seek[:]) + return &diskStorageIterator{ + account: account, + it: db.NewIterator(append(rawdb.SnapshotStoragePrefix, account.Bytes()...), pos), + } +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (it *diskStorageIterator) Next() bool { + // If the iterator was already exhausted, don't bother + if it.it == nil { + return false + } + // Try to advance the iterator and release it if we reached the end + for { + if !it.it.Next() { + it.it.Release() + it.it = nil + return false + } + if len(it.it.Key()) == len(rawdb.SnapshotStoragePrefix)+common.HashLength+common.HashLength { + break + } + } + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +// +// A diff layer is immutable after creation content wise and can always be fully +// iterated without error, so this method always returns nil. +func (it *diskStorageIterator) Error() error { + if it.it == nil { + return nil // Iterator is exhausted and released + } + return it.it.Error() +} + +// Hash returns the hash of the storage slot the iterator is currently at. +func (it *diskStorageIterator) Hash() common.Hash { + return common.BytesToHash(it.it.Key()) // The prefix will be truncated +} + +// Slot returns the raw storage slot content the iterator is currently at. +func (it *diskStorageIterator) Slot() []byte { + return it.it.Value() +} + +// Release releases the database snapshot held during iteration. +func (it *diskStorageIterator) Release() { + // The iterator is auto-released on exhaustion, so make sure it's still alive + if it.it != nil { + it.it.Release() + it.it = nil + } +} diff --git a/triedb/pathdb/iterator_binary.go b/triedb/pathdb/iterator_binary.go new file mode 100644 index 000000000000..6780f814ee2a --- /dev/null +++ b/triedb/pathdb/iterator_binary.go @@ -0,0 +1,278 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" +) + +// binaryIterator is a simplistic iterator to step over the accounts or storage +// in a layer, which may or may not be composed of multiple layers. Performance +// wise this iterator is slow, it's meant for cross validating the fast one. +type binaryIterator struct { + a Iterator + b Iterator + aDone bool + bDone bool + k common.Hash + account common.Hash + fail error +} + +// initBinaryAccountIterator creates a simplistic iterator to step over all the +// accounts in a slow, but easily verifiable way. Note this function is used +// for initialization, use `newBinaryAccountIterator` as the API. +func (dl *diskLayer) initBinaryAccountIterator() *binaryIterator { + l := &binaryIterator{ + a: newDiffAccountIterator(common.Hash{}, dl.buffer.states, dl.isStale), + b: newDiskAccountIterator(dl.db.diskdb, common.Hash{}), + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// initBinaryAccountIterator creates a simplistic iterator to step over all the +// accounts in a slow, but easily verifiable way. Note this function is used +// for initialization, use `newBinaryAccountIterator` as the API. +func (dl *diffLayer) initBinaryAccountIterator() *binaryIterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + l := &binaryIterator{ + a: newDiffAccountIterator(common.Hash{}, dl.states.stateSet, nil), + b: dl.parent.(*diskLayer).initBinaryAccountIterator(), + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l + } + l := &binaryIterator{ + a: newDiffAccountIterator(common.Hash{}, dl.states.stateSet, nil), + b: parent.initBinaryAccountIterator(), + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// initBinaryStorageIterator creates a simplistic iterator to step over all the +// storage slots in a slow, but easily verifiable way. Note this function is used +// for initialization, use `newBinaryStorageIterator` as the API. +func (dl *diskLayer) initBinaryStorageIterator(account common.Hash) *binaryIterator { + a, destructed := newDiffStorageIterator(account, common.Hash{}, dl.buffer.states, dl.isStale) + if destructed { + l := &binaryIterator{ + a: a, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = true + return l + } + l := &binaryIterator{ + a: a, + b: newDiskStorageIterator(dl.db.diskdb, account, common.Hash{}), + account: account, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// initBinaryStorageIterator creates a simplistic iterator to step over all the +// storage slots in a slow, but easily verifiable way. Note this function is used +// for initialization, use `newBinaryStorageIterator` as the API. +func (dl *diffLayer) initBinaryStorageIterator(account common.Hash) *binaryIterator { + parent, ok := dl.parent.(*diffLayer) + if !ok { + // If the storage in this layer is already destructed, discard all + // deeper layers but still return a valid single-branch iterator. + a, destructed := newDiffStorageIterator(account, common.Hash{}, dl.states.stateSet, nil) + if destructed { + l := &binaryIterator{ + a: a, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = true + return l + } + // The parent is disk layer + l := &binaryIterator{ + a: a, + b: dl.parent.(*diskLayer).initBinaryStorageIterator(account), + account: account, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l + } + // If the storage in this layer is already destructed, discard all + // deeper layers but still return a valid single-branch iterator. + a, destructed := newDiffStorageIterator(account, common.Hash{}, dl.states.stateSet, nil) + if destructed { + l := &binaryIterator{ + a: a, + account: account, + } + l.aDone = !l.a.Next() + l.bDone = true + return l + } + l := &binaryIterator{ + a: a, + b: parent.initBinaryStorageIterator(account), + account: account, + } + l.aDone = !l.a.Next() + l.bDone = !l.b.Next() + return l +} + +// Next steps the iterator forward one element, returning false if exhausted, +// or an error if iteration failed for some reason (e.g. root being iterated +// becomes stale and garbage collected). +func (it *binaryIterator) Next() bool { + if it.aDone && it.bDone { + return false + } +first: + if it.aDone { + it.k = it.b.Hash() + it.bDone = !it.b.Next() + return true + } + if it.bDone { + it.k = it.a.Hash() + it.aDone = !it.a.Next() + return true + } + nextA, nextB := it.a.Hash(), it.b.Hash() + if diff := bytes.Compare(nextA[:], nextB[:]); diff < 0 { + it.aDone = !it.a.Next() + it.k = nextA + return true + } else if diff == 0 { + // Now we need to advance one of them + it.aDone = !it.a.Next() + goto first + } + it.bDone = !it.b.Next() + it.k = nextB + return true +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (it *binaryIterator) Error() error { + return it.fail +} + +// Hash returns the hash of the account the iterator is currently at. +func (it *binaryIterator) Hash() common.Hash { + return it.k +} + +// Release recursively releases all the iterators in the stack. +func (it *binaryIterator) Release() { + it.a.Release() + it.b.Release() +} + +// accountBinaryIterator is a wrapper around a binary iterator that adds functionality +// to retrieve account data from the associated layer at the current position. +type accountBinaryIterator struct { + *binaryIterator + layer layer +} + +// newBinaryAccountIterator creates a simplistic account iterator to step over +// all the accounts in a slow, but easily verifiable way. +// +//nolint:all +func (dl *diskLayer) newBinaryAccountIterator() AccountIterator { + return &accountBinaryIterator{ + binaryIterator: dl.initBinaryAccountIterator(), + layer: dl, + } +} + +// newBinaryAccountIterator creates a simplistic account iterator to step over +// all the accounts in a slow, but easily verifiable way. +func (dl *diffLayer) newBinaryAccountIterator() AccountIterator { + return &accountBinaryIterator{ + binaryIterator: dl.initBinaryAccountIterator(), + layer: dl, + } +} + +// Account returns the RLP encoded slim account the iterator is currently at, or +// nil if the iterated snapshot stack became stale (you can check Error after +// to see if it failed or not). +// +// Note the returned account is not a copy, please don't modify it. +func (it *accountBinaryIterator) Account() []byte { + blob, err := it.layer.account(it.k, 0) + if err != nil { + it.fail = err + return nil + } + return blob +} + +// storageBinaryIterator is a wrapper around a binary iterator that adds functionality +// to retrieve storage slot data from the associated layer at the current position. +type storageBinaryIterator struct { + *binaryIterator + layer layer +} + +// newBinaryStorageIterator creates a simplistic account iterator to step over +// all the storage slots in a slow, but easily verifiable way. +// +//nolint:all +func (dl *diskLayer) newBinaryStorageIterator(account common.Hash) StorageIterator { + return &storageBinaryIterator{ + binaryIterator: dl.initBinaryStorageIterator(account), + layer: dl, + } +} + +// newBinaryStorageIterator creates a simplistic account iterator to step over +// all the storage slots in a slow, but easily verifiable way. +func (dl *diffLayer) newBinaryStorageIterator(account common.Hash) StorageIterator { + return &storageBinaryIterator{ + binaryIterator: dl.initBinaryStorageIterator(account), + layer: dl, + } +} + +// Slot returns the raw storage slot data the iterator is currently at, or +// nil if the iterated snapshot stack became stale (you can check Error after +// to see if it failed or not). +// +// Note the returned slot is not a copy, please don't modify it. +func (it *storageBinaryIterator) Slot() []byte { + blob, err := it.layer.storage(it.account, it.k, 0) + if err != nil { + it.fail = err + return nil + } + return blob +} diff --git a/triedb/pathdb/iterator_fast.go b/triedb/pathdb/iterator_fast.go new file mode 100644 index 000000000000..0843fe2b4371 --- /dev/null +++ b/triedb/pathdb/iterator_fast.go @@ -0,0 +1,372 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "fmt" + "slices" + "sort" + + "github.com/ethereum/go-ethereum/common" +) + +// weightedIterator is an iterator with an assigned weight. It is used to prioritise +// which account or storage slot is the correct one if multiple iterators find the +// same one (modified in multiple consecutive blocks). +type weightedIterator struct { + it Iterator + priority int +} + +func (it *weightedIterator) Cmp(other *weightedIterator) int { + // Order the iterators primarily by the account hashes + hashI := it.it.Hash() + hashJ := other.it.Hash() + + switch bytes.Compare(hashI[:], hashJ[:]) { + case -1: + return -1 + case 1: + return 1 + } + // Same account/storage-slot in multiple layers, split by priority + if it.priority < other.priority { + return -1 + } + if it.priority > other.priority { + return 1 + } + return 0 +} + +// fastIterator is a more optimized multi-layer iterator which maintains a +// direct mapping of all iterators leading down to the bottom layer. +type fastIterator struct { + db *Database // Database to reinitialize stale sub-iterators with + root common.Hash // Root hash to reinitialize stale sub-iterators through + + curAccount []byte + curSlot []byte + + iterators []*weightedIterator + initiated bool + account bool + fail error +} + +// newFastIterator creates a new hierarchical account or storage iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastIterator(db *Database, root common.Hash, account common.Hash, seek common.Hash, accountIterator bool) (*fastIterator, error) { + current := db.tree.get(root) + if current == nil { + return nil, fmt.Errorf("unknown snapshot: %x", root) + } + fi := &fastIterator{ + db: db, + root: root, + account: accountIterator, + } +loop: + for depth := 0; current != nil; depth++ { + if accountIterator { + switch dl := current.(type) { + case *diskLayer: + fi.iterators = append(fi.iterators, &weightedIterator{ + it: newDiffAccountIterator(seek, dl.buffer.states, dl.isStale), + priority: depth, + }) + fi.iterators = append(fi.iterators, &weightedIterator{ + it: newDiskAccountIterator(dl.db.diskdb, seek), + priority: depth + 1, + }) + case *diffLayer: + fi.iterators = append(fi.iterators, &weightedIterator{ + it: newDiffAccountIterator(seek, dl.states.stateSet, nil), + priority: depth, + }) + } + } else { + // If the whole storage is destructed in this layer, don't + // bother deeper layer anymore. But we should still keep + // the iterator for this layer, since the iterator can contain + // some valid slots which belongs to the re-created account. + switch dl := current.(type) { + case *diskLayer: + it, destructed := newDiffStorageIterator(account, seek, dl.buffer.states, dl.isStale) + fi.iterators = append(fi.iterators, &weightedIterator{ + it: it, + priority: depth, + }) + if destructed { + break loop + } + fi.iterators = append(fi.iterators, &weightedIterator{ + it: newDiskStorageIterator(dl.db.diskdb, account, seek), + priority: depth + 1, + }) + case *diffLayer: + it, destructed := newDiffStorageIterator(account, seek, dl.states.stateSet, nil) + fi.iterators = append(fi.iterators, &weightedIterator{ + it: it, + priority: depth, + }) + if destructed { + break loop + } + } + } + current = current.parentLayer() + } + fi.init() + return fi, nil +} + +// init walks over all the iterators and resolves any clashes between them, after +// which it prepares the stack for step-by-step iteration. +func (fi *fastIterator) init() { + // Track which account hashes are iterators positioned on + var positioned = make(map[common.Hash]int) + + // Position all iterators and track how many remain live + for i := 0; i < len(fi.iterators); i++ { + // Retrieve the first element and if it clashes with a previous iterator, + // advance either the current one or the old one. Repeat until nothing is + // clashing any more. + it := fi.iterators[i] + for { + // If the iterator is exhausted, drop it off the end + if !it.it.Next() { + it.it.Release() + last := len(fi.iterators) - 1 + + fi.iterators[i] = fi.iterators[last] + fi.iterators[last] = nil + fi.iterators = fi.iterators[:last] + + i-- + break + } + // The iterator is still alive, check for collisions with previous ones + hash := it.it.Hash() + if other, exist := positioned[hash]; !exist { + positioned[hash] = i + break + } else { + // Iterators collide, one needs to be progressed, use priority to + // determine which. + // + // This whole else-block can be avoided, if we instead + // do an initial priority-sort of the iterators. If we do that, + // then we'll only wind up here if a lower-priority (preferred) iterator + // has the same value, and then we will always just continue. + // However, it costs an extra sort, so it's probably not better + if fi.iterators[other].priority < it.priority { + // The 'it' should be progressed + continue + } else { + // The 'other' should be progressed, swap them + it = fi.iterators[other] + fi.iterators[other], fi.iterators[i] = fi.iterators[i], fi.iterators[other] + continue + } + } + } + } + // Re-sort the entire list + slices.SortFunc(fi.iterators, func(a, b *weightedIterator) int { return a.Cmp(b) }) + fi.initiated = false +} + +// Next steps the iterator forward one element, returning false if exhausted. +func (fi *fastIterator) Next() bool { + if len(fi.iterators) == 0 { + return false + } + if !fi.initiated { + // Don't forward first time -- we had to 'Next' once in order to + // do the sorting already + fi.initiated = true + if fi.account { + fi.curAccount = fi.iterators[0].it.(AccountIterator).Account() + } else { + fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot() + } + if innerErr := fi.iterators[0].it.Error(); innerErr != nil { + fi.fail = innerErr + return false + } + if fi.curAccount != nil || fi.curSlot != nil { + return true + } + // Implicit else: we've hit a nil-account or nil-slot, and need to + // fall through to the loop below to land on something non-nil + } + // If an account or a slot is deleted in one of the layers, the key will + // still be there, but the actual value will be nil. However, the iterator + // should not export nil-values (but instead simply omit the key), so we + // need to loop here until we either + // - get a non-nil value, + // - hit an error, + // - or exhaust the iterator + for { + if !fi.next(0) { + return false // exhausted + } + if fi.account { + fi.curAccount = fi.iterators[0].it.(AccountIterator).Account() + } else { + fi.curSlot = fi.iterators[0].it.(StorageIterator).Slot() + } + if innerErr := fi.iterators[0].it.Error(); innerErr != nil { + fi.fail = innerErr + return false // error + } + if fi.curAccount != nil || fi.curSlot != nil { + break // non-nil value found + } + } + return true +} + +// next handles the next operation internally and should be invoked when we know +// that two elements in the list may have the same value. +// +// For example, if the iterated hashes become [2,3,5,5,8,9,10], then we should +// invoke next(3), which will call Next on elem 3 (the second '5') and will +// cascade along the list, applying the same operation if needed. +func (fi *fastIterator) next(idx int) bool { + // If this particular iterator got exhausted, remove it and return true (the + // next one is surely not exhausted yet, otherwise it would have been removed + // already). + if it := fi.iterators[idx].it; !it.Next() { + it.Release() + + fi.iterators = append(fi.iterators[:idx], fi.iterators[idx+1:]...) + return len(fi.iterators) > 0 + } + // If there's no one left to cascade into, return + if idx == len(fi.iterators)-1 { + return true + } + // We next-ed the iterator at 'idx', now we may have to re-sort that element + var ( + cur, next = fi.iterators[idx], fi.iterators[idx+1] + curHash, nextHash = cur.it.Hash(), next.it.Hash() + ) + if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 { + // It is still in correct place + return true + } else if diff == 0 && cur.priority < next.priority { + // So still in correct place, but we need to iterate on the next + fi.next(idx + 1) + return true + } + // At this point, the iterator is in the wrong location, but the remaining + // list is sorted. Find out where to move the item. + clash := -1 + index := sort.Search(len(fi.iterators), func(n int) bool { + // The iterator always advances forward, so anything before the old slot + // is known to be behind us, so just skip them altogether. This actually + // is an important clause since the sort order got invalidated. + if n < idx { + return false + } + if n == len(fi.iterators)-1 { + // Can always place an elem last + return true + } + nextHash := fi.iterators[n+1].it.Hash() + if diff := bytes.Compare(curHash[:], nextHash[:]); diff < 0 { + return true + } else if diff > 0 { + return false + } + // The elem we're placing it next to has the same value, + // so whichever winds up on n+1 will need further iteration + clash = n + 1 + + return cur.priority < fi.iterators[n+1].priority + }) + fi.move(idx, index) + if clash != -1 { + fi.next(clash) + } + return true +} + +// move advances an iterator to another position in the list. +func (fi *fastIterator) move(index, newpos int) { + elem := fi.iterators[index] + copy(fi.iterators[index:], fi.iterators[index+1:newpos+1]) + fi.iterators[newpos] = elem +} + +// Error returns any failure that occurred during iteration, which might have +// caused a premature iteration exit (e.g. snapshot stack becoming stale). +func (fi *fastIterator) Error() error { + return fi.fail +} + +// Hash returns the current key +func (fi *fastIterator) Hash() common.Hash { + return fi.iterators[0].it.Hash() +} + +// Account returns the current account blob. +// Note the returned account is not a copy, please don't modify it. +func (fi *fastIterator) Account() []byte { + return fi.curAccount +} + +// Slot returns the current storage slot. +// Note the returned slot is not a copy, please don't modify it. +func (fi *fastIterator) Slot() []byte { + return fi.curSlot +} + +// Release iterates over all the remaining live layer iterators and releases each +// of them individually. +func (fi *fastIterator) Release() { + for _, it := range fi.iterators { + it.it.Release() + } + fi.iterators = nil +} + +// Debug is a convenience helper during testing +func (fi *fastIterator) Debug() { + for _, it := range fi.iterators { + fmt.Printf("[p=%v v=%v] ", it.priority, it.it.Hash()[0]) + } + fmt.Println() +} + +// newFastAccountIterator creates a new hierarchical account iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastAccountIterator(db *Database, root common.Hash, seek common.Hash) (AccountIterator, error) { + return newFastIterator(db, root, common.Hash{}, seek, true) +} + +// newFastStorageIterator creates a new hierarchical storage iterator with one +// element per diff layer. The returned combo iterator can be used to walk over +// the entire snapshot diff stack simultaneously. +func newFastStorageIterator(db *Database, root common.Hash, account common.Hash, seek common.Hash) (StorageIterator, error) { + return newFastIterator(db, root, account, seek, false) +} diff --git a/triedb/pathdb/iterator_test.go b/triedb/pathdb/iterator_test.go new file mode 100644 index 000000000000..abd1e05f7317 --- /dev/null +++ b/triedb/pathdb/iterator_test.go @@ -0,0 +1,334 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/testrand" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/holiman/uint256" +) + +type verifyContent int + +const ( + verifyNothing verifyContent = iota + verifyAccount + verifyStorage +) + +func verifyIterator(t *testing.T, expCount int, it Iterator, verify verifyContent) { + t.Helper() + + var ( + count = 0 + last = common.Hash{} + ) + for it.Next() { + hash := it.Hash() + if bytes.Compare(last[:], hash[:]) >= 0 { + t.Errorf("wrong order: %x >= %x", last, hash) + } + count++ + if verify == verifyAccount && len(it.(AccountIterator).Account()) == 0 { + t.Errorf("iterator returned nil-value for hash %x", hash) + } else if verify == verifyStorage && len(it.(StorageIterator).Slot()) == 0 { + t.Errorf("iterator returned nil-value for hash %x", hash) + } + last = hash + } + if count != expCount { + t.Errorf("iterator count mismatch: have %d, want %d", count, expCount) + } + if err := it.Error(); err != nil { + t.Errorf("iterator failed: %v", err) + } +} + +// randomAccount generates a random account and returns it RLP encoded. +func randomAccount() []byte { + a := &types.StateAccount{ + Balance: uint256.NewInt(rand.Uint64()), + Nonce: rand.Uint64(), + Root: testrand.Hash(), + CodeHash: types.EmptyCodeHash[:], + } + data, _ := rlp.EncodeToBytes(a) + return data +} + +// randomAccountSet generates a set of random accounts with the given strings as +// the account address hashes. +func randomAccountSet(hashes ...string) map[common.Hash][]byte { + accounts := make(map[common.Hash][]byte) + for _, hash := range hashes { + accounts[common.HexToHash(hash)] = randomAccount() + } + return accounts +} + +// randomStorageSet generates a set of random slots with the given strings as +// the slot addresses. +func randomStorageSet(accounts []string, hashes [][]string, nilStorage [][]string) map[common.Hash]map[common.Hash][]byte { + storages := make(map[common.Hash]map[common.Hash][]byte) + for index, account := range accounts { + storages[common.HexToHash(account)] = make(map[common.Hash][]byte) + + if index < len(hashes) { + hashes := hashes[index] + for _, hash := range hashes { + storages[common.HexToHash(account)][common.HexToHash(hash)] = testrand.Bytes(32) + } + } + if index < len(nilStorage) { + nils := nilStorage[index] + for _, hash := range nils { + storages[common.HexToHash(account)][common.HexToHash(hash)] = nil + } + } + } + return storages +} + +// TestAccountIteratorBasics tests some simple single-layer(diff and disk) iteration +func TestAccountIteratorBasics(t *testing.T) { + var ( + destructs = make(map[common.Hash]struct{}) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill up a parent + for i := 0; i < 100; i++ { + hash := testrand.Hash() + data := testrand.Bytes(32) + + accounts[hash] = data + if rand.Intn(4) == 0 { + destructs[hash] = struct{}{} + } + if rand.Intn(2) == 0 { + accStorage := make(map[common.Hash][]byte) + accStorage[testrand.Hash()] = testrand.Bytes(32) + storage[hash] = accStorage + } + } + states := newStates(destructs, accounts, storage) + it := newDiffAccountIterator(common.Hash{}, states, nil) + verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator + + db := rawdb.NewMemoryDatabase() + batch := db.NewBatch() + states.write(db, batch, nil, nil) + batch.Write() + it = newDiskAccountIterator(db, common.Hash{}) + verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator +} + +// TestStorageIteratorBasics tests some simple single-layer(diff and disk) iteration for storage +func TestStorageIteratorBasics(t *testing.T) { + var ( + nilStorage = make(map[common.Hash]int) + accounts = make(map[common.Hash][]byte) + storage = make(map[common.Hash]map[common.Hash][]byte) + ) + // Fill some random data + for i := 0; i < 10; i++ { + hash := testrand.Hash() + accounts[hash] = testrand.Bytes(32) + + accStorage := make(map[common.Hash][]byte) + + var nilstorage int + for i := 0; i < 100; i++ { + if rand.Intn(2) == 0 { + accStorage[testrand.Hash()] = testrand.Bytes(32) + } else { + accStorage[testrand.Hash()] = nil // delete slot + nilstorage += 1 + } + } + storage[hash] = accStorage + nilStorage[hash] = nilstorage + } + states := newStates(nil, accounts, storage) + for account := range accounts { + it, _ := newDiffStorageIterator(account, common.Hash{}, states, nil) + verifyIterator(t, 100, it, verifyNothing) // Nil is allowed for single layer iterator + } + + db := rawdb.NewMemoryDatabase() + batch := db.NewBatch() + states.write(db, batch, nil, nil) + batch.Write() + for account := range accounts { + it := newDiskStorageIterator(db, account, common.Hash{}) + verifyIterator(t, 100-nilStorage[account], it, verifyNothing) // Nil is allowed for single layer iterator + } +} + +type testIterator struct { + values []byte +} + +func newTestIterator(values ...byte) *testIterator { + return &testIterator{values} +} + +func (ti *testIterator) Seek(common.Hash) { + panic("implement me") +} + +func (ti *testIterator) Next() bool { + ti.values = ti.values[1:] + return len(ti.values) > 0 +} + +func (ti *testIterator) Error() error { + return nil +} + +func (ti *testIterator) Hash() common.Hash { + return common.BytesToHash([]byte{ti.values[0]}) +} + +func (ti *testIterator) Account() []byte { + return nil +} + +func (ti *testIterator) Slot() []byte { + return nil +} + +func (ti *testIterator) Release() {} + +func TestFastIteratorBasics(t *testing.T) { + type testCase struct { + lists [][]byte + expKeys []byte + } + for i, tc := range []testCase{ + {lists: [][]byte{{0, 1, 8}, {1, 2, 8}, {2, 9}, {4}, + {7, 14, 15}, {9, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 4, 7, 8, 9, 13, 14, 15, 16}}, + {lists: [][]byte{{0, 8}, {1, 2, 8}, {7, 14, 15}, {8, 9}, + {9, 10}, {10, 13, 15, 16}}, + expKeys: []byte{0, 1, 2, 7, 8, 9, 10, 13, 14, 15, 16}}, + } { + var iterators []*weightedIterator + for i, data := range tc.lists { + it := newTestIterator(data...) + iterators = append(iterators, &weightedIterator{it, i}) + } + fi := &fastIterator{ + iterators: iterators, + initiated: false, + } + count := 0 + for fi.Next() { + if got, exp := fi.Hash()[31], tc.expKeys[count]; exp != got { + t.Errorf("tc %d, [%d]: got %d exp %d", i, count, got, exp) + } + count++ + } + } +} + +// TestAccountIteratorTraversal tests some simple multi-layer iteration. +func TestAccountIteratorTraversal(t *testing.T) { + config := &Config{ + WriteBufferSize: 0, + } + db := New(rawdb.NewMemoryDatabase(), config, false) + db.WaitGeneration() + + // Stack three diff layers on top with various overlaps + db.Update(common.HexToHash("0x02"), types.EmptyRootHash, 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xaa", "0xee", "0xff", "0xf0"), nil, nil, nil)) + + db.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xbb", "0xdd", "0xf0"), nil, nil, nil)) + + db.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xcc", "0xf0", "0xff"), nil, nil, nil)) + + // Verify the single and multi-layer iterators + head := db.tree.get(common.HexToHash("0x04")) + + it := newDiffAccountIterator(common.Hash{}, head.(*diffLayer).states.stateSet, nil) + verifyIterator(t, 3, it, verifyNothing) + verifyIterator(t, 7, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount) + + it, _ = db.AccountIterator(common.HexToHash("0x04"), common.Hash{}) + verifyIterator(t, 7, it, verifyAccount) + it.Release() + + // Test after persist some bottom-most layers into the disk, + // the functionalities still work. + db.tree.cap(common.HexToHash("0x04"), 2) + + head = db.tree.get(common.HexToHash("0x04")) + verifyIterator(t, 7, head.(*diffLayer).newBinaryAccountIterator(), verifyAccount) + + it, _ = db.AccountIterator(common.HexToHash("0x04"), common.Hash{}) + verifyIterator(t, 7, it, verifyAccount) + it.Release() +} + +func TestStorageIteratorTraversal(t *testing.T) { + config := &Config{ + WriteBufferSize: 0, + } + db := New(rawdb.NewMemoryDatabase(), config, false) + db.WaitGeneration() + + // Stack three diff layers on top with various overlaps + db.Update(common.HexToHash("0x02"), types.EmptyRootHash, 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x02", "0x03"}}, nil), nil, nil)) + + db.Update(common.HexToHash("0x03"), common.HexToHash("0x02"), 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x04", "0x05", "0x06"}}, nil), nil, nil)) + + db.Update(common.HexToHash("0x04"), common.HexToHash("0x03"), 0, trienode.NewMergedNodeSet(), + NewStateSetWithOrigin(nil, randomAccountSet("0xaa"), randomStorageSet([]string{"0xaa"}, [][]string{{"0x01", "0x02", "0x03"}}, nil), nil, nil)) + + // Verify the single and multi-layer iterators + head := db.tree.get(common.HexToHash("0x04")) + + diffIter, _ := newDiffStorageIterator(common.HexToHash("0xaa"), common.Hash{}, head.(*diffLayer).states.stateSet, nil) + verifyIterator(t, 3, diffIter, verifyNothing) + verifyIterator(t, 6, head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa")), verifyStorage) + + it, _ := db.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.Hash{}) + verifyIterator(t, 6, it, verifyStorage) + it.Release() + + // Test after persist some bottom-most layers into the disk, + // the functionalities still work. + db.tree.cap(common.HexToHash("0x04"), 2) + verifyIterator(t, 6, head.(*diffLayer).newBinaryStorageIterator(common.HexToHash("0xaa")), verifyStorage) + + it, _ = db.StorageIterator(common.HexToHash("0x04"), common.HexToHash("0xaa"), common.Hash{}) + verifyIterator(t, 6, it, verifyStorage) + it.Release() +} diff --git a/triedb/pathdb/journal.go b/triedb/pathdb/journal.go index 1740ec593511..6987baecc493 100644 --- a/triedb/pathdb/journal.go +++ b/triedb/pathdb/journal.go @@ -27,10 +27,9 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) var ( @@ -47,33 +46,8 @@ var ( // // - Version 0: initial version // - Version 1: storage.Incomplete field is removed -const journalVersion uint64 = 1 - -// journalNode represents a trie node persisted in the journal. -type journalNode struct { - Path []byte // Path of the node in the trie - Blob []byte // RLP-encoded trie node blob, nil means the node is deleted -} - -// journalNodes represents a list trie nodes belong to a single account -// or the main account trie. -type journalNodes struct { - Owner common.Hash - Nodes []journalNode -} - -// journalAccounts represents a list accounts belong to the layer. -type journalAccounts struct { - Addresses []common.Address - Accounts [][]byte -} - -// journalStorage represents a list of storage slots belong to an account. -type journalStorage struct { - Account common.Address - Hashes []common.Hash - Slots [][]byte -} +// - Version 2: add state journal +const journalVersion uint64 = 2 // loadJournal tries to parse the layer journal from the disk. func (db *Database) loadJournal(diskRoot common.Hash) (layer, error) { @@ -117,6 +91,47 @@ func (db *Database) loadJournal(diskRoot common.Hash) (layer, error) { return head, nil } +// journalGenerator is a disk layer entry containing the generator progress marker. +type journalGenerator struct { + Done bool // Whether the generator finished creating the snapshot + Marker []byte // Generation progress, []byte{} means nothing is generated + Accounts uint64 // Number of accounts indexed + Slots uint64 // Number of storage slots indexed + DanglingSlots uint64 // Number of dangling storage slots detected + Storage uint64 // Total account and storage slot size(generation or recovery) +} + +// loadGenerator loads the state generation progress marker from the database. +func loadGenerator(db ethdb.KeyValueReader) (*journalGenerator, common.Hash) { + trieRoot := types.EmptyRootHash + if blob := rawdb.ReadAccountTrieNode(db, nil); len(blob) > 0 { + trieRoot = crypto.Keccak256Hash(blob) + } + // State generation progress marker is lost, rebuild it + blob := rawdb.ReadSnapshotGenerator(db) + if len(blob) == 0 { + log.Info("State snapshot generator is not found") + return nil, trieRoot + } + // State generation progress marker is not compatible, rebuild it + var generator journalGenerator + if err := rlp.DecodeBytes(blob, &generator); err != nil { + log.Info("State snapshot generator is not compatible") + return nil, trieRoot + } + // State snapshot is not consistent with the trie data, rebuild it + stateRoot := rawdb.ReadSnapshotRoot(db) + if trieRoot != stateRoot { + log.Info("State snapshot is not consistent with trie data", "trie", trieRoot, "state", stateRoot) + return nil, trieRoot + } + // Slice null-ness is lost after rlp decoding, reset it back to empty + if !generator.Done && generator.Marker == nil { + generator.Marker = []byte{} + } + return &generator, trieRoot +} + // loadLayers loads a pre-existing state layer backed by a key-value store. func (db *Database) loadLayers() layer { // Retrieve the root node of persistent state. @@ -136,7 +151,7 @@ func (db *Database) loadLayers() layer { log.Info("Failed to load journal, discard it", "err", err) } // Return single layer with persistent state. - return newDiskLayer(root, rawdb.ReadPersistentStateID(db.diskdb), db, nil, newNodeBuffer(db.bufferSize, nil, 0)) + return newDiskLayer(root, rawdb.ReadPersistentStateID(db.diskdb), db, nil, nil, newBuffer(db.config.WriteBufferSize, nil, nil, 0)) } // loadDiskLayer reads the binary blob from the layer journal, reconstructing @@ -158,26 +173,17 @@ func (db *Database) loadDiskLayer(r *rlp.Stream) (layer, error) { if stored > id { return nil, fmt.Errorf("invalid state id: stored %d resolved %d", stored, id) } - // Resolve nodes cached in node buffer - var encoded []journalNodes - if err := r.Decode(&encoded); err != nil { - return nil, fmt.Errorf("load disk nodes: %v", err) - } - nodes := make(map[common.Hash]map[string]*trienode.Node) - for _, entry := range encoded { - subset := make(map[string]*trienode.Node) - for _, n := range entry.Nodes { - if len(n.Blob) > 0 { - subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) - } else { - subset[string(n.Path)] = trienode.NewDeleted() - } - } - nodes[entry.Owner] = subset + // Resolve nodes cached in aggregated buffer + var nodes nodeSet + if err := nodes.decode(r); err != nil { + return nil, err + } + // Resolve flat state sets in aggregated buffer + var states stateSet + if err := states.decode(r); err != nil { + return nil, err } - // Calculate the internal state transitions by id difference. - base := newDiskLayer(root, id, db, nil, newNodeBuffer(db.bufferSize, nodes, id-stored)) - return base, nil + return newDiskLayer(root, id, db, nil, nil, newBuffer(db.config.WriteBufferSize, &nodes, &states, id-stored)), nil } // loadDiffLayer reads the next sections of a layer journal, reconstructing a new @@ -197,50 +203,16 @@ func (db *Database) loadDiffLayer(parent layer, r *rlp.Stream) (layer, error) { return nil, fmt.Errorf("load block number: %v", err) } // Read in-memory trie nodes from journal - var encoded []journalNodes - if err := r.Decode(&encoded); err != nil { - return nil, fmt.Errorf("load diff nodes: %v", err) - } - nodes := make(map[common.Hash]map[string]*trienode.Node) - for _, entry := range encoded { - subset := make(map[string]*trienode.Node) - for _, n := range entry.Nodes { - if len(n.Blob) > 0 { - subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) - } else { - subset[string(n.Path)] = trienode.NewDeleted() - } - } - nodes[entry.Owner] = subset - } - // Read state changes from journal - var ( - jaccounts journalAccounts - jstorages []journalStorage - accounts = make(map[common.Address][]byte) - storages = make(map[common.Address]map[common.Hash][]byte) - ) - if err := r.Decode(&jaccounts); err != nil { - return nil, fmt.Errorf("load diff accounts: %v", err) - } - for i, addr := range jaccounts.Addresses { - accounts[addr] = jaccounts.Accounts[i] - } - if err := r.Decode(&jstorages); err != nil { - return nil, fmt.Errorf("load diff storages: %v", err) - } - for _, entry := range jstorages { - set := make(map[common.Hash][]byte) - for i, h := range entry.Hashes { - if len(entry.Slots[i]) > 0 { - set[h] = entry.Slots[i] - } else { - set[h] = nil - } - } - storages[entry.Account] = set + var nodes nodeSet + if err := nodes.decode(r); err != nil { + return nil, err + } + // Read flat states set (with original value attached) from journal + var stateSet StateSetWithOrigin + if err := stateSet.decode(r); err != nil { + return nil, err } - return db.loadDiffLayer(newDiffLayer(parent, root, parent.stateID()+1, block, nodes, triestate.New(accounts, storages)), r) + return db.loadDiffLayer(newDiffLayer(parent, root, parent.stateID()+1, block, &nodes, &stateSet), r) } // journal implements the layer interface, marshaling the un-flushed trie nodes @@ -261,19 +233,15 @@ func (dl *diskLayer) journal(w io.Writer) error { if err := rlp.Encode(w, dl.id); err != nil { return err } - // Step three, write all unwritten nodes into the journal - nodes := make([]journalNodes, 0, len(dl.buffer.nodes)) - for owner, subset := range dl.buffer.nodes { - entry := journalNodes{Owner: owner} - for path, node := range subset { - entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) - } - nodes = append(nodes, entry) + // Step three, write the accumulated trie nodes into the journal + if err := dl.buffer.nodes.encode(w); err != nil { + return err } - if err := rlp.Encode(w, nodes); err != nil { + // Step four, write the accumulated flat states into the journal + if err := dl.buffer.states.encode(w); err != nil { return err } - log.Debug("Journaled pathdb disk layer", "root", dl.root, "nodes", len(dl.buffer.nodes)) + log.Debug("Journaled pathdb disk layer", "root", dl.root) return nil } @@ -295,39 +263,14 @@ func (dl *diffLayer) journal(w io.Writer) error { return err } // Write the accumulated trie nodes into buffer - nodes := make([]journalNodes, 0, len(dl.nodes)) - for owner, subset := range dl.nodes { - entry := journalNodes{Owner: owner} - for path, node := range subset { - entry.Nodes = append(entry.Nodes, journalNode{Path: []byte(path), Blob: node.Blob}) - } - nodes = append(nodes, entry) - } - if err := rlp.Encode(w, nodes); err != nil { + if err := dl.nodes.encode(w); err != nil { return err } - // Write the accumulated state changes into buffer - var jacct journalAccounts - for addr, account := range dl.states.Accounts { - jacct.Addresses = append(jacct.Addresses, addr) - jacct.Accounts = append(jacct.Accounts, account) - } - if err := rlp.Encode(w, jacct); err != nil { + // Write the associated flat state set into buffer + if err := dl.states.encode(w); err != nil { return err } - storage := make([]journalStorage, 0, len(dl.states.Storages)) - for addr, slots := range dl.states.Storages { - entry := journalStorage{Account: addr} - for slotHash, slot := range slots { - entry.Hashes = append(entry.Hashes, slotHash) - entry.Slots = append(entry.Slots, slot) - } - storage = append(storage, entry) - } - if err := rlp.Encode(w, storage); err != nil { - return err - } - log.Debug("Journaled pathdb diff layer", "root", dl.root, "parent", dl.parent.rootHash(), "id", dl.stateID(), "block", dl.block, "nodes", len(dl.nodes)) + log.Debug("Journaled pathdb diff layer", "root", dl.root, "parent", dl.parent.rootHash(), "id", dl.stateID(), "block", dl.block) return nil } @@ -347,6 +290,10 @@ func (db *Database) Journal(root common.Hash) error { } else { // disk layer only on noop runs (likely) or deep reorgs (unlikely) log.Info("Persisting dirty state to disk", "root", root, "layers", disk.buffer.layers) } + // Terminate the background state generation if it's active + if disk.generator != nil { + disk.generator.stop() + } start := time.Now() // Run the journaling diff --git a/triedb/pathdb/layertree.go b/triedb/pathdb/layertree.go index d314779910e9..cf6b14e744ef 100644 --- a/triedb/pathdb/layertree.go +++ b/triedb/pathdb/layertree.go @@ -24,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/trie/trienode" - "github.com/ethereum/go-ethereum/trie/triestate" ) // layerTree is a group of state layers identified by the state root. @@ -86,7 +85,7 @@ func (tree *layerTree) len() int { } // add inserts a new layer into the tree if it can be linked to an existing old parent. -func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *triestate.Set) error { +func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint64, nodes *trienode.MergedNodeSet, states *StateSetWithOrigin) error { // Reject noop updates to avoid self-loops. This is a special case that can // happen for clique networks and proof-of-stake networks where empty blocks // don't modify the state (0 block subsidy). @@ -101,7 +100,7 @@ func (tree *layerTree) add(root common.Hash, parentRoot common.Hash, block uint6 if parent == nil { return fmt.Errorf("triedb parent [%#x] layer missing", parentRoot) } - l := parent.update(root, parent.stateID()+1, block, nodes.Flatten(), states) + l := parent.update(root, parent.stateID()+1, block, newNodeSet(nodes.Flatten()), states) tree.lock.Lock() tree.layers[l.rootHash()] = l diff --git a/triedb/pathdb/metrics.go b/triedb/pathdb/metrics.go index a250f703cbab..8b75e71a7e89 100644 --- a/triedb/pathdb/metrics.go +++ b/triedb/pathdb/metrics.go @@ -19,33 +19,77 @@ package pathdb import "github.com/ethereum/go-ethereum/metrics" var ( - cleanHitMeter = metrics.NewRegisteredMeter("pathdb/clean/hit", nil) - cleanMissMeter = metrics.NewRegisteredMeter("pathdb/clean/miss", nil) - cleanReadMeter = metrics.NewRegisteredMeter("pathdb/clean/read", nil) - cleanWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/write", nil) + cleanNodeHitMeter = metrics.NewRegisteredMeter("pathdb/clean/node/hit", nil) + cleanNodeMissMeter = metrics.NewRegisteredMeter("pathdb/clean/node/miss", nil) + cleanNodeReadMeter = metrics.NewRegisteredMeter("pathdb/clean/node/read", nil) + cleanNodeWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/node/write", nil) - dirtyHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/hit", nil) - dirtyMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/miss", nil) - dirtyReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/read", nil) - dirtyWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/write", nil) - dirtyNodeHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/depth", nil, metrics.NewExpDecaySample(1028, 0.015)) + cleanStateHitMeter = metrics.NewRegisteredMeter("pathdb/clean/state/hit", nil) + cleanStateMissMeter = metrics.NewRegisteredMeter("pathdb/clean/state/miss", nil) + cleanStateReadMeter = metrics.NewRegisteredMeter("pathdb/clean/state/read", nil) + cleanStateWriteMeter = metrics.NewRegisteredMeter("pathdb/clean/state/write", nil) + + stateAccountMissMeter = metrics.NewRegisteredMeter("pathdb/state/account/miss", nil) + stateAccountHitMeter = metrics.NewRegisteredMeter("pathdb/state/account/hit", nil) + stateStorageMissMeter = metrics.NewRegisteredMeter("pathdb/state/storage/miss", nil) + stateStorageHitMeter = metrics.NewRegisteredMeter("pathdb/state/storage/hit", nil) + + dirtyNodeHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/hit/node", nil) + dirtyNodeMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/miss/node", nil) + dirtyNodeReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/read/node", nil) + dirtyNodeWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/write/node", nil) + dirtyNodeHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/depth/node", nil, metrics.NewExpDecaySample(1028, 0.015)) + + dirtyStateHitMeter = metrics.NewRegisteredMeter("pathdb/dirty/hit/state", nil) + dirtyStateMissMeter = metrics.NewRegisteredMeter("pathdb/dirty/miss/state", nil) + dirtyStateReadMeter = metrics.NewRegisteredMeter("pathdb/dirty/read/state", nil) + dirtyStateWriteMeter = metrics.NewRegisteredMeter("pathdb/dirty/write/state", nil) + dirtyStateHitDepthHist = metrics.NewRegisteredHistogram("pathdb/dirty/depth/state", nil, metrics.NewExpDecaySample(1028, 0.015)) cleanFalseMeter = metrics.NewRegisteredMeter("pathdb/clean/false", nil) dirtyFalseMeter = metrics.NewRegisteredMeter("pathdb/dirty/false", nil) diskFalseMeter = metrics.NewRegisteredMeter("pathdb/disk/false", nil) diffFalseMeter = metrics.NewRegisteredMeter("pathdb/diff/false", nil) - commitTimeTimer = metrics.NewRegisteredTimer("pathdb/commit/time", nil) - commitNodesMeter = metrics.NewRegisteredMeter("pathdb/commit/nodes", nil) - commitBytesMeter = metrics.NewRegisteredMeter("pathdb/commit/bytes", nil) - - gcNodesMeter = metrics.NewRegisteredMeter("pathdb/gc/nodes", nil) - gcBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/bytes", nil) + commitTimeTimer = metrics.NewRegisteredTimer("pathdb/commit/time", nil) + commitNodesMeter = metrics.NewRegisteredMeter("pathdb/commit/nodes", nil) + commitAccountsMeter = metrics.NewRegisteredMeter("pathdb/commit/accounts", nil) + commitStoragesMeter = metrics.NewRegisteredMeter("pathdb/commit/slots", nil) + commitBytesMeter = metrics.NewRegisteredMeter("pathdb/commit/bytes", nil) - diffLayerBytesMeter = metrics.NewRegisteredMeter("pathdb/diff/bytes", nil) - diffLayerNodesMeter = metrics.NewRegisteredMeter("pathdb/diff/nodes", nil) + gcTrieNodeMeter = metrics.NewRegisteredMeter("pathdb/gc/trienode/count", nil) + gcTrieNodeBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/trienode/bytes", nil) + gcAccountMeter = metrics.NewRegisteredMeter("pathdb/gc/account/count", nil) + gcAccountBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/account/bytes", nil) + gcStorageMeter = metrics.NewRegisteredMeter("pathdb/gc/storage/count", nil) + gcStorageBytesMeter = metrics.NewRegisteredMeter("pathdb/gc/storage/bytes", nil) historyBuildTimeMeter = metrics.NewRegisteredTimer("pathdb/history/time", nil) historyDataBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/data", nil) historyIndexBytesMeter = metrics.NewRegisteredMeter("pathdb/history/bytes/index", nil) ) + +// Metrics in generation +var ( + generatedAccountMeter = metrics.NewRegisteredMeter("pathdb/generation/account/generated", nil) + recoveredAccountMeter = metrics.NewRegisteredMeter("pathdb/generation/account/recovered", nil) + wipedAccountMeter = metrics.NewRegisteredMeter("pathdb/generation/account/wiped", nil) + missallAccountMeter = metrics.NewRegisteredMeter("pathdb/generation/account/missall", nil) + generatedStorageMeter = metrics.NewRegisteredMeter("pathdb/generation/storage/generated", nil) + recoveredStorageMeter = metrics.NewRegisteredMeter("pathdb/generation/storage/recovered", nil) + wipedStorageMeter = metrics.NewRegisteredMeter("pathdb/generation/storage/wiped", nil) + missallStorageMeter = metrics.NewRegisteredMeter("pathdb/generation/storage/missall", nil) + danglingStorageMeter = metrics.NewRegisteredMeter("pathdb/generation/storage/dangling", nil) + successfulRangeProofMeter = metrics.NewRegisteredMeter("pathdb/generation/proof/success", nil) + failedRangeProofMeter = metrics.NewRegisteredMeter("pathdb/generation/proof/failure", nil) + + accountProveCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/account/prove", nil) + accountTrieReadCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/account/trieread", nil) + accountSnapReadCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/account/snapread", nil) + accountWriteCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/account/write", nil) + storageProveCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/storage/prove", nil) + storageTrieReadCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/storage/trieread", nil) + storageSnapReadCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/storage/snapread", nil) + storageWriteCounter = metrics.NewRegisteredCounter("pathdb/generation/duration/storage/write", nil) + storageCleanCounter = metrics.NewRegisteredCounter("state/snapshot/generation/duration/storage/clean", nil) +) diff --git a/triedb/pathdb/nodebuffer.go b/triedb/pathdb/nodebuffer.go deleted file mode 100644 index d3492602c8b7..000000000000 --- a/triedb/pathdb/nodebuffer.go +++ /dev/null @@ -1,283 +0,0 @@ -// Copyright 2022 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package pathdb - -import ( - "bytes" - "fmt" - "maps" - "time" - - "github.com/VictoriaMetrics/fastcache" - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/log" - "github.com/ethereum/go-ethereum/trie/trienode" -) - -// nodebuffer is a collection of modified trie nodes to aggregate the disk -// write. The content of the nodebuffer must be checked before diving into -// disk (since it basically is not-yet-written data). -type nodebuffer struct { - layers uint64 // The number of diff layers aggregated inside - size uint64 // The size of aggregated writes - limit uint64 // The maximum memory allowance in bytes - nodes map[common.Hash]map[string]*trienode.Node // The dirty node set, mapped by owner and path -} - -// newNodeBuffer initializes the node buffer with the provided nodes. -func newNodeBuffer(limit int, nodes map[common.Hash]map[string]*trienode.Node, layers uint64) *nodebuffer { - if nodes == nil { - nodes = make(map[common.Hash]map[string]*trienode.Node) - } - var size uint64 - for _, subset := range nodes { - for path, n := range subset { - size += uint64(len(n.Blob) + len(path)) - } - } - return &nodebuffer{ - layers: layers, - nodes: nodes, - size: size, - limit: uint64(limit), - } -} - -// node retrieves the trie node with given node info. -func (b *nodebuffer) node(owner common.Hash, path []byte) (*trienode.Node, bool) { - subset, ok := b.nodes[owner] - if !ok { - return nil, false - } - n, ok := subset[string(path)] - if !ok { - return nil, false - } - return n, true -} - -// commit merges the dirty nodes into the nodebuffer. This operation won't take -// the ownership of the nodes map which belongs to the bottom-most diff layer. -// It will just hold the node references from the given map which are safe to -// copy. -func (b *nodebuffer) commit(nodes map[common.Hash]map[string]*trienode.Node) *nodebuffer { - var ( - delta int64 - overwrite int64 - overwriteSize int64 - ) - for owner, subset := range nodes { - current, exist := b.nodes[owner] - if !exist { - // Allocate a new map for the subset instead of claiming it directly - // from the passed map to avoid potential concurrent map read/write. - // The nodes belong to original diff layer are still accessible even - // after merging, thus the ownership of nodes map should still belong - // to original layer and any mutation on it should be prevented. - for path, n := range subset { - delta += int64(len(n.Blob) + len(path)) - } - b.nodes[owner] = maps.Clone(subset) - continue - } - for path, n := range subset { - if orig, exist := current[path]; !exist { - delta += int64(len(n.Blob) + len(path)) - } else { - delta += int64(len(n.Blob) - len(orig.Blob)) - overwrite++ - overwriteSize += int64(len(orig.Blob) + len(path)) - } - current[path] = n - } - b.nodes[owner] = current - } - b.updateSize(delta) - b.layers++ - gcNodesMeter.Mark(overwrite) - gcBytesMeter.Mark(overwriteSize) - return b -} - -// revert is the reverse operation of commit. It also merges the provided nodes -// into the nodebuffer, the difference is that the provided node set should -// revert the changes made by the last state transition. -func (b *nodebuffer) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) error { - // Short circuit if no embedded state transition to revert. - if b.layers == 0 { - return errStateUnrecoverable - } - b.layers-- - - // Reset the entire buffer if only a single transition left. - if b.layers == 0 { - b.reset() - return nil - } - var delta int64 - for owner, subset := range nodes { - current, ok := b.nodes[owner] - if !ok { - panic(fmt.Sprintf("non-existent subset (%x)", owner)) - } - for path, n := range subset { - orig, ok := current[path] - if !ok { - // There is a special case in MPT that one child is removed from - // a fullNode which only has two children, and then a new child - // with different position is immediately inserted into the fullNode. - // In this case, the clean child of the fullNode will also be - // marked as dirty because of node collapse and expansion. - // - // In case of database rollback, don't panic if this "clean" - // node occurs which is not present in buffer. - var blob []byte - if owner == (common.Hash{}) { - blob = rawdb.ReadAccountTrieNode(db, []byte(path)) - } else { - blob = rawdb.ReadStorageTrieNode(db, owner, []byte(path)) - } - // Ignore the clean node in the case described above. - if bytes.Equal(blob, n.Blob) { - continue - } - panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex())) - } - current[path] = n - delta += int64(len(n.Blob)) - int64(len(orig.Blob)) - } - } - b.updateSize(delta) - return nil -} - -// updateSize updates the total cache size by the given delta. -func (b *nodebuffer) updateSize(delta int64) { - size := int64(b.size) + delta - if size >= 0 { - b.size = uint64(size) - return - } - s := b.size - b.size = 0 - log.Error("Invalid pathdb buffer size", "prev", common.StorageSize(s), "delta", common.StorageSize(delta)) -} - -// reset cleans up the disk cache. -func (b *nodebuffer) reset() { - b.layers = 0 - b.size = 0 - b.nodes = make(map[common.Hash]map[string]*trienode.Node) -} - -// empty returns an indicator if nodebuffer contains any state transition inside. -func (b *nodebuffer) empty() bool { - return b.layers == 0 -} - -// setSize sets the buffer size to the provided number, and invokes a flush -// operation if the current memory usage exceeds the new limit. -func (b *nodebuffer) setSize(size int, db ethdb.KeyValueStore, clean *fastcache.Cache, id uint64) error { - b.limit = uint64(size) - return b.flush(db, clean, id, false) -} - -// allocBatch returns a database batch with pre-allocated buffer. -func (b *nodebuffer) allocBatch(db ethdb.KeyValueStore) ethdb.Batch { - var metasize int - for owner, nodes := range b.nodes { - if owner == (common.Hash{}) { - metasize += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix - } else { - metasize += len(nodes) * (len(rawdb.TrieNodeStoragePrefix) + common.HashLength) // database key prefix + owner - } - } - return db.NewBatchWithSize((metasize + int(b.size)) * 11 / 10) // extra 10% for potential pebble internal stuff -} - -// flush persists the in-memory dirty trie node into the disk if the configured -// memory threshold is reached. Note, all data must be written atomically. -func (b *nodebuffer) flush(db ethdb.KeyValueStore, clean *fastcache.Cache, id uint64, force bool) error { - if b.size <= b.limit && !force { - return nil - } - // Ensure the target state id is aligned with the internal counter. - head := rawdb.ReadPersistentStateID(db) - if head+b.layers != id { - return fmt.Errorf("buffer layers (%d) cannot be applied on top of persisted state id (%d) to reach requested state id (%d)", b.layers, head, id) - } - var ( - start = time.Now() - batch = b.allocBatch(db) - ) - nodes := writeNodes(batch, b.nodes, clean) - rawdb.WritePersistentStateID(batch, id) - - // Flush all mutations in a single batch - size := batch.ValueSize() - if err := batch.Write(); err != nil { - return err - } - commitBytesMeter.Mark(int64(size)) - commitNodesMeter.Mark(int64(nodes)) - commitTimeTimer.UpdateSince(start) - log.Debug("Persisted pathdb nodes", "nodes", len(b.nodes), "bytes", common.StorageSize(size), "elapsed", common.PrettyDuration(time.Since(start))) - b.reset() - return nil -} - -// writeNodes writes the trie nodes into the provided database batch. -// Note this function will also inject all the newly written nodes -// into clean cache. -func writeNodes(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) (total int) { - for owner, subset := range nodes { - for path, n := range subset { - if n.IsDeleted() { - if owner == (common.Hash{}) { - rawdb.DeleteAccountTrieNode(batch, []byte(path)) - } else { - rawdb.DeleteStorageTrieNode(batch, owner, []byte(path)) - } - if clean != nil { - clean.Del(cacheKey(owner, []byte(path))) - } - } else { - if owner == (common.Hash{}) { - rawdb.WriteAccountTrieNode(batch, []byte(path), n.Blob) - } else { - rawdb.WriteStorageTrieNode(batch, owner, []byte(path), n.Blob) - } - if clean != nil { - clean.Set(cacheKey(owner, []byte(path)), n.Blob) - } - } - } - total += len(subset) - } - return total -} - -// cacheKey constructs the unique key of clean cache. -func cacheKey(owner common.Hash, path []byte) []byte { - if owner == (common.Hash{}) { - return path - } - return append(owner.Bytes(), path...) -} diff --git a/triedb/pathdb/nodes.go b/triedb/pathdb/nodes.go new file mode 100644 index 000000000000..0b2eea6ee93a --- /dev/null +++ b/triedb/pathdb/nodes.go @@ -0,0 +1,246 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "bytes" + "fmt" + "io" + "maps" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie/trienode" +) + +// nodeSet represents a collection of modified trie nodes resulting from a state +// transition, typically corresponding to a block execution. It can also represent +// the combined trie node set from several aggregated state transitions. +type nodeSet struct { + size uint64 // aggregated size of the trie node + nodes map[common.Hash]map[string]*trienode.Node // node set, mapped by owner and path +} + +// newNodeSet constructs the set with the provided dirty trie nodes. +func newNodeSet(nodes map[common.Hash]map[string]*trienode.Node) *nodeSet { + // Don't panic for the lazy callers, initialize the nil map instead + if nodes == nil { + nodes = make(map[common.Hash]map[string]*trienode.Node) + } + s := &nodeSet{nodes: nodes} + s.computeSize() + return s +} + +// computeSize calculates the database size of the held trie nodes. +func (s *nodeSet) computeSize() { + var size uint64 + for owner, subset := range s.nodes { + var prefix int + if owner != (common.Hash{}) { + prefix = common.HashLength // owner (32 bytes) for storage trie nodes + } + for path, n := range subset { + size += uint64(prefix + len(n.Blob) + len(path)) + } + } + s.size = size +} + +// node retrieves the trie node with node path and its trie identifier. +func (s *nodeSet) node(owner common.Hash, path []byte) (*trienode.Node, bool) { + subset, ok := s.nodes[owner] + if !ok { + return nil, false + } + n, ok := subset[string(path)] + if !ok { + return nil, false + } + return n, true +} + +// merge integrates the provided dirty nodes into the set. The provided nodeset +// will remain unchanged, as it may still be referenced by other layers. +func (s *nodeSet) merge(set *nodeSet) { + var ( + delta int64 // size difference resulting from node merging + overwrite counter // counter of nodes being overwritten + ) + for owner, subset := range set.nodes { + var prefix int + if owner != (common.Hash{}) { + prefix = common.HashLength + } + current, exist := s.nodes[owner] + if !exist { + for path, n := range subset { + delta += int64(prefix + len(n.Blob) + len(path)) + } + // Perform a shallow copy of the map for the subset instead of claiming it + // directly from the provided nodeset to avoid potential concurrent map + // read/write issues. The nodes belonging to the original diff layer remain + // accessible even after merging. Therefore, ownership of the nodes map + // should still belong to the original layer, and any modifications to it + // should be prevented. + s.nodes[owner] = maps.Clone(subset) + continue + } + for path, n := range subset { + if orig, exist := current[path]; !exist { + delta += int64(prefix + len(n.Blob) + len(path)) + } else { + delta += int64(len(n.Blob) - len(orig.Blob)) + overwrite.add(prefix + len(orig.Blob) + len(path)) + } + current[path] = n + } + s.nodes[owner] = current + } + overwrite.report(gcTrieNodeMeter, gcTrieNodeBytesMeter) + s.updateSize(delta) +} + +// revert merges the provided trie nodes into the set. This should reverse the +// changes made by the most recent state transition. +func (s *nodeSet) revert(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) { + var delta int64 + for owner, subset := range nodes { + current, ok := s.nodes[owner] + if !ok { + panic(fmt.Sprintf("non-existent subset (%x)", owner)) + } + for path, n := range subset { + orig, ok := current[path] + if !ok { + // There is a special case in merkle tree that one child is removed + // from a fullNode which only has two children, and then a new child + // with different position is immediately inserted into the fullNode. + // In this case, the clean child of the fullNode will also be marked + // as dirty because of node collapse and expansion. In case of database + // rollback, don't panic if this "clean" node occurs which is not + // present in buffer. + var blob []byte + if owner == (common.Hash{}) { + blob = rawdb.ReadAccountTrieNode(db, []byte(path)) + } else { + blob = rawdb.ReadStorageTrieNode(db, owner, []byte(path)) + } + // Ignore the clean node in the case described above. + if bytes.Equal(blob, n.Blob) { + continue + } + panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex())) + } + current[path] = n + delta += int64(len(n.Blob)) - int64(len(orig.Blob)) + } + } + s.updateSize(delta) +} + +// journalNode represents a trie node persisted in the journal. +type journalNode struct { + Path []byte // Path of the node in the trie + Blob []byte // RLP-encoded trie node blob, nil means the node is deleted +} + +// journalNodes represents a list trie nodes belong to a single account +// or the main account trie. +type journalNodes struct { + Owner common.Hash + Nodes []journalNode +} + +// encode serializes the content of trie nodes into the provided writer. +func (s *nodeSet) encode(w io.Writer) error { + nodes := make([]journalNodes, 0, len(s.nodes)) + for owner, subset := range s.nodes { + entry := journalNodes{Owner: owner} + for path, node := range subset { + entry.Nodes = append(entry.Nodes, journalNode{ + Path: []byte(path), + Blob: node.Blob, + }) + } + nodes = append(nodes, entry) + } + return rlp.Encode(w, nodes) +} + +// decode deserializes the content from the rlp stream into the nodeset. +func (s *nodeSet) decode(r *rlp.Stream) error { + var encoded []journalNodes + if err := r.Decode(&encoded); err != nil { + return fmt.Errorf("load nodes: %v", err) + } + nodes := make(map[common.Hash]map[string]*trienode.Node) + for _, entry := range encoded { + subset := make(map[string]*trienode.Node) + for _, n := range entry.Nodes { + if len(n.Blob) > 0 { + subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob) + } else { + subset[string(n.Path)] = trienode.NewDeleted() + } + } + nodes[entry.Owner] = subset + } + s.nodes = nodes + s.computeSize() + return nil +} + +// write flushes nodes into the provided database batch as a whole. +func (s *nodeSet) write(batch ethdb.Batch, nodes map[common.Hash]map[string]*trienode.Node, clean *fastcache.Cache) int { + return writeNodes(batch, nodes, clean) +} + +// reset clears all cached trie node data. +func (s *nodeSet) reset() { + s.nodes = make(map[common.Hash]map[string]*trienode.Node) + s.size = 0 +} + +// updateSize updates the total cache size by the given delta. +func (s *nodeSet) updateSize(delta int64) { + size := int64(s.size) + delta + if size >= 0 { + s.size = uint64(size) + return + } + log.Error("Nodeset size underflow", "prev", common.StorageSize(s.size), "delta", common.StorageSize(delta)) + s.size = 0 +} + +// dbsize returns the approximate size of db write. +func (s *nodeSet) dbsize() int { + var m int + for owner, nodes := range s.nodes { + if owner == (common.Hash{}) { + m += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix + } else { + m += len(nodes) * (len(rawdb.TrieNodeStoragePrefix)) // database key prefix + } + } + return m + int(s.size) +} diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index 6a58493ba694..da46f5368fbd 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -21,7 +21,9 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/hexutil" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/triedb/database" ) @@ -45,14 +47,14 @@ func (loc *nodeLoc) string() string { return fmt.Sprintf("loc: %s, depth: %d", loc.loc, loc.depth) } -// reader implements the database.Reader interface, providing the functionalities to +// reader implements the database.NodeReader interface, providing the functionalities to // retrieve trie nodes by wrapping the internal state layer. type reader struct { layer layer noHashCheck bool } -// Node implements database.Reader interface, retrieving the node with specified +// Node implements database.NodeReader interface, retrieving the node with specified // node info. Don't modify the returned byte slice since it's not deep-copied // and still be referenced by database. func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, error) { @@ -84,11 +86,53 @@ func (r *reader) Node(owner common.Hash, path []byte, hash common.Hash) ([]byte, return blob, nil } -// Reader retrieves a layer belonging to the given state root. -func (db *Database) Reader(root common.Hash) (database.Reader, error) { +// Account directly retrieves the account associated with a particular hash in +// the slim data format. An error will be returned if the read operation exits +// abnormally. Specifically, if the layer is already stale. +// +// No error will be returned if the requested account is not found in database +func (r *reader) Account(hash common.Hash) (*types.SlimAccount, error) { + blob, err := r.layer.account(hash, 0) + if err != nil { + return nil, err + } + if len(blob) == 0 { + return nil, nil + } + account := new(types.SlimAccount) + if err := rlp.DecodeBytes(blob, account); err != nil { + panic(err) + } + return account, nil +} + +// Storage directly retrieves the storage data associated with a particular hash, +// within a particular account. An error will be returned if the read operation +// exits abnormally. Specifically, if the layer is already stale. +// +// Note: +// - the returned storage data is not a copy, please don't modify it +// - no error will be returned if the requested slot is not found in database +func (r *reader) Storage(accountHash, storageHash common.Hash) ([]byte, error) { + return r.layer.storage(accountHash, storageHash, 0) +} + +// NodeReader returns a reader that allows access to the trie node data associated +// with the specified state. +func (db *Database) NodeReader(root common.Hash) (database.NodeReader, error) { layer := db.tree.get(root) if layer == nil { return nil, fmt.Errorf("state %#x is not available", root) } return &reader{layer: layer, noHashCheck: db.isVerkle}, nil } + +// StateReader returns a reader that allows access to the state data associated +// with the specified state. +func (db *Database) StateReader(root common.Hash) (database.StateReader, error) { + layer := db.tree.get(root) + if layer == nil { + return nil, fmt.Errorf("state %#x is not available", root) + } + return &reader{layer: layer}, nil +} diff --git a/triedb/pathdb/states.go b/triedb/pathdb/states.go new file mode 100644 index 000000000000..31e6c5d72ce1 --- /dev/null +++ b/triedb/pathdb/states.go @@ -0,0 +1,703 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "errors" + "fmt" + "io" + "slices" + "sync" + + "github.com/VictoriaMetrics/fastcache" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" + "golang.org/x/exp/maps" +) + +// counter helps in tracking items and their corresponding sizes. +type counter struct { + n int + size int +} + +// add size to the counter and increase the item counter. +func (c *counter) add(size int) { + c.n++ + c.size += size +} + +// report uploads the cached statistics to meters. +func (c *counter) report(count metrics.Meter, size metrics.Meter) { + count.Mark(int64(c.n)) + size.Mark(int64(c.size)) +} + +// destruct represents the record of destruct set modification. +type destruct struct { + Hash common.Hash + Exist bool +} + +// journal contains the list of modifications applied for destruct set. +type journal struct { + destructs [][]destruct +} + +func (j *journal) add(entries []destruct) { + j.destructs = append(j.destructs, entries) +} + +func (j *journal) pop() ([]destruct, error) { + if len(j.destructs) == 0 { + return nil, errors.New("destruct journal is not available") + } + last := j.destructs[len(j.destructs)-1] + j.destructs = j.destructs[:len(j.destructs)-1] + return last, nil +} + +func (j *journal) reset() { + j.destructs = nil +} + +func (j *journal) encode(w io.Writer) error { + return rlp.Encode(w, j.destructs) +} + +func (j *journal) decode(r *rlp.Stream) error { + var dec [][]destruct + if err := r.Decode(&dec); err != nil { + return err + } + j.destructs = dec + return nil +} + +// stateSet represents a collection of state modifications belonging to a +// transition(a block execution) or several aggregated transitions. +type stateSet struct { + // destructSet is a very special helper marker. If an account is marked as + // deleted, then it's recorded in this set. However, it's allowed that an + // account is included here but still available in other sets (e.g., + // accountData and storageData). The reason is the diff layer includes all + // the changes in a *block*. It can happen that: + // + // - in the tx_1, account A is deleted + // - in the tx_2, account A is recreated + // + // But we still need this marker to indicate the "old" A is deleted, all + // data in other set belongs to the "new" A. + destructSet map[common.Hash]struct{} // Keyed markers for deleted (and potentially) recreated accounts + accountData map[common.Hash][]byte // Keyed accounts for direct retrieval (nil is not expected) + storageData map[common.Hash]map[common.Hash][]byte // Keyed storage slots for direct retrieval. one per account (nil means deleted) + size uint64 // Memory size of the state data (destructSet, accountData and storageData) + + journal *journal // Track the modifications to destructSet, used for reversal + accountListSorted []common.Hash // List of account for iteration. If it exists, it's sorted, otherwise it's nil + storageListSorted map[common.Hash][]common.Hash // List of storage slots for iterated retrievals, one per account. Any existing lists are sorted if non-nil + lock sync.RWMutex // Lock for guarding the two lists above +} + +// newStates constructs the state set with the provided data. +func newStates(destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte) *stateSet { + // Don't panic for the lazy callers, initialize the nil maps instead. + if destructs == nil { + destructs = make(map[common.Hash]struct{}) + } + if accounts == nil { + accounts = make(map[common.Hash][]byte) + } + if storages == nil { + storages = make(map[common.Hash]map[common.Hash][]byte) + } + s := &stateSet{ + destructSet: destructs, + accountData: accounts, + storageData: storages, + journal: &journal{}, + storageListSorted: make(map[common.Hash][]common.Hash), + } + s.size = s.check() + return s +} + +// account returns the account data associated with the specified address hash. +func (s *stateSet) account(hash common.Hash) ([]byte, bool) { + // If the account is known locally, return it + if data, ok := s.accountData[hash]; ok { + return data, true + } + // If the account is known locally, but deleted, return it + if _, ok := s.destructSet[hash]; ok { + return nil, true + } + return nil, false // account is unknown in this set +} + +// storage returns the storage slot associated with the specified address hash +// and storage key hash. +func (s *stateSet) storage(accountHash, storageHash common.Hash) ([]byte, bool) { + // If the account is known locally, try to resolve the slot locally + if storage, ok := s.storageData[accountHash]; ok { + if data, ok := storage[storageHash]; ok { + return data, true + } + } + // If the account is known locally, but deleted, return an empty slot + if _, ok := s.destructSet[accountHash]; ok { + return nil, true + } + return nil, false // storage is unknown in this set +} + +// check sanitizes accounts and storage slots to ensure the data validity. +// Additionally, it computes the total memory size occupied by the maps. +func (s *stateSet) check() uint64 { + size := len(s.destructSet) * common.HashLength + for accountHash, blob := range s.accountData { + if blob == nil { + panic(fmt.Sprintf("account %#x nil", accountHash)) // nil account blob is not permitted + } + size += common.HashLength + len(blob) + } + for accountHash, slots := range s.storageData { + if slots == nil { + panic(fmt.Sprintf("storage %#x nil", accountHash)) // nil slots is not permitted + } + for _, val := range slots { + size += 2*common.HashLength + len(val) + } + } + return uint64(size) +} + +// accountList returns a sorted list of all accounts in this state set, including +// the deleted ones. +// +// Note, the returned slice is not a copy, so do not modify it. +func (s *stateSet) accountList() []common.Hash { + // If an old list already exists, return it + s.lock.RLock() + list := s.accountListSorted + s.lock.RUnlock() + + if list != nil { + return list + } + // No old sorted account list exists, generate a new one + s.lock.Lock() + defer s.lock.Unlock() + + s.accountListSorted = make([]common.Hash, 0, len(s.destructSet)+len(s.accountData)) + for hash := range s.accountData { + s.accountListSorted = append(s.accountListSorted, hash) + } + for hash := range s.destructSet { + if _, ok := s.accountData[hash]; !ok { + s.accountListSorted = append(s.accountListSorted, hash) + } + } + slices.SortFunc(s.accountListSorted, common.Hash.Cmp) + return s.accountListSorted +} + +// StorageList returns a sorted list of all storage slot hashes in this state set +// for the given account. If the whole storage is destructed in this layer, then +// an additional flag *destructed = true* will be returned, otherwise the flag is +// false. Besides, the returned list will include the hash of deleted storage slot. +// Note a special case is an account is deleted in a prior tx but is recreated in +// the following tx with some storage slots set. In this case the returned list is +// not empty but the flag is true. +// +// Note, the returned slice is not a copy, so do not modify it. +func (s *stateSet) storageList(accountHash common.Hash) ([]common.Hash, bool) { + s.lock.RLock() + _, destructed := s.destructSet[accountHash] + if _, ok := s.storageData[accountHash]; !ok { + // Account not tracked by this layer + s.lock.RUnlock() + return nil, destructed + } + // If an old list already exists, return it + if list, exist := s.storageListSorted[accountHash]; exist { + s.lock.RUnlock() + return list, destructed // the cached list can't be nil + } + s.lock.RUnlock() + + // No old sorted account list exists, generate a new one + s.lock.Lock() + defer s.lock.Unlock() + + storageList := maps.Keys(s.storageData[accountHash]) + slices.SortFunc(storageList, common.Hash.Cmp) + s.storageListSorted[accountHash] = storageList + return storageList, destructed +} + +// clearCache invalidates the cached account list and storage lists. +func (s *stateSet) clearCache() { + s.lock.Lock() + defer s.lock.Unlock() + + s.accountListSorted = nil + s.storageListSorted = make(map[common.Hash][]common.Hash) +} + +// merge integrates the accounts and storages from the external set into the +// local set, ensuring the combined set reflects the combined state of both. +// +// The provided state set will remain unchanged, as it may still be referenced +// by other layers. +func (s *stateSet) merge(set *stateSet) { + var ( + delta int + accountOverwrites counter + storageOverwrites counter + destructs []destruct + ) + // Apply account deletion markers and discard any previously cached data if exists + for accountHash := range set.destructSet { + if origin, ok := s.accountData[accountHash]; ok { + delta -= common.HashLength + len(origin) + accountOverwrites.add(common.HashLength + len(origin)) + delete(s.accountData, accountHash) + } + if _, ok := s.storageData[accountHash]; ok { + // Looping through the nested map may cause slight performance degradation. + // However, since account destruction is no longer possible after the cancun + // fork, this overhead is considered acceptable. + for _, val := range s.storageData[accountHash] { + delta -= 2*common.HashLength + len(val) + storageOverwrites.add(2*common.HashLength + len(val)) + } + delete(s.storageData, accountHash) + } + // Keep track of whether the account has already been marked as destructed. + // This additional marker is useful for undoing the merge operation. + _, exist := s.destructSet[accountHash] + destructs = append(destructs, destruct{ + Hash: accountHash, + Exist: exist, + }) + if exist { + continue + } + delta += common.HashLength + s.destructSet[accountHash] = struct{}{} + } + s.journal.add(destructs) + + // Apply the updated account data + for accountHash, data := range set.accountData { + if origin, ok := s.accountData[accountHash]; ok { + delta += len(data) - len(origin) + accountOverwrites.add(common.HashLength + len(origin)) + } else { + delta += common.HashLength + len(data) + } + s.accountData[accountHash] = data + } + // Apply all the updated storage slots (individually) + for accountHash, storage := range set.storageData { + // If storage didn't exist (or was deleted) in the set, overwrite blindly + if _, ok := s.storageData[accountHash]; !ok { + // To prevent potential concurrent map read/write issues, allocate a + // new map for the storage instead of claiming it directly from the + // passed external set. Even after merging, the slots belonging to the + // external state set remain accessible, so ownership of the map should + // not be taken, and any mutation on it should be avoided. + slots := make(map[common.Hash][]byte) + for storageHash, data := range storage { + slots[storageHash] = data + delta += 2*common.HashLength + len(data) + } + s.storageData[accountHash] = slots + continue + } + // Storage exists in both local and external set, merge the slots + slots := s.storageData[accountHash] + for storageHash, data := range storage { + if origin, ok := slots[storageHash]; ok { + delta += len(data) - len(origin) + storageOverwrites.add(2*common.HashLength + len(origin)) + } else { + delta += 2*common.HashLength + len(data) + } + slots[storageHash] = data + } + } + accountOverwrites.report(gcAccountMeter, gcAccountBytesMeter) + storageOverwrites.report(gcStorageMeter, gcStorageBytesMeter) + s.clearCache() + s.updateSize(delta) +} + +// revert takes the original value of accounts and storages as input and reverts +// the latest state transition applied on the state set. +func (s *stateSet) revert(accountOrigin map[common.Hash][]byte, storageOrigin map[common.Hash]map[common.Hash][]byte) { + // Load the destruct journal whose availability is always expected + destructs, err := s.journal.pop() + if err != nil { + panic(fmt.Sprintf("failed to revert state, %v", err)) + } + // Revert the modifications to the destruct set by journal + var delta int + for _, entry := range destructs { + if entry.Exist { + continue + } + delete(s.destructSet, entry.Hash) + delta -= common.HashLength + } + // Overwrite the account data with original value blindly + for addrHash, blob := range accountOrigin { + if len(blob) == 0 { + if data, ok := s.accountData[addrHash]; ok { + delta -= common.HashLength + len(data) + } else { + panic(fmt.Sprintf("non-existent account for deleting, %x", addrHash)) + } + delete(s.accountData, addrHash) + } else { + if data, ok := s.accountData[addrHash]; ok { + delta += len(blob) - len(data) + } else { + delta += len(blob) + common.HashLength + } + s.accountData[addrHash] = blob + } + } + // Overwrite the storage data with original value blindly + for addrHash, storage := range storageOrigin { + // It might be possible that the storage set is not existent because + // the whole storage is deleted. + slots := s.storageData[addrHash] + if len(slots) == 0 { + slots = make(map[common.Hash][]byte) + } + for storageHash, blob := range storage { + if len(blob) == 0 { + if data, ok := slots[storageHash]; ok { + delta -= 2*common.HashLength + len(data) + } else { + panic(fmt.Sprintf("non-existent storage slot for deleting, %x %x", addrHash, storageHash)) + } + delete(slots, storageHash) + } else { + if data, ok := slots[storageHash]; ok { + delta += len(blob) - len(data) + } else { + delta += 2*common.HashLength + len(blob) + } + slots[storageHash] = blob + } + } + if len(slots) == 0 { + delete(s.storageData, addrHash) + } else { + s.storageData[addrHash] = slots + } + } + s.clearCache() + s.updateSize(delta) +} + +// updateSize updates the total cache size by the given delta. +func (s *stateSet) updateSize(delta int) { + size := int64(s.size) + int64(delta) + if size >= 0 { + s.size = uint64(size) + return + } + log.Error("Stateset size underflow", "prev", common.StorageSize(s.size), "delta", common.StorageSize(delta)) + s.size = 0 +} + +// encode serializes the content of state set into the provided writer. +func (s *stateSet) encode(w io.Writer) error { + // Encode destructs + destructs := make([]common.Hash, 0, len(s.destructSet)) + for hash := range s.destructSet { + destructs = append(destructs, hash) + } + if err := rlp.Encode(w, destructs); err != nil { + return err + } + // Encode accounts + type Account struct { + Hash common.Hash + Blob []byte + } + accounts := make([]Account, 0, len(s.accountData)) + for hash, blob := range s.accountData { + accounts = append(accounts, Account{Hash: hash, Blob: blob}) + } + if err := rlp.Encode(w, accounts); err != nil { + return err + } + // Encode storages + type Storage struct { + Hash common.Hash + Keys []common.Hash + Blobs [][]byte + } + storages := make([]Storage, 0, len(s.storageData)) + for accountHash, slots := range s.storageData { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storages = append(storages, Storage{Hash: accountHash, Keys: keys, Blobs: vals}) + } + if err := rlp.Encode(w, storages); err != nil { + return err + } + // Encode journal + return s.journal.encode(w) +} + +// decode deserializes the content from the rlp stream into the state set. +func (s *stateSet) decode(r *rlp.Stream) error { + // Decode destructs + var ( + destructs []common.Hash + destructSet = make(map[common.Hash]struct{}) + ) + if err := r.Decode(&destructs); err != nil { + return fmt.Errorf("load diff destructs: %v", err) + } + for _, hash := range destructs { + destructSet[hash] = struct{}{} + } + s.destructSet = destructSet + + // Decode accounts + type Account struct { + Hash common.Hash + Blob []byte + } + var ( + accounts []Account + accountSet = make(map[common.Hash][]byte) + ) + if err := r.Decode(&accounts); err != nil { + return fmt.Errorf("load diff accounts: %v", err) + } + for _, account := range accounts { + accountSet[account.Hash] = account.Blob + } + s.accountData = accountSet + + // Decode storages + type Storage struct { + AccountHash common.Hash + Keys []common.Hash + Vals [][]byte + } + var ( + storages []Storage + storageSet = make(map[common.Hash]map[common.Hash][]byte) + ) + if err := r.Decode(&storages); err != nil { + return fmt.Errorf("load diff storage: %v", err) + } + for _, entry := range storages { + storageSet[entry.AccountHash] = make(map[common.Hash][]byte) + for i := 0; i < len(entry.Keys); i++ { + storageSet[entry.AccountHash][entry.Keys[i]] = entry.Vals[i] + } + } + s.storageData = storageSet + s.storageListSorted = make(map[common.Hash][]common.Hash) + + // Decode journal + s.journal = &journal{} + if err := s.journal.decode(r); err != nil { + return err + } + s.size = s.check() + return nil +} + +// write flushes state mutations into the provided database batch as a whole. +func (s *stateSet) write(db ethdb.KeyValueStore, batch ethdb.Batch, genMarker []byte, clean *fastcache.Cache) (int, int) { + return writeStates(db, batch, genMarker, s.destructSet, s.accountData, s.storageData, clean) +} + +// reset clears all cached state data, including any optional sorted lists that +// may have been generated. +func (s *stateSet) reset() { + s.destructSet = make(map[common.Hash]struct{}) + s.accountData = make(map[common.Hash][]byte) + s.storageData = make(map[common.Hash]map[common.Hash][]byte) + s.size = 0 + s.journal.reset() + s.accountListSorted = nil + s.storageListSorted = make(map[common.Hash][]common.Hash) +} + +// dbsize returns the approximate size for db write. +func (s *stateSet) dbsize() int { + m := (len(s.destructSet) + len(s.accountData)) * len(rawdb.SnapshotAccountPrefix) + for _, slots := range s.storageData { + m += len(slots) * len(rawdb.SnapshotStoragePrefix) + } + return m + int(s.size) +} + +// StateSetWithOrigin wraps the state set with additional original values of the +// mutated states. +type StateSetWithOrigin struct { + *stateSet + + // AccountOrigin represents the account data before the state transition, + // corresponding to both the accountData and destructSet. It's keyed by the + // account address. The nil value means the account was not present before. + accountOrigin map[common.Address][]byte + + // StorageOrigin represents the storage data before the state transition, + // corresponding to storageData and deleted slots of destructSet. It's keyed + // by the account address and slot key hash. The nil value means the slot was + // not present. + storageOrigin map[common.Address]map[common.Hash][]byte + + // Memory size of the state data (accountOrigin and storageOrigin) + size uint64 +} + +// NewStateSetWithOrigin constructs the state set with the provided data. +func NewStateSetWithOrigin(destructs map[common.Hash]struct{}, accounts map[common.Hash][]byte, storages map[common.Hash]map[common.Hash][]byte, accountOrigin map[common.Address][]byte, storageOrigin map[common.Address]map[common.Hash][]byte) *StateSetWithOrigin { + // Don't panic for the lazy callers, initialize the nil maps instead. + if accountOrigin == nil { + accountOrigin = make(map[common.Address][]byte) + } + if storageOrigin == nil { + storageOrigin = make(map[common.Address]map[common.Hash][]byte) + } + // Count the memory size occupied by the set. Note that each slot key here + // uses 2*common.HashLength to keep consistent with the calculation method + // of stateSet. + var size int + for _, data := range accountOrigin { + size += common.HashLength + len(data) + } + for _, slots := range storages { + for _, data := range slots { + size += 2*common.HashLength + len(data) + } + } + set := newStates(destructs, accounts, storages) + return &StateSetWithOrigin{ + stateSet: set, + accountOrigin: accountOrigin, + storageOrigin: storageOrigin, + size: set.size + uint64(size), + } +} + +// encode serializes the content of state set into the provided writer. +func (s *StateSetWithOrigin) encode(w io.Writer) error { + // Encode state set + if err := s.stateSet.encode(w); err != nil { + return err + } + // Encode accounts + type Account struct { + Address common.Address + Blob []byte + } + accounts := make([]Account, 0, len(s.accountOrigin)) + for address, blob := range s.accountOrigin { + accounts = append(accounts, Account{Address: address, Blob: blob}) + } + if err := rlp.Encode(w, accounts); err != nil { + return err + } + // Encode storages + type Storage struct { + Address common.Address + Keys []common.Hash + Blobs [][]byte + } + storages := make([]Storage, 0, len(s.storageOrigin)) + for address, slots := range s.storageOrigin { + keys := make([]common.Hash, 0, len(slots)) + vals := make([][]byte, 0, len(slots)) + for key, val := range slots { + keys = append(keys, key) + vals = append(vals, val) + } + storages = append(storages, Storage{Address: address, Keys: keys, Blobs: vals}) + } + return rlp.Encode(w, storages) +} + +// decode deserializes the content from the rlp stream into the state set. +func (s *StateSetWithOrigin) decode(r *rlp.Stream) error { + if s.stateSet == nil { + s.stateSet = &stateSet{} + } + if err := s.stateSet.decode(r); err != nil { + return err + } + // Decode account origin + type Account struct { + Address common.Address + Blob []byte + } + var ( + accounts []Account + accountSet = make(map[common.Address][]byte) + ) + if err := r.Decode(&accounts); err != nil { + return fmt.Errorf("load diff account origin set: %v", err) + } + for _, account := range accounts { + accountSet[account.Address] = account.Blob + } + s.accountOrigin = accountSet + + // Decode storage origin + type Storage struct { + Address common.Address + Keys []common.Hash + Blobs [][]byte + } + var ( + storages []Storage + storageSet = make(map[common.Address]map[common.Hash][]byte) + ) + if err := r.Decode(&storages); err != nil { + return fmt.Errorf("load diff storage origin: %v", err) + } + for _, storage := range storages { + storageSet[storage.Address] = make(map[common.Hash][]byte) + for i := 0; i < len(storage.Keys); i++ { + storageSet[storage.Address][storage.Keys[i]] = storage.Blobs[i] + } + } + s.storageOrigin = storageSet + return nil +} diff --git a/triedb/pathdb/states_test.go b/triedb/pathdb/states_test.go new file mode 100644 index 000000000000..5ab77de658e0 --- /dev/null +++ b/triedb/pathdb/states_test.go @@ -0,0 +1,459 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package pathdb + +import ( + "bytes" + "reflect" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/rlp" +) + +func TestStatesMerge(t *testing.T) { + a := newStates( + nil, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, + common.Hash{0xb}: {0xb0}, + common.Hash{0xc}: {0xc0}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, + common.Hash{0x2}: {0x20}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10}, + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, + }, + }, + ) + b := newStates( + map[common.Hash]struct{}{ + common.Hash{0xa}: {}, + common.Hash{0xc}: {}, + }, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa1}, + common.Hash{0xb}: {0xb1}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x11}, + common.Hash{0x3}: {0x31}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x11}, + }, + }, + ) + a.merge(b) + + blob, exist := a.account(common.Hash{0xa}) + if !exist || !bytes.Equal(blob, []byte{0xa1}) { + t.Error("Unexpected value for account a") + } + blob, exist = a.account(common.Hash{0xb}) + if !exist || !bytes.Equal(blob, []byte{0xb1}) { + t.Error("Unexpected value for account b") + } + blob, exist = a.account(common.Hash{0xc}) + if !exist || len(blob) != 0 { + t.Error("Unexpected value for account c") + } + // unknown account + blob, exist = a.account(common.Hash{0xd}) + if exist || len(blob) != 0 { + t.Error("Unexpected value for account d") + } + + blob, exist = a.storage(common.Hash{0xa}, common.Hash{0x1}) + if !exist || !bytes.Equal(blob, []byte{0x11}) { + t.Error("Unexpected value for a's storage") + } + blob, exist = a.storage(common.Hash{0xa}, common.Hash{0x2}) + if !exist || len(blob) != 0 { + t.Error("Unexpected value for a's storage") + } + blob, exist = a.storage(common.Hash{0xa}, common.Hash{0x3}) + if !exist || !bytes.Equal(blob, []byte{0x31}) { + t.Error("Unexpected value for a's storage") + } + blob, exist = a.storage(common.Hash{0xb}, common.Hash{0x1}) + if !exist || !bytes.Equal(blob, []byte{0x11}) { + t.Error("Unexpected value for b's storage") + } + blob, exist = a.storage(common.Hash{0xc}, common.Hash{0x1}) + if !exist || len(blob) != 0 { + t.Error("Unexpected value for c's storage") + } + + // unknown storage slots + blob, exist = a.storage(common.Hash{0xd}, common.Hash{0x1}) + if exist || len(blob) != 0 { + t.Error("Unexpected value for d's storage") + } +} + +func TestStatesRevert(t *testing.T) { + a := newStates( + nil, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, + common.Hash{0xb}: {0xb0}, + common.Hash{0xc}: {0xc0}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, + common.Hash{0x2}: {0x20}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10}, + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, + }, + }, + ) + b := newStates( + map[common.Hash]struct{}{ + common.Hash{0xa}: {}, + common.Hash{0xc}: {}, + }, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa1}, + common.Hash{0xb}: {0xb1}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x11}, + common.Hash{0x3}: {0x31}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x11}, + }, + }, + ) + a.merge(b) + a.revert( + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, + common.Hash{0xb}: {0xb0}, + common.Hash{0xc}: {0xc0}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, + common.Hash{0x2}: {0x20}, + common.Hash{0x3}: {}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10}, + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, + }, + }, + ) + + blob, exist := a.account(common.Hash{0xa}) + if !exist || !bytes.Equal(blob, []byte{0xa0}) { + t.Error("Unexpected value for account a") + } + blob, exist = a.account(common.Hash{0xb}) + if !exist || !bytes.Equal(blob, []byte{0xb0}) { + t.Error("Unexpected value for account b") + } + blob, exist = a.account(common.Hash{0xc}) + if !exist || !bytes.Equal(blob, []byte{0xc0}) { + t.Error("Unexpected value for account c") + } + // unknown account + blob, exist = a.account(common.Hash{0xd}) + if exist || len(blob) != 0 { + t.Error("Unexpected value for account d") + } + + blob, exist = a.storage(common.Hash{0xa}, common.Hash{0x1}) + if !exist || !bytes.Equal(blob, []byte{0x10}) { + t.Error("Unexpected value for a's storage") + } + blob, exist = a.storage(common.Hash{0xa}, common.Hash{0x2}) + if !exist || !bytes.Equal(blob, []byte{0x20}) { + t.Error("Unexpected value for a's storage") + } + _, exist = a.storage(common.Hash{0xa}, common.Hash{0x3}) + if exist { + t.Error("Unexpected value for a's storage") + } + blob, exist = a.storage(common.Hash{0xb}, common.Hash{0x1}) + if !exist || !bytes.Equal(blob, []byte{0x10}) { + t.Error("Unexpected value for b's storage") + } + blob, exist = a.storage(common.Hash{0xc}, common.Hash{0x1}) + if !exist || !bytes.Equal(blob, []byte{0x10}) { + t.Error("Unexpected value for c's storage") + } + // unknown storage slots + blob, exist = a.storage(common.Hash{0xd}, common.Hash{0x1}) + if exist || len(blob) != 0 { + t.Error("Unexpected value for d's storage") + } +} + +func TestDestructJournalEncode(t *testing.T) { + var enc journal + enc.add(nil) // nil + enc.add([]destruct{}) // zero size destructs + enc.add([]destruct{ + {Hash: common.HexToHash("0xdeadbeef"), Exist: true}, + {Hash: common.HexToHash("0xcafebabe"), Exist: false}, + }) + var buf bytes.Buffer + enc.encode(&buf) + + var dec journal + if err := dec.decode(rlp.NewStream(&buf, 0)); err != nil { + t.Fatalf("Failed to decode journal, %v", err) + } + if len(enc.destructs) != len(dec.destructs) { + t.Fatalf("Unexpected destruct journal length, want: %d, got: %d", len(enc.destructs), len(dec.destructs)) + } + for i := 0; i < len(enc.destructs); i++ { + want := enc.destructs[i] + got := dec.destructs[i] + if len(want) == 0 && len(got) == 0 { + continue + } + if !reflect.DeepEqual(want, got) { + t.Fatalf("Unexpected destruct, want: %v, got: %v", want, got) + } + } +} + +func TestStatesEncode(t *testing.T) { + s := newStates( + map[common.Hash]struct{}{ + common.Hash{0x1}: {}, + }, + map[common.Hash][]byte{ + common.Hash{0x1}: {0x1}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0x1}: { + common.Hash{0x1}: {0x1}, + }, + }, + ) + buf := bytes.NewBuffer(nil) + if err := s.encode(buf); err != nil { + t.Fatalf("Failed to encode states, %v", err) + } + var dec stateSet + if err := dec.decode(rlp.NewStream(buf, 0)); err != nil { + t.Fatalf("Failed to decode states, %v", err) + } + if !reflect.DeepEqual(s.destructSet, dec.destructSet) { + t.Fatal("Unexpected destruct set") + } + if !reflect.DeepEqual(s.accountData, dec.accountData) { + t.Fatal("Unexpected account data") + } + if !reflect.DeepEqual(s.storageData, dec.storageData) { + t.Fatal("Unexpected storage data") + } +} + +func TestStateWithOriginEncode(t *testing.T) { + s := NewStateSetWithOrigin( + map[common.Hash]struct{}{ + common.Hash{0x1}: {}, + }, + map[common.Hash][]byte{ + common.Hash{0x1}: {0x1}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0x1}: { + common.Hash{0x1}: {0x1}, + }, + }, + map[common.Address][]byte{ + common.Address{0x1}: {0x1}, + }, + map[common.Address]map[common.Hash][]byte{ + common.Address{0x1}: { + common.Hash{0x1}: {0x1}, + }, + }, + ) + buf := bytes.NewBuffer(nil) + if err := s.encode(buf); err != nil { + t.Fatalf("Failed to encode states, %v", err) + } + var dec StateSetWithOrigin + if err := dec.decode(rlp.NewStream(buf, 0)); err != nil { + t.Fatalf("Failed to decode states, %v", err) + } + if !reflect.DeepEqual(s.destructSet, dec.destructSet) { + t.Fatal("Unexpected destruct set") + } + if !reflect.DeepEqual(s.accountData, dec.accountData) { + t.Fatal("Unexpected account data") + } + if !reflect.DeepEqual(s.storageData, dec.storageData) { + t.Fatal("Unexpected storage data") + } + if !reflect.DeepEqual(s.accountOrigin, dec.accountOrigin) { + t.Fatal("Unexpected account origin data") + } + if !reflect.DeepEqual(s.storageOrigin, dec.storageOrigin) { + t.Fatal("Unexpected storage origin data") + } +} + +func TestStateSizeTracking(t *testing.T) { + expSizeA := 3*(common.HashLength+1) + /* account data */ + 2*(2*common.HashLength+1) + /* storage data of 0xa */ + 2*common.HashLength + 3 + /* storage data of 0xb */ + 2*common.HashLength + 1 /* storage data of 0xc */ + + a := newStates( + nil, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, // common.HashLength+1 + common.Hash{0xb}: {0xb0}, // common.HashLength+1 + common.Hash{0xc}: {0xc0}, // common.HashLength+1 + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, // 2*common.HashLength+1 + common.Hash{0x2}: {0x20}, // 2*common.HashLength+1 + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10, 0x11, 0x12}, // 2*common.HashLength+3 + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, // 2*common.HashLength+1 + }, + }, + ) + if a.size != uint64(expSizeA) { + t.Fatalf("Unexpected size, want: %d, got: %d", expSizeA, a.size) + } + + expSizeB := 2*common.HashLength + /* destruct set data */ + common.HashLength + 2 + common.HashLength + 3 + /* account data */ + 2*common.HashLength + 3 + 2*common.HashLength + 2 + /* storage data of 0xa */ + 2*common.HashLength + 2 + 2*common.HashLength + 2 /* storage data of 0xb */ + b := newStates( + map[common.Hash]struct{}{ + common.Hash{0xa}: {}, // common.HashLength + common.Hash{0xc}: {}, // common.HashLength + }, + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa1, 0xa1}, // common.HashLength+2 + common.Hash{0xb}: {0xb1, 0xb1, 0xb1}, // common.HashLength+3 + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x11, 0x11, 0x11}, // 2*common.HashLength+3 + common.Hash{0x3}: {0x31, 0x31}, // 2*common.HashLength+1 + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x11, 0x11}, // 2*common.HashLength+2 + common.Hash{0x2}: {0x22, 0x22}, // 2*common.HashLength+2 + }, + }, + ) + if b.size != uint64(expSizeB) { + t.Fatalf("Unexpected size, want: %d, got: %d", expSizeB, b.size) + } + + a.merge(b) + mergeSize := expSizeA + 2*common.HashLength /* destruct set data */ + mergeSize += 1 /* account a data change */ + 2 /* account b data change */ + mergeSize -= common.HashLength + 1 /* account data removal of 0xc */ + mergeSize += 2 + 1 /* storage a change */ + mergeSize += 2*common.HashLength + 2 - 1 /* storage b change */ + mergeSize -= 2*common.HashLength + 1 /* storage data removal of 0xc */ + + if a.size != uint64(mergeSize) { + t.Fatalf("Unexpected size, want: %d, got: %d", mergeSize, a.size) + } + + // Revert the set to original status + a.revert( + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, + common.Hash{0xb}: {0xb0}, + common.Hash{0xc}: {0xc0}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, + common.Hash{0x2}: {0x20}, + common.Hash{0x3}: {}, + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10, 0x11, 0x12}, + common.Hash{0x2}: {}, + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, + }, + }, + ) + if a.size != uint64(expSizeA) { + t.Fatalf("Unexpected size, want: %d, got: %d", expSizeA, a.size) + } + + // Revert state set a again, this time with additional slots which were + // deleted in account destruction and re-created because of resurrection. + a.merge(b) + a.revert( + map[common.Hash][]byte{ + common.Hash{0xa}: {0xa0}, + common.Hash{0xb}: {0xb0}, + common.Hash{0xc}: {0xc0}, + }, + map[common.Hash]map[common.Hash][]byte{ + common.Hash{0xa}: { + common.Hash{0x1}: {0x10}, + common.Hash{0x2}: {0x20}, + common.Hash{0x3}: {}, + common.Hash{0x4}: {0x40}, // this slot was not in the set a, but resurrected because of revert + common.Hash{0x5}: {0x50, 0x51}, // this slot was not in the set a, but resurrected because of revert + }, + common.Hash{0xb}: { + common.Hash{0x1}: {0x10, 0x11, 0x12}, + common.Hash{0x2}: {}, + }, + common.Hash{0xc}: { + common.Hash{0x1}: {0x10}, + }, + }, + ) + expSize := expSizeA + common.HashLength*2 + 1 + /* slot 4 */ +common.HashLength*2 + 2 /* slot 5 */ + if a.size != uint64(expSize) { + t.Fatalf("Unexpected size, want: %d, got: %d", expSize, a.size) + } +} diff --git a/triedb/pathdb/verifier.go b/triedb/pathdb/verifier.go new file mode 100644 index 000000000000..2d6f72925b6e --- /dev/null +++ b/triedb/pathdb/verifier.go @@ -0,0 +1,355 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "encoding/binary" + "errors" + "fmt" + "math" + "runtime" + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" +) + +// trieKV represents a trie key-value pair +type trieKV struct { + key common.Hash + value []byte +} + +type ( + // trieHasherFn is the interface of trie hasher which can be implemented + // by different trie algorithm. + trieHasherFn func(in chan trieKV, out chan common.Hash) + + // leafCallbackFn is the callback invoked at the leaves of the trie, + // returns the subtrie root with the specified subtrie identifier. + leafCallbackFn func(accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) +) + +// VerifyState traverses the flat states specified by the given state root and +// ensures they are matched with each other. +func (db *Database) VerifyState(root common.Hash) error { + acctIt, err := db.AccountIterator(root, common.Hash{}) + if err != nil { + return err // The required snapshot might not exist. + } + defer acctIt.Release() + + got, err := generateTrieRoot(acctIt, common.Hash{}, stackTrieHasher, func(accountHash, codeHash common.Hash, stat *generateStats) (common.Hash, error) { + // Migrate the code first, commit the contract code into the tmp db. + if codeHash != types.EmptyCodeHash { + code := rawdb.ReadCode(db.diskdb, codeHash) + if len(code) == 0 { + return common.Hash{}, errors.New("failed to read contract code") + } + } + // Then migrate all storage trie nodes into the tmp db. + storageIt, err := db.StorageIterator(root, accountHash, common.Hash{}) + if err != nil { + return common.Hash{}, err + } + defer storageIt.Release() + + hash, err := generateTrieRoot(storageIt, accountHash, stackTrieHasher, nil, stat, false) + if err != nil { + return common.Hash{}, err + } + return hash, nil + }, newGenerateStats(), true) + + if err != nil { + return err + } + if got != root { + return fmt.Errorf("state root hash mismatch: got %x, want %x", got, root) + } + return nil +} + +// generateStats is a collection of statistics gathered by the trie generator +// for logging purposes. +type generateStats struct { + head common.Hash + start time.Time + + accounts uint64 // Number of accounts done (including those being crawled) + slots uint64 // Number of storage slots done (including those being crawled) + + slotsStart map[common.Hash]time.Time // Start time for account slot crawling + slotsHead map[common.Hash]common.Hash // Slot head for accounts being crawled + + lock sync.RWMutex +} + +// newGenerateStats creates a new generator stats. +func newGenerateStats() *generateStats { + return &generateStats{ + slotsStart: make(map[common.Hash]time.Time), + slotsHead: make(map[common.Hash]common.Hash), + start: time.Now(), + } +} + +// progressAccounts updates the generator stats for the account range. +func (stat *generateStats) progressAccounts(account common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += done + stat.head = account +} + +// finishAccounts updates the generator stats for the finished account range. +func (stat *generateStats) finishAccounts(done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.accounts += done +} + +// progressContract updates the generator stats for a specific in-progress contract. +func (stat *generateStats) progressContract(account common.Hash, slot common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.slots += done + stat.slotsHead[account] = slot + if _, ok := stat.slotsStart[account]; !ok { + stat.slotsStart[account] = time.Now() + } +} + +// finishContract updates the generator stats for a specific just-finished contract. +func (stat *generateStats) finishContract(account common.Hash, done uint64) { + stat.lock.Lock() + defer stat.lock.Unlock() + + stat.slots += done + delete(stat.slotsHead, account) + delete(stat.slotsStart, account) +} + +// report prints the cumulative progress statistic smartly. +func (stat *generateStats) report() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + ctx := []interface{}{ + "accounts", stat.accounts, + "slots", stat.slots, + "elapsed", common.PrettyDuration(time.Since(stat.start)), + } + if stat.accounts > 0 { + // If there's progress on the account trie, estimate the time to finish crawling it + if done := binary.BigEndian.Uint64(stat.head[:8]) / stat.accounts; done > 0 { + var ( + left = (math.MaxUint64 - binary.BigEndian.Uint64(stat.head[:8])) / stat.accounts + speed = done/uint64(time.Since(stat.start)/time.Millisecond+1) + 1 // +1s to avoid division by zero + eta = time.Duration(left/speed) * time.Millisecond + ) + // If there are large contract crawls in progress, estimate their finish time + for acc, head := range stat.slotsHead { + start := stat.slotsStart[acc] + if done := binary.BigEndian.Uint64(head[:8]); done > 0 { + var ( + left = math.MaxUint64 - binary.BigEndian.Uint64(head[:8]) + speed = done/uint64(time.Since(start)/time.Millisecond+1) + 1 // +1s to avoid division by zero + ) + // Override the ETA if larger than the largest until now + if slotETA := time.Duration(left/speed) * time.Millisecond; eta < slotETA { + eta = slotETA + } + } + } + ctx = append(ctx, []interface{}{ + "eta", common.PrettyDuration(eta), + }...) + } + } + log.Info("Iterating state snapshot", ctx...) +} + +// reportDone prints the last log when the whole generation is finished. +func (stat *generateStats) reportDone() { + stat.lock.RLock() + defer stat.lock.RUnlock() + + var ctx []interface{} + ctx = append(ctx, []interface{}{"accounts", stat.accounts}...) + if stat.slots != 0 { + ctx = append(ctx, []interface{}{"slots", stat.slots}...) + } + ctx = append(ctx, []interface{}{"elapsed", common.PrettyDuration(time.Since(stat.start))}...) + log.Info("Iterated snapshot", ctx...) +} + +// runReport periodically prints the progress information. +func runReport(stats *generateStats, stop chan bool) { + timer := time.NewTimer(0) + defer timer.Stop() + + for { + select { + case <-timer.C: + stats.report() + timer.Reset(time.Second * 8) + case success := <-stop: + if success { + stats.reportDone() + } + return + } + } +} + +// generateTrieRoot generates the trie hash based on the snapshot iterator. +// It can be used for generating account trie, storage trie or even the +// whole state which connects the accounts and the corresponding storages. +func generateTrieRoot(it Iterator, account common.Hash, generatorFn trieHasherFn, leafCallback leafCallbackFn, stats *generateStats, report bool) (common.Hash, error) { + var ( + in = make(chan trieKV) // chan to pass leaves + out = make(chan common.Hash, 1) // chan to collect result + stoplog = make(chan bool, 1) // 1-size buffer, works when logging is not enabled + wg sync.WaitGroup + ) + // Spin up a go-routine for trie hash re-generation + wg.Add(1) + go func() { + defer wg.Done() + generatorFn(in, out) + }() + // Spin up a go-routine for progress logging + if report && stats != nil { + wg.Add(1) + go func() { + defer wg.Done() + runReport(stats, stoplog) + }() + } + // Create a semaphore to assign tasks and collect results through. We'll pre- + // fill it with nils, thus using the same channel for both limiting concurrent + // processing and gathering results. + threads := runtime.NumCPU() + results := make(chan error, threads) + for i := 0; i < threads; i++ { + results <- nil // fill the semaphore + } + // stop is a helper function to shutdown the background threads + // and return the re-generated trie hash. + stop := func(fail error) (common.Hash, error) { + close(in) + result := <-out + for i := 0; i < threads; i++ { + if err := <-results; err != nil && fail == nil { + fail = err + } + } + stoplog <- fail == nil + + wg.Wait() + return result, fail + } + var ( + logged = time.Now() + processed = uint64(0) + leaf trieKV + ) + // Start to feed leaves + for it.Next() { + if account == (common.Hash{}) { + var ( + err error + fullData []byte + ) + if leafCallback == nil { + fullData, err = types.FullAccountRLP(it.(AccountIterator).Account()) + if err != nil { + return stop(err) + } + } else { + // Wait until the semaphore allows us to continue, aborting if + // a sub-task failed + if err := <-results; err != nil { + results <- nil // stop will drain the results, add a noop back for this error we just consumed + return stop(err) + } + // Fetch the next account and process it concurrently + account, err := types.FullAccount(it.(AccountIterator).Account()) + if err != nil { + return stop(err) + } + go func(hash common.Hash) { + subroot, err := leafCallback(hash, common.BytesToHash(account.CodeHash), stats) + if err != nil { + results <- err + return + } + if account.Root != subroot { + results <- fmt.Errorf("invalid subroot(path %x), want %x, have %x", hash, account.Root, subroot) + return + } + results <- nil + }(it.Hash()) + fullData, err = rlp.EncodeToBytes(account) + if err != nil { + return stop(err) + } + } + leaf = trieKV{it.Hash(), fullData} + } else { + leaf = trieKV{it.Hash(), common.CopyBytes(it.(StorageIterator).Slot())} + } + in <- leaf + + // Accumulate the generation statistic if it's required. + processed++ + if time.Since(logged) > 3*time.Second && stats != nil { + if account == (common.Hash{}) { + stats.progressAccounts(it.Hash(), processed) + } else { + stats.progressContract(account, it.Hash(), processed) + } + logged, processed = time.Now(), 0 + } + } + // Commit the last part statistic. + if processed > 0 && stats != nil { + if account == (common.Hash{}) { + stats.finishAccounts(processed) + } else { + stats.finishContract(account, processed) + } + } + return stop(nil) +} + +func stackTrieHasher(in chan trieKV, out chan common.Hash) { + t := trie.NewStackTrie(nil) + for leaf := range in { + t.Update(leaf.key[:], leaf.value) + } + out <- t.Hash() +} diff --git a/triedb/states.go b/triedb/states.go new file mode 100644 index 000000000000..178c68a32843 --- /dev/null +++ b/triedb/states.go @@ -0,0 +1,51 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package triedb + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/triedb/pathdb" +) + +// StateSet represents a collection of mutated states during a state transition. +type StateSet struct { + Destructs map[common.Hash]struct{} // Destructed accounts + Accounts map[common.Hash][]byte // Mutated accounts in 'slim RLP' encoding + AccountsOrigin map[common.Address][]byte // Original values of mutated accounts in 'slim RLP' encoding + Storages map[common.Hash]map[common.Hash][]byte // Mutated storage slots in 'prefix-zero-trimmed' RLP format + StoragesOrigin map[common.Address]map[common.Hash][]byte // Original values of mutated storage slots in 'prefix-zero-trimmed' RLP format +} + +// NewStateSet initializes an empty state set. +func NewStateSet() *StateSet { + return &StateSet{ + Destructs: make(map[common.Hash]struct{}), + Accounts: make(map[common.Hash][]byte), + AccountsOrigin: make(map[common.Address][]byte), + Storages: make(map[common.Hash]map[common.Hash][]byte), + StoragesOrigin: make(map[common.Address]map[common.Hash][]byte), + } +} + +// internal returns a state set for path database internal usage. +func (set *StateSet) internal() *pathdb.StateSetWithOrigin { + // the nil state set is possible in tests. + if set == nil { + return nil + } + return pathdb.NewStateSetWithOrigin(set.Destructs, set.Accounts, set.Storages, set.AccountsOrigin, set.StoragesOrigin) +}