From f75a2cf65b4bc4724bab6c6edd5754e5150eca6d Mon Sep 17 00:00:00 2001 From: Lion - dapplion <35266934+dapplion@users.noreply.github.com> Date: Tue, 27 Aug 2024 06:10:22 +0200 Subject: [PATCH] PeerDAS implementation (#5683) * 1D PeerDAS prototype: Data format and Distribution (#5050) * Build and publish column sidecars. Add stubs for gossip. * Add blob column subnets * Add `BlobColumnSubnetId` and initial compute subnet logic. * Subscribe to blob column subnets. * Introduce `BLOB_COLUMN_SUBNET_COUNT` based on DAS configuration parameter changes. * Fix column sidecar type to use `VariableList` for data. * Fix lint errors. * Update types and naming to latest consensus-spec #3574. * Fix test and some cleanups. * Merge branch 'unstable' into das * Merge branch 'unstable' into das * Merge branch 'unstable' into das # Conflicts: # consensus/types/src/chain_spec.rs * Add `DataColumnSidecarsByRoot ` req/resp protocol (#5196) * Add stub for `DataColumnsByRoot` * Add basic implementation of serving RPC data column from DA checker. * Store data columns in early attester cache and blobs db. * Apply suggestions from code review Co-authored-by: Eitan Seri-Levi Co-authored-by: Jacob Kaufmann * Fix build. * Store `DataColumnInfo` in database and various cleanups. * Update `DataColumnSidecar` ssz max size and remove panic code. --------- Co-authored-by: Eitan Seri-Levi Co-authored-by: Jacob Kaufmann * feat: add DAS KZG in data col construction (#5210) * feat: add DAS KZG in data col construction * refactor data col sidecar construction * refactor: add data cols to GossipVerifiedBlockContents * Disable windows tests for `das` branch. (c-kzg doesn't build on windows) * Formatting and lint changes only. * refactor: remove iters in construction of data cols * Update vec capacity and error handling. * Add `data_column_sidecar_computation_seconds` metric. --------- Co-authored-by: Jimmy Chen * Merge branch 'unstable' into das # Conflicts: # .github/workflows/test-suite.yml # beacon_node/lighthouse_network/src/types/topics.rs * fix: update data col subnet count from 64 to 32 (#5413) * feat: add peerdas custody field to ENR (#5409) * feat: add peerdas custody field to ENR * add hash prefix step in subnet computation * refactor test and fix possible u64 overflow * default to min custody value if not present in ENR * Merge branch 'unstable' into das * Merge branch 'unstable' into das-unstable-merge-0415 # Conflicts: # Cargo.lock # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/availability_view.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/beacon_chain/src/data_availability_checker/processing_cache.rs # beacon_node/lighthouse_network/src/rpc/methods.rs # beacon_node/network/src/network_beacon_processor/mod.rs # beacon_node/network/src/sync/block_lookups/tests.rs # crypto/kzg/Cargo.toml * Merge remote-tracking branch 'sigp/unstable' into das * Merge remote-tracking branch 'sigp/unstable' into das * Fix merge conflicts. * Send custody data column to `DataAvailabilityChecker` for determining block importability (#5570) * Only import custody data columns after publishing a block. * Add `subscribe-all-data-column-subnets` and pass custody column count to `availability_cache`. * Add custody requirement checks to `availability_cache`. * Fix config not being passed to DAChecker and add more logging. * Introduce `peer_das_epoch` and make blobs and columns mutually exclusive. * Add DA filter for PeerDAS. * Fix data availability check and use test_logger in tests. * Fix subscribe to all data column subnets not working correctly. * Fix tests. * Only publish column sidecars if PeerDAS is activated. Add `PEER_DAS_EPOCH` chain spec serialization. * Remove unused data column index in `OverflowKey`. * Fix column sidecars incorrectly produced when there are no blobs. * Re-instate index to `OverflowKey::DataColumn` and downgrade noisy debug log to `trace`. * DAS sampling on sync (#5616) * Data availability sampling on sync * Address @jimmygchen review * Trigger sampling * Address some review comments and only send `SamplingBlock` sync message after PEER_DAS_EPOCH. --------- Co-authored-by: Jimmy Chen * Merge branch 'unstable' into das # Conflicts: # Cargo.lock # Cargo.toml # beacon_node/beacon_chain/src/block_verification.rs # beacon_node/http_api/src/publish_blocks.rs # beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs # beacon_node/lighthouse_network/src/rpc/protocol.rs # beacon_node/lighthouse_network/src/types/pubsub.rs # beacon_node/network/src/sync/block_lookups/single_block_lookup.rs # beacon_node/store/src/hot_cold_store.rs # consensus/types/src/beacon_state.rs # consensus/types/src/chain_spec.rs # consensus/types/src/eth_spec.rs * Merge branch 'unstable' into das * Re-process early sampling requests (#5569) * Re-process early sampling requests # Conflicts: # beacon_node/beacon_processor/src/work_reprocessing_queue.rs # beacon_node/lighthouse_network/src/rpc/methods.rs # beacon_node/network/src/network_beacon_processor/rpc_methods.rs * Update beacon_node/beacon_processor/src/work_reprocessing_queue.rs Co-authored-by: Jimmy Chen * Add missing var * Beta compiler fixes and small typo fixes. * Remove duplicate method. --------- Co-authored-by: Jimmy Chen * Merge remote-tracking branch 'sigp/unstable' into das * Fix merge conflict. * Add data columns by root to currently supported protocol list (#5678) * Add data columns by root to currently supported protocol list. * Add missing data column by roots handling. * Merge branch 'unstable' into das # Conflicts: # Cargo.lock # Cargo.toml # beacon_node/network/src/sync/block_lookups/tests.rs # beacon_node/network/src/sync/manager.rs * Fix simulator tests on `das` branch (#5731) * Bump genesis delay in sim tests as KZG setup takes longer for DAS. * Fix incorrect YAML spacing. * DataColumnByRange boilerplate (#5353) * add boilerplate * fmt * PeerDAS custody lookup sync (#5684) * Implement custody sync * Lint * Fix tests * Fix rebase issue * Add data column kzg verification and update `c-kzg`. (#5701) * Add data column kzg verification and update `c-kzg`. * Fix incorrect `Cell` size. * Add kzg verification on rpc blocks. * Add kzg verification on rpc data columns. * Rename `PEER_DAS_EPOCH` to `EIP7594_FORK_EPOCH` for client interop. (#5750) * Fetch custody columns in range sync (#5747) * Fetch custody columns in range sync * Clean up todos * Remove `BlobSidecar` construction and publish after PeerDAS activated (#5759) * Avoid building and publishing blob sidecars after PeerDAS. * Ignore gossip blobs with a slot greater than peer das activation epoch. * Only attempt to verify blob count and import blobs before PeerDAS. * #5684 review comments (#5748) * #5684 review comments. * Doc and message update only. * Fix incorrect condition when constructing `RpcBlock` with `DataColumn`s * Make sampling tests deterministic (#5775) * PeerDAS spec tests (#5772) * Add get_custody_columns spec tests. * Add kzg merkle proof spec tests. * Add SSZ spec tests. * Add remaining KZG tests * Load KZG only once per process, exclude electra tests and add missing SSZ tests. * Fix lint and missing changes. * Ignore macOS generated file. * Merge remote branch 'sigp/unstable' into das * Merge remote tracking branch 'origin/unstable' into das * Implement unconditional reconstruction for supernodes (#5781) * Implement unconditional reconstruction for supernodes * Move code into KzgVerifiedCustodyDataColumn * Remove expect * Add test * Thanks justin * Add withhold attack mode for interop (#5788) * Add withhold attack mode * Update readme * Drop added readmes * Undo styling changes * Add column gossip verification and handle unknown parent block (#5783) * Add column gossip verification and handle missing parent for columns. * Review PR * Fix rebase issue * more lint issues :) --------- Co-authored-by: dapplion <35266934+dapplion@users.noreply.github.com> * Trigger sampling on sync events (#5776) * Trigger sampling on sync events * Update beacon_chain.rs * Fix tests * Fix tests * PeerDAS parameter changes for devnet-0 (#5779) * Update PeerDAS parameters to latest values. * Lint fix * Fix lint. * Update hardcoded subnet count to 64 (#5791) * Fix incorrect columns per subnet and config cleanup (#5792) * Tidy up PeerDAS preset and config values. * Fix broken config * Fix DAS branch CI (#5793) * Fix invalid syntax. * Update cli doc. Ignore get_custody_columns test temporarily. * Fix failing test and add verify inclusion test. * Undo accidentally removed code. * Only attempt reconstruct columns once. (#5794) * Re-enable precompute table for peerdas kzg (#5795) * Merge branch 'unstable' into das * Update subscription filter. (#5797) * Remove penalty for duplicate columns (expected due to reconstruction) (#5798) * Revert DAS config for interop testing. Optimise get_custody_columns function. (#5799) * Don't perform reconstruction for proposer node as it already has all the columns. (#5806) * Multithread compute_cells_and_proofs (#5805) * Multi-thread reconstruct data columns * Multi-thread path for block production * Merge branch 'unstable' into das # Conflicts: # .github/workflows/test-suite.yml # beacon_node/network/src/sync/block_lookups/mod.rs # beacon_node/network/src/sync/block_lookups/single_block_lookup.rs # beacon_node/network/src/sync/network_context.rs * Fix CI errors. * Move PeerDAS type-level config to configurable `ChainSpec` (#5828) * Move PeerDAS type level config to `ChainSpec`. * Fix tests * Misc custody lookup improvements (#5821) * Improve custody requests * Type DataColumnsByRootRequestId * Prioritize peers and load balance * Update tests * Address PR review * Merge branch 'unstable' into das * Rename deploy_block in network config (`das` branch) (#5852) * Rename deploy_block.txt to deposit_contract_block.txt * fmt --------- Co-authored-by: Pawan Dhananjay * Merge branch 'unstable' into das * Fix CI and merge issues. * Merge branch 'unstable' into das # Conflicts: # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # lcli/src/main.rs * Store data columns individually in store and caches (#5890) * Store data columns individually in store and caches * Implement data column pruning * Merge branch 'unstable' into das # Conflicts: # Cargo.lock * Update reconstruction benches to newer criterion version. (#5949) * Merge branch 'unstable' into das # Conflicts: # .github/workflows/test-suite.yml * chore: add `recover_cells_and_compute_proofs` method (#5938) * chore: add recover_cells_and_compute_proofs method * Introduce type alias `CellsAndKzgProofs` to address type complexity. --------- Co-authored-by: Jimmy Chen * Update `csc` format in ENR and spec tests for devnet-1 (#5966) * Update `csc` format in ENR. * Add spec tests for `recover_cells_and_kzg_proofs`. * Add tests for ENR. * Fix failing tests. * Add protection against invalid csc value in ENR. * Fix lint * Fix csc encoding and decoding (#5997) * Fix data column rpc request not being sent due to incorrect limits set. (#6000) * Fix incorrect inbound request count causing rate limiting. (#6025) * Merge branch 'stable' into das # Conflicts: # beacon_node/network/src/sync/block_lookups/tests.rs # beacon_node/network/src/sync/block_sidecar_coupling.rs # beacon_node/network/src/sync/manager.rs # beacon_node/network/src/sync/network_context.rs # beacon_node/network/src/sync/network_context/requests.rs * Merge remote-tracking branch 'unstable' into das * Add kurtosis config for DAS testing (#5968) * Add kurtosis config for DAS testing. * Fix invalid yaml file * Update network parameter files. * chore: add rust PeerdasKZG crypto library for peerdas functionality and rollback c-kzg dependency to 4844 version (#5941) * chore: add recover_cells_and_compute_proofs method * chore: add rust peerdas crypto library * chore: integrate peerdaskzg rust library into kzg crate * chore(multi): - update `ssz_cell_to_crypto_cell` - update conversion from the crypto cell type to a Vec. Since the Rust library defines them as references to an array, the conversion is simply `to_vec` * chore(multi): - update rest of code to handle the new crypto `Cell` type - update test case code to no longer use the Box type * chore: cleanup of superfluous conversions * chore: revert c-kzg dependency back to v1 * chore: move dependency into correct order * chore: update rust dependency - This version includes a new method `PeerDasContext::with_num_threads` * chore: remove Default initialization of PeerDasContext and explicitly set the parameters in `new_from_trusted_setup` * chore: cleanup exports * chore: commit updated cargo.lock * Update Cargo.toml Co-authored-by: Jimmy Chen * chore: rename dependency * chore: update peerdas lib - sets the blst version to 0.3 so that it matches whatever lighthouse is using. Although 0.3.12 is latest, lighthouse is pinned to 0.3.3 * chore: fix clippy lifetime - Rust doesn't allow you to elide the lifetime on type aliases * chore: cargo clippy fix * chore: cargo fmt * chore: update lib to add redundant checks (these will be removed in consensus-specs PR 3819) * chore: update dependency to ignore proofs * chore: update peerdas lib to latest * update lib * chore: remove empty proof parameter --------- Co-authored-by: Jimmy Chen * Update PeerDAS interop testnet config (#6069) * Update interop testnet config. * Fix typo and remove target peers * Avoid retrying same sampling peer that previously failed. (#6084) * Various fixes to custody range sync (#6004) * Only start requesting batches when there are good peers across all custody columns to avoid spaming block requests. * Add custody peer check before mutating `BatchInfo` to avoid inconsistent state. * Add check to cover a case where batch is not processed while waiting for custody peers to become available. * Fix lint and logic error * Fix `good_peers_on_subnet` always returning false for `DataColumnSubnet`. * Add test for `get_custody_peers_for_column` * Revert epoch parameter refactor. * Fall back to default custody requiremnt if peer ENR is not present. * Add metrics and update code comment. * Add more debug logs. * Use subscribed peers on subnet before MetaDataV3 is implemented. Remove peer_id matching when injecting error because multiple peers are used for range requests. Use randomized custodial peer to avoid repeatedly sending requests to failing peers. Batch by range request where possible. * Remove unused code and update docs. * Add comment * chore: update peerdas-kzg library (#6118) * chore: update peerDAS lib * chore: update library * chore: update library to version that include "init context" benchmarks and optional validation checks * chore: (can remove) -- Add benchmarks for init context * Prevent continuous searchers for low-peer networks (#6162) * Merge branch 'unstable' into das * Fix merge conflicts * Add cli flag to enable sampling and disable by default. (#6209) * chore: Use reference to an array representing a blob instead of an owned KzgBlob (#6179) * add KzgBlobRef type * modify code to use KzgBlobRef * clippy * Remove Deneb blob related changes to maintain compatibility with `c-kzg-4844`. --------- Co-authored-by: Jimmy Chen * Store computed custody subnets in PeerDB and fix custody lookup test (#6218) * Fix failing custody lookup tests. * Store custody subnets in PeerDB, fix custody lookup test and refactor some methods. * Merge branch 'unstable' into das # Conflicts: # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/block_verification_types.rs # beacon_node/beacon_chain/src/builder.rs # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/beacon_chain/src/data_column_verification.rs # beacon_node/beacon_chain/src/early_attester_cache.rs # beacon_node/beacon_chain/src/historical_blocks.rs # beacon_node/beacon_chain/tests/store_tests.rs # beacon_node/lighthouse_network/src/discovery/enr.rs # beacon_node/network/src/service.rs # beacon_node/src/cli.rs # beacon_node/store/src/hot_cold_store.rs # beacon_node/store/src/lib.rs # lcli/src/generate_bootnode_enr.rs * Fix CI failures after merge. * Batch sampling requests by peer (#6256) * Batch sampling requests by peer * Fix clippy errors * Fix tests * Add column_index to error message for ease of tracing * Remove outdated comment * Fix range sync never evaluating request as finished, causing it to get stuck. (#6276) * Merge branch 'unstable' into das-0821-merge # Conflicts: # Cargo.lock # Cargo.toml # beacon_node/beacon_chain/src/beacon_chain.rs # beacon_node/beacon_chain/src/data_availability_checker.rs # beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs # beacon_node/beacon_chain/src/data_column_verification.rs # beacon_node/beacon_chain/src/kzg_utils.rs # beacon_node/beacon_chain/src/metrics.rs # beacon_node/beacon_processor/src/lib.rs # beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs # beacon_node/lighthouse_network/src/rpc/config.rs # beacon_node/lighthouse_network/src/rpc/methods.rs # beacon_node/lighthouse_network/src/rpc/outbound.rs # beacon_node/lighthouse_network/src/rpc/rate_limiter.rs # beacon_node/lighthouse_network/src/service/api_types.rs # beacon_node/lighthouse_network/src/types/globals.rs # beacon_node/network/src/network_beacon_processor/mod.rs # beacon_node/network/src/network_beacon_processor/rpc_methods.rs # beacon_node/network/src/network_beacon_processor/sync_methods.rs # beacon_node/network/src/sync/block_lookups/common.rs # beacon_node/network/src/sync/block_lookups/mod.rs # beacon_node/network/src/sync/block_lookups/single_block_lookup.rs # beacon_node/network/src/sync/block_lookups/tests.rs # beacon_node/network/src/sync/manager.rs # beacon_node/network/src/sync/network_context.rs # consensus/types/src/data_column_sidecar.rs # crypto/kzg/Cargo.toml # crypto/kzg/benches/benchmark.rs # crypto/kzg/src/lib.rs * Fix custody tests and load PeerDAS KZG instead. * Fix ef tests and bench compilation. * Fix failing sampling test. * Merge pull request #6287 from jimmygchen/das-0821-merge Merge `unstable` into `das` 20240821 * Remove get_block_import_status * Merge branch 'unstable' into das * Re-enable Windows release tests. * Address some review comments. * Address more review comments and cleanups. * Comment out peer DAS KZG EF tests for now * Address more review comments and fix build. * Merge branch 'das' of github.com:sigp/lighthouse into das * Unignore Electra tests * Fix metric name * Address some of Pawan's review comments * Merge remote-tracking branch 'origin/unstable' into das * Update PeerDAS network parameters for peerdas-devnet-2 (#6290) * update subnet count & custody req * das network params * update ef tests --------- Co-authored-by: Jimmy Chen --- Cargo.lock | 2 + beacon_node/beacon_chain/Cargo.toml | 5 + beacon_node/beacon_chain/benches/benches.rs | 66 ++ beacon_node/beacon_chain/src/beacon_chain.rs | 117 +++- .../beacon_chain/src/blob_verification.rs | 4 +- .../beacon_chain/src/block_verification.rs | 124 +++- .../src/block_verification_types.rs | 128 +++- beacon_node/beacon_chain/src/builder.rs | 11 +- beacon_node/beacon_chain/src/chain_config.rs | 6 + .../src/data_availability_checker.rs | 272 +++++--- .../src/data_availability_checker/error.rs | 4 + .../overflow_lru_cache.rs | 123 +++- .../state_lru_cache.rs | 4 +- .../src/data_column_verification.rs | 88 ++- beacon_node/beacon_chain/src/errors.rs | 4 +- beacon_node/beacon_chain/src/metrics.rs | 28 + beacon_node/beacon_chain/src/test_utils.rs | 26 + .../beacon_chain/tests/block_verification.rs | 4 +- beacon_node/beacon_chain/tests/store_tests.rs | 8 +- beacon_node/beacon_processor/src/lib.rs | 80 ++- beacon_node/beacon_processor/src/metrics.rs | 35 +- .../src/work_reprocessing_queue.rs | 102 +++ beacon_node/http_api/Cargo.toml | 1 + beacon_node/http_api/src/lib.rs | 24 + beacon_node/http_api/src/publish_blocks.rs | 81 ++- beacon_node/http_api/src/test_utils.rs | 1 + .../tests/broadcast_validation_tests.rs | 6 + .../lighthouse_network/src/discovery/enr.rs | 2 +- .../lighthouse_network/src/discovery/mod.rs | 1 + .../src/discovery/subnet_predicate.rs | 4 +- .../src/peer_manager/mod.rs | 11 +- .../src/peer_manager/peerdb.rs | 82 ++- .../src/peer_manager/peerdb/peer_info.rs | 17 +- .../src/rpc/codec/ssz_snappy.rs | 6 +- .../lighthouse_network/src/rpc/config.rs | 8 + .../lighthouse_network/src/rpc/methods.rs | 10 + beacon_node/lighthouse_network/src/rpc/mod.rs | 2 + .../src/rpc/rate_limiter.rs | 3 +- .../src/service/api_types.rs | 38 +- .../lighthouse_network/src/service/mod.rs | 5 +- .../lighthouse_network/src/service/utils.rs | 8 +- .../lighthouse_network/src/types/globals.rs | 77 ++- beacon_node/network/src/metrics.rs | 81 ++- .../gossip_methods.rs | 69 +- .../src/network_beacon_processor/mod.rs | 38 ++ .../network_beacon_processor/sync_methods.rs | 98 ++- .../src/network_beacon_processor/tests.rs | 11 +- .../network/src/sync/backfill_sync/mod.rs | 10 +- .../network/src/sync/block_lookups/common.rs | 15 +- .../network/src/sync/block_lookups/mod.rs | 67 +- .../sync/block_lookups/single_block_lookup.rs | 58 +- .../network/src/sync/block_lookups/tests.rs | 475 ++++++++++++- .../src/sync/block_sidecar_coupling.rs | 332 +++++++-- beacon_node/network/src/sync/manager.rs | 176 ++++- beacon_node/network/src/sync/mod.rs | 2 + .../network/src/sync/network_context.rs | 353 ++++++++-- .../src/sync/network_context/custody.rs | 415 ++++++++++++ .../requests/data_columns_by_root.rs | 9 +- .../network/src/sync/range_sync/batch.rs | 7 +- .../network/src/sync/range_sync/chain.rs | 70 +- .../network/src/sync/range_sync/range.rs | 6 +- beacon_node/network/src/sync/sampling.rs | 628 ++++++++++++++++++ beacon_node/src/cli.rs | 19 + beacon_node/src/config.rs | 10 + beacon_node/store/src/hot_cold_store.rs | 6 +- .../chiado/config.yaml | 4 +- .../gnosis/config.yaml | 4 +- .../holesky/config.yaml | 4 +- .../mainnet/config.yaml | 4 +- .../sepolia/config.yaml | 4 +- consensus/types/src/chain_spec.rs | 10 +- consensus/types/src/lib.rs | 2 +- consensus/types/src/runtime_var_list.rs | 253 +++++-- .../environment/tests/testnet_dir/config.yaml | 4 +- lighthouse/tests/beacon_node.rs | 27 + scripts/local_testnet/network_params.yaml | 4 +- .../network_params_das_devnet_1.yaml | 8 + .../network_params_das_interop.yaml | 38 ++ .../network_params_das_local.yaml | 20 + testing/ef_tests/check_all_files_accessed.py | 7 + testing/ef_tests/src/cases.rs | 30 +- .../ef_tests/src/cases/get_custody_columns.rs | 43 ++ .../src/cases/kzg_blob_to_kzg_commitment.rs | 5 +- .../src/cases/kzg_compute_blob_kzg_proof.rs | 4 + .../cases/kzg_compute_cells_and_kzg_proofs.rs | 67 ++ .../src/cases/kzg_compute_kzg_proof.rs | 4 + .../cases/kzg_recover_cells_and_kzg_proofs.rs | 97 +++ .../src/cases/kzg_verify_blob_kzg_proof.rs | 34 +- .../cases/kzg_verify_blob_kzg_proof_batch.rs | 5 +- .../cases/kzg_verify_cell_kzg_proof_batch.rs | 77 +++ .../src/cases/kzg_verify_kzg_proof.rs | 4 + .../src/cases/merkle_proof_validity.rs | 65 +- testing/ef_tests/src/handler.rs | 139 +++- testing/ef_tests/src/lib.rs | 9 +- testing/ef_tests/src/type_name.rs | 3 +- testing/ef_tests/tests/tests.rs | 49 +- 96 files changed, 5002 insertions(+), 609 deletions(-) create mode 100644 beacon_node/beacon_chain/benches/benches.rs create mode 100644 beacon_node/network/src/sync/network_context/custody.rs create mode 100644 beacon_node/network/src/sync/sampling.rs create mode 100644 scripts/local_testnet/network_params_das_devnet_1.yaml create mode 100644 scripts/local_testnet/network_params_das_interop.yaml create mode 100644 scripts/local_testnet/network_params_das_local.yaml create mode 100644 testing/ef_tests/src/cases/get_custody_columns.rs create mode 100644 testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs create mode 100644 testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs create mode 100644 testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs diff --git a/Cargo.lock b/Cargo.lock index bf10b9fe0e3..03a8fc735ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -779,6 +779,7 @@ version = "0.2.0" dependencies = [ "bitvec 1.0.1", "bls", + "criterion", "derivative", "eth1", "eth2", @@ -3852,6 +3853,7 @@ dependencies = [ "operation_pool", "parking_lot 0.12.3", "proto_array", + "rand", "safe_arith", "sensitive_url", "serde", diff --git a/beacon_node/beacon_chain/Cargo.toml b/beacon_node/beacon_chain/Cargo.toml index 0deccfb622d..5ad56cba55c 100644 --- a/beacon_node/beacon_chain/Cargo.toml +++ b/beacon_node/beacon_chain/Cargo.toml @@ -5,6 +5,10 @@ authors = ["Paul Hauner ", "Age Manning ( + num_of_blobs: usize, + spec: &ChainSpec, +) -> (SignedBeaconBlock, BlobsList) { + let mut block = BeaconBlock::Deneb(BeaconBlockDeneb::empty(spec)); + let mut body = block.body_mut(); + let blob_kzg_commitments = body.blob_kzg_commitments_mut().unwrap(); + *blob_kzg_commitments = + KzgCommitments::::new(vec![KzgCommitment::empty_for_testing(); num_of_blobs]).unwrap(); + + let signed_block = SignedBeaconBlock::from_block(block, Signature::empty()); + + let blobs = (0..num_of_blobs) + .map(|_| Blob::::default()) + .collect::>() + .into(); + + (signed_block, blobs) +} + +fn all_benches(c: &mut Criterion) { + type E = MainnetEthSpec; + let spec = Arc::new(E::default_spec()); + + let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) + .map_err(|e| format!("Unable to read trusted setup file: {}", e)) + .expect("should have trusted setup"); + let kzg = Arc::new(Kzg::new_from_trusted_setup(trusted_setup).expect("should create kzg")); + + for blob_count in [1, 2, 3, 6] { + let kzg = kzg.clone(); + let (signed_block, blob_sidecars) = create_test_block_and_blobs::(blob_count, &spec); + + let column_sidecars = + blobs_to_data_column_sidecars(&blob_sidecars, &signed_block, &kzg.clone(), &spec) + .unwrap(); + + let spec = spec.clone(); + + c.bench_function(&format!("reconstruct_{}", blob_count), |b| { + b.iter(|| { + black_box(reconstruct_data_columns( + &kzg, + &column_sidecars.iter().as_slice()[0..column_sidecars.len() / 2], + spec.as_ref(), + )) + }) + }); + } +} + +criterion_group!(benches, all_benches); +criterion_main!(benches); diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 66e7d06ad7c..74eaa2f50d9 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -22,6 +22,7 @@ pub use crate::canonical_head::CanonicalHead; use crate::chain_config::ChainConfig; use crate::data_availability_checker::{ Availability, AvailabilityCheckError, AvailableBlock, DataAvailabilityChecker, + DataColumnsToPublish, }; use crate::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn}; use crate::early_attester_cache::EarlyAttesterCache; @@ -123,6 +124,7 @@ use task_executor::{ShutdownReason, TaskExecutor}; use tokio_stream::Stream; use tree_hash::TreeHash; use types::blob_sidecar::FixedBlobSidecarList; +use types::data_column_sidecar::{ColumnIndex, DataColumnIdentifier}; use types::payload::BlockProductionVersion; use types::*; @@ -206,11 +208,13 @@ impl TryInto for AvailabilityProcessingStatus { /// The result of a chain segment processing. pub enum ChainSegmentResult { /// Processing this chain segment finished successfully. - Successful { imported_blocks: usize }, + Successful { + imported_blocks: Vec<(Hash256, Slot)>, + }, /// There was an error processing this chain segment. Before the error, some blocks could /// have been imported. Failed { - imported_blocks: usize, + imported_blocks: Vec<(Hash256, Slot)>, error: BlockError, }, } @@ -2696,7 +2700,7 @@ impl BeaconChain { chain_segment: Vec>, ) -> Result>, ChainSegmentResult> { // This function will never import any blocks. - let imported_blocks = 0; + let imported_blocks = vec![]; let mut filtered_chain_segment = Vec::with_capacity(chain_segment.len()); // Produce a list of the parent root and slot of the child of each block. @@ -2802,7 +2806,7 @@ impl BeaconChain { chain_segment: Vec>, notify_execution_layer: NotifyExecutionLayer, ) -> ChainSegmentResult { - let mut imported_blocks = 0; + let mut imported_blocks = vec![]; // Filter uninteresting blocks from the chain segment in a blocking task. let chain = self.clone(); @@ -2862,6 +2866,7 @@ impl BeaconChain { // Import the blocks into the chain. for signature_verified_block in signature_verified_blocks { + let block_slot = signature_verified_block.slot(); match self .process_block( signature_verified_block.block_root(), @@ -2874,9 +2879,9 @@ impl BeaconChain { { Ok(status) => { match status { - AvailabilityProcessingStatus::Imported(_) => { + AvailabilityProcessingStatus::Imported(block_root) => { // The block was imported successfully. - imported_blocks += 1; + imported_blocks.push((block_root, block_slot)); } AvailabilityProcessingStatus::MissingComponents(slot, block_root) => { warn!(self.log, "Blobs missing in response to range request"; @@ -2909,6 +2914,17 @@ impl BeaconChain { ChainSegmentResult::Successful { imported_blocks } } + /// Updates fork-choice node into a permanent `available` state so it can become a viable head. + /// Only completed sampling results are received. Blocks are unavailable by default and should + /// be pruned on finalization, on a timeout or by a max count. + pub async fn process_sampling_completed(self: &Arc, block_root: Hash256) { + // TODO(das): update fork-choice + // NOTE: It is possible that sampling complets before block is imported into fork choice, + // in that case we may need to update availability cache. + // TODO(das): These log levels are too high, reduce once DAS matures + info!(self.log, "Sampling completed"; "block_root" => %block_root); + } + /// Returns `Ok(GossipVerifiedBlock)` if the supplied `block` should be forwarded onto the /// gossip network. The block is not imported into the chain, it is just partially verified. /// @@ -2983,6 +2999,11 @@ impl BeaconChain { return Err(BlockError::BlockIsAlreadyKnown(blob.block_root())); } + // No need to process and import blobs beyond the PeerDAS epoch. + if self.spec.is_peer_das_enabled_for_epoch(blob.epoch()) { + return Err(BlockError::BlobNotRequired(blob.slot())); + } + if let Some(event_handler) = self.event_handler.as_ref() { if event_handler.has_blob_sidecar_subscribers() { event_handler.register(EventKind::BlobSidecar(SseBlobSidecar::from_blob_sidecar( @@ -3000,7 +3021,13 @@ impl BeaconChain { pub async fn process_gossip_data_columns( self: &Arc, data_columns: Vec>, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { let Ok((slot, block_root)) = data_columns .iter() .map(|c| (c.slot(), c.block_root())) @@ -3067,7 +3094,13 @@ impl BeaconChain { pub async fn process_rpc_custody_columns( self: &Arc, custody_columns: DataColumnSidecarList, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { let Ok((slot, block_root)) = custody_columns .iter() .map(|c| (c.slot(), c.block_root())) @@ -3094,7 +3127,7 @@ impl BeaconChain { let r = self .check_rpc_custody_columns_availability_and_import(slot, block_root, custody_columns) .await; - self.remove_notified(&block_root, r) + self.remove_notified_custody_columns(&block_root, r) } /// Remove any block components from the *processing cache* if we no longer require them. If the @@ -3114,13 +3147,15 @@ impl BeaconChain { /// Remove any block components from the *processing cache* if we no longer require them. If the /// block was imported full or erred, we no longer require them. - fn remove_notified_custody_columns( + fn remove_notified_custody_columns

( &self, block_root: &Hash256, - r: Result>, - ) -> Result> { - let has_missing_components = - matches!(r, Ok(AvailabilityProcessingStatus::MissingComponents(_, _))); + r: Result<(AvailabilityProcessingStatus, P), BlockError>, + ) -> Result<(AvailabilityProcessingStatus, P), BlockError> { + let has_missing_components = matches!( + r, + Ok((AvailabilityProcessingStatus::MissingComponents(_, _), _)) + ); if !has_missing_components { self.reqresp_pre_import_cache.write().remove(block_root); } @@ -3378,20 +3413,26 @@ impl BeaconChain { slot: Slot, block_root: Hash256, data_columns: Vec>, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { if let Some(slasher) = self.slasher.as_ref() { for data_colum in &data_columns { slasher.accept_block_header(data_colum.signed_block_header()); } } - let availability = self.data_availability_checker.put_gossip_data_columns( - slot, - block_root, - data_columns, - )?; + let (availability, data_columns_to_publish) = self + .data_availability_checker + .put_gossip_data_columns(slot, block_root, data_columns)?; - self.process_availability(slot, availability).await + self.process_availability(slot, availability) + .await + .map(|result| (result, data_columns_to_publish)) } /// Checks if the provided blobs can make any cached blocks available, and imports immediately @@ -3440,7 +3481,13 @@ impl BeaconChain { slot: Slot, block_root: Hash256, custody_columns: DataColumnSidecarList, - ) -> Result> { + ) -> Result< + ( + AvailabilityProcessingStatus, + DataColumnsToPublish, + ), + BlockError, + > { // Need to scope this to ensure the lock is dropped before calling `process_availability` // Even an explicit drop is not enough to convince the borrow checker. { @@ -3465,13 +3512,16 @@ impl BeaconChain { // This slot value is purely informative for the consumers of // `AvailabilityProcessingStatus::MissingComponents` to log an error with a slot. - let availability = self.data_availability_checker.put_rpc_custody_columns( - block_root, - slot.epoch(T::EthSpec::slots_per_epoch()), - custody_columns, - )?; + let (availability, data_columns_to_publish) = + self.data_availability_checker.put_rpc_custody_columns( + block_root, + slot.epoch(T::EthSpec::slots_per_epoch()), + custody_columns, + )?; - self.process_availability(slot, availability).await + self.process_availability(slot, availability) + .await + .map(|result| (result, data_columns_to_publish)) } /// Imports a fully available block. Otherwise, returns `AvailabilityProcessingStatus::MissingComponents` @@ -3522,6 +3572,8 @@ impl BeaconChain { ); } + // TODO(das) record custody column available timestamp + // import let chain = self.clone(); let block_root = self @@ -6895,6 +6947,15 @@ impl BeaconChain { && self.spec.is_peer_das_enabled_for_epoch(block_epoch) } + /// Returns true if we should issue a sampling request for this block + /// TODO(das): check if the block is still within the da_window + pub fn should_sample_slot(&self, slot: Slot) -> bool { + self.config.enable_sampling + && self + .spec + .is_peer_das_enabled_for_epoch(slot.epoch(T::EthSpec::slots_per_epoch())) + } + pub fn logger(&self) -> &Logger { &self.log } diff --git a/beacon_node/beacon_chain/src/blob_verification.rs b/beacon_node/beacon_chain/src/blob_verification.rs index 228b3f7092c..99fc5d9d0c0 100644 --- a/beacon_node/beacon_chain/src/blob_verification.rs +++ b/beacon_node/beacon_chain/src/blob_verification.rs @@ -409,8 +409,8 @@ pub fn validate_blob_sidecar_for_gossip( // Verify that the blob_sidecar was received on the correct subnet. if blob_index != subnet { return Err(GossipBlobError::InvalidSubnet { - expected: blob_index, - received: subnet, + expected: subnet, + received: blob_index, }); } diff --git a/beacon_node/beacon_chain/src/block_verification.rs b/beacon_node/beacon_chain/src/block_verification.rs index 68fccee959b..d9662d59f9e 100644 --- a/beacon_node/beacon_chain/src/block_verification.rs +++ b/beacon_node/beacon_chain/src/block_verification.rs @@ -49,17 +49,20 @@ #![allow(clippy::result_large_err)] use crate::beacon_snapshot::PreProcessingSnapshot; -use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob}; +use crate::blob_verification::{GossipBlobError, GossipVerifiedBlob, GossipVerifiedBlobList}; use crate::block_verification_types::{ AsBlock, BlockContentsError, BlockImportData, GossipVerifiedBlockContents, RpcBlock, }; use crate::data_availability_checker::{AvailabilityCheckError, MaybeAvailableBlock}; -use crate::data_column_verification::GossipDataColumnError; +use crate::data_column_verification::{ + GossipDataColumnError, GossipVerifiedDataColumn, GossipVerifiedDataColumnList, +}; use crate::eth1_finalization_cache::Eth1FinalizationData; use crate::execution_payload::{ is_optimistic_candidate_block, validate_execution_payload_for_gossip, validate_merge_block, AllowOptimisticImport, NotifyExecutionLayer, PayloadNotifier, }; +use crate::kzg_utils::blobs_to_data_column_sidecars; use crate::observed_block_producers::SeenBlock; use crate::validator_monitor::HISTORIC_EPOCHS as VALIDATOR_MONITOR_HISTORIC_EPOCHS; use crate::validator_pubkey_cache::ValidatorPubkeyCache; @@ -94,10 +97,12 @@ use std::io::Write; use std::sync::Arc; use store::{Error as DBError, HotStateSummary, KeyValueStore, StoreOp}; use task_executor::JoinHandle; +use types::data_column_sidecar::DataColumnSidecarError; use types::{ - BeaconBlockRef, BeaconState, BeaconStateError, ChainSpec, Epoch, EthSpec, ExecutionBlockHash, - Hash256, InconsistentFork, PublicKey, PublicKeyBytes, RelativeEpoch, SignedBeaconBlock, - SignedBeaconBlockHeader, Slot, + BeaconBlockRef, BeaconState, BeaconStateError, BlobsList, ChainSpec, DataColumnSubnetId, Epoch, + EthSpec, ExecutionBlockHash, FullPayload, Hash256, InconsistentFork, KzgProofs, PublicKey, + PublicKeyBytes, RelativeEpoch, RuntimeVariableList, SignedBeaconBlock, SignedBeaconBlockHeader, + Slot, }; use types::{BlobSidecar, ExecPayload}; @@ -306,6 +311,14 @@ pub enum BlockError { /// TODO: We may need to penalize the peer that gave us a potentially invalid rpc blob. /// https://github.com/sigp/lighthouse/issues/4546 AvailabilityCheck(AvailabilityCheckError), + /// A Blob with a slot after PeerDAS is received and is not required to be imported. + /// This can happen because we stay subscribed to the blob subnet after 2 epochs, as we could + /// still receive valid blobs from a Deneb epoch after PeerDAS is activated. + /// + /// ## Peer scoring + /// + /// This indicates the peer is sending an unexpected gossip blob and should be penalised. + BlobNotRequired(Slot), /// An internal error has occurred when processing the block or sidecars. /// /// ## Peer scoring @@ -722,27 +735,24 @@ impl IntoGossipVerifiedBlockContents for PublishBlockReq chain: &BeaconChain, ) -> Result, BlockContentsError> { let (block, blobs) = self.deconstruct(); + let peer_das_enabled = chain.spec.is_peer_das_enabled_for_epoch(block.epoch()); + + let (gossip_verified_blobs, gossip_verified_data_columns) = if peer_das_enabled { + let gossip_verified_data_columns = + build_gossip_verified_data_columns(chain, &block, blobs.map(|(_, blobs)| blobs))?; + (None, gossip_verified_data_columns) + } else { + let gossip_verified_blobs = build_gossip_verified_blobs(chain, &block, blobs)?; + (gossip_verified_blobs, None) + }; - let gossip_verified_blobs = blobs - .map(|(kzg_proofs, blobs)| { - let mut gossip_verified_blobs = vec![]; - for (i, (kzg_proof, blob)) in kzg_proofs.iter().zip(blobs).enumerate() { - let _timer = - metrics::start_timer(&metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION); - let blob = BlobSidecar::new(i, blob, &block, *kzg_proof) - .map_err(BlockContentsError::SidecarError)?; - drop(_timer); - let gossip_verified_blob = - GossipVerifiedBlob::new(Arc::new(blob), i as u64, chain)?; - gossip_verified_blobs.push(gossip_verified_blob); - } - let gossip_verified_blobs = VariableList::from(gossip_verified_blobs); - Ok::<_, BlockContentsError>(gossip_verified_blobs) - }) - .transpose()?; let gossip_verified_block = GossipVerifiedBlock::new(block, chain)?; - Ok((gossip_verified_block, gossip_verified_blobs)) + Ok(( + gossip_verified_block, + gossip_verified_blobs, + gossip_verified_data_columns, + )) } fn inner_block(&self) -> &SignedBeaconBlock { @@ -750,6 +760,70 @@ impl IntoGossipVerifiedBlockContents for PublishBlockReq } } +#[allow(clippy::type_complexity)] +fn build_gossip_verified_blobs( + chain: &BeaconChain, + block: &Arc>>, + blobs: Option<(KzgProofs, BlobsList)>, +) -> Result>, BlockContentsError> { + blobs + .map(|(kzg_proofs, blobs)| { + let mut gossip_verified_blobs = vec![]; + for (i, (kzg_proof, blob)) in kzg_proofs.iter().zip(blobs).enumerate() { + let _timer = + metrics::start_timer(&metrics::BLOB_SIDECAR_INCLUSION_PROOF_COMPUTATION); + let blob = BlobSidecar::new(i, blob, block, *kzg_proof) + .map_err(BlockContentsError::BlobSidecarError)?; + drop(_timer); + let gossip_verified_blob = + GossipVerifiedBlob::new(Arc::new(blob), i as u64, chain)?; + gossip_verified_blobs.push(gossip_verified_blob); + } + let gossip_verified_blobs = VariableList::from(gossip_verified_blobs); + Ok::<_, BlockContentsError>(gossip_verified_blobs) + }) + .transpose() +} + +fn build_gossip_verified_data_columns( + chain: &BeaconChain, + block: &SignedBeaconBlock>, + blobs: Option>, +) -> Result>, BlockContentsError> { + blobs + // Only attempt to build data columns if blobs is non empty to avoid skewing the metrics. + .filter(|b| !b.is_empty()) + .map(|blobs| { + // NOTE: we expect KZG to be initialized if the blobs are present + let kzg = chain + .kzg + .as_ref() + .ok_or(BlockContentsError::DataColumnError( + GossipDataColumnError::KzgNotInitialized, + ))?; + + let timer = metrics::start_timer(&metrics::DATA_COLUMN_SIDECAR_COMPUTATION); + let sidecars = blobs_to_data_column_sidecars(&blobs, block, kzg, &chain.spec)?; + drop(timer); + let mut gossip_verified_data_columns = vec![]; + for sidecar in sidecars { + let subnet = DataColumnSubnetId::from_column_index::( + sidecar.index as usize, + &chain.spec, + ); + let column = GossipVerifiedDataColumn::new(sidecar, subnet.into(), chain)?; + gossip_verified_data_columns.push(column); + } + let gossip_verified_data_columns = RuntimeVariableList::new( + gossip_verified_data_columns, + chain.spec.number_of_columns, + ) + .map_err(DataColumnSidecarError::SszError)?; + Ok::<_, BlockContentsError>(gossip_verified_data_columns) + }) + .transpose() +} + /// Implemented on types that can be converted into a `ExecutionPendingBlock`. /// /// Used to allow functions to accept blocks at various stages of verification. @@ -1169,6 +1243,10 @@ impl SignatureVerifiedBlock { pub fn block_root(&self) -> Hash256 { self.block_root } + + pub fn slot(&self) -> Slot { + self.block.slot() + } } impl IntoExecutionPendingBlock for SignatureVerifiedBlock { diff --git a/beacon_node/beacon_chain/src/block_verification_types.rs b/beacon_node/beacon_chain/src/block_verification_types.rs index 426c41bfeab..b271f0a2f98 100644 --- a/beacon_node/beacon_chain/src/block_verification_types.rs +++ b/beacon_node/beacon_chain/src/block_verification_types.rs @@ -2,6 +2,9 @@ use crate::blob_verification::{GossipBlobError, GossipVerifiedBlobList}; use crate::block_verification::BlockError; use crate::data_availability_checker::AvailabilityCheckError; pub use crate::data_availability_checker::{AvailableBlock, MaybeAvailableBlock}; +use crate::data_column_verification::{ + CustodyDataColumn, CustodyDataColumnList, GossipDataColumnError, GossipVerifiedDataColumnList, +}; use crate::eth1_finalization_cache::Eth1FinalizationData; use crate::{get_block_root, GossipVerifiedBlock, PayloadVerificationOutcome}; use derivative::Derivative; @@ -9,10 +12,11 @@ use ssz_types::VariableList; use state_processing::ConsensusContext; use std::fmt::{Debug, Formatter}; use std::sync::Arc; -use types::blob_sidecar::{BlobIdentifier, BlobSidecarError, FixedBlobSidecarList}; +use types::blob_sidecar::{self, BlobIdentifier, FixedBlobSidecarList}; +use types::data_column_sidecar::{self}; use types::{ - BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, Epoch, EthSpec, Hash256, - SignedBeaconBlock, SignedBeaconBlockHeader, Slot, + BeaconBlockRef, BeaconState, BlindedPayload, BlobSidecarList, ChainSpec, Epoch, EthSpec, + Hash256, RuntimeVariableList, SignedBeaconBlock, SignedBeaconBlockHeader, Slot, }; /// A block that has been received over RPC. It has 2 internal variants: @@ -50,6 +54,7 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(block) => block, RpcBlockInner::BlockAndBlobs(block, _) => block, + RpcBlockInner::BlockAndCustodyColumns(block, _) => block, } } @@ -57,6 +62,7 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(block) => block.clone(), RpcBlockInner::BlockAndBlobs(block, _) => block.clone(), + RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(), } } @@ -64,6 +70,15 @@ impl RpcBlock { match &self.block { RpcBlockInner::Block(_) => None, RpcBlockInner::BlockAndBlobs(_, blobs) => Some(blobs), + RpcBlockInner::BlockAndCustodyColumns(_, _) => None, + } + } + + pub fn custody_columns(&self) -> Option<&CustodyDataColumnList> { + match &self.block { + RpcBlockInner::Block(_) => None, + RpcBlockInner::BlockAndBlobs(_, _) => None, + RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => Some(data_columns), } } } @@ -79,6 +94,9 @@ enum RpcBlockInner { /// This variant is used with parent lookups and by-range responses. It should have all blobs /// ordered, all block roots matching, and the correct number of blobs for this block. BlockAndBlobs(Arc>, BlobSidecarList), + /// This variant is used with parent lookups and by-range responses. It should have all + /// requested data columns, all block roots matching for this block. + BlockAndCustodyColumns(Arc>, CustodyDataColumnList), } impl RpcBlock { @@ -136,6 +154,33 @@ impl RpcBlock { }) } + pub fn new_with_custody_columns( + block_root: Option, + block: Arc>, + custody_columns: Vec>, + spec: &ChainSpec, + ) -> Result { + let block_root = block_root.unwrap_or_else(|| get_block_root(&block)); + + if block.num_expected_blobs() > 0 && custody_columns.is_empty() { + // The number of required custody columns is out of scope here. + return Err(AvailabilityCheckError::MissingCustodyColumns); + } + // Treat empty data column lists as if they are missing. + let inner = if !custody_columns.is_empty() { + RpcBlockInner::BlockAndCustodyColumns( + block, + RuntimeVariableList::new(custody_columns, spec.number_of_columns)?, + ) + } else { + RpcBlockInner::Block(block) + }; + Ok(Self { + block_root, + block: inner, + }) + } + pub fn new_from_fixed( block_root: Hash256, block: Arc>, @@ -153,25 +198,36 @@ impl RpcBlock { Self::new(Some(block_root), block, blobs) } + #[allow(clippy::type_complexity)] pub fn deconstruct( self, ) -> ( Hash256, Arc>, Option>, + Option>, ) { let block_root = self.block_root(); match self.block { - RpcBlockInner::Block(block) => (block_root, block, None), - RpcBlockInner::BlockAndBlobs(block, blobs) => (block_root, block, Some(blobs)), + RpcBlockInner::Block(block) => (block_root, block, None, None), + RpcBlockInner::BlockAndBlobs(block, blobs) => (block_root, block, Some(blobs), None), + RpcBlockInner::BlockAndCustodyColumns(block, data_columns) => { + (block_root, block, None, Some(data_columns)) + } } } pub fn n_blobs(&self) -> usize { match &self.block { - RpcBlockInner::Block(_) => 0, + RpcBlockInner::Block(_) | RpcBlockInner::BlockAndCustodyColumns(_, _) => 0, RpcBlockInner::BlockAndBlobs(_, blobs) => blobs.len(), } } + pub fn n_data_columns(&self) -> usize { + match &self.block { + RpcBlockInner::Block(_) | RpcBlockInner::BlockAndBlobs(_, _) => 0, + RpcBlockInner::BlockAndCustodyColumns(_, data_columns) => data_columns.len(), + } + } } /// A block that has gone through all pre-deneb block processing checks including block processing @@ -334,14 +390,19 @@ impl BlockImportData { } } -pub type GossipVerifiedBlockContents = - (GossipVerifiedBlock, Option>); +pub type GossipVerifiedBlockContents = ( + GossipVerifiedBlock, + Option>, + Option>, +); #[derive(Debug)] pub enum BlockContentsError { BlockError(BlockError), BlobError(GossipBlobError), - SidecarError(BlobSidecarError), + BlobSidecarError(blob_sidecar::BlobSidecarError), + DataColumnError(GossipDataColumnError), + DataColumnSidecarError(data_column_sidecar::DataColumnSidecarError), } impl From> for BlockContentsError { @@ -356,6 +417,18 @@ impl From> for BlockContentsError { } } +impl From for BlockContentsError { + fn from(value: GossipDataColumnError) -> Self { + Self::DataColumnError(value) + } +} + +impl From for BlockContentsError { + fn from(value: data_column_sidecar::DataColumnSidecarError) -> Self { + Self::DataColumnSidecarError(value) + } +} + impl std::fmt::Display for BlockContentsError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -365,8 +438,14 @@ impl std::fmt::Display for BlockContentsError { BlockContentsError::BlobError(err) => { write!(f, "BlobError({})", err) } - BlockContentsError::SidecarError(err) => { - write!(f, "SidecarError({:?})", err) + BlockContentsError::BlobSidecarError(err) => { + write!(f, "BlobSidecarError({:?})", err) + } + BlockContentsError::DataColumnError(err) => { + write!(f, "DataColumnError({:?})", err) + } + BlockContentsError::DataColumnSidecarError(err) => { + write!(f, "DataColumnSidecarError({:?})", err) } } } @@ -517,13 +596,28 @@ impl AsBlock for AvailableBlock { } fn into_rpc_block(self) -> RpcBlock { - // TODO(das): rpc data columns to be merged from `das` branch - let (block_root, block, blobs_opt, _data_columns_opt) = self.deconstruct(); + let number_of_columns = self.spec.number_of_columns; + let (block_root, block, blobs_opt, data_columns_opt) = self.deconstruct(); // Circumvent the constructor here, because an Available block will have already had // consistency checks performed. - let inner = match blobs_opt { - None => RpcBlockInner::Block(block), - Some(blobs) => RpcBlockInner::BlockAndBlobs(block, blobs), + let inner = match (blobs_opt, data_columns_opt) { + (None, None) => RpcBlockInner::Block(block), + (Some(blobs), _) => RpcBlockInner::BlockAndBlobs(block, blobs), + (_, Some(data_columns)) => RpcBlockInner::BlockAndCustodyColumns( + block, + RuntimeVariableList::new( + data_columns + .into_iter() + // TODO(das): This is an ugly hack that should be removed. After updating + // store types to handle custody data columns this should not be required. + // It's okay-ish because available blocks must have all the required custody + // columns. + .map(|d| CustodyDataColumn::from_asserted_custody(d)) + .collect(), + number_of_columns, + ) + .expect("data column list is within bounds"), + ), }; RpcBlock { block_root, @@ -555,12 +649,14 @@ impl AsBlock for RpcBlock { match &self.block { RpcBlockInner::Block(block) => block, RpcBlockInner::BlockAndBlobs(block, _) => block, + RpcBlockInner::BlockAndCustodyColumns(block, _) => block, } } fn block_cloned(&self) -> Arc> { match &self.block { RpcBlockInner::Block(block) => block.clone(), RpcBlockInner::BlockAndBlobs(block, _) => block.clone(), + RpcBlockInner::BlockAndCustodyColumns(block, _) => block.clone(), } } fn canonical_root(&self) -> Hash256 { diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 042d14a4fa4..84c6dea3680 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -409,6 +409,11 @@ where .init_blob_info(genesis.beacon_block.slot()) .map_err(|e| format!("Failed to initialize genesis blob info: {:?}", e))?, ); + self.pending_io_batch.push( + store + .init_data_column_info(genesis.beacon_block.slot()) + .map_err(|e| format!("Failed to initialize genesis data column info: {:?}", e))?, + ); let fc_store = BeaconForkChoiceStore::get_forkchoice_store(store, &genesis) .map_err(|e| format!("Unable to initialize fork choice store: {e:?}"))?; @@ -573,6 +578,11 @@ where .init_blob_info(weak_subj_block.slot()) .map_err(|e| format!("Failed to initialize blob info: {:?}", e))?, ); + self.pending_io_batch.push( + store + .init_data_column_info(weak_subj_block.slot()) + .map_err(|e| format!("Failed to initialize data column info: {:?}", e))?, + ); // Store pruning checkpoint to prevent attempting to prune before the anchor state. self.pending_io_batch @@ -978,7 +988,6 @@ where self.kzg.clone(), store, self.import_all_data_columns, - &log, self.spec, ) .map_err(|e| format!("Error initializing DataAvailabilityChecker: {:?}", e))?, diff --git a/beacon_node/beacon_chain/src/chain_config.rs b/beacon_node/beacon_chain/src/chain_config.rs index c908efa07c3..20edfbf31a4 100644 --- a/beacon_node/beacon_chain/src/chain_config.rs +++ b/beacon_node/beacon_chain/src/chain_config.rs @@ -84,6 +84,10 @@ pub struct ChainConfig { pub epochs_per_migration: u64, /// When set to true Light client server computes and caches state proofs for serving updates pub enable_light_client_server: bool, + /// The number of data columns to withhold / exclude from publishing when proposing a block. + pub malicious_withhold_count: usize, + /// Enable peer sampling on blocks. + pub enable_sampling: bool, } impl Default for ChainConfig { @@ -115,6 +119,8 @@ impl Default for ChainConfig { always_prepare_payload: false, epochs_per_migration: crate::migrate::DEFAULT_EPOCHS_PER_MIGRATION, enable_light_client_server: false, + malicious_withhold_count: 0, + enable_sampling: false, } } } diff --git a/beacon_node/beacon_chain/src/data_availability_checker.rs b/beacon_node/beacon_chain/src/data_availability_checker.rs index 1bfe377ad05..470cee713fa 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker.rs @@ -5,7 +5,7 @@ use crate::block_verification_types::{ use crate::data_availability_checker::overflow_lru_cache::DataAvailabilityCheckerInner; use crate::{BeaconChain, BeaconChainTypes, BeaconStore}; use kzg::Kzg; -use slog::{debug, error, Logger}; +use slog::{debug, error}; use slot_clock::SlotClock; use std::fmt; use std::fmt::Debug; @@ -16,7 +16,7 @@ use task_executor::TaskExecutor; use types::blob_sidecar::{BlobIdentifier, BlobSidecar, FixedBlobSidecarList}; use types::{ BlobSidecarList, ChainSpec, DataColumnIdentifier, DataColumnSidecar, DataColumnSidecarList, - Epoch, EthSpec, Hash256, SignedBeaconBlock, Slot, + Epoch, EthSpec, Hash256, RuntimeVariableList, SignedBeaconBlock, Slot, }; mod error; @@ -24,11 +24,14 @@ mod overflow_lru_cache; mod state_lru_cache; use crate::data_column_verification::{ - GossipVerifiedDataColumn, KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, + verify_kzg_for_data_column_list, CustodyDataColumn, GossipVerifiedDataColumn, + KzgVerifiedCustodyDataColumn, KzgVerifiedDataColumn, }; pub use error::{Error as AvailabilityCheckError, ErrorCategory as AvailabilityCheckErrorCategory}; use types::non_zero_usize::new_non_zero_usize; +pub use self::overflow_lru_cache::DataColumnsToPublish; + /// The LRU Cache stores `PendingComponents` which can store up to /// `MAX_BLOBS_PER_BLOCK = 6` blobs each. A `BlobSidecar` is 0.131256 MB. So /// the maximum size of a `PendingComponents` is ~ 0.787536 MB. Setting this @@ -67,8 +70,7 @@ pub struct DataAvailabilityChecker { availability_cache: Arc>, slot_clock: T::SlotClock, kzg: Option>, - log: Logger, - spec: ChainSpec, + spec: Arc, } /// This type is returned after adding a block / blob to the `DataAvailabilityChecker`. @@ -98,9 +100,9 @@ impl DataAvailabilityChecker { kzg: Option>, store: BeaconStore, import_all_data_columns: bool, - log: &Logger, spec: ChainSpec, ) -> Result { + let spec = Arc::new(spec); let custody_subnet_count = if import_all_data_columns { spec.data_column_sidecar_subnet_count as usize } else { @@ -120,7 +122,6 @@ impl DataAvailabilityChecker { availability_cache: Arc::new(inner), slot_clock, kzg, - log: log.clone(), spec, }) } @@ -208,12 +209,14 @@ impl DataAvailabilityChecker { /// Put a list of custody columns received via RPC into the availability cache. This performs KZG /// verification on the blobs in the list. + #[allow(clippy::type_complexity)] pub fn put_rpc_custody_columns( &self, block_root: Hash256, epoch: Epoch, custody_columns: DataColumnSidecarList, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { let Some(kzg) = self.kzg.as_ref() else { return Err(AvailabilityCheckError::KzgNotInitialized); }; @@ -221,16 +224,16 @@ impl DataAvailabilityChecker { // TODO(das): report which column is invalid for proper peer scoring // TODO(das): batch KZG verification here let verified_custody_columns = custody_columns - .iter() + .into_iter() .map(|column| { Ok(KzgVerifiedCustodyDataColumn::from_asserted_custody( - KzgVerifiedDataColumn::new(column.clone(), kzg) - .map_err(AvailabilityCheckError::Kzg)?, + KzgVerifiedDataColumn::new(column, kzg).map_err(AvailabilityCheckError::Kzg)?, )) }) .collect::, AvailabilityCheckError>>()?; self.availability_cache.put_kzg_verified_data_columns( + kzg, block_root, epoch, verified_custody_columns, @@ -253,20 +256,35 @@ impl DataAvailabilityChecker { ) } + /// Check if we've cached other data columns for this block. If it satisfies the custody requirement and we also + /// have a block cached, return the `Availability` variant triggering block import. + /// Otherwise cache the data column sidecar. + /// + /// This should only accept gossip verified data columns, so we should not have to worry about dupes. + #[allow(clippy::type_complexity)] pub fn put_gossip_data_columns( &self, slot: Slot, block_root: Hash256, gossip_data_columns: Vec>, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { + let Some(kzg) = self.kzg.as_ref() else { + return Err(AvailabilityCheckError::KzgNotInitialized); + }; let epoch = slot.epoch(T::EthSpec::slots_per_epoch()); + let custody_columns = gossip_data_columns .into_iter() .map(|c| KzgVerifiedCustodyDataColumn::from_asserted_custody(c.into_inner())) .collect::>(); - self.availability_cache - .put_kzg_verified_data_columns(block_root, epoch, custody_columns) + self.availability_cache.put_kzg_verified_data_columns( + kzg, + block_root, + epoch, + custody_columns, + ) } /// Check if we have all the blobs for a block. Returns `Availability` which has information @@ -293,42 +311,66 @@ impl DataAvailabilityChecker { &self, block: RpcBlock, ) -> Result, AvailabilityCheckError> { - let (block_root, block, blobs) = block.deconstruct(); - match blobs { - None => { - if self.blobs_required_for_block(&block) { - Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) - } else { - Ok(MaybeAvailableBlock::Available(AvailableBlock { - block_root, - block, - blobs: None, - data_columns: None, - blobs_available_timestamp: None, - })) - } - } - Some(blob_list) => { - let verified_blobs = if self.blobs_required_for_block(&block) { - let kzg = self - .kzg - .as_ref() - .ok_or(AvailabilityCheckError::KzgNotInitialized)?; - verify_kzg_for_blob_list(blob_list.iter(), kzg) - .map_err(AvailabilityCheckError::Kzg)?; - Some(blob_list) - } else { - None - }; + let (block_root, block, blobs, data_columns) = block.deconstruct(); + if self.blobs_required_for_block(&block) { + return if let Some(blob_list) = blobs.as_ref() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_blob_list(blob_list.iter(), kzg) + .map_err(AvailabilityCheckError::Kzg)?; Ok(MaybeAvailableBlock::Available(AvailableBlock { block_root, block, - blobs: verified_blobs, + blobs, + blobs_available_timestamp: None, data_columns: None, + spec: self.spec.clone(), + })) + } else { + Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) + }; + } + if self.data_columns_required_for_block(&block) { + return if let Some(data_column_list) = data_columns.as_ref() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_data_column_list( + data_column_list + .iter() + .map(|custody_column| custody_column.as_data_column()), + kzg, + ) + .map_err(AvailabilityCheckError::Kzg)?; + Ok(MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, blobs_available_timestamp: None, + data_columns: Some( + data_column_list + .into_iter() + .map(|d| d.clone_arc()) + .collect(), + ), + spec: self.spec.clone(), })) - } + } else { + Ok(MaybeAvailableBlock::AvailabilityPending { block_root, block }) + }; } + + Ok(MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + blobs_available_timestamp: None, + data_columns: None, + spec: self.spec.clone(), + })) } /// Checks if a vector of blocks are available. Returns a vector of `MaybeAvailableBlock` @@ -360,64 +402,108 @@ impl DataAvailabilityChecker { verify_kzg_for_blob_list(all_blobs.iter(), kzg)?; } + let all_data_columns = blocks + .iter() + .filter(|block| self.data_columns_required_for_block(block.as_block())) + // this clone is cheap as it's cloning an Arc + .filter_map(|block| block.custody_columns().cloned()) + .flatten() + .map(CustodyDataColumn::into_inner) + .collect::>(); + let all_data_columns = + RuntimeVariableList::from_vec(all_data_columns, self.spec.number_of_columns); + + // verify kzg for all data columns at once + if !all_data_columns.is_empty() { + let kzg = self + .kzg + .as_ref() + .ok_or(AvailabilityCheckError::KzgNotInitialized)?; + verify_kzg_for_data_column_list(all_data_columns.iter(), kzg)?; + } + for block in blocks { - let (block_root, block, blobs) = block.deconstruct(); - match blobs { - None => { - if self.blobs_required_for_block(&block) { - results.push(MaybeAvailableBlock::AvailabilityPending { block_root, block }) - } else { - results.push(MaybeAvailableBlock::Available(AvailableBlock { - block_root, - block, - blobs: None, - data_columns: None, - blobs_available_timestamp: None, - })) - } - } - Some(blob_list) => { - let verified_blobs = if self.blobs_required_for_block(&block) { - Some(blob_list) - } else { - None - }; - // already verified kzg for all blobs - results.push(MaybeAvailableBlock::Available(AvailableBlock { + let (block_root, block, blobs, data_columns) = block.deconstruct(); + + let maybe_available_block = if self.blobs_required_for_block(&block) { + if blobs.is_some() { + MaybeAvailableBlock::Available(AvailableBlock { block_root, block, - blobs: verified_blobs, + blobs, + blobs_available_timestamp: None, data_columns: None, + spec: self.spec.clone(), + }) + } else { + MaybeAvailableBlock::AvailabilityPending { block_root, block } + } + } else if self.data_columns_required_for_block(&block) { + if data_columns.is_some() { + MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + data_columns: data_columns.map(|data_columns| { + data_columns.into_iter().map(|d| d.into_inner()).collect() + }), blobs_available_timestamp: None, - })) + spec: self.spec.clone(), + }) + } else { + MaybeAvailableBlock::AvailabilityPending { block_root, block } } - } + } else { + MaybeAvailableBlock::Available(AvailableBlock { + block_root, + block, + blobs: None, + data_columns: None, + blobs_available_timestamp: None, + spec: self.spec.clone(), + }) + }; + + results.push(maybe_available_block); } Ok(results) } /// Determines the blob requirements for a block. If the block is pre-deneb, no blobs are required. - /// If the block's epoch is from prior to the data availability boundary, no blobs are required. + /// If the epoch is from prior to the data availability boundary, no blobs are required. + pub fn blobs_required_for_epoch(&self, epoch: Epoch) -> bool { + self.da_check_required_for_epoch(epoch) && !self.spec.is_peer_das_enabled_for_epoch(epoch) + } + + /// Determines the data column requirements for an epoch. + /// - If the epoch is pre-peerdas, no data columns are required. + /// - If the epoch is from prior to the data availability boundary, no data columns are required. + pub fn data_columns_required_for_epoch(&self, epoch: Epoch) -> bool { + self.da_check_required_for_epoch(epoch) && self.spec.is_peer_das_enabled_for_epoch(epoch) + } + + /// See `Self::blobs_required_for_epoch` fn blobs_required_for_block(&self, block: &SignedBeaconBlock) -> bool { - block.num_expected_blobs() > 0 && self.da_check_required_for_epoch(block.epoch()) + block.num_expected_blobs() > 0 && self.blobs_required_for_epoch(block.epoch()) + } + + /// See `Self::data_columns_required_for_epoch` + fn data_columns_required_for_block(&self, block: &SignedBeaconBlock) -> bool { + block.num_expected_blobs() > 0 && self.data_columns_required_for_epoch(block.epoch()) } /// The epoch at which we require a data availability check in block processing. /// `None` if the `Deneb` fork is disabled. pub fn data_availability_boundary(&self) -> Option { - self.spec.deneb_fork_epoch.and_then(|fork_epoch| { - self.slot_clock - .now() - .map(|slot| slot.epoch(T::EthSpec::slots_per_epoch())) - .map(|current_epoch| { - std::cmp::max( - fork_epoch, - current_epoch - .saturating_sub(self.spec.min_epochs_for_blob_sidecars_requests), - ) - }) - }) + let fork_epoch = self.spec.deneb_fork_epoch?; + let current_slot = self.slot_clock.now()?; + Some(std::cmp::max( + fork_epoch, + current_slot + .epoch(T::EthSpec::slots_per_epoch()) + .saturating_sub(self.spec.min_epochs_for_blob_sidecars_requests), + )) } /// Returns true if the given epoch lies within the da boundary and false otherwise. @@ -426,18 +512,6 @@ impl DataAvailabilityChecker { .map_or(false, |da_epoch| block_epoch >= da_epoch) } - pub fn da_check_required_for_current_epoch(&self) -> bool { - let Some(current_slot) = self.slot_clock.now_or_genesis() else { - error!( - self.log, - "Failed to read slot clock when checking for missing blob ids" - ); - return false; - }; - - self.da_check_required_for_epoch(current_slot.epoch(T::EthSpec::slots_per_epoch())) - } - /// Returns `true` if the current epoch is greater than or equal to the `Deneb` epoch. pub fn is_deneb(&self) -> bool { self.slot_clock.now().map_or(false, |slot| { @@ -556,6 +630,7 @@ pub struct AvailableBlock { data_columns: Option>, /// Timestamp at which this block first became available (UNIX timestamp, time since 1970). blobs_available_timestamp: Option, + pub spec: Arc, } impl AvailableBlock { @@ -564,6 +639,7 @@ impl AvailableBlock { block: Arc>, blobs: Option>, data_columns: Option>, + spec: Arc, ) -> Self { Self { block_root, @@ -571,6 +647,7 @@ impl AvailableBlock { blobs, data_columns, blobs_available_timestamp: None, + spec, } } @@ -589,6 +666,10 @@ impl AvailableBlock { self.blobs_available_timestamp } + pub fn data_columns(&self) -> Option<&DataColumnSidecarList> { + self.data_columns.as_ref() + } + #[allow(clippy::type_complexity)] pub fn deconstruct( self, @@ -604,6 +685,7 @@ impl AvailableBlock { blobs, data_columns, blobs_available_timestamp: _, + .. } = self; (block_root, block, blobs, data_columns) } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/error.rs b/beacon_node/beacon_chain/src/data_availability_checker/error.rs index bb92b0b6322..79793d6dc29 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/error.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/error.rs @@ -14,7 +14,9 @@ pub enum Error { Unexpected, SszTypes(ssz_types::Error), MissingBlobs, + MissingCustodyColumns, BlobIndexInvalid(u64), + DataColumnIndexInvalid(u64), StoreError(store::Error), DecodeError(ssz::DecodeError), ParentStateMissing(Hash256), @@ -37,6 +39,7 @@ impl Error { Error::KzgNotInitialized | Error::SszTypes(_) | Error::MissingBlobs + | Error::MissingCustodyColumns | Error::StoreError(_) | Error::DecodeError(_) | Error::Unexpected @@ -47,6 +50,7 @@ impl Error { | Error::SlotClockError => ErrorCategory::Internal, Error::Kzg(_) | Error::BlobIndexInvalid(_) + | Error::DataColumnIndexInvalid(_) | Error::KzgCommitmentMismatch { .. } | Error::KzgVerificationFailed => ErrorCategory::Malicious, } diff --git a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs index 50fae091196..4863982b552 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/overflow_lru_cache.rs @@ -6,28 +6,36 @@ use crate::block_verification_types::{ }; use crate::data_availability_checker::{Availability, AvailabilityCheckError}; use crate::data_column_verification::KzgVerifiedCustodyDataColumn; +use crate::metrics; use crate::BeaconChainTypes; +use kzg::Kzg; use lru::LruCache; use parking_lot::RwLock; use ssz_types::{FixedVector, VariableList}; +use std::collections::HashSet; use std::num::NonZeroUsize; use std::sync::Arc; use types::blob_sidecar::BlobIdentifier; use types::{ - BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, Epoch, EthSpec, - Hash256, SignedBeaconBlock, + BlobSidecar, ChainSpec, ColumnIndex, DataColumnIdentifier, DataColumnSidecar, + DataColumnSidecarList, Epoch, EthSpec, Hash256, SignedBeaconBlock, }; +pub type DataColumnsToPublish = Option>; + /// This represents the components of a partially available block /// /// The blobs are all gossip and kzg verified. /// The block has completed all verifications except the availability check. +/// TODO(das): this struct can potentially be reafactored as blobs and data columns are mutually +/// exclusive and this could simplify `is_importable`. #[derive(Clone)] pub struct PendingComponents { pub block_root: Hash256, pub verified_blobs: FixedVector>, E::MaxBlobsPerBlock>, pub verified_data_columns: Vec>, pub executed_block: Option>, + pub reconstruction_started: bool, } pub enum BlockImportRequirement { @@ -52,10 +60,11 @@ impl PendingComponents { pub fn get_cached_data_column( &self, data_column_index: u64, - ) -> Option<&KzgVerifiedCustodyDataColumn> { + ) -> Option>> { self.verified_data_columns .iter() .find(|d| d.index() == data_column_index) + .map(|d| d.clone_arc()) } /// Returns a mutable reference to the cached block. @@ -170,12 +179,14 @@ impl PendingComponents { fn merge_data_columns>>( &mut self, kzg_verified_data_columns: I, - ) { + ) -> Result<(), AvailabilityCheckError> { for data_column in kzg_verified_data_columns { + // TODO(das): Add equivalent checks for data columns if necessary if !self.data_column_exists(data_column.index()) { self.verified_data_columns.push(data_column); } } + Ok(()) } /// Inserts a new block and revalidates the existing blobs against it. @@ -218,6 +229,7 @@ impl PendingComponents { verified_blobs: FixedVector::default(), verified_data_columns: vec![], executed_block: None, + reconstruction_started: false, } } @@ -230,6 +242,7 @@ impl PendingComponents { pub fn make_available( self, block_import_requirement: BlockImportRequirement, + spec: &Arc, recover: R, ) -> Result, AvailabilityCheckError> where @@ -242,6 +255,7 @@ impl PendingComponents { verified_blobs, verified_data_columns, executed_block, + .. } = self; let blobs_available_timestamp = verified_blobs @@ -291,12 +305,17 @@ impl PendingComponents { blobs, data_columns, blobs_available_timestamp, + spec: spec.clone(), }; Ok(Availability::Available(Box::new( AvailableExecutedBlock::new(available_block, import_data, payload_verification_outcome), ))) } + pub fn reconstruction_started(&mut self) { + self.reconstruction_started = true; + } + /// Returns the epoch of the block if it is cached, otherwise returns the epoch of the first blob. pub fn epoch(&self) -> Option { self.executed_block @@ -337,7 +356,7 @@ pub struct DataAvailabilityCheckerInner { state_cache: StateLRUCache, /// The number of data columns the node is custodying. custody_column_count: usize, - spec: ChainSpec, + spec: Arc, } impl DataAvailabilityCheckerInner { @@ -345,7 +364,7 @@ impl DataAvailabilityCheckerInner { capacity: NonZeroUsize, beacon_store: BeaconStore, custody_column_count: usize, - spec: ChainSpec, + spec: Arc, ) -> Result { Ok(Self { critical: RwLock::new(LruCache::new(capacity)), @@ -430,6 +449,28 @@ impl DataAvailabilityCheckerInner { } } + /// Potentially trigger reconstruction if: + /// - Our custody requirement is all columns + /// - We >= 50% of columns, but not all columns + fn should_reconstruct( + &self, + block_import_requirement: &BlockImportRequirement, + pending_components: &PendingComponents, + ) -> bool { + let BlockImportRequirement::CustodyColumns(num_expected_columns) = block_import_requirement + else { + return false; + }; + + let num_of_columns = self.spec.number_of_columns; + let has_missing_columns = pending_components.verified_data_columns.len() < num_of_columns; + + has_missing_columns + && !pending_components.reconstruction_started + && *num_expected_columns == num_of_columns + && pending_components.verified_data_columns.len() >= num_of_columns / 2 + } + pub fn put_kzg_verified_blobs>>( &self, block_root: Hash256, @@ -460,7 +501,7 @@ impl DataAvailabilityCheckerInner { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { + pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -469,14 +510,17 @@ impl DataAvailabilityCheckerInner { } } + #[allow(clippy::type_complexity)] pub fn put_kzg_verified_data_columns< I: IntoIterator>, >( &self, + kzg: &Kzg, block_root: Hash256, epoch: Epoch, kzg_verified_data_columns: I, - ) -> Result, AvailabilityCheckError> { + ) -> Result<(Availability, DataColumnsToPublish), AvailabilityCheckError> + { let mut write_lock = self.critical.write(); // Grab existing entry or create a new entry. @@ -486,19 +530,68 @@ impl DataAvailabilityCheckerInner { .unwrap_or_else(|| PendingComponents::empty(block_root)); // Merge in the data columns. - pending_components.merge_data_columns(kzg_verified_data_columns); + pending_components.merge_data_columns(kzg_verified_data_columns)?; let block_import_requirement = self.block_import_requirement(epoch)?; + + // Potentially trigger reconstruction if: + // - Our custody requirement is all columns + // - We >= 50% of columns + let data_columns_to_publish = + if self.should_reconstruct(&block_import_requirement, &pending_components) { + pending_components.reconstruction_started(); + + let timer = metrics::start_timer(&metrics::DATA_AVAILABILITY_RECONSTRUCTION_TIME); + + let existing_column_indices = pending_components + .verified_data_columns + .iter() + .map(|d| d.index()) + .collect::>(); + + // Will only return an error if: + // - < 50% of columns + // - There are duplicates + let all_data_columns = KzgVerifiedCustodyDataColumn::reconstruct_columns( + kzg, + pending_components.verified_data_columns.as_slice(), + &self.spec, + )?; + + let data_columns_to_publish = all_data_columns + .iter() + .filter(|d| !existing_column_indices.contains(&d.index())) + .map(|d| d.clone_arc()) + .collect::>(); + + pending_components.verified_data_columns = all_data_columns; + + metrics::stop_timer(timer); + metrics::inc_counter_by( + &metrics::DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS, + data_columns_to_publish.len() as u64, + ); + + Some(data_columns_to_publish) + } else { + None + }; + if pending_components.is_available(&block_import_requirement) { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { - self.state_cache.recover_pending_executed_block(diet_block) - }) + pending_components + .make_available(block_import_requirement, &self.spec, |diet_block| { + self.state_cache.recover_pending_executed_block(diet_block) + }) + .map(|availability| (availability, data_columns_to_publish)) } else { write_lock.put(block_root, pending_components); - Ok(Availability::MissingComponents(block_root)) + Ok(( + Availability::MissingComponents(block_root), + data_columns_to_publish, + )) } } @@ -532,7 +625,7 @@ impl DataAvailabilityCheckerInner { write_lock.put(block_root, pending_components.clone()); // No need to hold the write lock anymore drop(write_lock); - pending_components.make_available(block_import_requirement, |diet_block| { + pending_components.make_available(block_import_requirement, &self.spec, |diet_block| { self.state_cache.recover_pending_executed_block(diet_block) }) } else { @@ -791,7 +884,7 @@ mod test { let log = test_logger(); let chain_db_path = tempdir().expect("should get temp dir"); let harness = get_deneb_chain(log.clone(), &chain_db_path).await; - let spec = harness.spec.clone(); + let spec = Arc::new(harness.spec.clone()); let test_store = harness.chain.store.clone(); let capacity_non_zero = new_non_zero_usize(capacity); let cache = Arc::new( diff --git a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs index cf6eb669d5e..03e3289118d 100644 --- a/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs +++ b/beacon_node/beacon_chain/src/data_availability_checker/state_lru_cache.rs @@ -70,11 +70,11 @@ impl DietAvailabilityPendingExecutedBlock { pub struct StateLRUCache { states: RwLock>>, store: BeaconStore, - spec: ChainSpec, + spec: Arc, } impl StateLRUCache { - pub fn new(store: BeaconStore, spec: ChainSpec) -> Self { + pub fn new(store: BeaconStore, spec: Arc) -> Self { Self { states: RwLock::new(LruCache::new(STATE_LRU_CAPACITY_NON_ZERO)), store, diff --git a/beacon_node/beacon_chain/src/data_column_verification.rs b/beacon_node/beacon_chain/src/data_column_verification.rs index 279af20909b..f4a5feaee2a 100644 --- a/beacon_node/beacon_chain/src/data_column_verification.rs +++ b/beacon_node/beacon_chain/src/data_column_verification.rs @@ -2,7 +2,7 @@ use crate::block_verification::{ cheap_state_advance_to_obtain_committees, get_validator_pubkey_cache, process_block_slash_info, BlockSlashInfo, }; -use crate::kzg_utils::validate_data_columns; +use crate::kzg_utils::{reconstruct_data_columns, validate_data_columns}; use crate::{metrics, BeaconChain, BeaconChainError, BeaconChainTypes}; use derivative::Derivative; use fork_choice::ProtoBlock; @@ -181,6 +181,15 @@ impl GossipVerifiedDataColumn { } } + pub fn as_data_column(&self) -> &DataColumnSidecar { + self.data_column.as_data_column() + } + + /// This is cheap as we're calling clone on an Arc + pub fn clone_data_column(&self) -> Arc> { + self.data_column.clone_data_column() + } + pub fn block_root(&self) -> Hash256 { self.block_root } @@ -189,6 +198,10 @@ impl GossipVerifiedDataColumn { self.data_column.data.slot() } + pub fn index(&self) -> ColumnIndex { + self.data_column.data.index + } + pub fn signed_block_header(&self) -> SignedBeaconBlockHeader { self.data_column.data.signed_block_header.clone() } @@ -226,6 +239,38 @@ impl KzgVerifiedDataColumn { } } +pub type CustodyDataColumnList = RuntimeVariableList>; + +/// Data column that we must custody +#[derive(Debug, Derivative, Clone, Encode, Decode)] +#[derivative(PartialEq, Eq, Hash(bound = "E: EthSpec"))] +#[ssz(struct_behaviour = "transparent")] +pub struct CustodyDataColumn { + data: Arc>, +} + +impl CustodyDataColumn { + /// Mark a column as custody column. Caller must ensure that our current custody requirements + /// include this column + pub fn from_asserted_custody(data: Arc>) -> Self { + Self { data } + } + + pub fn into_inner(self) -> Arc> { + self.data + } + pub fn as_data_column(&self) -> &Arc> { + &self.data + } + /// This is cheap as we're calling clone on an Arc + pub fn clone_arc(&self) -> Arc> { + self.data.clone() + } + pub fn index(&self) -> u64 { + self.data.index + } +} + /// Data column that we must custody and has completed kzg verification #[derive(Debug, Derivative, Clone, Encode, Decode)] #[derivative(PartialEq, Eq)] @@ -243,8 +288,39 @@ impl KzgVerifiedCustodyDataColumn { } } - pub fn index(&self) -> ColumnIndex { - self.data.index + /// Verify a column already marked as custody column + pub fn new(data_column: CustodyDataColumn, kzg: &Kzg) -> Result { + verify_kzg_for_data_column(data_column.clone_arc(), kzg)?; + Ok(Self { + data: data_column.data, + }) + } + + pub fn reconstruct_columns( + kzg: &Kzg, + partial_set_of_columns: &[Self], + spec: &ChainSpec, + ) -> Result, KzgError> { + // Will only return an error if: + // - < 50% of columns + // - There are duplicates + let all_data_columns = reconstruct_data_columns( + kzg, + &partial_set_of_columns + .iter() + .map(|d| d.clone_arc()) + .collect::>(), + spec, + )?; + + Ok(all_data_columns + .into_iter() + .map(|d| { + KzgVerifiedCustodyDataColumn::from_asserted_custody(KzgVerifiedDataColumn { + data: d, + }) + }) + .collect::>()) } pub fn into_inner(self) -> Arc> { @@ -257,6 +333,9 @@ impl KzgVerifiedCustodyDataColumn { pub fn clone_arc(&self) -> Arc> { self.data.clone() } + pub fn index(&self) -> ColumnIndex { + self.data.index + } } /// Complete kzg verification for a `DataColumnSidecar`. @@ -303,6 +382,7 @@ pub fn validate_data_column_sidecar_for_gossip( let parent_block = verify_parent_block_and_finalized_descendant(data_column.clone(), chain)?; verify_slot_higher_than_parent(&parent_block, column_slot)?; verify_proposer_and_signature(&data_column, &parent_block, chain)?; + let kzg = chain .kzg .clone() @@ -350,9 +430,11 @@ fn verify_is_first_sidecar( fn verify_column_inclusion_proof( data_column: &DataColumnSidecar, ) -> Result<(), GossipDataColumnError> { + let _timer = metrics::start_timer(&metrics::DATA_COLUMN_SIDECAR_INCLUSION_PROOF_VERIFICATION); if !data_column.verify_inclusion_proof() { return Err(GossipDataColumnError::InvalidInclusionProof); } + Ok(()) } diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index 1e3d67f9d7a..4db3f0ebb41 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -77,8 +77,6 @@ pub enum BeaconChainError { AttesterSlashingValidationError(AttesterSlashingValidationError), BlsExecutionChangeValidationError(BlsExecutionChangeValidationError), MissingFinalizedStateRoot(Slot), - /// Returned when an internal check fails, indicating corrupt data. - InvariantViolated(String), SszTypesError(SszTypesError), NoProposerForSlot(Slot), CanonicalHeadLockTimeout, @@ -216,10 +214,12 @@ pub enum BeaconChainError { InconsistentFork(InconsistentFork), ProposerHeadForkChoiceError(fork_choice::Error), UnableToPublish, + UnableToBuildColumnSidecar(String), AvailabilityCheckError(AvailabilityCheckError), LightClientError(LightClientError), UnsupportedFork, MilhouseError(MilhouseError), + EmptyRpcCustodyColumns, AttestationError(AttestationError), AttestationCommitteeIndexNotSet, } diff --git a/beacon_node/beacon_chain/src/metrics.rs b/beacon_node/beacon_chain/src/metrics.rs index 82c98a2083b..3394946255f 100644 --- a/beacon_node/beacon_chain/src/metrics.rs +++ b/beacon_node/beacon_chain/src/metrics.rs @@ -1653,6 +1653,13 @@ pub static DATA_COLUMN_SIDECAR_COMPUTATION: LazyLock> = LazyLo Ok(vec![0.04, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 1.0]), ) }); +pub static DATA_COLUMN_SIDECAR_INCLUSION_PROOF_VERIFICATION: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "data_column_sidecar_inclusion_proof_verification_seconds", + "Time taken to verify data_column sidecar inclusion proof", + ) + }); pub static DATA_COLUMN_SIDECAR_PROCESSING_REQUESTS: LazyLock> = LazyLock::new(|| { try_create_int_counter( @@ -1674,6 +1681,13 @@ pub static DATA_COLUMN_SIDECAR_GOSSIP_VERIFICATION_TIMES: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "beacon_blobs_column_sidecar_processing_successes_total", + "Number of data column sidecars verified for gossip", + ) + }); /* * Light server message verification @@ -1856,6 +1870,20 @@ pub static DATA_AVAILABILITY_OVERFLOW_STORE_CACHE_SIZE: LazyLock> = + LazyLock::new(|| { + try_create_histogram( + "data_availability_reconstruction_time_seconds", + "Time taken to reconstruct columns", + ) + }); +pub static DATA_AVAILABILITY_RECONSTRUCTED_COLUMNS: LazyLock> = + LazyLock::new(|| { + try_create_int_counter( + "data_availability_reconstructed_columns_total", + "Total count of reconstructed columns", + ) + }); /* * light_client server metrics diff --git a/beacon_node/beacon_chain/src/test_utils.rs b/beacon_node/beacon_chain/src/test_utils.rs index 87a3eeb359e..b28d221da7e 100644 --- a/beacon_node/beacon_chain/src/test_utils.rs +++ b/beacon_node/beacon_chain/src/test_utils.rs @@ -1,4 +1,5 @@ use crate::block_verification_types::{AsBlock, RpcBlock}; +use crate::kzg_utils::blobs_to_data_column_sidecars; use crate::observed_operations::ObservationOutcome; pub use crate::persisted_beacon_chain::PersistedBeaconChain; use crate::BeaconBlockResponseWrapper; @@ -82,6 +83,14 @@ pub static KZG: LazyLock> = LazyLock::new(|| { Arc::new(kzg) }); +pub static KZG_PEERDAS: LazyLock> = LazyLock::new(|| { + let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) + .map_err(|e| format!("Unable to read trusted setup file: {}", e)) + .expect("should have trusted setup"); + let kzg = Kzg::new_from_trusted_setup_das_enabled(trusted_setup).expect("should create kzg"); + Arc::new(kzg) +}); + pub type BaseHarnessType = Witness, E, THotStore, TColdStore>; @@ -2690,3 +2699,20 @@ pub fn generate_rand_block_and_blobs( } (block, blob_sidecars) } + +#[allow(clippy::type_complexity)] +pub fn generate_rand_block_and_data_columns( + fork_name: ForkName, + num_blobs: NumBlobs, + rng: &mut impl Rng, + spec: &ChainSpec, +) -> ( + SignedBeaconBlock>, + Vec>>, +) { + let (block, blobs) = generate_rand_block_and_blobs(fork_name, num_blobs, rng); + let blob: BlobsList = blobs.into_iter().map(|b| b.blob).collect::>().into(); + let data_columns = blobs_to_data_column_sidecars(&blob, &block, &KZG_PEERDAS, spec).unwrap(); + + (block, data_columns) +} diff --git a/beacon_node/beacon_chain/tests/block_verification.rs b/beacon_node/beacon_chain/tests/block_verification.rs index 046a3468afc..1c494d99bf5 100644 --- a/beacon_node/beacon_chain/tests/block_verification.rs +++ b/beacon_node/beacon_chain/tests/block_verification.rs @@ -1472,7 +1472,7 @@ async fn add_base_block_to_altair_chain() { ) .await, ChainSegmentResult::Failed { - imported_blocks: 0, + imported_blocks: _, error: BlockError::InconsistentFork(InconsistentFork { fork_at_slot: ForkName::Altair, object_fork: ForkName::Base, @@ -1608,7 +1608,7 @@ async fn add_altair_block_to_base_chain() { ) .await, ChainSegmentResult::Failed { - imported_blocks: 0, + imported_blocks: _, error: BlockError::InconsistentFork(InconsistentFork { fork_at_slot: ForkName::Base, object_fork: ForkName::Altair, diff --git a/beacon_node/beacon_chain/tests/store_tests.rs b/beacon_node/beacon_chain/tests/store_tests.rs index c1071d55cf6..740aada413d 100644 --- a/beacon_node/beacon_chain/tests/store_tests.rs +++ b/beacon_node/beacon_chain/tests/store_tests.rs @@ -2792,7 +2792,13 @@ async fn weak_subjectivity_sync_test(slots: Vec, checkpoint_slot: Slot) { let (block_root, block, blobs, data_columns) = available_blocks[0].clone().deconstruct(); let mut corrupt_block = (*block).clone(); *corrupt_block.signature_mut() = Signature::empty(); - AvailableBlock::__new_for_testing(block_root, Arc::new(corrupt_block), blobs, data_columns) + AvailableBlock::__new_for_testing( + block_root, + Arc::new(corrupt_block), + blobs, + data_columns, + Arc::new(spec), + ) }; // Importing the invalid batch should error. diff --git a/beacon_node/beacon_processor/src/lib.rs b/beacon_node/beacon_processor/src/lib.rs index 6ce3b64acfe..f506f0bb94d 100644 --- a/beacon_node/beacon_processor/src/lib.rs +++ b/beacon_node/beacon_processor/src/lib.rs @@ -64,11 +64,11 @@ use types::{ Attestation, BeaconState, ChainSpec, Hash256, RelativeEpoch, SignedAggregateAndProof, SubnetId, }; use types::{EthSpec, Slot}; -use work_reprocessing_queue::IgnoredRpcBlock; use work_reprocessing_queue::{ spawn_reprocess_scheduler, QueuedAggregate, QueuedLightClientUpdate, QueuedRpcBlock, QueuedUnaggregate, ReadyWork, }; +use work_reprocessing_queue::{IgnoredRpcBlock, QueuedSamplingRequest}; mod metrics; pub mod work_reprocessing_queue; @@ -106,9 +106,12 @@ pub struct BeaconProcessorQueueLengths { finality_update_queue: usize, optimistic_update_queue: usize, unknown_light_client_update_queue: usize, + unknown_block_sampling_request_queue: usize, rpc_block_queue: usize, rpc_blob_queue: usize, rpc_custody_column_queue: usize, + rpc_verify_data_column_queue: usize, + sampling_result_queue: usize, chain_segment_queue: usize, backfill_chain_segment: usize, gossip_block_queue: usize, @@ -161,10 +164,14 @@ impl BeaconProcessorQueueLengths { gossip_attester_slashing_queue: 4096, finality_update_queue: 1024, optimistic_update_queue: 1024, + unknown_block_sampling_request_queue: 16384, unknown_light_client_update_queue: 128, rpc_block_queue: 1024, rpc_blob_queue: 1024, - rpc_custody_column_queue: 1024, + // TODO(das): Placeholder values + rpc_custody_column_queue: 1000, + rpc_verify_data_column_queue: 1000, + sampling_result_queue: 1000, chain_segment_queue: 64, backfill_chain_segment: 64, gossip_block_queue: 1024, @@ -231,6 +238,8 @@ pub const RPC_BLOCK: &str = "rpc_block"; pub const IGNORED_RPC_BLOCK: &str = "ignored_rpc_block"; pub const RPC_BLOBS: &str = "rpc_blob"; pub const RPC_CUSTODY_COLUMN: &str = "rpc_custody_column"; +pub const RPC_VERIFY_DATA_COLUMNS: &str = "rpc_verify_data_columns"; +pub const SAMPLING_RESULT: &str = "sampling_result"; pub const CHAIN_SEGMENT: &str = "chain_segment"; pub const CHAIN_SEGMENT_BACKFILL: &str = "chain_segment_backfill"; pub const STATUS_PROCESSING: &str = "status_processing"; @@ -246,6 +255,7 @@ pub const LIGHT_CLIENT_OPTIMISTIC_UPDATE_REQUEST: &str = "light_client_optimisti pub const UNKNOWN_BLOCK_ATTESTATION: &str = "unknown_block_attestation"; pub const UNKNOWN_BLOCK_AGGREGATE: &str = "unknown_block_aggregate"; pub const UNKNOWN_LIGHT_CLIENT_UPDATE: &str = "unknown_light_client_update"; +pub const UNKNOWN_BLOCK_SAMPLING_REQUEST: &str = "unknown_block_sampling_request"; pub const GOSSIP_BLS_TO_EXECUTION_CHANGE: &str = "gossip_bls_to_execution_change"; pub const API_REQUEST_P0: &str = "api_request_p0"; pub const API_REQUEST_P1: &str = "api_request_p1"; @@ -501,6 +511,10 @@ impl From for WorkEvent { process_fn, }, }, + ReadyWork::SamplingRequest(QueuedSamplingRequest { process_fn, .. }) => Self { + drop_during_sync: true, + work: Work::UnknownBlockSamplingRequest { process_fn }, + }, ReadyWork::BackfillSync(QueuedBackfillBatch(process_fn)) => Self { drop_during_sync: false, work: Work::ChainSegmentBackfill(process_fn), @@ -584,6 +598,9 @@ pub enum Work { parent_root: Hash256, process_fn: BlockingFn, }, + UnknownBlockSamplingRequest { + process_fn: BlockingFn, + }, GossipAggregateBatch { aggregates: Vec>, process_batch: Box>) + Send + Sync>, @@ -610,6 +627,8 @@ pub enum Work { process_fn: AsyncFn, }, RpcCustodyColumn(AsyncFn), + RpcVerifyDataColumn(AsyncFn), + SamplingResult(AsyncFn), IgnoredRpcBlock { process_fn: BlockingFn, }, @@ -658,6 +677,8 @@ impl Work { Work::RpcBlock { .. } => RPC_BLOCK, Work::RpcBlobs { .. } => RPC_BLOBS, Work::RpcCustodyColumn { .. } => RPC_CUSTODY_COLUMN, + Work::RpcVerifyDataColumn(_) => RPC_VERIFY_DATA_COLUMNS, + Work::SamplingResult(_) => SAMPLING_RESULT, Work::IgnoredRpcBlock { .. } => IGNORED_RPC_BLOCK, Work::ChainSegment { .. } => CHAIN_SEGMENT, Work::ChainSegmentBackfill(_) => CHAIN_SEGMENT_BACKFILL, @@ -673,8 +694,9 @@ impl Work { Work::LightClientFinalityUpdateRequest(_) => LIGHT_CLIENT_FINALITY_UPDATE_REQUEST, Work::UnknownBlockAttestation { .. } => UNKNOWN_BLOCK_ATTESTATION, Work::UnknownBlockAggregate { .. } => UNKNOWN_BLOCK_AGGREGATE, - Work::GossipBlsToExecutionChange(_) => GOSSIP_BLS_TO_EXECUTION_CHANGE, Work::UnknownLightClientOptimisticUpdate { .. } => UNKNOWN_LIGHT_CLIENT_UPDATE, + Work::UnknownBlockSamplingRequest { .. } => UNKNOWN_BLOCK_SAMPLING_REQUEST, + Work::GossipBlsToExecutionChange(_) => GOSSIP_BLS_TO_EXECUTION_CHANGE, Work::ApiRequestP0 { .. } => API_REQUEST_P0, Work::ApiRequestP1 { .. } => API_REQUEST_P1, } @@ -816,11 +838,16 @@ impl BeaconProcessor { let mut optimistic_update_queue = FifoQueue::new(queue_lengths.optimistic_update_queue); let mut unknown_light_client_update_queue = FifoQueue::new(queue_lengths.unknown_light_client_update_queue); + let mut unknown_block_sampling_request_queue = + FifoQueue::new(queue_lengths.unknown_block_sampling_request_queue); // Using a FIFO queue since blocks need to be imported sequentially. let mut rpc_block_queue = FifoQueue::new(queue_lengths.rpc_block_queue); let mut rpc_blob_queue = FifoQueue::new(queue_lengths.rpc_blob_queue); let mut rpc_custody_column_queue = FifoQueue::new(queue_lengths.rpc_custody_column_queue); + let mut rpc_verify_data_column_queue = + FifoQueue::new(queue_lengths.rpc_verify_data_column_queue); + let mut sampling_result_queue = FifoQueue::new(queue_lengths.sampling_result_queue); let mut chain_segment_queue = FifoQueue::new(queue_lengths.chain_segment_queue); let mut backfill_chain_segment = FifoQueue::new(queue_lengths.backfill_chain_segment); let mut gossip_block_queue = FifoQueue::new(queue_lengths.gossip_block_queue); @@ -978,6 +1005,13 @@ impl BeaconProcessor { self.spawn_worker(item, idle_tx); } else if let Some(item) = rpc_custody_column_queue.pop() { self.spawn_worker(item, idle_tx); + // TODO(das): decide proper prioritization for sampling columns + } else if let Some(item) = rpc_custody_column_queue.pop() { + self.spawn_worker(item, idle_tx); + } else if let Some(item) = rpc_verify_data_column_queue.pop() { + self.spawn_worker(item, idle_tx); + } else if let Some(item) = sampling_result_queue.pop() { + self.spawn_worker(item, idle_tx); // Check delayed blocks before gossip blocks, the gossip blocks might rely // on the delayed ones. } else if let Some(item) = delayed_block_queue.pop() { @@ -1143,6 +1177,9 @@ impl BeaconProcessor { self.spawn_worker(item, idle_tx); } else if let Some(item) = dcbrange_queue.pop() { self.spawn_worker(item, idle_tx); + // Prioritize sampling requests after block syncing requests + } else if let Some(item) = unknown_block_sampling_request_queue.pop() { + self.spawn_worker(item, idle_tx); // Check slashings after all other consensus messages so we prioritize // following head. // @@ -1273,6 +1310,12 @@ impl BeaconProcessor { Work::RpcCustodyColumn { .. } => { rpc_custody_column_queue.push(work, work_id, &self.log) } + Work::RpcVerifyDataColumn(_) => { + rpc_verify_data_column_queue.push(work, work_id, &self.log) + } + Work::SamplingResult(_) => { + sampling_result_queue.push(work, work_id, &self.log) + } Work::ChainSegment { .. } => { chain_segment_queue.push(work, work_id, &self.log) } @@ -1319,6 +1362,9 @@ impl BeaconProcessor { Work::UnknownLightClientOptimisticUpdate { .. } => { unknown_light_client_update_queue.push(work, work_id, &self.log) } + Work::UnknownBlockSamplingRequest { .. } => { + unknown_block_sampling_request_queue.push(work, work_id, &self.log) + } Work::ApiRequestP0 { .. } => { api_request_p0_queue.push(work, work_id, &self.log) } @@ -1369,6 +1415,18 @@ impl BeaconProcessor { &metrics::BEACON_PROCESSOR_RPC_BLOB_QUEUE_TOTAL, rpc_blob_queue.len() as i64, ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_RPC_CUSTODY_COLUMN_QUEUE_TOTAL, + rpc_custody_column_queue.len() as i64, + ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_RPC_VERIFY_DATA_COLUMN_QUEUE_TOTAL, + rpc_verify_data_column_queue.len() as i64, + ); + metrics::set_gauge( + &metrics::BEACON_PROCESSOR_SAMPLING_RESULT_QUEUE_TOTAL, + sampling_result_queue.len() as i64, + ); metrics::set_gauge( &metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_QUEUE_TOTAL, chain_segment_queue.len() as i64, @@ -1497,12 +1555,12 @@ impl BeaconProcessor { Work::ChainSegment(process_fn) => task_spawner.spawn_async(async move { process_fn.await; }), - Work::UnknownBlockAttestation { process_fn } => task_spawner.spawn_blocking(process_fn), - Work::UnknownBlockAggregate { process_fn } => task_spawner.spawn_blocking(process_fn), - Work::UnknownLightClientOptimisticUpdate { - parent_root: _, - process_fn, - } => task_spawner.spawn_blocking(process_fn), + Work::UnknownBlockAttestation { process_fn } + | Work::UnknownBlockAggregate { process_fn } + | Work::UnknownLightClientOptimisticUpdate { process_fn, .. } + | Work::UnknownBlockSamplingRequest { process_fn } => { + task_spawner.spawn_blocking(process_fn) + } Work::DelayedImportBlock { beacon_block_slot: _, beacon_block_root: _, @@ -1510,7 +1568,9 @@ impl BeaconProcessor { } => task_spawner.spawn_async(process_fn), Work::RpcBlock { process_fn } | Work::RpcBlobs { process_fn } - | Work::RpcCustodyColumn(process_fn) => task_spawner.spawn_async(process_fn), + | Work::RpcCustodyColumn(process_fn) + | Work::RpcVerifyDataColumn(process_fn) + | Work::SamplingResult(process_fn) => task_spawner.spawn_async(process_fn), Work::IgnoredRpcBlock { process_fn } => task_spawner.spawn_blocking(process_fn), Work::GossipBlock(work) | Work::GossipBlobSidecar(work) diff --git a/beacon_node/beacon_processor/src/metrics.rs b/beacon_node/beacon_processor/src/metrics.rs index 56105f1e101..8bc03cee6c7 100644 --- a/beacon_node/beacon_processor/src/metrics.rs +++ b/beacon_node/beacon_processor/src/metrics.rs @@ -133,6 +133,30 @@ pub static BEACON_PROCESSOR_RPC_BLOB_QUEUE_TOTAL: LazyLock> = "Count of blobs from the rpc waiting to be verified.", ) }); +// Rpc custody data columns. +pub static BEACON_PROCESSOR_RPC_CUSTODY_COLUMN_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_rpc_custody_column_queue_total", + "Count of custody columns from the rpc waiting to be imported.", + ) + }); +// Rpc verify data columns +pub static BEACON_PROCESSOR_RPC_VERIFY_DATA_COLUMN_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_rpc_verify_data_column_queue_total", + "Count of data columns from the rpc waiting to be verified.", + ) + }); +// Sampling result +pub static BEACON_PROCESSOR_SAMPLING_RESULT_QUEUE_TOTAL: LazyLock> = + LazyLock::new(|| { + try_create_int_gauge( + "beacon_processor_sampling_result_queue_total", + "Count of sampling results waiting to be processed.", + ) + }); // Chain segments. pub static BEACON_PROCESSOR_CHAIN_SEGMENT_QUEUE_TOTAL: LazyLock> = LazyLock::new(|| { @@ -221,6 +245,15 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_ATTESTATIONS: LazyLock, +> = LazyLock::new(|| { + try_create_int_counter( + "beacon_processor_reprocessing_queue_matched_sampling_requests", + "Number of queued sampling requests where a matching block has been imported.", + ) +}); /* * Light client update reprocessing queue metrics. @@ -238,7 +271,7 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_OPTIMISTIC_UPDATES: LazyL > = LazyLock::new(|| { try_create_int_counter( "beacon_processor_reprocessing_queue_matched_optimistic_updates", - "Number of queued light client optimistic updates where as matching block has been imported." + "Number of queued light client optimistic updates where a matching block has been imported." ) }); diff --git a/beacon_node/beacon_processor/src/work_reprocessing_queue.rs b/beacon_node/beacon_processor/src/work_reprocessing_queue.rs index 137010557da..a43310ac834 100644 --- a/beacon_node/beacon_processor/src/work_reprocessing_queue.rs +++ b/beacon_node/beacon_processor/src/work_reprocessing_queue.rs @@ -50,6 +50,9 @@ pub const QUEUED_LIGHT_CLIENT_UPDATE_DELAY: Duration = Duration::from_secs(12); /// For how long to queue rpc blocks before sending them back for reprocessing. pub const QUEUED_RPC_BLOCK_DELAY: Duration = Duration::from_secs(4); +/// For how long to queue sampling requests for reprocessing. +pub const QUEUED_SAMPLING_REQUESTS_DELAY: Duration = Duration::from_secs(12); + /// Set an arbitrary upper-bound on the number of queued blocks to avoid DoS attacks. The fact that /// we signature-verify blocks before putting them in the queue *should* protect against this, but /// it's nice to have extra protection. @@ -61,6 +64,10 @@ const MAXIMUM_QUEUED_ATTESTATIONS: usize = 16_384; /// How many light client updates we keep before new ones get dropped. const MAXIMUM_QUEUED_LIGHT_CLIENT_UPDATES: usize = 128; +/// How many sampling requests we queue before new ones get dropped. +/// TODO(das): choose a sensible value +const MAXIMUM_QUEUED_SAMPLING_REQUESTS: usize = 16_384; + // Process backfill batch 50%, 60%, 80% through each slot. // // Note: use caution to set these fractions in a way that won't cause panic-y @@ -97,6 +104,8 @@ pub enum ReprocessQueueMessage { UnknownBlockAggregate(QueuedAggregate), /// A light client optimistic update that references a parent root that has not been seen as a parent. UnknownLightClientOptimisticUpdate(QueuedLightClientUpdate), + /// A sampling request that references an unknown block. + UnknownBlockSamplingRequest(QueuedSamplingRequest), /// A new backfill batch that needs to be scheduled for processing. BackfillSync(QueuedBackfillBatch), } @@ -109,6 +118,7 @@ pub enum ReadyWork { Unaggregate(QueuedUnaggregate), Aggregate(QueuedAggregate), LightClientUpdate(QueuedLightClientUpdate), + SamplingRequest(QueuedSamplingRequest), BackfillSync(QueuedBackfillBatch), } @@ -133,6 +143,12 @@ pub struct QueuedLightClientUpdate { pub process_fn: BlockingFn, } +/// A sampling request for which the corresponding block is not known while processing. +pub struct QueuedSamplingRequest { + pub beacon_block_root: Hash256, + pub process_fn: BlockingFn, +} + /// A block that arrived early and has been queued for later import. pub struct QueuedGossipBlock { pub beacon_block_slot: Slot, @@ -215,6 +231,8 @@ struct ReprocessQueue { attestations_delay_queue: DelayQueue, /// Queue to manage scheduled light client updates. lc_updates_delay_queue: DelayQueue, + /// Queue to manage scheduled sampling requests + sampling_requests_delay_queue: DelayQueue, /* Queued items */ /// Queued blocks. @@ -229,6 +247,10 @@ struct ReprocessQueue { queued_lc_updates: FnvHashMap, /// Light Client Updates per parent_root. awaiting_lc_updates_per_parent_root: HashMap>, + /// Queued sampling requests. + queued_sampling_requests: FnvHashMap, + /// Sampling requests per block root. + awaiting_sampling_requests_per_block_root: HashMap>, /// Queued backfill batches queued_backfill_batches: Vec, @@ -236,15 +258,18 @@ struct ReprocessQueue { /// Next attestation id, used for both aggregated and unaggregated attestations next_attestation: usize, next_lc_update: usize, + next_sampling_request_update: usize, early_block_debounce: TimeLatch, rpc_block_debounce: TimeLatch, attestation_delay_debounce: TimeLatch, lc_update_delay_debounce: TimeLatch, + sampling_request_delay_debounce: TimeLatch, next_backfill_batch_event: Option>>, slot_clock: Arc, } pub type QueuedLightClientUpdateId = usize; +pub type QueuedSamplingRequestId = usize; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum QueuedAttestationId { @@ -388,19 +413,24 @@ impl ReprocessQueue { rpc_block_delay_queue: DelayQueue::new(), attestations_delay_queue: DelayQueue::new(), lc_updates_delay_queue: DelayQueue::new(), + sampling_requests_delay_queue: <_>::default(), queued_gossip_block_roots: HashSet::new(), queued_lc_updates: FnvHashMap::default(), queued_aggregates: FnvHashMap::default(), queued_unaggregates: FnvHashMap::default(), + queued_sampling_requests: <_>::default(), awaiting_attestations_per_root: HashMap::new(), awaiting_lc_updates_per_parent_root: HashMap::new(), + awaiting_sampling_requests_per_block_root: <_>::default(), queued_backfill_batches: Vec::new(), next_attestation: 0, next_lc_update: 0, + next_sampling_request_update: 0, early_block_debounce: TimeLatch::default(), rpc_block_debounce: TimeLatch::default(), attestation_delay_debounce: TimeLatch::default(), lc_update_delay_debounce: TimeLatch::default(), + sampling_request_delay_debounce: <_>::default(), next_backfill_batch_event: None, slot_clock, } @@ -624,6 +654,35 @@ impl ReprocessQueue { self.next_lc_update += 1; } + InboundEvent::Msg(UnknownBlockSamplingRequest(queued_sampling_request)) => { + if self.sampling_requests_delay_queue.len() >= MAXIMUM_QUEUED_SAMPLING_REQUESTS { + if self.sampling_request_delay_debounce.elapsed() { + error!( + log, + "Sampling requests delay queue is full"; + "queue_size" => MAXIMUM_QUEUED_SAMPLING_REQUESTS, + ); + } + // Drop the inbound message. + return; + } + + let id: QueuedSamplingRequestId = self.next_sampling_request_update; + self.next_sampling_request_update += 1; + + // Register the delay. + let delay_key = self + .sampling_requests_delay_queue + .insert(id, QUEUED_SAMPLING_REQUESTS_DELAY); + + self.awaiting_sampling_requests_per_block_root + .entry(queued_sampling_request.beacon_block_root) + .or_default() + .push(id); + + self.queued_sampling_requests + .insert(id, (queued_sampling_request, delay_key)); + } InboundEvent::Msg(BlockImported { block_root, parent_root, @@ -685,6 +744,49 @@ impl ReprocessQueue { ); } } + // Unqueue the sampling requests we have for this root, if any. + if let Some(queued_ids) = self + .awaiting_sampling_requests_per_block_root + .remove(&block_root) + { + let mut sent_count = 0; + let mut failed_to_send_count = 0; + + for id in queued_ids { + metrics::inc_counter( + &metrics::BEACON_PROCESSOR_REPROCESSING_QUEUE_MATCHED_SAMPLING_REQUESTS, + ); + + if let Some((queued, delay_key)) = self.queued_sampling_requests.remove(&id) + { + // Remove the delay. + self.sampling_requests_delay_queue.remove(&delay_key); + + // Send the work. + let work = ReadyWork::SamplingRequest(queued); + + if self.ready_work_tx.try_send(work).is_err() { + failed_to_send_count += 1; + } else { + sent_count += 1; + } + } else { + // This should never happen. + error!(log, "Unknown sampling request for block root"; "block_root" => ?block_root, "id" => ?id); + } + } + + if failed_to_send_count > 0 { + error!( + log, + "Ignored scheduled sampling requests for block"; + "hint" => "system may be overloaded", + "block_root" => ?block_root, + "failed_count" => failed_to_send_count, + "sent_count" => sent_count, + ); + } + } } InboundEvent::Msg(NewLightClientOptimisticUpdate { parent_root }) => { // Unqueue the light client optimistic updates we have for this root, if any. diff --git a/beacon_node/http_api/Cargo.toml b/beacon_node/http_api/Cargo.toml index 068feea1df8..f3779f0e4ac 100644 --- a/beacon_node/http_api/Cargo.toml +++ b/beacon_node/http_api/Cargo.toml @@ -42,6 +42,7 @@ sensitive_url = { workspace = true } store = { workspace = true } bytes = { workspace = true } beacon_processor = { workspace = true } +rand = { workspace = true } [dev-dependencies] serde_json = { workspace = true } diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index 93499b7c38a..102d138aa3a 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -1263,12 +1263,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_contents: PublishBlockRequest, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_block( @@ -1279,6 +1281,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1294,6 +1297,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_bytes: Bytes, @@ -1301,6 +1305,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block_contents = PublishBlockRequest::::from_ssz_bytes( @@ -1318,6 +1323,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1333,6 +1339,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1340,6 +1347,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_block( @@ -1350,6 +1358,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1366,6 +1375,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1374,6 +1384,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block_contents = PublishBlockRequest::::from_ssz_bytes( @@ -1391,6 +1402,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1410,12 +1422,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_contents: Arc>, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_blinded_block( @@ -1425,6 +1439,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1440,12 +1455,14 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |block_bytes: Bytes, task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block = SignedBlindedBeaconBlock::::from_ssz_bytes( @@ -1463,6 +1480,7 @@ pub fn serve( log, BroadcastValidation::default(), duplicate_block_status_code, + network_globals, ) .await }) @@ -1478,6 +1496,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1485,6 +1504,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { publish_blocks::publish_blinded_block( @@ -1494,6 +1514,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) @@ -1509,6 +1530,7 @@ pub fn serve( .and(task_spawner_filter.clone()) .and(chain_filter.clone()) .and(network_tx_filter.clone()) + .and(network_globals.clone()) .and(log_filter.clone()) .then( move |validation_level: api_types::BroadcastValidationQuery, @@ -1516,6 +1538,7 @@ pub fn serve( task_spawner: TaskSpawner, chain: Arc>, network_tx: UnboundedSender>, + network_globals: Arc>, log: Logger| { task_spawner.spawn_async_with_rejection(Priority::P0, async move { let block = SignedBlindedBeaconBlock::::from_ssz_bytes( @@ -1533,6 +1556,7 @@ pub fn serve( log, validation_level.broadcast_validation, duplicate_block_status_code, + network_globals, ) .await }) diff --git a/beacon_node/http_api/src/publish_blocks.rs b/beacon_node/http_api/src/publish_blocks.rs index 10d000ef6f8..bbdfc31d430 100644 --- a/beacon_node/http_api/src/publish_blocks.rs +++ b/beacon_node/http_api/src/publish_blocks.rs @@ -9,8 +9,9 @@ use beacon_chain::{ use eth2::types::{into_full_block_and_blobs, BroadcastValidation, ErrorMessage}; use eth2::types::{FullPayloadContents, PublishBlockRequest}; use execution_layer::ProvenancedPayload; -use lighthouse_network::PubsubMessage; +use lighthouse_network::{NetworkGlobals, PubsubMessage}; use network::NetworkMessage; +use rand::seq::SliceRandom; use slog::{debug, error, info, warn, Logger}; use slot_clock::SlotClock; use std::marker::PhantomData; @@ -19,9 +20,9 @@ use std::time::Duration; use tokio::sync::mpsc::UnboundedSender; use tree_hash::TreeHash; use types::{ - AbstractExecPayload, BeaconBlockRef, BlobSidecarList, BlockImportSource, EthSpec, ExecPayload, - ExecutionBlockHash, ForkName, FullPayload, FullPayloadBellatrix, Hash256, SignedBeaconBlock, - SignedBlindedBeaconBlock, VariableList, + AbstractExecPayload, BeaconBlockRef, BlobSidecarList, BlockImportSource, DataColumnSidecarList, + DataColumnSubnetId, EthSpec, ExecPayload, ExecutionBlockHash, ForkName, FullPayload, + FullPayloadBellatrix, Hash256, SignedBeaconBlock, SignedBlindedBeaconBlock, VariableList, }; use warp::http::StatusCode; use warp::{reply::Response, Rejection, Reply}; @@ -45,6 +46,7 @@ impl> ProvenancedBloc } /// Handles a request from the HTTP API for full blocks. +#[allow(clippy::too_many_arguments)] pub async fn publish_block>( block_root: Option, provenanced_block: ProvenancedBlock, @@ -53,6 +55,7 @@ pub async fn publish_block>, ) -> Result { let seen_timestamp = timestamp_now(); @@ -68,10 +71,13 @@ pub async fn publish_block block.slot()); + let malicious_withhold_count = chain.config.malicious_withhold_count; + let chain_cloned = chain.clone(); /* actually publish a block */ let publish_block = move |block: Arc>, blobs_opt: Option>, + data_cols_opt: Option>, sender, log, seen_timestamp| { @@ -104,6 +110,7 @@ pub async fn publish_block { let mut pubsub_messages = vec![PubsubMessage::BeaconBlock(block)]; if let Some(blob_sidecars) = blobs_opt { + // Publish blob sidecars for (blob_index, blob) in blob_sidecars.into_iter().enumerate() { pubsub_messages.push(PubsubMessage::BlobSidecar(Box::new(( blob_index as u64, @@ -111,6 +118,30 @@ pub async fn publish_block 0 { + let columns_to_keep = data_col_sidecars + .len() + .saturating_sub(malicious_withhold_count); + // Randomize columns before dropping the last malicious_withhold_count items + data_col_sidecars.shuffle(&mut rand::thread_rng()); + data_col_sidecars = data_col_sidecars + .into_iter() + .take(columns_to_keep) + .collect::>(); + } + + for data_col in data_col_sidecars { + let subnet = DataColumnSubnetId::from_column_index::( + data_col.index as usize, + &chain_cloned.spec, + ); + pubsub_messages.push(PubsubMessage::DataColumnSidecar(Box::new(( + subnet, data_col, + )))); + } + } crate::publish_pubsub_messages(&sender, pubsub_messages) .map_err(|_| BlockError::BeaconChainError(BeaconChainError::UnableToPublish))?; } @@ -126,7 +157,7 @@ pub async fn publish_block b, Err(BlockContentsError::BlockError(BlockError::BlockIsAlreadyKnown(_))) @@ -155,6 +186,10 @@ pub async fn publish_block>(); VariableList::from(blobs) }); + let data_cols_opt = gossip_verified_data_columns + .as_ref() + .map(|gossip_verified_data_columns| { + gossip_verified_data_columns + .into_iter() + .map(|col| col.clone_data_column()) + .collect::>() + }); let block_root = block_root.unwrap_or(gossip_verified_block.block_root); @@ -172,6 +215,7 @@ pub async fn publish_block publish_block( block_clone, blobs_opt, + data_cols_opt, sender_clone, log_clone, seen_timestamp, @@ -201,6 +246,7 @@ pub async fn publish_block &msg + ); + Err(warp_utils::reject::custom_bad_request(msg)) + }; + } + } + match Box::pin(chain.process_block( block_root, gossip_verified_block, @@ -313,6 +382,7 @@ pub async fn publish_blinded_block( log: Logger, validation_level: BroadcastValidation, duplicate_status_code: StatusCode, + network_globals: Arc>, ) -> Result { let block_root = blinded_block.canonical_root(); let full_block: ProvenancedBlock> = @@ -325,6 +395,7 @@ pub async fn publish_blinded_block( log, validation_level, duplicate_status_code, + network_globals, ) .await } diff --git a/beacon_node/http_api/src/test_utils.rs b/beacon_node/http_api/src/test_utils.rs index 88112de10b6..dcd494a880f 100644 --- a/beacon_node/http_api/src/test_utils.rs +++ b/beacon_node/http_api/src/test_utils.rs @@ -151,6 +151,7 @@ pub async fn create_api_server( vec![], false, &log, + chain.spec.clone(), )); // Only a peer manager can add peers, so we create a dummy manager. diff --git a/beacon_node/http_api/tests/broadcast_validation_tests.rs b/beacon_node/http_api/tests/broadcast_validation_tests.rs index 78f9c819888..4b884bb5192 100644 --- a/beacon_node/http_api/tests/broadcast_validation_tests.rs +++ b/beacon_node/http_api/tests/broadcast_validation_tests.rs @@ -376,6 +376,7 @@ pub async fn consensus_partial_pass_only_consensus() { /* submit `block_b` which should induce equivocation */ let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_block( None, @@ -385,6 +386,7 @@ pub async fn consensus_partial_pass_only_consensus() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; @@ -677,6 +679,7 @@ pub async fn equivocation_consensus_late_equivocation() { assert!(gossip_block_contents_a.is_err()); let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_block( None, @@ -686,6 +689,7 @@ pub async fn equivocation_consensus_late_equivocation() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; @@ -1335,6 +1339,7 @@ pub async fn blinded_equivocation_consensus_late_equivocation() { assert!(gossip_block_a.is_err()); let channel = tokio::sync::mpsc::unbounded_channel(); + let network_globals = tester.ctx.network_globals.clone().unwrap(); let publication_result = publish_blinded_block( block_b, @@ -1343,6 +1348,7 @@ pub async fn blinded_equivocation_consensus_late_equivocation() { test_logger, validation_level.unwrap(), StatusCode::ACCEPTED, + network_globals, ) .await; diff --git a/beacon_node/lighthouse_network/src/discovery/enr.rs b/beacon_node/lighthouse_network/src/discovery/enr.rs index 04ae9971502..7415fdaf590 100644 --- a/beacon_node/lighthouse_network/src/discovery/enr.rs +++ b/beacon_node/lighthouse_network/src/discovery/enr.rs @@ -360,7 +360,7 @@ mod test { let config = NetworkConfig::default(); let spec = make_eip7594_spec(); let (mut enr, enr_key) = build_enr_with_config(config, &spec); - let invalid_subnet_count = 99u64; + let invalid_subnet_count = 999u64; enr.insert( PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY, diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 300c190cdaf..7b297d243bd 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -1232,6 +1232,7 @@ mod tests { vec![], false, &log, + spec.clone(), ); let keypair = keypair.into(); Discovery::new(keypair, &config, Arc::new(globals), &log, &spec) diff --git a/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs b/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs index b53afe556db..8bc5e25fde9 100644 --- a/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs +++ b/beacon_node/lighthouse_network/src/discovery/subnet_predicate.rs @@ -16,6 +16,7 @@ where E: EthSpec, { let log_clone = log.clone(); + let spec_clone = spec.clone(); move |enr: &Enr| { let attestation_bitfield: EnrAttestationBitfield = match enr.attestation_bitfield::() @@ -29,8 +30,7 @@ where let sync_committee_bitfield: Result, _> = enr.sync_committee_bitfield::(); - // TODO(das): compute from enr - let custody_subnet_count = spec.custody_requirement; + let custody_subnet_count = enr.custody_subnet_count::(&spec_clone); let predicate = subnets.iter().any(|subnet| match subnet { Subnet::Attestation(s) => attestation_bitfield diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index 4c9551507e7..7247425f500 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -530,7 +530,10 @@ impl PeerManager { RPCResponseErrorCode::Unknown => PeerAction::HighToleranceError, RPCResponseErrorCode::ResourceUnavailable => { // Don't ban on this because we want to retry with a block by root request. - if matches!(protocol, Protocol::BlobsByRoot) { + if matches!( + protocol, + Protocol::BlobsByRoot | Protocol::DataColumnsByRoot + ) { return; } @@ -1385,7 +1388,8 @@ mod tests { ..Default::default() }; let log = build_log(slog::Level::Debug, false); - let globals = NetworkGlobals::new_test_globals(vec![], &log); + let spec = E::default_spec(); + let globals = NetworkGlobals::new_test_globals(vec![], &log, spec); PeerManager::new(config, Arc::new(globals), &log).unwrap() } @@ -1399,7 +1403,8 @@ mod tests { ..Default::default() }; let log = build_log(slog::Level::Debug, false); - let globals = NetworkGlobals::new_test_globals(trusted_peers, &log); + let spec = E::default_spec(); + let globals = NetworkGlobals::new_test_globals(trusted_peers, &log, spec); PeerManager::new(config, Arc::new(globals), &log).unwrap() } diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs index c3e77ae225e..fdde57b4a57 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb.rs @@ -1,5 +1,8 @@ +use crate::discovery::enr::PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY; use crate::discovery::CombinedKey; -use crate::{metrics, multiaddr::Multiaddr, types::Subnet, Enr, Gossipsub, PeerId}; +use crate::{ + metrics, multiaddr::Multiaddr, types::Subnet, Enr, EnrExt, Eth2Enr, Gossipsub, PeerId, +}; use peer_info::{ConnectionDirection, PeerConnectionStatus, PeerInfo}; use rand::seq::SliceRandom; use score::{PeerAction, ReportSource, Score, ScoreState}; @@ -12,7 +15,7 @@ use std::{ fmt::Formatter, }; use sync_status::SyncStatus; -use types::EthSpec; +use types::{ChainSpec, DataColumnSubnetId, EthSpec}; pub mod client; pub mod peer_info; @@ -44,10 +47,16 @@ pub struct PeerDB { disable_peer_scoring: bool, /// PeerDB's logger log: slog::Logger, + spec: ChainSpec, } impl PeerDB { - pub fn new(trusted_peers: Vec, disable_peer_scoring: bool, log: &slog::Logger) -> Self { + pub fn new( + trusted_peers: Vec, + disable_peer_scoring: bool, + log: &slog::Logger, + spec: ChainSpec, + ) -> Self { // Initialize the peers hashmap with trusted peers let peers = trusted_peers .into_iter() @@ -59,6 +68,7 @@ impl PeerDB { banned_peers_count: BannedPeersCount::default(), disable_peer_scoring, peers, + spec, } } @@ -246,6 +256,27 @@ impl PeerDB { .map(|(peer_id, _)| peer_id) } + pub fn good_custody_subnet_peer( + &self, + subnet: DataColumnSubnetId, + ) -> impl Iterator { + self.peers + .iter() + .filter(move |(_, info)| { + // TODO(das): we currently consider peer to be a subnet peer if the peer is *either* + // subscribed to the subnet or assigned to the subnet. + // The first condition is currently required as we don't have custody count in + // metadata implemented yet, and therefore unable to reliably determine custody + // subnet count (ENR is not always available). + // This condition can be removed later so that we can identify peers that are not + // serving custody columns and penalise accordingly. + let is_custody_subnet_peer = info.on_subnet_gossipsub(&Subnet::DataColumn(subnet)) + || info.is_assigned_to_custody_subnet(&subnet); + info.is_connected() && info.is_good_gossipsub_peer() && is_custody_subnet_peer + }) + .map(|(peer_id, _)| peer_id) + } + /// Gives the ids of all known disconnected peers. pub fn disconnected_peers(&self) -> impl Iterator { self.peers @@ -673,17 +704,34 @@ impl PeerDB { } /// Updates the connection state. MUST ONLY BE USED IN TESTS. - pub fn __add_connected_peer_testing_only(&mut self, peer_id: &PeerId) -> Option { + pub fn __add_connected_peer_testing_only( + &mut self, + supernode: bool, + spec: &ChainSpec, + ) -> PeerId { let enr_key = CombinedKey::generate_secp256k1(); - let enr = Enr::builder().build(&enr_key).unwrap(); + let mut enr = Enr::builder().build(&enr_key).unwrap(); + let peer_id = enr.peer_id(); + + if supernode { + enr.insert( + PEERDAS_CUSTODY_SUBNET_COUNT_ENR_KEY, + &spec.data_column_sidecar_subnet_count, + &enr_key, + ) + .expect("u64 can be encoded"); + } + self.update_connection_state( - peer_id, + &peer_id, NewConnectionState::Connected { enr: Some(enr), seen_address: Multiaddr::empty(), direction: ConnectionDirection::Outgoing, }, - ) + ); + + peer_id } /// The connection state of the peer has been changed. Modify the peer in the db to ensure all @@ -746,8 +794,17 @@ impl PeerDB { seen_address, }, ) => { - // Update the ENR if one exists + // Update the ENR if one exists, and compute the custody subnets if let Some(enr) = enr { + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + let custody_subnets = DataColumnSubnetId::compute_custody_subnets::( + node_id, + custody_subnet_count, + &self.spec, + ) + .collect::>(); + info.set_custody_subnets(custody_subnets); info.set_enr(enr); } @@ -1298,7 +1355,8 @@ mod tests { fn get_db() -> PeerDB { let log = build_log(slog::Level::Debug, false); - PeerDB::new(vec![], false, &log) + let spec = M::default_spec(); + PeerDB::new(vec![], false, &log, spec) } #[test] @@ -1997,7 +2055,8 @@ mod tests { fn test_trusted_peers_score() { let trusted_peer = PeerId::random(); let log = build_log(slog::Level::Debug, false); - let mut pdb: PeerDB = PeerDB::new(vec![trusted_peer], false, &log); + let spec = M::default_spec(); + let mut pdb: PeerDB = PeerDB::new(vec![trusted_peer], false, &log, spec); pdb.connect_ingoing(&trusted_peer, "/ip4/0.0.0.0".parse().unwrap(), None); @@ -2021,7 +2080,8 @@ mod tests { fn test_disable_peer_scoring() { let peer = PeerId::random(); let log = build_log(slog::Level::Debug, false); - let mut pdb: PeerDB = PeerDB::new(vec![], true, &log); + let spec = M::default_spec(); + let mut pdb: PeerDB = PeerDB::new(vec![], true, &log, spec); pdb.connect_ingoing(&peer, "/ip4/0.0.0.0".parse().unwrap(), None); diff --git a/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs b/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs index 0745cc26008..8a04d450ba4 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/peerdb/peer_info.rs @@ -13,7 +13,7 @@ use std::collections::HashSet; use std::net::IpAddr; use std::time::Instant; use strum::AsRefStr; -use types::EthSpec; +use types::{DataColumnSubnetId, EthSpec}; use PeerConnectionStatus::*; /// Information about a given connected peer. @@ -40,6 +40,11 @@ pub struct PeerInfo { meta_data: Option>, /// Subnets the peer is connected to. subnets: HashSet, + /// This is computed from either metadata or the ENR, and contains the subnets that the peer + /// is *assigned* to custody, rather than *connected* to (different to `self.subnets`). + /// Note: Another reason to keep this separate to `self.subnets` is an upcoming change to + /// decouple custody requirements from the actual subnets, i.e. changing this to `custody_groups`. + custody_subnets: HashSet, /// The time we would like to retain this peer. After this time, the peer is no longer /// necessary. #[serde(skip)] @@ -62,6 +67,7 @@ impl Default for PeerInfo { listening_addresses: Vec::new(), seen_multiaddrs: HashSet::new(), subnets: HashSet::new(), + custody_subnets: HashSet::new(), sync_status: SyncStatus::Unknown, meta_data: None, min_ttl: None, @@ -210,6 +216,11 @@ impl PeerInfo { self.subnets.contains(subnet) } + /// Returns if the peer is assigned to a given `DataColumnSubnetId`. + pub fn is_assigned_to_custody_subnet(&self, subnet: &DataColumnSubnetId) -> bool { + self.custody_subnets.contains(subnet) + } + /// Returns true if the peer is connected to a long-lived subnet. pub fn has_long_lived_subnet(&self) -> bool { // Check the meta_data @@ -362,6 +373,10 @@ impl PeerInfo { self.connection_status = connection_status } + pub(super) fn set_custody_subnets(&mut self, custody_subnets: HashSet) { + self.custody_subnets = custody_subnets + } + /// Sets the ENR of the peer if one is known. pub(super) fn set_enr(&mut self, enr: Enr) { self.enr = Some(enr) diff --git a/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs b/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs index f5d8b58dcee..9012954391c 100644 --- a/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs +++ b/beacon_node/lighthouse_network/src/rpc/codec/ssz_snappy.rs @@ -522,6 +522,9 @@ fn handle_rpc_request( )?, }))) } + SupportedProtocol::DataColumnsByRangeV1 => Ok(Some(InboundRequest::DataColumnsByRange( + DataColumnsByRangeRequest::from_ssz_bytes(decoded_buffer)?, + ))), SupportedProtocol::DataColumnsByRootV1 => Ok(Some(InboundRequest::DataColumnsByRoot( DataColumnsByRootRequest { data_column_ids: RuntimeVariableList::from_ssz_bytes( @@ -530,9 +533,6 @@ fn handle_rpc_request( )?, }, ))), - SupportedProtocol::DataColumnsByRangeV1 => Ok(Some(InboundRequest::DataColumnsByRange( - DataColumnsByRangeRequest::from_ssz_bytes(decoded_buffer)?, - ))), SupportedProtocol::PingV1 => Ok(Some(InboundRequest::Ping(Ping { data: u64::from_ssz_bytes(decoded_buffer)?, }))), diff --git a/beacon_node/lighthouse_network/src/rpc/config.rs b/beacon_node/lighthouse_network/src/rpc/config.rs index 7ff189b9815..fcb9c986048 100644 --- a/beacon_node/lighthouse_network/src/rpc/config.rs +++ b/beacon_node/lighthouse_network/src/rpc/config.rs @@ -165,6 +165,14 @@ impl Debug for RateLimiterConfig { .field("blocks_by_root", fmt_q!(&self.blocks_by_root_quota)) .field("blobs_by_range", fmt_q!(&self.blobs_by_range_quota)) .field("blobs_by_root", fmt_q!(&self.blobs_by_root_quota)) + .field( + "data_columns_by_range", + fmt_q!(&self.data_columns_by_range_quota), + ) + .field( + "data_columns_by_root", + fmt_q!(&self.data_columns_by_root_quota), + ) .finish() } } diff --git a/beacon_node/lighthouse_network/src/rpc/methods.rs b/beacon_node/lighthouse_network/src/rpc/methods.rs index 7c7dca02f50..12565dee5ee 100644 --- a/beacon_node/lighthouse_network/src/rpc/methods.rs +++ b/beacon_node/lighthouse_network/src/rpc/methods.rs @@ -742,6 +742,16 @@ impl std::fmt::Display for BlobsByRangeRequest { } } +impl std::fmt::Display for DataColumnsByRootRequest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Request: DataColumnsByRoot: Number of Requested Data Column Ids: {}", + self.data_column_ids.len() + ) + } +} + impl slog::KV for StatusMessage { fn serialize( &self, diff --git a/beacon_node/lighthouse_network/src/rpc/mod.rs b/beacon_node/lighthouse_network/src/rpc/mod.rs index 666cbe6fbcc..c40f976e7a1 100644 --- a/beacon_node/lighthouse_network/src/rpc/mod.rs +++ b/beacon_node/lighthouse_network/src/rpc/mod.rs @@ -366,8 +366,10 @@ where protocol, Protocol::BlocksByRange | Protocol::BlobsByRange + | Protocol::DataColumnsByRange | Protocol::BlocksByRoot | Protocol::BlobsByRoot + | Protocol::DataColumnsByRoot ) { debug!(self.log, "Request too large to process"; "request" => %req, "protocol" => %protocol); } else { diff --git a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs index 9fb085efd86..523b891a009 100644 --- a/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs +++ b/beacon_node/lighthouse_network/src/rpc/rate_limiter.rs @@ -196,7 +196,6 @@ impl RPCRateLimiterBuilder { let blbrange_quota = self .blbrange_quota .ok_or("BlobsByRange quota not specified")?; - let blbroots_quota = self .blbroot_quota .ok_or("BlobsByRoot quota not specified")?; @@ -357,6 +356,8 @@ impl RPCRateLimiter { self.bbroots_rl.prune(time_since_start); self.blbrange_rl.prune(time_since_start); self.blbroot_rl.prune(time_since_start); + self.dcbrange_rl.prune(time_since_start); + self.dcbroot_rl.prune(time_since_start); } } diff --git a/beacon_node/lighthouse_network/src/service/api_types.rs b/beacon_node/lighthouse_network/src/service/api_types.rs index 756f4bd1326..30400db3b66 100644 --- a/beacon_node/lighthouse_network/src/service/api_types.rs +++ b/beacon_node/lighthouse_network/src/service/api_types.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use libp2p::swarm::ConnectionId; use types::{ - BlobSidecar, DataColumnSidecar, EthSpec, LightClientBootstrap, LightClientFinalityUpdate, - LightClientOptimisticUpdate, SignedBeaconBlock, + BlobSidecar, DataColumnSidecar, EthSpec, Hash256, LightClientBootstrap, + LightClientFinalityUpdate, LightClientOptimisticUpdate, SignedBeaconBlock, }; use crate::rpc::methods::{ @@ -42,11 +42,43 @@ pub enum SyncRequestId { /// Request searching for a set of blobs given a hash. SingleBlob { id: SingleLookupReqId }, /// Request searching for a set of data columns given a hash and list of column indices. - DataColumnsByRoot(DataColumnsByRootRequestId, SingleLookupReqId), + DataColumnsByRoot(DataColumnsByRootRequestId, DataColumnsByRootRequester), /// Range request that is composed by both a block range request and a blob range request. RangeBlockAndBlobs { id: Id }, } +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub enum DataColumnsByRootRequester { + Sampling(SamplingId), + Custody(CustodyId), +} + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct SamplingId { + pub id: SamplingRequester, + pub sampling_request_id: SamplingRequestId, +} + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub enum SamplingRequester { + ImportedBlock(Hash256), +} + +/// Identifier of sampling requests. +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct SamplingRequestId(pub usize); + +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct CustodyId { + pub requester: CustodyRequester, + pub req_id: Id, +} + +/// Downstream components that perform custody by root requests. +/// Currently, it's only single block lookups, so not using an enum +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +pub struct CustodyRequester(pub SingleLookupReqId); + /// Application level requests sent to the network. #[derive(Debug, Clone, Copy)] pub enum AppRequestId { diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs index 4ef080619eb..50bce0217af 100644 --- a/beacon_node/lighthouse_network/src/service/mod.rs +++ b/beacon_node/lighthouse_network/src/service/mod.rs @@ -172,6 +172,7 @@ impl Network { trusted_peers, config.disable_peer_scoring, &log, + ctx.chain_spec.clone(), ); Arc::new(globals) }; @@ -242,6 +243,7 @@ impl Network { let max_topics = ctx.chain_spec.attestation_subnet_count as usize + SYNC_COMMITTEE_SUBNET_COUNT as usize + ctx.chain_spec.blob_sidecar_subnet_count as usize + + ctx.chain_spec.data_column_sidecar_subnet_count as usize + BASE_CORE_TOPICS.len() + ALTAIR_CORE_TOPICS.len() + CAPELLA_CORE_TOPICS.len() @@ -255,10 +257,11 @@ impl Network { ctx.chain_spec.attestation_subnet_count, SYNC_COMMITTEE_SUBNET_COUNT, ctx.chain_spec.blob_sidecar_subnet_count, + ctx.chain_spec.data_column_sidecar_subnet_count, ), // during a fork we subscribe to both the old and new topics max_subscribed_topics: max_topics * 4, - // 162 in theory = (64 attestation + 4 sync committee + 7 core topics + 6 blob topics) * 2 + // 418 in theory = (64 attestation + 4 sync committee + 7 core topics + 6 blob topics + 128 column topics) * 2 max_subscriptions_per_request: max_topics * 2, }; diff --git a/beacon_node/lighthouse_network/src/service/utils.rs b/beacon_node/lighthouse_network/src/service/utils.rs index 80187efc103..cf06b7c1cee 100644 --- a/beacon_node/lighthouse_network/src/service/utils.rs +++ b/beacon_node/lighthouse_network/src/service/utils.rs @@ -19,7 +19,9 @@ use std::io::prelude::*; use std::path::Path; use std::sync::Arc; use std::time::Duration; -use types::{ChainSpec, EnrForkId, EthSpec, ForkContext, SubnetId, SyncSubnetId}; +use types::{ + ChainSpec, DataColumnSubnetId, EnrForkId, EthSpec, ForkContext, SubnetId, SyncSubnetId, +}; pub const NETWORK_KEY_FILENAME: &str = "key"; /// The maximum simultaneous libp2p connections per peer. @@ -231,6 +233,7 @@ pub(crate) fn create_whitelist_filter( attestation_subnet_count: u64, sync_committee_subnet_count: u64, blob_sidecar_subnet_count: u64, + data_column_sidecar_subnet_count: u64, ) -> gossipsub::WhitelistSubscriptionFilter { let mut possible_hashes = HashSet::new(); for fork_digest in possible_fork_digests { @@ -259,6 +262,9 @@ pub(crate) fn create_whitelist_filter( for id in 0..blob_sidecar_subnet_count { add(BlobSidecar(id)); } + for id in 0..data_column_sidecar_subnet_count { + add(DataColumnSidecar(DataColumnSubnetId::new(id))); + } } gossipsub::WhitelistSubscriptionFilter(possible_hashes) } diff --git a/beacon_node/lighthouse_network/src/types/globals.rs b/beacon_node/lighthouse_network/src/types/globals.rs index 1c7c7f07d0a..412a70902df 100644 --- a/beacon_node/lighthouse_network/src/types/globals.rs +++ b/beacon_node/lighthouse_network/src/types/globals.rs @@ -2,12 +2,12 @@ use crate::peer_manager::peerdb::PeerDB; use crate::rpc::{MetaData, MetaDataV2}; use crate::types::{BackFillState, SyncState}; -use crate::Client; use crate::EnrExt; +use crate::{Client, Eth2Enr}; use crate::{Enr, GossipTopic, Multiaddr, PeerId}; use parking_lot::RwLock; use std::collections::HashSet; -use types::{ChainSpec, ColumnIndex, EthSpec}; +use types::{ChainSpec, ColumnIndex, DataColumnSubnetId, EthSpec}; pub struct NetworkGlobals { /// The current local ENR. @@ -26,6 +26,7 @@ pub struct NetworkGlobals { pub sync_state: RwLock, /// The current state of the backfill sync. pub backfill_state: RwLock, + spec: ChainSpec, } impl NetworkGlobals { @@ -35,16 +36,23 @@ impl NetworkGlobals { trusted_peers: Vec, disable_peer_scoring: bool, log: &slog::Logger, + spec: ChainSpec, ) -> Self { NetworkGlobals { local_enr: RwLock::new(enr.clone()), peer_id: RwLock::new(enr.peer_id()), listen_multiaddrs: RwLock::new(Vec::new()), local_metadata: RwLock::new(local_metadata), - peers: RwLock::new(PeerDB::new(trusted_peers, disable_peer_scoring, log)), + peers: RwLock::new(PeerDB::new( + trusted_peers, + disable_peer_scoring, + log, + spec.clone(), + )), gossipsub_subscriptions: RwLock::new(HashSet::new()), sync_state: RwLock::new(SyncState::Stalled), backfill_state: RwLock::new(BackFillState::NotRequired), + spec, } } @@ -111,14 +119,45 @@ impl NetworkGlobals { } /// Compute custody data columns the node is assigned to custody. - pub fn custody_columns(&self, _spec: &ChainSpec) -> Vec { - let _enr = self.local_enr(); - //TODO(das): implement ENR changes - vec![] + pub fn custody_columns(&self) -> Vec { + let enr = self.local_enr(); + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + DataColumnSubnetId::compute_custody_columns::(node_id, custody_subnet_count, &self.spec) + .collect() + } + + /// Compute custody data column subnets the node is assigned to custody. + pub fn custody_subnets(&self) -> impl Iterator { + let enr = self.local_enr(); + let node_id = enr.node_id().raw().into(); + let custody_subnet_count = enr.custody_subnet_count::(&self.spec); + DataColumnSubnetId::compute_custody_subnets::(node_id, custody_subnet_count, &self.spec) + } + + /// Returns a connected peer that: + /// 1. is connected + /// 2. assigned to custody the column based on it's `custody_subnet_count` from ENR or metadata (WIP) + /// 3. has a good score + /// 4. subscribed to the specified column - this condition can be removed later, so we can + /// identify and penalise peers that are supposed to custody the column. + pub fn custody_peers_for_column(&self, column_index: ColumnIndex) -> Vec { + self.peers + .read() + .good_custody_subnet_peer(DataColumnSubnetId::from_column_index::( + column_index as usize, + &self.spec, + )) + .cloned() + .collect::>() } /// TESTING ONLY. Build a dummy NetworkGlobals instance. - pub fn new_test_globals(trusted_peers: Vec, log: &slog::Logger) -> NetworkGlobals { + pub fn new_test_globals( + trusted_peers: Vec, + log: &slog::Logger, + spec: ChainSpec, + ) -> NetworkGlobals { use crate::CombinedKeyExt; let keypair = libp2p::identity::secp256k1::Keypair::generate(); let enr_key: discv5::enr::CombinedKey = discv5::enr::CombinedKey::from_secp256k1(&keypair); @@ -133,6 +172,28 @@ impl NetworkGlobals { trusted_peers, false, log, + spec, ) } } + +#[cfg(test)] +mod test { + use super::*; + use types::{EthSpec, MainnetEthSpec as E}; + + #[test] + fn test_custody_count_default() { + let spec = E::default_spec(); + let log = logging::test_logger(); + let default_custody_requirement_column_count = spec.number_of_columns as u64 + / spec.data_column_sidecar_subnet_count + * spec.custody_requirement; + let globals = NetworkGlobals::::new_test_globals(vec![], &log, spec.clone()); + let columns = globals.custody_columns(); + assert_eq!( + columns.len(), + default_custody_requirement_column_count as usize + ); + } +} diff --git a/beacon_node/network/src/metrics.rs b/beacon_node/network/src/metrics.rs index bb1e5468705..9e42aa8e924 100644 --- a/beacon_node/network/src/metrics.rs +++ b/beacon_node/network/src/metrics.rs @@ -14,6 +14,9 @@ use std::sync::{Arc, LazyLock}; use strum::IntoEnumIterator; use types::EthSpec; +pub const SUCCESS: &str = "SUCCESS"; +pub const FAILURE: &str = "FAILURE"; + pub static BEACON_BLOCK_MESH_PEERS_PER_CLIENT: LazyLock> = LazyLock::new(|| { try_create_int_gauge_vec( @@ -340,6 +343,13 @@ pub static PEERS_PER_SYNC_TYPE: LazyLock> = LazyLock::new(|| &["sync_status"], ) }); +pub static PEERS_PER_COLUMN_SUBNET: LazyLock> = LazyLock::new(|| { + try_create_int_gauge_vec( + "peers_per_column_subnet", + "Number of connected peers per column subnet", + &["subnet_id"], + ) +}); pub static SYNCING_CHAINS_COUNT: LazyLock> = LazyLock::new(|| { try_create_int_gauge_vec( "sync_range_chains", @@ -481,6 +491,29 @@ pub static BEACON_BLOB_DELAY_GOSSIP: LazyLock> = LazyLock::new( ) }); +pub static BEACON_DATA_COLUMN_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME: LazyLock< + Result, +> = LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_data_column_gossip_propagation_verification_delay_time", + "Duration between when the data column sidecar is received over gossip and when it is verified for propagation.", + // [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] + decimal_buckets(-3,-1) + ) +}); +pub static BEACON_DATA_COLUMN_GOSSIP_SLOT_START_DELAY_TIME: LazyLock> = + LazyLock::new(|| { + try_create_histogram_with_buckets( + "beacon_data_column_gossip_slot_start_delay_time", + "Duration between when the data column sidecar is received over gossip and the start of the slot it belongs to.", + // Create a custom bucket list for greater granularity in block delay + Ok(vec![0.1, 0.2, 0.3,0.4,0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.5,3.0,3.5,4.0,5.0,6.0,7.0,8.0,9.0,10.0,15.0,20.0]) + // NOTE: Previous values, which we may want to switch back to. + // [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50] + //decimal_buckets(-1,2) + ) + }); + pub static BEACON_BLOB_DELAY_GOSSIP_VERIFICATION: LazyLock> = LazyLock::new( || { try_create_int_gauge( @@ -520,22 +553,6 @@ pub static BEACON_BLOB_GOSSIP_ARRIVED_LATE_TOTAL: LazyLock> = }, ); -pub static BEACON_DATA_COLUMN_DELAY_GOSSIP: LazyLock> = LazyLock::new(|| { - try_create_int_gauge( - "beacon_data_column_delay_gossip_last_delay", - "The first time we see this data column as a delay from the start of the slot", - ) -}); - -pub static BEACON_DATA_COLUMN_DELAY_GOSSIP_VERIFICATION: LazyLock> = LazyLock::new( - || { - try_create_int_gauge( - "beacon_data_column_delay_gossip_verification", - "Keeps track of the time delay from the start of the slot to the point we propagate the data column" - ) - }, -); - /* * Light client update reprocessing queue metrics. */ @@ -548,6 +565,31 @@ pub static BEACON_PROCESSOR_REPROCESSING_QUEUE_SENT_OPTIMISTIC_UPDATES: LazyLock ) }); +/* + * Sampling + */ +pub static SAMPLE_DOWNLOAD_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_sample_verify_result_total", + "Total count of individual sample download results", + &["result"], + ) +}); +pub static SAMPLE_VERIFY_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_sample_verify_result_total", + "Total count of individual sample verify results", + &["result"], + ) +}); +pub static SAMPLING_REQUEST_RESULT: LazyLock> = LazyLock::new(|| { + try_create_int_counter_vec( + "beacon_sampling_request_result_total", + "Total count of sample request results", + &["result"], + ) +}); + pub fn register_finality_update_error(error: &LightClientFinalityUpdateError) { inc_counter_vec(&GOSSIP_FINALITY_UPDATE_ERRORS_PER_TYPE, &[error.as_ref()]); } @@ -564,6 +606,13 @@ pub fn register_sync_committee_error(error: &SyncCommitteeError) { inc_counter_vec(&GOSSIP_SYNC_COMMITTEE_ERRORS_PER_TYPE, &[error.as_ref()]); } +pub fn from_result(result: &std::result::Result) -> &str { + match result { + Ok(_) => SUCCESS, + Err(_) => FAILURE, + } +} + pub fn update_gossip_metrics( gossipsub: &Gossipsub, network_globals: &Arc>, diff --git a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs index 4c5c34bfd83..d5d83d540a0 100644 --- a/beacon_node/network/src/network_beacon_processor/gossip_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/gossip_methods.rs @@ -4,7 +4,6 @@ use crate::{ service::NetworkMessage, sync::SyncMessage, }; -use beacon_chain::blob_verification::{GossipBlobError, GossipVerifiedBlob}; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::data_column_verification::{GossipDataColumnError, GossipVerifiedDataColumn}; use beacon_chain::store::Error; @@ -19,7 +18,13 @@ use beacon_chain::{ AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ForkChoiceError, GossipVerifiedBlock, NotifyExecutionLayer, }; -use lighthouse_network::{Client, MessageAcceptance, MessageId, PeerAction, PeerId, ReportSource}; +use beacon_chain::{ + blob_verification::{GossipBlobError, GossipVerifiedBlob}, + data_availability_checker::DataColumnsToPublish, +}; +use lighthouse_network::{ + Client, MessageAcceptance, MessageId, PeerAction, PeerId, PubsubMessage, ReportSource, +}; use operation_pool::ReceivedPreCapella; use slog::{crit, debug, error, info, trace, warn, Logger}; use slot_clock::SlotClock; @@ -166,6 +171,26 @@ impl NetworkBeaconProcessor { }) } + pub(crate) fn handle_data_columns_to_publish( + &self, + data_columns_to_publish: DataColumnsToPublish, + ) { + if let Some(data_columns_to_publish) = data_columns_to_publish { + self.send_network_message(NetworkMessage::Publish { + messages: data_columns_to_publish + .iter() + .map(|d| { + let subnet = DataColumnSubnetId::from_column_index::( + d.index as usize, + &self.chain.spec, + ); + PubsubMessage::DataColumnSidecar(Box::new((subnet, d.clone()))) + }) + .collect(), + }); + } + } + /// Send a message on `message_tx` that the `message_id` sent by `peer_id` should be propagated on /// the gossip network. /// @@ -615,9 +640,9 @@ impl NetworkBeaconProcessor { let index = column_sidecar.index; let delay = get_slot_delay_ms(seen_duration, slot, &self.chain.slot_clock); // Log metrics to track delay from other nodes on the network. - metrics::set_gauge( - &metrics::BEACON_DATA_COLUMN_DELAY_GOSSIP, - delay.as_millis() as i64, + metrics::observe_duration( + &metrics::BEACON_DATA_COLUMN_GOSSIP_SLOT_START_DELAY_TIME, + delay, ); match self .chain @@ -644,9 +669,9 @@ impl NetworkBeaconProcessor { .ok() .and_then(|now| now.checked_sub(seen_duration)) { - metrics::set_gauge( - &metrics::BEACON_DATA_COLUMN_DELAY_GOSSIP_VERIFICATION, - duration.as_millis() as i64, + metrics::observe_duration( + &metrics::BEACON_DATA_COLUMN_GOSSIP_PROPAGATION_VERIFICATION_DELAY_TIME, + duration, ); } self.process_gossip_verified_data_column( @@ -991,7 +1016,9 @@ impl NetworkBeaconProcessor { .process_gossip_data_columns(vec![verified_data_column]) .await { - Ok(availability) => { + Ok((availability, data_columns_to_publish)) => { + self.handle_data_columns_to_publish(data_columns_to_publish); + match availability { AvailabilityProcessingStatus::Imported(block_root) => { // Note: Reusing block imported metric here @@ -1304,6 +1331,16 @@ impl NetworkBeaconProcessor { ); return None; } + Err(e @ BlockError::BlobNotRequired(_)) => { + // TODO(das): penalty not implemented yet as other clients may still send us blobs + // during early stage of implementation. + debug!(self.log, "Received blobs for slot after PeerDAS epoch from peer"; + "error" => %e, + "peer_id" => %peer_id, + ); + self.propagate_validation_result(message_id, peer_id, MessageAcceptance::Ignore); + return None; + } }; metrics::inc_counter(&metrics::BEACON_PROCESSOR_GOSSIP_BLOCK_VERIFIED_TOTAL); @@ -1414,7 +1451,19 @@ impl NetworkBeaconProcessor { let block = verified_block.block.block_cloned(); let block_root = verified_block.block_root; - // TODO(block source) + // TODO(das) Might be too early to issue a request here. We haven't checked that the block + // actually includes blob transactions and thus has data. A peer could send a block is + // garbage commitments, and make us trigger sampling for a block that does not have data. + if block.num_expected_blobs() > 0 { + // Trigger sampling for block not yet execution valid. At this point column custodials are + // unlikely to have received their columns. Triggering sampling so early is only viable with + // either: + // - Sync delaying sampling until some latter window + // - Re-processing early sampling requests: https://github.com/sigp/lighthouse/pull/5569 + if self.chain.should_sample_slot(block.slot()) { + self.send_sync_message(SyncMessage::SampleBlock(block_root, block.slot())); + } + } let result = self .chain diff --git a/beacon_node/network/src/network_beacon_processor/mod.rs b/beacon_node/network/src/network_beacon_processor/mod.rs index cb21b6dfb50..7f551c544c7 100644 --- a/beacon_node/network/src/network_beacon_processor/mod.rs +++ b/beacon_node/network/src/network_beacon_processor/mod.rs @@ -1,4 +1,5 @@ use crate::sync::manager::BlockProcessType; +use crate::sync::SamplingId; use crate::{service::NetworkMessage, sync::manager::SyncMessage}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::{builder::Witness, eth1_chain::CachingEth1Backend, BeaconChain}; @@ -500,6 +501,43 @@ impl NetworkBeaconProcessor { }) } + /// Create a new `Work` event for some sampling columns, and reports the verification result + /// back to sync. + pub fn send_rpc_validate_data_columns( + self: &Arc, + block_root: Hash256, + data_columns: Vec>>, + seen_timestamp: Duration, + id: SamplingId, + ) -> Result<(), Error> { + let s = self.clone(); + self.try_send(BeaconWorkEvent { + drop_during_sync: false, + work: Work::RpcVerifyDataColumn(Box::pin(async move { + let result = s + .clone() + .validate_rpc_data_columns(block_root, data_columns, seen_timestamp) + .await; + // Sync handles these results + s.send_sync_message(SyncMessage::SampleVerified { id, result }); + })), + }) + } + + /// Create a new `Work` event with a block sampling completed result + pub fn send_sampling_completed( + self: &Arc, + block_root: Hash256, + ) -> Result<(), Error> { + let nbp = self.clone(); + self.try_send(BeaconWorkEvent { + drop_during_sync: false, + work: Work::SamplingResult(Box::pin(async move { + nbp.process_sampling_completed(block_root).await; + })), + }) + } + /// Create a new work event to import `blocks` as a beacon chain segment. pub fn send_chain_segment( self: &Arc, diff --git a/beacon_node/network/src/network_beacon_processor/sync_methods.rs b/beacon_node/network/src/network_beacon_processor/sync_methods.rs index 495d1cd92be..508576d9f52 100644 --- a/beacon_node/network/src/network_beacon_processor/sync_methods.rs +++ b/beacon_node/network/src/network_beacon_processor/sync_methods.rs @@ -8,6 +8,7 @@ use crate::sync::{ use beacon_chain::block_verification_types::{AsBlock, RpcBlock}; use beacon_chain::data_availability_checker::AvailabilityCheckError; use beacon_chain::data_availability_checker::MaybeAvailableBlock; +use beacon_chain::data_column_verification::verify_kzg_for_data_column_list; use beacon_chain::{ validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainError, BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError, NotifyExecutionLayer, @@ -24,8 +25,7 @@ use store::KzgCommitment; use tokio::sync::mpsc; use types::beacon_block_body::format_kzg_commitments; use types::blob_sidecar::FixedBlobSidecarList; -use types::{BlockImportSource, DataColumnSidecarList}; -use types::{Epoch, Hash256}; +use types::{BlockImportSource, DataColumnSidecar, DataColumnSidecarList, Epoch, Hash256}; /// Id associated to a batch processing request, either a sync batch or a parent lookup. #[derive(Clone, Debug, PartialEq)] @@ -139,6 +139,7 @@ impl NetworkBeaconProcessor { }; let slot = block.slot(); + let block_has_data = block.as_block().num_expected_blobs() > 0; let parent_root = block.message().parent_root(); let commitments_formatted = block.as_block().commitments_formatted(); @@ -186,6 +187,18 @@ impl NetworkBeaconProcessor { self.chain.recompute_head_at_current_slot().await; } + + // RPC block imported or execution validated. If the block was already imported by gossip we + // receive Err(BlockError::AlreadyKnown). + if result.is_ok() && + // Block has at least one blob, so it produced columns + block_has_data && + // Block slot is within the DA boundary (should always be the case) and PeerDAS is activated + self.chain.should_sample_slot(slot) + { + self.send_sync_message(SyncMessage::SampleBlock(block_root, slot)); + } + // Sync handles these results self.send_sync_message(SyncMessage::BlockComponentProcessed { process_type, @@ -320,24 +333,28 @@ impl NetworkBeaconProcessor { .await; match &result { - Ok(availability) => match availability { - AvailabilityProcessingStatus::Imported(hash) => { - debug!( - self.log, - "Block components retrieved"; - "result" => "imported block and custody columns", - "block_hash" => %hash, - ); - self.chain.recompute_head_at_current_slot().await; - } - AvailabilityProcessingStatus::MissingComponents(_, _) => { - debug!( - self.log, - "Missing components over rpc"; - "block_hash" => %block_root, - ); + Ok((availability, data_columns_to_publish)) => { + self.handle_data_columns_to_publish(data_columns_to_publish.clone()); + + match availability { + AvailabilityProcessingStatus::Imported(hash) => { + debug!( + self.log, + "Block components retrieved"; + "result" => "imported block and custody columns", + "block_hash" => %hash, + ); + self.chain.recompute_head_at_current_slot().await; + } + AvailabilityProcessingStatus::MissingComponents(_, _) => { + debug!( + self.log, + "Missing components over rpc"; + "block_hash" => %block_root, + ); + } } - }, + } Err(BlockError::BlockIsAlreadyKnown(_)) => { debug!( self.log, @@ -357,10 +374,29 @@ impl NetworkBeaconProcessor { self.send_sync_message(SyncMessage::BlockComponentProcessed { process_type, - result: result.into(), + result: result.map(|(r, _)| r).into(), }); } + /// Validate a list of data columns received from RPC requests + pub async fn validate_rpc_data_columns( + self: Arc>, + _block_root: Hash256, + data_columns: Vec>>, + _seen_timestamp: Duration, + ) -> Result<(), String> { + let kzg = self.chain.kzg.as_ref().ok_or("Kzg not initialized")?; + verify_kzg_for_data_column_list(data_columns.iter(), kzg).map_err(|err| format!("{err:?}")) + } + + /// Process a sampling completed event, inserting it into fork-choice + pub async fn process_sampling_completed( + self: Arc>, + block_root: Hash256, + ) { + self.chain.process_sampling_completed(block_root).await; + } + /// Attempt to import the chain segment (`blocks`) to the beacon chain, informing the sync /// thread if more blocks are needed to process it. pub async fn process_chain_segment( @@ -421,6 +457,10 @@ impl NetworkBeaconProcessor { .iter() .map(|wrapped| wrapped.n_blobs()) .sum::(); + let n_data_columns = downloaded_blocks + .iter() + .map(|wrapped| wrapped.n_data_columns()) + .sum::(); match self.process_backfill_blocks(downloaded_blocks) { (imported_blocks, Ok(_)) => { @@ -430,6 +470,7 @@ impl NetworkBeaconProcessor { "last_block_slot" => end_slot, "processed_blocks" => sent_blocks, "processed_blobs" => n_blobs, + "processed_data_columns" => n_data_columns, "service"=> "sync"); BatchProcessResult::Success { sent_blocks, @@ -473,10 +514,19 @@ impl NetworkBeaconProcessor { { ChainSegmentResult::Successful { imported_blocks } => { metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_SUCCESS_TOTAL); - if imported_blocks > 0 { + if !imported_blocks.is_empty() { self.chain.recompute_head_at_current_slot().await; + + for (block_root, block_slot) in &imported_blocks { + if self.chain.should_sample_slot(*block_slot) { + self.send_sync_message(SyncMessage::SampleBlock( + *block_root, + *block_slot, + )); + } + } } - (imported_blocks, Ok(())) + (imported_blocks.len(), Ok(())) } ChainSegmentResult::Failed { imported_blocks, @@ -484,10 +534,10 @@ impl NetworkBeaconProcessor { } => { metrics::inc_counter(&metrics::BEACON_PROCESSOR_CHAIN_SEGMENT_FAILED_TOTAL); let r = self.handle_failed_chain_segment(error); - if imported_blocks > 0 { + if !imported_blocks.is_empty() { self.chain.recompute_head_at_current_slot().await; } - (imported_blocks, r) + (imported_blocks.len(), r) } } } diff --git a/beacon_node/network/src/network_beacon_processor/tests.rs b/beacon_node/network/src/network_beacon_processor/tests.rs index a9b9f64a79d..40c69a0baa5 100644 --- a/beacon_node/network/src/network_beacon_processor/tests.rs +++ b/beacon_node/network/src/network_beacon_processor/tests.rs @@ -93,7 +93,7 @@ impl TestRig { spec.shard_committee_period = 2; let harness = BeaconChainHarness::builder(MainnetEthSpec) - .spec(spec) + .spec(spec.clone()) .deterministic_keypairs(VALIDATOR_COUNT) .fresh_ephemeral_store() .mock_execution_layer() @@ -204,7 +204,14 @@ impl TestRig { }); let enr_key = CombinedKey::generate_secp256k1(); let enr = enr::Enr::builder().build(&enr_key).unwrap(); - let network_globals = Arc::new(NetworkGlobals::new(enr, meta_data, vec![], false, &log)); + let network_globals = Arc::new(NetworkGlobals::new( + enr, + meta_data, + vec![], + false, + &log, + spec, + )); let executor = harness.runtime.task_executor.clone(); diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index dfb05da19bd..946d25237bf 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -372,7 +372,9 @@ impl BackFillSync { // A batch could be retried without the peer failing the request (disconnecting/ // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(()); } debug!(self.log, "Batch failed"; "batch_epoch" => batch_id, "error" => "rpc_error"); @@ -420,7 +422,9 @@ impl BackFillSync { // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer, and that the // request_id matches - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(ProcessResult::Successful); } batch @@ -958,7 +962,7 @@ impl BackFillSync { ) -> Result<(), BackFillError> { if let Some(batch) = self.batches.get_mut(&batch_id) { let (request, is_blob_batch) = batch.to_blocks_by_range_request(); - match network.blocks_and_blobs_by_range_request( + match network.block_components_by_range_request( peer, is_blob_batch, request, diff --git a/beacon_node/network/src/sync/block_lookups/common.rs b/beacon_node/network/src/sync/block_lookups/common.rs index a7be72556e2..c7c043f53f8 100644 --- a/beacon_node/network/src/sync/block_lookups/common.rs +++ b/beacon_node/network/src/sync/block_lookups/common.rs @@ -4,6 +4,7 @@ use crate::sync::block_lookups::single_block_lookup::{ use crate::sync::block_lookups::{ BlobRequestState, BlockRequestState, CustodyRequestState, PeerId, }; +use crate::sync::manager::BlockProcessType; use crate::sync::network_context::{LookupRequestResult, SyncNetworkContext}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; @@ -92,7 +93,7 @@ impl RequestState for BlockRequestState { value, block_root, seen_timestamp, - peer_id: _, + .. } = download_result; cx.send_block_for_processing( id, @@ -140,7 +141,7 @@ impl RequestState for BlobRequestState { value, block_root, seen_timestamp, - peer_id: _, + .. } = download_result; cx.send_blobs_for_processing(id, block_root, value, seen_timestamp) .map_err(LookupRequestError::SendFailedProcessor) @@ -186,8 +187,14 @@ impl RequestState for CustodyRequestState { seen_timestamp, .. } = download_result; - cx.send_custody_columns_for_processing(id, block_root, value, seen_timestamp) - .map_err(LookupRequestError::SendFailedProcessor) + cx.send_custody_columns_for_processing( + id, + block_root, + value, + seen_timestamp, + BlockProcessType::SingleCustodyColumn(id), + ) + .map_err(LookupRequestError::SendFailedProcessor) } fn response_type() -> ResponseType { diff --git a/beacon_node/network/src/sync/block_lookups/mod.rs b/beacon_node/network/src/sync/block_lookups/mod.rs index 7194faa2860..7a5cda20692 100644 --- a/beacon_node/network/src/sync/block_lookups/mod.rs +++ b/beacon_node/network/src/sync/block_lookups/mod.rs @@ -24,7 +24,7 @@ use self::parent_chain::{compute_parent_chains, NodeChain}; pub use self::single_block_lookup::DownloadResult; use self::single_block_lookup::{LookupRequestError, LookupResult, SingleBlockLookup}; use super::manager::{BlockProcessType, BlockProcessingResult, SLOT_IMPORT_TOLERANCE}; -use super::network_context::{RpcResponseResult, SyncNetworkContext}; +use super::network_context::{PeerGroup, RpcResponseError, SyncNetworkContext}; use crate::metrics; use crate::sync::block_lookups::common::ResponseType; use crate::sync::block_lookups::parent_chain::find_oldest_fork_ancestor; @@ -42,7 +42,7 @@ use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Duration; use store::Hash256; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; +use types::{BlobSidecar, DataColumnSidecar, EthSpec, SignedBeaconBlock}; pub mod common; pub mod parent_chain; @@ -76,6 +76,7 @@ const MAX_LOOKUPS: usize = 200; pub enum BlockComponent { Block(DownloadResult>>), Blob(DownloadResult>>), + DataColumn(DownloadResult>>), } impl BlockComponent { @@ -83,12 +84,14 @@ impl BlockComponent { match self { BlockComponent::Block(block) => block.value.parent_root(), BlockComponent::Blob(blob) => blob.value.block_parent_root(), + BlockComponent::DataColumn(column) => column.value.block_parent_root(), } } fn get_type(&self) -> &'static str { match self { BlockComponent::Block(_) => "block", BlockComponent::Blob(_) => "blob", + BlockComponent::DataColumn(_) => "data_column", } } } @@ -379,11 +382,10 @@ impl BlockLookups { pub fn on_download_response>( &mut self, id: SingleLookupReqId, - peer_id: PeerId, - response: RpcResponseResult, + response: Result<(R::VerifiedResponseType, PeerGroup, Duration), RpcResponseError>, cx: &mut SyncNetworkContext, ) { - let result = self.on_download_response_inner::(id, peer_id, response, cx); + let result = self.on_download_response_inner::(id, response, cx); self.on_lookup_result(id.lookup_id, result, "download_response", cx); } @@ -391,8 +393,7 @@ impl BlockLookups { pub fn on_download_response_inner>( &mut self, id: SingleLookupReqId, - peer_id: PeerId, - response: RpcResponseResult, + response: Result<(R::VerifiedResponseType, PeerGroup, Duration), RpcResponseError>, cx: &mut SyncNetworkContext, ) -> Result { // Note: no need to downscore peers here, already downscored on network context @@ -409,12 +410,12 @@ impl BlockLookups { let request_state = R::request_state_mut(lookup).get_state_mut(); match response { - Ok((response, seen_timestamp)) => { + Ok((response, peer_group, seen_timestamp)) => { debug!(self.log, "Received lookup download success"; "block_root" => ?block_root, "id" => ?id, - "peer_id" => %peer_id, + "peer_group" => ?peer_group, "response_type" => ?response_type, ); @@ -435,19 +436,20 @@ impl BlockLookups { value: response, block_root, seen_timestamp, - peer_id, + peer_group, }, )?; // continue_request will send for processing as the request state is AwaitingProcessing } Err(e) => { + // TODO(das): is it okay to not log the peer source of request failures? Then we + // should log individual requests failures in the SyncNetworkContext debug!(self.log, "Received lookup download failure"; "block_root" => ?block_root, "id" => ?id, - "peer_id" => %peer_id, "response_type" => ?response_type, - "error" => %e, + "error" => ?e, ); request_state.on_download_failure(id.req_id)?; @@ -481,11 +483,11 @@ impl BlockLookups { BlockProcessType::SingleBlob { id } => { self.on_processing_result_inner::>(id, result, cx) } + BlockProcessType::SingleCustodyColumn(id) => { + self.on_processing_result_inner::>(id, result, cx) + } }; - let id = match process_type { - BlockProcessType::SingleBlock { id } | BlockProcessType::SingleBlob { id } => id, - }; - self.on_lookup_result(id, lookup_result, "processing_result", cx); + self.on_lookup_result(process_type.id(), lookup_result, "processing_result", cx); } pub fn on_processing_result_inner>( @@ -519,10 +521,9 @@ impl BlockLookups { Action::Continue } - BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( - _, - _block_root, - )) => { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents { + .. + }) => { // `on_processing_success` is called here to ensure the request state is updated prior to checking // if both components have been processed. request_state.on_processing_success()?; @@ -591,17 +592,21 @@ impl BlockLookups { } other => { debug!(self.log, "Invalid lookup component"; "block_root" => ?block_root, "component" => ?R::response_type(), "error" => ?other); - - let peer_id = request_state.on_processing_failure()?; - cx.report_peer( - peer_id, - PeerAction::MidToleranceError, - match R::response_type() { - ResponseType::Block => "lookup_block_processing_failure", - ResponseType::Blob => "lookup_blobs_processing_failure", - ResponseType::CustodyColumn => "lookup_custody_processing_failure", - }, - ); + let peer_group = request_state.on_processing_failure()?; + // TOOD(das): only downscore peer subgroup that provided the invalid proof + for peer in peer_group.all() { + cx.report_peer( + *peer, + PeerAction::MidToleranceError, + match R::response_type() { + ResponseType::Block => "lookup_block_processing_failure", + ResponseType::Blob => "lookup_blobs_processing_failure", + ResponseType::CustodyColumn => { + "lookup_custody_column_processing_failure" + } + }, + ); + } Action::Retry } diff --git a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs index b9cd4e3e035..b17bcedc5f5 100644 --- a/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs +++ b/beacon_node/network/src/sync/block_lookups/single_block_lookup.rs @@ -2,7 +2,8 @@ use super::common::ResponseType; use super::{BlockComponent, PeerId, SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS}; use crate::sync::block_lookups::common::RequestState; use crate::sync::network_context::{ - LookupRequestResult, ReqId, RpcRequestSendError, SendErrorProcessor, SyncNetworkContext, + LookupRequestResult, PeerGroup, ReqId, RpcRequestSendError, SendErrorProcessor, + SyncNetworkContext, }; use beacon_chain::BeaconChainTypes; use derivative::Derivative; @@ -124,8 +125,8 @@ impl SingleBlockLookup { .block_request_state .state .insert_verified_response(block), - BlockComponent::Blob(_) => { - // For now ignore single blobs, as the blob request state assumes all blobs are + BlockComponent::Blob(_) | BlockComponent::DataColumn(_) => { + // For now ignore single blobs and columns, as the blob request state assumes all blobs are // attributed to the same peer = the peer serving the remaining blobs. Ignoring this // block component has a minor effect, causing the node to re-request this blob // once the parent chain is successfully resolved @@ -292,34 +293,34 @@ impl SingleBlockLookup { } } -/// The state of the block request component of a `SingleBlockLookup`. +/// The state of the blob request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct BlockRequestState { +pub struct BlobRequestState { #[derivative(Debug = "ignore")] - pub requested_block_root: Hash256, - pub state: SingleLookupRequestState>>, + pub block_root: Hash256, + pub state: SingleLookupRequestState>, } -impl BlockRequestState { +impl BlobRequestState { pub fn new(block_root: Hash256) -> Self { Self { - requested_block_root: block_root, + block_root, state: SingleLookupRequestState::new(), } } } -/// The state of the blob request component of a `SingleBlockLookup`. +/// The state of the custody request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct BlobRequestState { +pub struct CustodyRequestState { #[derivative(Debug = "ignore")] pub block_root: Hash256, - pub state: SingleLookupRequestState>, + pub state: SingleLookupRequestState>, } -impl BlobRequestState { +impl CustodyRequestState { pub fn new(block_root: Hash256) -> Self { Self { block_root, @@ -328,33 +329,33 @@ impl BlobRequestState { } } -/// The state of the custody request component of a `SingleBlockLookup`. +/// The state of the block request component of a `SingleBlockLookup`. #[derive(Derivative)] #[derivative(Debug)] -pub struct CustodyRequestState { +pub struct BlockRequestState { #[derivative(Debug = "ignore")] - pub block_root: Hash256, - pub state: SingleLookupRequestState>, + pub requested_block_root: Hash256, + pub state: SingleLookupRequestState>>, } -impl CustodyRequestState { +impl BlockRequestState { pub fn new(block_root: Hash256) -> Self { Self { - block_root, + requested_block_root: block_root, state: SingleLookupRequestState::new(), } } } -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, Clone)] pub struct DownloadResult { pub value: T, pub block_root: Hash256, pub seen_timestamp: Duration, - pub peer_id: PeerId, + pub peer_group: PeerGroup, } -#[derive(PartialEq, Eq, IntoStaticStr)] +#[derive(IntoStaticStr)] pub enum State { AwaitingDownload(&'static str), Downloading(ReqId), @@ -366,8 +367,7 @@ pub enum State { } /// Object representing the state of a single block or blob lookup request. -#[derive(PartialEq, Eq, Derivative)] -#[derivative(Debug)] +#[derive(Debug)] pub struct SingleLookupRequestState { /// State of this request. state: State, @@ -537,13 +537,13 @@ impl SingleLookupRequestState { } /// Registers a failure in processing a block. - pub fn on_processing_failure(&mut self) -> Result { + pub fn on_processing_failure(&mut self) -> Result { match &self.state { State::Processing(result) => { - let peer_id = result.peer_id; + let peers_source = result.peer_group.clone(); self.failed_processing = self.failed_processing.saturating_add(1); self.state = State::AwaitingDownload("not started"); - Ok(peer_id) + Ok(peers_source) } other => Err(LookupRequestError::BadState(format!( "Bad state on_processing_failure expected Processing got {other}" @@ -600,8 +600,8 @@ impl std::fmt::Debug for State { match self { Self::AwaitingDownload(status) => write!(f, "AwaitingDownload({:?})", status), Self::Downloading(req_id) => write!(f, "Downloading({:?})", req_id), - Self::AwaitingProcess(d) => write!(f, "AwaitingProcess({:?})", d.peer_id), - Self::Processing(d) => write!(f, "Processing({:?})", d.peer_id), + Self::AwaitingProcess(d) => write!(f, "AwaitingProcess({:?})", d.peer_group), + Self::Processing(d) => write!(f, "Processing({:?})", d.peer_group), Self::Processed { .. } => write!(f, "Processed"), } } diff --git a/beacon_node/network/src/sync/block_lookups/tests.rs b/beacon_node/network/src/sync/block_lookups/tests.rs index fcd0d768b7b..9572bf7f444 100644 --- a/beacon_node/network/src/sync/block_lookups/tests.rs +++ b/beacon_node/network/src/sync/block_lookups/tests.rs @@ -1,7 +1,7 @@ use crate::network_beacon_processor::NetworkBeaconProcessor; - use crate::sync::manager::{BlockProcessType, SyncManager}; -use crate::sync::SyncMessage; +use crate::sync::sampling::SamplingConfig; +use crate::sync::{SamplingId, SyncMessage}; use crate::NetworkMessage; use std::sync::Arc; @@ -14,26 +14,33 @@ use beacon_chain::builder::Witness; use beacon_chain::data_availability_checker::Availability; use beacon_chain::eth1_chain::CachingEth1Backend; use beacon_chain::test_utils::{ - build_log, generate_rand_block_and_blobs, BeaconChainHarness, EphemeralHarnessType, NumBlobs, + build_log, generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, + BeaconChainHarness, EphemeralHarnessType, NumBlobs, }; +use beacon_chain::validator_monitor::timestamp_now; use beacon_chain::{ AvailabilityPendingExecutedBlock, PayloadVerificationOutcome, PayloadVerificationStatus, }; use beacon_processor::WorkEvent; use lighthouse_network::rpc::{RPCError, RPCResponseErrorCode}; -use lighthouse_network::service::api_types::{AppRequestId, Id, SingleLookupReqId, SyncRequestId}; +use lighthouse_network::service::api_types::{ + AppRequestId, DataColumnsByRootRequester, Id, SamplingRequester, SingleLookupReqId, + SyncRequestId, +}; use lighthouse_network::types::SyncState; use lighthouse_network::{NetworkGlobals, Request}; use slog::info; use slot_clock::{ManualSlotClock, SlotClock, TestingSlotClock}; use store::MemoryStore; use tokio::sync::mpsc; +use types::data_column_sidecar::ColumnIndex; use types::test_utils::TestRandom; use types::{ test_utils::{SeedableRng, XorShiftRng}, BlobSidecar, ForkName, MinimalEthSpec as E, SignedBeaconBlock, Slot, }; use types::{BeaconState, BeaconStateBase}; +use types::{DataColumnSidecar, Epoch}; type T = Witness, E, MemoryStore, MemoryStore>; @@ -84,15 +91,32 @@ struct TestRig { const D: Duration = Duration::new(0, 0); const PARENT_FAIL_TOLERANCE: u8 = SINGLE_BLOCK_LOOKUP_MAX_ATTEMPTS; +const SAMPLING_REQUIRED_SUCCESSES: usize = 2; + +type DCByRootIds = Vec; +type DCByRootId = (SyncRequestId, Vec); + +struct TestRigConfig { + peer_das_enabled: bool, +} impl TestRig { - fn test_setup() -> Self { + fn test_setup_with_config(config: Option) -> Self { let enable_log = cfg!(feature = "test_logger"); let log = build_log(slog::Level::Trace, enable_log); + // Use `fork_from_env` logic to set correct fork epochs + let mut spec = test_spec::(); + + if let Some(config) = config { + if config.peer_das_enabled { + spec.eip7594_fork_epoch = Some(Epoch::new(0)); + } + } + // Initialise a new beacon chain let harness = BeaconChainHarness::>::builder(E) - .default_spec() + .spec(spec) .logger(log.clone()) .deterministic_keypairs(1) .fresh_ephemeral_store() @@ -106,7 +130,13 @@ impl TestRig { let chain = harness.chain.clone(); let (network_tx, network_rx) = mpsc::unbounded_channel(); - let globals = Arc::new(NetworkGlobals::new_test_globals(Vec::new(), &log)); + // TODO(das): make the generation of the ENR use the deterministic rng to have consistent + // column assignments + let globals = Arc::new(NetworkGlobals::new_test_globals( + Vec::new(), + &log, + chain.spec.clone(), + )); let (beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing( globals, chain.clone(), @@ -136,6 +166,9 @@ impl TestRig { network_tx, beacon_processor.into(), sync_recv, + SamplingConfig::Custom { + required_successes: vec![SAMPLING_REQUIRED_SUCCESSES], + }, log.clone(), ), harness, @@ -144,6 +177,10 @@ impl TestRig { } } + fn test_setup() -> Self { + Self::test_setup_with_config(None) + } + fn test_setup_after_deneb() -> Option { let r = Self::test_setup(); if r.after_deneb() { @@ -153,6 +190,17 @@ impl TestRig { } } + fn test_setup_after_peerdas() -> Option { + let r = Self::test_setup_with_config(Some(TestRigConfig { + peer_das_enabled: true, + })); + if r.after_deneb() { + Some(r) + } else { + None + } + } + fn log(&self, msg: &str) { info!(self.log, "TEST_RIG"; "msg" => msg); } @@ -180,6 +228,10 @@ impl TestRig { )); } + fn trigger_sample_block(&mut self, block_root: Hash256, block_slot: Slot) { + self.send_sync_message(SyncMessage::SampleBlock(block_root, block_slot)) + } + fn rand_block(&mut self) -> SignedBeaconBlock { self.rand_block_and_blobs(NumBlobs::None).0 } @@ -193,6 +245,18 @@ impl TestRig { generate_rand_block_and_blobs::(fork_name, num_blobs, rng) } + fn rand_block_and_data_columns( + &mut self, + ) -> (SignedBeaconBlock, Vec>>) { + let num_blobs = NumBlobs::Number(1); + generate_rand_block_and_data_columns::( + self.fork_name, + num_blobs, + &mut self.rng, + &self.harness.spec, + ) + } + pub fn rand_block_and_parent( &mut self, ) -> (SignedBeaconBlock, SignedBeaconBlock, Hash256, Hash256) { @@ -233,6 +297,20 @@ impl TestRig { ); } + fn expect_no_active_sampling(&mut self) { + assert_eq!( + self.sync_manager.active_sampling_requests(), + vec![], + "expected no active sampling" + ); + } + + fn expect_clean_finished_sampling(&mut self) { + self.expect_empty_network(); + self.expect_sampling_result_work(); + self.expect_no_active_sampling(); + } + fn assert_parent_lookups_count(&self, count: usize) { assert_eq!( self.active_parent_lookups_count(), @@ -311,12 +389,26 @@ impl TestRig { } fn new_connected_peer(&mut self) -> PeerId { - let peer_id = PeerId::random(); self.network_globals .peers .write() - .__add_connected_peer_testing_only(&peer_id); - peer_id + .__add_connected_peer_testing_only(false, &self.harness.spec) + } + + fn new_connected_supernode_peer(&mut self) -> PeerId { + self.network_globals + .peers + .write() + .__add_connected_peer_testing_only(true, &self.harness.spec) + } + + fn new_connected_peers_for_peerdas(&mut self) { + // Enough sampling peers with few columns + for _ in 0..100 { + self.new_connected_peer(); + } + // One supernode peer to ensure all columns have at least one peer + self.new_connected_supernode_peer(); } fn parent_chain_processed_success( @@ -542,6 +634,182 @@ impl TestRig { }) } + fn return_empty_sampling_requests(&mut self, ids: DCByRootIds) { + for id in ids { + self.log(&format!("return empty data column for {id:?}")); + self.return_empty_sampling_request(id) + } + } + + fn return_empty_sampling_request(&mut self, (request_id, _): DCByRootId) { + let peer_id = PeerId::random(); + // Send stream termination + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: None, + seen_timestamp: timestamp_now(), + }); + } + + fn sampling_requests_failed( + &mut self, + sampling_ids: DCByRootIds, + peer_id: PeerId, + error: RPCError, + ) { + for (request_id, _) in sampling_ids { + self.send_sync_message(SyncMessage::RpcError { + peer_id, + request_id, + error: error.clone(), + }) + } + } + + fn complete_valid_block_request( + &mut self, + id: SingleLookupReqId, + block: Arc>, + missing_components: bool, + ) { + // Complete download + let peer_id = PeerId::random(); + let slot = block.slot(); + let block_root = block.canonical_root(); + self.single_lookup_block_response(id, peer_id, Some(block)); + self.single_lookup_block_response(id, peer_id, None); + // Expect processing and resolve with import + self.expect_block_process(ResponseType::Block); + self.single_block_component_processed( + id.lookup_id, + if missing_components { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + slot, block_root, + )) + } else { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported(block_root)) + }, + ) + } + + fn complete_valid_sampling_column_requests( + &mut self, + ids: DCByRootIds, + data_columns: Vec>>, + ) { + for id in ids { + self.log(&format!("return valid data column for {id:?}")); + let indices = &id.1; + let columns_to_send = indices + .iter() + .map(|&i| data_columns[i as usize].clone()) + .collect::>(); + self.complete_valid_sampling_column_request(id, &columns_to_send); + } + } + + fn complete_valid_sampling_column_request( + &mut self, + id: DCByRootId, + data_columns: &[Arc>], + ) { + let first_dc = data_columns.first().unwrap(); + let block_root = first_dc.block_root(); + let sampling_request_id = match id.0 { + SyncRequestId::DataColumnsByRoot( + _, + _requester @ DataColumnsByRootRequester::Sampling(sampling_id), + ) => sampling_id.sampling_request_id, + _ => unreachable!(), + }; + self.complete_data_columns_by_root_request(id, data_columns); + + // Expect work event + // TODO(das): worth it to append sender id to the work event for stricter assertion? + self.expect_rpc_sample_verify_work_event(); + + // Respond with valid result + self.send_sync_message(SyncMessage::SampleVerified { + id: SamplingId { + id: SamplingRequester::ImportedBlock(block_root), + sampling_request_id, + }, + result: Ok(()), + }) + } + + fn complete_valid_custody_request( + &mut self, + ids: DCByRootIds, + data_columns: Vec>>, + missing_components: bool, + ) { + let lookup_id = + if let SyncRequestId::DataColumnsByRoot(_, DataColumnsByRootRequester::Custody(id)) = + ids.first().unwrap().0 + { + id.requester.0.lookup_id + } else { + panic!("not a custody requester") + }; + + let first_column = data_columns.first().cloned().unwrap(); + + for id in ids { + self.log(&format!("return valid data column for {id:?}")); + let indices = &id.1; + let columns_to_send = indices + .iter() + .map(|&i| data_columns[i as usize].clone()) + .collect::>(); + self.complete_data_columns_by_root_request(id, &columns_to_send); + } + + // Expect work event + // TODO(das): worth it to append sender id to the work event for stricter assertion? + self.expect_rpc_custody_column_work_event(); + + // Respond with valid result + self.send_sync_message(SyncMessage::BlockComponentProcessed { + process_type: BlockProcessType::SingleCustodyColumn(lookup_id), + result: if missing_components { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::MissingComponents( + first_column.slot(), + first_column.block_root(), + )) + } else { + BlockProcessingResult::Ok(AvailabilityProcessingStatus::Imported( + first_column.block_root(), + )) + }, + }); + } + + fn complete_data_columns_by_root_request( + &mut self, + (request_id, _): DCByRootId, + data_columns: &[Arc>], + ) { + let peer_id = PeerId::random(); + for data_column in data_columns { + // Send chunks + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: Some(data_column.clone()), + seen_timestamp: timestamp_now(), + }); + } + // Send stream termination + self.send_sync_message(SyncMessage::RpcDataColumn { + request_id, + peer_id, + data_column: None, + seen_timestamp: timestamp_now(), + }); + } + /// Return RPCErrors for all active requests of peer fn rpc_error_all_active_requests(&mut self, disconnected_peer_id: PeerId) { self.drain_network_rx(); @@ -710,6 +978,59 @@ impl TestRig { .unwrap_or_else(|e| panic!("Expected blob parent request for {for_block:?}: {e}")) } + /// Retrieves an unknown number of requests for data columns of `block_root`. Because peer ENRs + /// are random, and peer selection is random, the total number of batched requests is unknown. + fn expect_data_columns_by_root_requests( + &mut self, + block_root: Hash256, + count: usize, + ) -> DCByRootIds { + let mut requests: DCByRootIds = vec![]; + loop { + let req = self + .pop_received_network_event(|ev| match ev { + NetworkMessage::SendRequest { + peer_id: _, + request: Request::DataColumnsByRoot(request), + request_id: AppRequestId::Sync(id @ SyncRequestId::DataColumnsByRoot { .. }), + } if request + .data_column_ids + .to_vec() + .iter() + .any(|r| r.block_root == block_root) => + { + let indices = request + .data_column_ids + .to_vec() + .iter() + .map(|cid| cid.index) + .collect::>(); + Some((*id, indices)) + } + _ => None, + }) + .unwrap_or_else(|e| { + panic!("Expected more DataColumnsByRoot requests for {block_root:?}: {e}") + }); + requests.push(req); + + // Should never infinite loop because sync does not send requests for 0 columns + if requests.iter().map(|r| r.1.len()).sum::() >= count { + return requests; + } + } + } + + fn expect_only_data_columns_by_root_requests( + &mut self, + for_block: Hash256, + count: usize, + ) -> DCByRootIds { + let ids = self.expect_data_columns_by_root_requests(for_block, count); + self.expect_empty_network(); + ids + } + #[track_caller] fn expect_block_process(&mut self, response_type: ResponseType) { match response_type { @@ -723,11 +1044,47 @@ impl TestRig { (ev.work_type() == beacon_processor::RPC_BLOBS).then_some(()) }) .unwrap_or_else(|e| panic!("Expected blobs work event: {e}")), - // TODO(das): remove todo when adding tests for custody sync lookup - ResponseType::CustodyColumn => todo!(), + ResponseType::CustodyColumn => self + .pop_received_processor_event(|ev| { + (ev.work_type() == beacon_processor::RPC_CUSTODY_COLUMN).then_some(()) + }) + .unwrap_or_else(|e| panic!("Expected column work event: {e}")), } } + fn expect_rpc_custody_column_work_event(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::RPC_CUSTODY_COLUMN { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected RPC custody column work: {e}")) + } + + fn expect_rpc_sample_verify_work_event(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::RPC_VERIFY_DATA_COLUMNS { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected sample verify work: {e}")) + } + + fn expect_sampling_result_work(&mut self) { + self.pop_received_processor_event(|ev| { + if ev.work_type() == beacon_processor::SAMPLING_RESULT { + Some(()) + } else { + None + } + }) + .unwrap_or_else(|e| panic!("Expected sampling result work: {e}")) + } + fn expect_no_penalty_for(&mut self, peer_id: PeerId) { self.drain_network_rx(); let downscore_events = self @@ -763,7 +1120,11 @@ impl TestRig { fn expect_empty_network(&mut self) { self.drain_network_rx(); if !self.network_rx_queue.is_empty() { - panic!("expected no network events: {:#?}", self.network_rx_queue); + let n = self.network_rx_queue.len(); + panic!( + "expected no network events but got {n} events, displaying first 2: {:#?}", + self.network_rx_queue[..n.min(2)].iter().collect::>() + ); } } @@ -1588,6 +1949,94 @@ fn blobs_in_da_checker_skip_download() { r.expect_no_active_lookups(); } +#[test] +fn sampling_happy_path() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + r.trigger_sample_block(block_root, block.slot()); + // Retrieve all outgoing sample requests for random column indexes + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + // Resolve all of them one by one + r.complete_valid_sampling_column_requests(sampling_ids, data_columns); + r.expect_clean_finished_sampling(); +} + +#[test] +fn sampling_with_retries() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + r.trigger_sample_block(block_root, block.slot()); + // Retrieve all outgoing sample requests for random column indexes, and return empty responses + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.return_empty_sampling_requests(sampling_ids); + // Expect retries for all of them, and resolve them + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.complete_valid_sampling_column_requests(sampling_ids, data_columns); + r.expect_clean_finished_sampling(); +} + +#[test] +fn sampling_avoid_retrying_same_peer() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + let peer_id_1 = r.new_connected_supernode_peer(); + let peer_id_2 = r.new_connected_supernode_peer(); + let block_root = Hash256::random(); + r.trigger_sample_block(block_root, Slot::new(0)); + // Retrieve all outgoing sample requests for random column indexes, and return empty responses + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.sampling_requests_failed(sampling_ids, peer_id_1, RPCError::Disconnected); + // Should retry the other peer + let sampling_ids = + r.expect_only_data_columns_by_root_requests(block_root, SAMPLING_REQUIRED_SUCCESSES); + r.sampling_requests_failed(sampling_ids, peer_id_2, RPCError::Disconnected); + // Expect no more retries + r.expect_empty_network(); +} + +#[test] +fn custody_lookup_happy_path() { + let Some(mut r) = TestRig::test_setup_after_peerdas() else { + return; + }; + let spec = E::default_spec(); + r.new_connected_peers_for_peerdas(); + let (block, data_columns) = r.rand_block_and_data_columns(); + let block_root = block.canonical_root(); + let peer_id = r.new_connected_peer(); + r.trigger_unknown_block_from_attestation(block_root, peer_id); + // Should not request blobs + let id = r.expect_block_lookup_request(block.canonical_root()); + r.complete_valid_block_request(id, block.into(), true); + let custody_column_count = spec.custody_requirement * spec.data_columns_per_subnet() as u64; + let custody_ids = + r.expect_only_data_columns_by_root_requests(block_root, custody_column_count as usize); + r.complete_valid_custody_request(custody_ids, data_columns, false); + r.expect_no_active_lookups(); +} + +// TODO(das): Test retries of DataColumnByRoot: +// - Expect request for column_index +// - Respond with bad data +// - Respond with stream terminator +// ^ The stream terminator should be ignored and not close the next retry + +// TODO(das): Test error early a sampling request and it getting drop + then receiving responses +// from pending requests. + mod deneb_only { use super::*; use beacon_chain::{ diff --git a/beacon_node/network/src/sync/block_sidecar_coupling.rs b/beacon_node/network/src/sync/block_sidecar_coupling.rs index f31f2921ea2..966ce55fabe 100644 --- a/beacon_node/network/src/sync/block_sidecar_coupling.rs +++ b/beacon_node/network/src/sync/block_sidecar_coupling.rs @@ -1,69 +1,105 @@ -use beacon_chain::block_verification_types::RpcBlock; +use beacon_chain::{ + block_verification_types::RpcBlock, data_column_verification::CustodyDataColumn, get_block_root, +}; use lighthouse_network::PeerId; use ssz_types::VariableList; -use std::{collections::VecDeque, sync::Arc}; -use types::{BlobSidecar, EthSpec, SignedBeaconBlock}; - -use super::range_sync::ByRangeRequestType; +use std::{ + collections::{HashMap, VecDeque}, + sync::Arc, +}; +use types::{ + BlobSidecar, ChainSpec, ColumnIndex, DataColumnSidecar, EthSpec, Hash256, SignedBeaconBlock, +}; #[derive(Debug)] -pub struct BlocksAndBlobsRequestInfo { +pub struct RangeBlockComponentsRequest { /// Blocks we have received awaiting for their corresponding sidecar. - accumulated_blocks: VecDeque>>, + blocks: VecDeque>>, /// Sidecars we have received awaiting for their corresponding block. - accumulated_sidecars: VecDeque>>, + blobs: VecDeque>>, + data_columns: VecDeque>>, /// Whether the individual RPC request for blocks is finished or not. is_blocks_stream_terminated: bool, /// Whether the individual RPC request for sidecars is finished or not. is_sidecars_stream_terminated: bool, + custody_columns_streams_terminated: usize, /// Used to determine if this accumulator should wait for a sidecars stream termination - request_type: ByRangeRequestType, - /// The peer the request was made to. - pub(crate) peer_id: PeerId, + expects_blobs: bool, + expects_custody_columns: Option>, + /// Used to determine if the number of data columns stream termination this accumulator should + /// wait for. This may be less than the number of `expects_custody_columns` due to request batching. + num_custody_column_requests: Option, + /// The peers the request was made to. + pub(crate) peer_ids: Vec, } -impl BlocksAndBlobsRequestInfo { - pub fn new(request_type: ByRangeRequestType, peer_id: PeerId) -> Self { +impl RangeBlockComponentsRequest { + pub fn new( + expects_blobs: bool, + expects_custody_columns: Option>, + num_custody_column_requests: Option, + peer_ids: Vec, + ) -> Self { Self { - accumulated_blocks: <_>::default(), - accumulated_sidecars: <_>::default(), - is_blocks_stream_terminated: <_>::default(), - is_sidecars_stream_terminated: <_>::default(), - request_type, - peer_id, + blocks: <_>::default(), + blobs: <_>::default(), + data_columns: <_>::default(), + is_blocks_stream_terminated: false, + is_sidecars_stream_terminated: false, + custody_columns_streams_terminated: 0, + expects_blobs, + expects_custody_columns, + num_custody_column_requests, + peer_ids, } } - pub fn get_request_type(&self) -> ByRangeRequestType { - self.request_type + // TODO: This function should be deprecated when simplying the retry mechanism of this range + // requests. + pub fn get_requirements(&self) -> (bool, Option>) { + (self.expects_blobs, self.expects_custody_columns.clone()) } pub fn add_block_response(&mut self, block_opt: Option>>) { match block_opt { - Some(block) => self.accumulated_blocks.push_back(block), + Some(block) => self.blocks.push_back(block), None => self.is_blocks_stream_terminated = true, } } pub fn add_sidecar_response(&mut self, sidecar_opt: Option>>) { match sidecar_opt { - Some(sidecar) => self.accumulated_sidecars.push_back(sidecar), + Some(sidecar) => self.blobs.push_back(sidecar), None => self.is_sidecars_stream_terminated = true, } } - pub fn into_responses(self) -> Result>, String> { - let BlocksAndBlobsRequestInfo { - accumulated_blocks, - accumulated_sidecars, - .. - } = self; + pub fn add_data_column(&mut self, column_opt: Option>>) { + match column_opt { + Some(column) => self.data_columns.push_back(column), + // TODO(das): this mechanism is dangerous, if somehow there are two requests for the + // same column index it can terminate early. This struct should track that all requests + // for all custody columns terminate. + None => self.custody_columns_streams_terminated += 1, + } + } + + pub fn into_responses(self, spec: &ChainSpec) -> Result>, String> { + if let Some(expects_custody_columns) = self.expects_custody_columns.clone() { + self.into_responses_with_custody_columns(expects_custody_columns, spec) + } else { + self.into_responses_with_blobs() + } + } + + fn into_responses_with_blobs(self) -> Result>, String> { + let RangeBlockComponentsRequest { blocks, blobs, .. } = self; // There can't be more more blobs than blocks. i.e. sending any blob (empty // included) for a skipped slot is not permitted. - let mut responses = Vec::with_capacity(accumulated_blocks.len()); - let mut blob_iter = accumulated_sidecars.into_iter().peekable(); - for block in accumulated_blocks.into_iter() { + let mut responses = Vec::with_capacity(blocks.len()); + let mut blob_iter = blobs.into_iter().peekable(); + for block in blocks.into_iter() { let mut blob_list = Vec::with_capacity(E::max_blobs_per_block()); while { let pair_next_blob = blob_iter @@ -99,20 +135,110 @@ impl BlocksAndBlobsRequestInfo { Ok(responses) } + fn into_responses_with_custody_columns( + self, + expects_custody_columns: Vec, + spec: &ChainSpec, + ) -> Result>, String> { + let RangeBlockComponentsRequest { + blocks, + data_columns, + .. + } = self; + + // Group data columns by block_root and index + let mut data_columns_by_block = + HashMap::>>>::new(); + + for column in data_columns { + let block_root = column.block_root(); + let index = column.index; + if data_columns_by_block + .entry(block_root) + .or_default() + .insert(index, column) + .is_some() + { + return Err(format!( + "Repeated column block_root {block_root:?} index {index}" + )); + } + } + + // Now iterate all blocks ensuring that the block roots of each block and data column match, + // plus we have columns for our custody requirements + let mut rpc_blocks = Vec::with_capacity(blocks.len()); + + for block in blocks { + let block_root = get_block_root(&block); + rpc_blocks.push(if block.num_expected_blobs() > 0 { + let Some(mut data_columns_by_index) = data_columns_by_block.remove(&block_root) + else { + // This PR ignores the fix from https://github.com/sigp/lighthouse/pull/5675 + // which allows blobs to not match blocks. + // TODO(das): on the initial version of PeerDAS the beacon chain does not check + // rpc custody requirements and dropping this check can allow the block to have + // an inconsistent DB. + return Err(format!("No columns for block {block_root:?} with data")); + }; + + let mut custody_columns = vec![]; + for index in &expects_custody_columns { + let Some(data_column) = data_columns_by_index.remove(index) else { + return Err(format!("No column for block {block_root:?} index {index}")); + }; + // Safe to convert to `CustodyDataColumn`: we have asserted that the index of + // this column is in the set of `expects_custody_columns` and with the expected + // block root, so for the expected epoch of this batch. + custody_columns.push(CustodyDataColumn::from_asserted_custody(data_column)); + } + + // Assert that there are no columns left + if !data_columns_by_index.is_empty() { + let remaining_indices = data_columns_by_index.keys().collect::>(); + return Err(format!( + "Not all columns consumed for block {block_root:?}: {remaining_indices:?}" + )); + } + + RpcBlock::new_with_custody_columns(Some(block_root), block, custody_columns, spec) + .map_err(|e| format!("{e:?}"))? + } else { + RpcBlock::new_without_blobs(Some(block_root), block) + }); + } + + // Assert that there are no columns left for other blocks + if !data_columns_by_block.is_empty() { + let remaining_roots = data_columns_by_block.keys().collect::>(); + return Err(format!("Not all columns consumed: {remaining_roots:?}")); + } + + Ok(rpc_blocks) + } + pub fn is_finished(&self) -> bool { - let blobs_requested = match self.request_type { - ByRangeRequestType::Blocks => false, - ByRangeRequestType::BlocksAndBlobs => true, - }; - self.is_blocks_stream_terminated && (!blobs_requested || self.is_sidecars_stream_terminated) + if !self.is_blocks_stream_terminated { + return false; + } + if self.expects_blobs && !self.is_sidecars_stream_terminated { + return false; + } + if let Some(expects_custody_column_responses) = self.num_custody_column_requests { + if self.custody_columns_streams_terminated < expects_custody_column_responses { + return false; + } + } + true } } #[cfg(test)] mod tests { - use super::BlocksAndBlobsRequestInfo; - use crate::sync::range_sync::ByRangeRequestType; - use beacon_chain::test_utils::{generate_rand_block_and_blobs, NumBlobs}; + use super::RangeBlockComponentsRequest; + use beacon_chain::test_utils::{ + generate_rand_block_and_blobs, generate_rand_block_and_data_columns, test_spec, NumBlobs, + }; use lighthouse_network::PeerId; use rand::SeedableRng; use types::{test_utils::XorShiftRng, ForkName, MinimalEthSpec as E}; @@ -120,7 +246,7 @@ mod tests { #[test] fn no_blobs_into_responses() { let peer_id = PeerId::random(); - let mut info = BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::Blocks, peer_id); + let mut info = RangeBlockComponentsRequest::::new(false, None, None, vec![peer_id]); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| generate_rand_block_and_blobs::(ForkName::Base, NumBlobs::None, &mut rng).0) @@ -134,14 +260,13 @@ mod tests { // Assert response is finished and RpcBlocks can be constructed assert!(info.is_finished()); - info.into_responses().unwrap(); + info.into_responses(&test_spec::()).unwrap(); } #[test] fn empty_blobs_into_responses() { let peer_id = PeerId::random(); - let mut info = - BlocksAndBlobsRequestInfo::::new(ByRangeRequestType::BlocksAndBlobs, peer_id); + let mut info = RangeBlockComponentsRequest::::new(true, None, None, vec![peer_id]); let mut rng = XorShiftRng::from_seed([42; 16]); let blocks = (0..4) .map(|_| { @@ -162,6 +287,123 @@ mod tests { // This makes sure we don't expect blobs here when they have expired. Checking this logic should // be hendled elsewhere. assert!(info.is_finished()); - info.into_responses().unwrap(); + info.into_responses(&test_spec::()).unwrap(); + } + + #[test] + fn rpc_block_with_custody_columns() { + let spec = test_spec::(); + let expects_custody_columns = vec![1, 2, 3, 4]; + let mut info = RangeBlockComponentsRequest::::new( + false, + Some(expects_custody_columns.clone()), + Some(expects_custody_columns.len()), + vec![PeerId::random()], + ); + let mut rng = XorShiftRng::from_seed([42; 16]); + let blocks = (0..4) + .map(|_| { + generate_rand_block_and_data_columns::( + ForkName::Deneb, + NumBlobs::Number(1), + &mut rng, + &spec, + ) + }) + .collect::>(); + + // Send blocks and complete terminate response + for block in &blocks { + info.add_block_response(Some(block.0.clone().into())); + } + info.add_block_response(None); + // Assert response is not finished + assert!(!info.is_finished()); + + // Send data columns interleaved + for block in &blocks { + for column in &block.1 { + if expects_custody_columns.contains(&column.index) { + info.add_data_column(Some(column.clone())); + } + } + } + + // Terminate the requests + for (i, _column_index) in expects_custody_columns.iter().enumerate() { + info.add_data_column(None); + + if i < expects_custody_columns.len() - 1 { + assert!( + !info.is_finished(), + "requested should not be finished at loop {i}" + ); + } else { + assert!( + info.is_finished(), + "request should be finishied at loop {i}" + ); + } + } + + // All completed construct response + info.into_responses(&spec).unwrap(); + } + + #[test] + fn rpc_block_with_custody_columns_batched() { + let spec = test_spec::(); + let expects_custody_columns = vec![1, 2, 3, 4]; + let num_of_data_column_requests = 2; + let mut info = RangeBlockComponentsRequest::::new( + false, + Some(expects_custody_columns.clone()), + Some(num_of_data_column_requests), + vec![PeerId::random()], + ); + let mut rng = XorShiftRng::from_seed([42; 16]); + let blocks = (0..4) + .map(|_| { + generate_rand_block_and_data_columns::( + ForkName::Deneb, + NumBlobs::Number(1), + &mut rng, + &spec, + ) + }) + .collect::>(); + + // Send blocks and complete terminate response + for block in &blocks { + info.add_block_response(Some(block.0.clone().into())); + } + info.add_block_response(None); + // Assert response is not finished + assert!(!info.is_finished()); + + // Send data columns interleaved + for block in &blocks { + for column in &block.1 { + if expects_custody_columns.contains(&column.index) { + info.add_data_column(Some(column.clone())); + } + } + } + + // Terminate the requests + for i in 0..num_of_data_column_requests { + info.add_data_column(None); + if i < num_of_data_column_requests - 1 { + assert!( + !info.is_finished(), + "requested should not be finished at loop {i}" + ); + } else { + assert!(info.is_finished(), "request should be finished at loop {i}"); + } + } + + // All completed construct response + info.into_responses(&spec).unwrap(); } } diff --git a/beacon_node/network/src/sync/manager.rs b/beacon_node/network/src/sync/manager.rs index e494f1f94fc..d6ce14adb16 100644 --- a/beacon_node/network/src/sync/manager.rs +++ b/beacon_node/network/src/sync/manager.rs @@ -38,13 +38,15 @@ use super::block_lookups::BlockLookups; use super::network_context::{BlockOrBlob, RangeRequestId, RpcEvent, SyncNetworkContext}; use super::peer_sync_info::{remote_sync_type, PeerSyncType}; use super::range_sync::{RangeSync, RangeSyncType, EPOCHS_PER_BATCH}; +use super::sampling::{Sampling, SamplingConfig, SamplingResult}; use crate::network_beacon_processor::{ChainSegmentProcessId, NetworkBeaconProcessor}; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use crate::sync::block_lookups::{ - BlobRequestState, BlockComponent, BlockRequestState, DownloadResult, + BlobRequestState, BlockComponent, BlockRequestState, CustodyRequestState, DownloadResult, }; -use crate::sync::block_sidecar_coupling::BlocksAndBlobsRequestInfo; +use crate::sync::block_sidecar_coupling::RangeBlockComponentsRequest; +use crate::sync::network_context::PeerGroup; use beacon_chain::block_verification_types::AsBlock; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::validator_monitor::timestamp_now; @@ -54,7 +56,8 @@ use beacon_chain::{ use futures::StreamExt; use lighthouse_network::rpc::RPCError; use lighthouse_network::service::api_types::{ - DataColumnsByRootRequestId, Id, SingleLookupReqId, SyncRequestId, + DataColumnsByRootRequestId, DataColumnsByRootRequester, Id, SamplingId, SamplingRequester, + SingleLookupReqId, SyncRequestId, }; use lighthouse_network::types::{NetworkGlobals, SyncState}; use lighthouse_network::SyncInfo; @@ -124,6 +127,10 @@ pub enum SyncMessage { /// manager to attempt to find the block matching the unknown hash. UnknownBlockHashFromAttestation(PeerId, Hash256), + /// Request to start sampling a block. Caller should ensure that block has data before sending + /// the request. + SampleBlock(Hash256, Slot), + /// A peer has disconnected. Disconnect(PeerId), @@ -146,6 +153,12 @@ pub enum SyncMessage { result: BlockProcessingResult, }, + /// Sample data column verified + SampleVerified { + id: SamplingId, + result: Result<(), String>, + }, + /// A block from gossip has completed processing, GossipBlockProcessResult { block_root: Hash256, imported: bool }, } @@ -155,6 +168,17 @@ pub enum SyncMessage { pub enum BlockProcessType { SingleBlock { id: Id }, SingleBlob { id: Id }, + SingleCustodyColumn(Id), +} + +impl BlockProcessType { + pub fn id(&self) -> Id { + match self { + BlockProcessType::SingleBlock { id } + | BlockProcessType::SingleBlob { id } + | BlockProcessType::SingleCustodyColumn(id) => *id, + } + } } #[derive(Debug)] @@ -206,6 +230,8 @@ pub struct SyncManager { /// one event is useful, the rest generating log noise and wasted cycles notified_unknown_roots: LRUTimeCache<(PeerId, Hash256)>, + sampling: Sampling, + /// The logger for the import manager. log: Logger, } @@ -232,6 +258,7 @@ pub fn spawn( network_send, beacon_processor, sync_recv, + SamplingConfig::Default, log.clone(), ); @@ -246,6 +273,7 @@ impl SyncManager { network_send: mpsc::UnboundedSender>, beacon_processor: Arc>, sync_recv: mpsc::UnboundedReceiver>, + sampling_config: SamplingConfig, log: slog::Logger, ) -> Self { let network_globals = beacon_processor.network_globals.clone(); @@ -271,6 +299,7 @@ impl SyncManager { notified_unknown_roots: LRUTimeCache::new(Duration::from_secs( NOTIFIED_UNKNOWN_ROOT_EXPIRY_SECONDS, )), + sampling: Sampling::new(sampling_config, log.new(o!("service" => "sampling"))), log: log.clone(), } } @@ -299,6 +328,11 @@ impl SyncManager { self.block_lookups.insert_failed_chain(block_root); } + #[cfg(test)] + pub(crate) fn active_sampling_requests(&self) -> Vec { + self.sampling.active_sampling_requests() + } + fn network_globals(&self) -> &NetworkGlobals { self.network.network_globals() } @@ -650,7 +684,7 @@ impl SyncManager { value: block.block_cloned(), block_root, seen_timestamp: timestamp_now(), - peer_id, + peer_group: PeerGroup::from_single(peer_id), }), ); } @@ -668,12 +702,27 @@ impl SyncManager { value: blob, block_root, seen_timestamp: timestamp_now(), - peer_id, + peer_group: PeerGroup::from_single(peer_id), }), ); } - SyncMessage::UnknownParentDataColumn(_peer_id, _data_column) => { - // TODO(das): data column parent lookup to be implemented + SyncMessage::UnknownParentDataColumn(peer_id, data_column) => { + let data_column_slot = data_column.slot(); + let block_root = data_column.block_root(); + let parent_root = data_column.block_parent_root(); + debug!(self.log, "Received unknown parent data column message"; "block_root" => %block_root, "parent_root" => %parent_root); + self.handle_unknown_parent( + peer_id, + block_root, + parent_root, + data_column_slot, + BlockComponent::DataColumn(DownloadResult { + value: data_column, + block_root, + seen_timestamp: timestamp_now(), + peer_group: PeerGroup::from_single(peer_id), + }), + ); } SyncMessage::UnknownBlockHashFromAttestation(peer_id, block_root) => { if !self.notified_unknown_roots.contains(&(peer_id, block_root)) { @@ -682,6 +731,15 @@ impl SyncManager { self.handle_unknown_block_root(peer_id, block_root); } } + SyncMessage::SampleBlock(block_root, block_slot) => { + debug!(self.log, "Received SampleBlock message"; "block_root" => %block_root, "slot" => block_slot); + if let Some((requester, result)) = self + .sampling + .on_new_sample_request(block_root, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } SyncMessage::Disconnect(peer_id) => { debug!(self.log, "Received disconnected message"; "peer_id" => %peer_id); self.peer_disconnect(&peer_id); @@ -731,6 +789,14 @@ impl SyncManager { } } }, + SyncMessage::SampleVerified { id, result } => { + if let Some((requester, result)) = + self.sampling + .on_sample_verified(id, result, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } } } @@ -885,8 +951,9 @@ impl SyncManager { self.block_lookups .on_download_response::>( id, - peer_id, - resp, + resp.map(|(value, seen_timestamp)| { + (value, PeerGroup::from_single(peer_id), seen_timestamp) + }), &mut self.network, ) } @@ -936,8 +1003,12 @@ impl SyncManager { }, ); } - SyncRequestId::RangeBlockAndBlobs { id: _ } => { - // TODO(das): implement custody range sync + SyncRequestId::RangeBlockAndBlobs { id } => { + self.range_block_and_blobs_response( + id, + peer_id, + BlockOrBlob::CustodyColumns(data_column), + ); } _ => { crit!(self.log, "bad request id for data_column"; "peer_id" => %peer_id); @@ -955,8 +1026,9 @@ impl SyncManager { self.block_lookups .on_download_response::>( id, - peer_id, - resp, + resp.map(|(value, seen_timestamp)| { + (value, PeerGroup::from_single(peer_id), seen_timestamp) + }), &mut self.network, ) } @@ -965,15 +1037,74 @@ impl SyncManager { fn on_data_columns_by_root_response( &mut self, req_id: DataColumnsByRootRequestId, - _requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, peer_id: PeerId, - rpc_event: RpcEvent>>, + data_column: RpcEvent>>, ) { - if let Some(_resp) = self - .network - .on_data_columns_by_root_response(req_id, peer_id, rpc_event) + if let Some(resp) = + self.network + .on_data_columns_by_root_response(req_id, peer_id, data_column) { - // TODO(das): pass data_columns_by_root result to consumer + match requester { + DataColumnsByRootRequester::Sampling(id) => { + if let Some((requester, result)) = + self.sampling + .on_sample_downloaded(id, peer_id, resp, &mut self.network) + { + self.on_sampling_result(requester, result) + } + } + DataColumnsByRootRequester::Custody(custody_id) => { + if let Some(custody_columns) = self + .network + .on_custody_by_root_response(custody_id, req_id, peer_id, resp) + { + // TODO(das): get proper timestamp + let seen_timestamp = timestamp_now(); + self.block_lookups + .on_download_response::>( + custody_id.requester.0, + custody_columns.map(|(columns, peer_group)| { + (columns, peer_group, seen_timestamp) + }), + &mut self.network, + ); + } + } + } + } + } + + fn on_sampling_result(&mut self, requester: SamplingRequester, result: SamplingResult) { + // TODO(das): How is a consumer of sampling results? + // - Fork-choice for trailing DA + // - Single lookups to complete import requirements + // - Range sync to complete import requirements? Can sampling for syncing lag behind and + // accumulate in fork-choice? + + match requester { + SamplingRequester::ImportedBlock(block_root) => { + debug!(self.log, "Sampling result"; "block_root" => %block_root, "result" => ?result); + + // TODO(das): Consider moving SamplingResult to the beacon_chain crate and import + // here. No need to add too much enum variants, just whatever the beacon_chain or + // fork-choice needs to make a decision. Currently the fork-choice only needs to + // be notified of successful samplings, i.e. sampling failures don't trigger pruning + match result { + Ok(_) => { + if let Err(e) = self + .network + .beacon_processor() + .send_sampling_completed(block_root) + { + warn!(self.log, "Error sending sampling result"; "block_root" => ?block_root, "reason" => ?e); + } + } + Err(e) => { + warn!(self.log, "Sampling failed"; "block_root" => %block_root, "reason" => ?e); + } + } + } } } @@ -1027,7 +1158,12 @@ impl SyncManager { self.network.insert_range_blocks_and_blobs_request( id, resp.sender_id, - BlocksAndBlobsRequestInfo::new(resp.request_type, peer_id), + RangeBlockComponentsRequest::new( + resp.expects_blobs, + resp.expects_custody_columns, + None, + vec![], + ), ); // inform range that the request needs to be treated as failed // With time we will want to downgrade this log diff --git a/beacon_node/network/src/sync/mod.rs b/beacon_node/network/src/sync/mod.rs index 7b244bceceb..6669add4453 100644 --- a/beacon_node/network/src/sync/mod.rs +++ b/beacon_node/network/src/sync/mod.rs @@ -8,6 +8,8 @@ pub mod manager; mod network_context; mod peer_sync_info; mod range_sync; +mod sampling; +pub use lighthouse_network::service::api_types::SamplingId; pub use manager::{BatchProcessResult, SyncMessage}; pub use range_sync::{BatchOperationOutcome, ChainId}; diff --git a/beacon_node/network/src/sync/network_context.rs b/beacon_node/network/src/sync/network_context.rs index fa9159f7f8e..0b02a986f73 100644 --- a/beacon_node/network/src/sync/network_context.rs +++ b/beacon_node/network/src/sync/network_context.rs @@ -1,43 +1,53 @@ //! Provides network functionality for the Syncing thread. This fundamentally wraps a network //! channel and stores a global RPC ID to perform requests. +use self::custody::{ActiveCustodyRequest, Error as CustodyRequestError}; use self::requests::{ActiveBlobsByRootRequest, ActiveBlocksByRootRequest}; -pub use self::requests::{BlobsByRootSingleBlockRequest, BlocksByRootSingleRequest}; -use super::block_sidecar_coupling::BlocksAndBlobsRequestInfo; +pub use self::requests::{BlocksByRootSingleRequest, DataColumnsByRootSingleBlockRequest}; +use super::block_sidecar_coupling::RangeBlockComponentsRequest; +use super::manager::BlockProcessType; use super::range_sync::{BatchId, ByRangeRequestType, ChainId}; use crate::metrics; use crate::network_beacon_processor::NetworkBeaconProcessor; use crate::service::NetworkMessage; use crate::status::ToStatusMessage; use crate::sync::block_lookups::SingleLookupId; -use crate::sync::manager::BlockProcessType; +use crate::sync::network_context::requests::BlobsByRootSingleBlockRequest; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::{BeaconChain, BeaconChainTypes, BlockProcessStatus, EngineState}; use fnv::FnvHashMap; -use lighthouse_network::rpc::methods::BlobsByRangeRequest; +use lighthouse_network::rpc::methods::{BlobsByRangeRequest, DataColumnsByRangeRequest}; use lighthouse_network::rpc::{BlocksByRangeRequest, GoodbyeReason, RPCError}; use lighthouse_network::service::api_types::{ - AppRequestId, DataColumnsByRootRequestId, Id, SingleLookupReqId, SyncRequestId, + AppRequestId, CustodyId, CustodyRequester, DataColumnsByRootRequestId, + DataColumnsByRootRequester, Id, SingleLookupReqId, SyncRequestId, }; use lighthouse_network::{Client, NetworkGlobals, PeerAction, PeerId, ReportSource, Request}; +use rand::seq::SliceRandom; +use rand::thread_rng; +use requests::ActiveDataColumnsByRootRequest; pub use requests::LookupVerifyError; -use requests::{ActiveDataColumnsByRootRequest, DataColumnsByRootSingleBlockRequest}; -use slog::{debug, error, trace, warn}; +use slog::{debug, error, warn}; +use slot_clock::SlotClock; use std::collections::hash_map::Entry; +use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; use types::blob_sidecar::FixedBlobSidecarList; use types::{ - BlobSidecar, DataColumnSidecar, DataColumnSidecarList, EthSpec, Hash256, SignedBeaconBlock, + BlobSidecar, ColumnIndex, DataColumnSidecar, DataColumnSidecarList, EthSpec, Hash256, + SignedBeaconBlock, Slot, }; +pub mod custody; mod requests; pub struct BlocksAndBlobsByRangeResponse { pub sender_id: RangeRequestId, pub responses: Result>, String>, - pub request_type: ByRangeRequestType, + pub expects_blobs: bool, + pub expects_custody_columns: Option>, } #[derive(Debug, Clone, Copy)] @@ -60,15 +70,20 @@ pub enum RpcEvent { pub type RpcResponseResult = Result<(T, Duration), RpcResponseError>; +#[derive(Debug)] pub enum RpcResponseError { RpcError(RPCError), VerifyError(LookupVerifyError), + CustodyRequestError(CustodyRequestError), } #[derive(Debug, PartialEq, Eq)] pub enum RpcRequestSendError { /// Network channel send failed NetworkSendError, + NoCustodyPeers, + CustodyRequestError(custody::Error), + SlotClockError, } #[derive(Debug, PartialEq, Eq)] @@ -82,6 +97,7 @@ impl std::fmt::Display for RpcResponseError { match self { RpcResponseError::RpcError(e) => write!(f, "RPC Error: {:?}", e), RpcResponseError::VerifyError(e) => write!(f, "Lookup Verify Error: {:?}", e), + RpcResponseError::CustodyRequestError(e) => write!(f, "Custody Request Error: {:?}", e), } } } @@ -98,6 +114,31 @@ impl From for RpcResponseError { } } +/// Represents a group of peers that served a block component. +#[derive(Clone, Debug)] +pub struct PeerGroup { + /// Peers group by which indexed section of the block component they served. For example: + /// - PeerA served = [blob index 0, blob index 2] + /// - PeerA served = [blob index 1] + peers: HashMap>, +} + +impl PeerGroup { + /// Return a peer group where a single peer returned all parts of a block component. For + /// example, a block has a single component (the block = index 0/1). + pub fn from_single(peer: PeerId) -> Self { + Self { + peers: HashMap::from_iter([(peer, vec![0])]), + } + } + pub fn from_set(peers: HashMap>) -> Self { + Self { peers } + } + pub fn all(&self) -> impl Iterator + '_ { + self.peers.keys() + } +} + /// Sequential ID that uniquely identifies ReqResp outgoing requests pub type ReqId = u32; @@ -128,13 +169,16 @@ pub struct SyncNetworkContext { /// A mapping of active BlobsByRoot requests, including both current slot and parent lookups. blobs_by_root_requests: FnvHashMap>, + /// Mapping of active custody column requests for a block root + custody_by_root_requests: FnvHashMap>, + /// A mapping of active DataColumnsByRoot requests data_columns_by_root_requests: FnvHashMap>, /// BlocksByRange requests paired with BlobsByRange - range_blocks_and_blobs_requests: - FnvHashMap)>, + range_block_components_requests: + FnvHashMap)>, /// Whether the ee is online. If it's not, we don't allow access to the /// `beacon_processor_send`. @@ -153,6 +197,7 @@ pub struct SyncNetworkContext { pub enum BlockOrBlob { Block(Option>>), Blob(Option>>), + CustodyColumns(Option>>), } impl From>>> for BlockOrBlob { @@ -181,7 +226,8 @@ impl SyncNetworkContext { blocks_by_root_requests: <_>::default(), blobs_by_root_requests: <_>::default(), data_columns_by_root_requests: <_>::default(), - range_blocks_and_blobs_requests: FnvHashMap::default(), + custody_by_root_requests: <_>::default(), + range_block_components_requests: FnvHashMap::default(), network_beacon_processor, chain, log, @@ -191,10 +237,10 @@ impl SyncNetworkContext { /// Returns the ids of all the requests made to the given peer_id. pub fn peer_disconnected(&mut self, peer_id: &PeerId) -> Vec { let failed_range_ids = - self.range_blocks_and_blobs_requests + self.range_block_components_requests .iter() .filter_map(|(id, request)| { - if request.1.peer_id == *peer_id { + if request.1.peer_ids.contains(peer_id) { Some(SyncRequestId::RangeBlockAndBlobs { id: *id }) } else { None @@ -239,6 +285,17 @@ impl SyncNetworkContext { .collect() } + pub fn get_custodial_peers(&self, column_index: ColumnIndex) -> Vec { + self.network_globals() + .custody_peers_for_column(column_index) + } + + pub fn get_random_custodial_peer(&self, column_index: ColumnIndex) -> Option { + self.get_custodial_peers(column_index) + .choose(&mut thread_rng()) + .cloned() + } + pub fn network_globals(&self) -> &NetworkGlobals { &self.network_beacon_processor.network_globals } @@ -277,19 +334,23 @@ impl SyncNetworkContext { } } - /// A blocks by range request for the range sync algorithm. - pub fn blocks_by_range_request( + /// A blocks by range request sent by the range sync algorithm + pub fn block_components_by_range_request( &mut self, peer_id: PeerId, batch_type: ByRangeRequestType, request: BlocksByRangeRequest, + sender_id: RangeRequestId, ) -> Result { + let epoch = Slot::new(*request.start_slot()).epoch(T::EthSpec::slots_per_epoch()); let id = self.next_id(); - trace!( + let mut requested_peers = vec![peer_id]; + debug!( self.log, "Sending BlocksByRange request"; "method" => "BlocksByRange", "count" => request.count(), + "epoch" => epoch, "peer" => %peer_id, ); self.network_send @@ -300,12 +361,13 @@ impl SyncNetworkContext { }) .map_err(|_| RpcRequestSendError::NetworkSendError)?; - if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) { + let expected_blobs = if matches!(batch_type, ByRangeRequestType::BlocksAndBlobs) { debug!( self.log, "Sending BlobsByRange requests"; "method" => "BlobsByRange", "count" => request.count(), + "epoch" => epoch, "peer" => %peer_id, ); @@ -320,33 +382,94 @@ impl SyncNetworkContext { request_id: AppRequestId::Sync(SyncRequestId::RangeBlockAndBlobs { id }), }) .map_err(|_| RpcRequestSendError::NetworkSendError)?; - } + true + } else { + false + }; + + let (expects_custody_columns, num_of_custody_column_req) = + if matches!(batch_type, ByRangeRequestType::BlocksAndColumns) { + let custody_indexes = self.network_globals().custody_columns(); + let mut num_of_custody_column_req = 0; + + for (peer_id, columns_by_range_request) in + self.make_columns_by_range_requests(request, &custody_indexes)? + { + requested_peers.push(peer_id); + + debug!( + self.log, + "Sending DataColumnsByRange requests"; + "method" => "DataColumnsByRange", + "count" => columns_by_range_request.count, + "epoch" => epoch, + "columns" => ?columns_by_range_request.columns, + "peer" => %peer_id, + ); + + self.send_network_msg(NetworkMessage::SendRequest { + peer_id, + request: Request::DataColumnsByRange(columns_by_range_request), + request_id: AppRequestId::Sync(SyncRequestId::RangeBlockAndBlobs { id }), + }) + .map_err(|_| RpcRequestSendError::NetworkSendError)?; + + num_of_custody_column_req += 1; + } + (Some(custody_indexes), Some(num_of_custody_column_req)) + } else { + (None, None) + }; + + let info = RangeBlockComponentsRequest::new( + expected_blobs, + expects_custody_columns, + num_of_custody_column_req, + requested_peers, + ); + self.range_block_components_requests + .insert(id, (sender_id, info)); Ok(id) } - /// A blocks by range request sent by the range sync algorithm - pub fn blocks_and_blobs_by_range_request( - &mut self, - peer_id: PeerId, - batch_type: ByRangeRequestType, + fn make_columns_by_range_requests( + &self, request: BlocksByRangeRequest, - sender_id: RangeRequestId, - ) -> Result { - let id = self.blocks_by_range_request(peer_id, batch_type, request)?; - self.range_blocks_and_blobs_requests.insert( - id, - ( - sender_id, - BlocksAndBlobsRequestInfo::new(batch_type, peer_id), - ), - ); - Ok(id) + custody_indexes: &Vec, + ) -> Result, RpcRequestSendError> { + let mut peer_id_to_request_map = HashMap::new(); + + for column_index in custody_indexes { + // TODO(das): The peer selection logic here needs to be improved - we should probably + // avoid retrying from failed peers, however `BatchState` currently only tracks the peer + // serving the blocks. + let Some(custody_peer) = self.get_random_custodial_peer(*column_index) else { + // TODO(das): this will be pretty bad UX. To improve we should: + // - Attempt to fetch custody requests first, before requesting blocks + // - Handle the no peers case gracefully, maybe add some timeout and give a few + // minutes / seconds to the peer manager to locate peers on this subnet before + // abandoing progress on the chain completely. + return Err(RpcRequestSendError::NoCustodyPeers); + }; + + let columns_by_range_request = peer_id_to_request_map + .entry(custody_peer) + .or_insert_with(|| DataColumnsByRangeRequest { + start_slot: *request.start_slot(), + count: *request.count(), + columns: vec![], + }); + + columns_by_range_request.columns.push(*column_index); + } + + Ok(peer_id_to_request_map) } pub fn range_request_failed(&mut self, request_id: Id) -> Option { let sender_id = self - .range_blocks_and_blobs_requests + .range_block_components_requests .remove(&request_id) .map(|(sender_id, _info)| sender_id); if let Some(sender_id) = sender_id { @@ -370,7 +493,7 @@ impl SyncNetworkContext { request_id: Id, block_or_blob: BlockOrBlob, ) -> Option> { - let Entry::Occupied(mut entry) = self.range_blocks_and_blobs_requests.entry(request_id) + let Entry::Occupied(mut entry) = self.range_block_components_requests.entry(request_id) else { metrics::inc_counter_vec(&metrics::SYNC_UNKNOWN_NETWORK_REQUESTS, &["range_blocks"]); return None; @@ -380,15 +503,17 @@ impl SyncNetworkContext { match block_or_blob { BlockOrBlob::Block(maybe_block) => info.add_block_response(maybe_block), BlockOrBlob::Blob(maybe_sidecar) => info.add_sidecar_response(maybe_sidecar), + BlockOrBlob::CustodyColumns(column) => info.add_data_column(column), } if info.is_finished() { // If the request is finished, dequeue everything let (sender_id, info) = entry.remove(); - let request_type = info.get_request_type(); + let (expects_blobs, expects_custody_columns) = info.get_requirements(); Some(BlocksAndBlobsByRangeResponse { sender_id, - request_type, - responses: info.into_responses(), + responses: info.into_responses(&self.chain.spec), + expects_blobs, + expects_custody_columns, }) } else { None @@ -470,6 +595,21 @@ impl SyncNetworkContext { block_root: Hash256, downloaded_block: Option>>, ) -> Result { + // Check if we are into deneb, and before peerdas + if !self + .chain + .data_availability_checker + .blobs_required_for_epoch( + // TODO(das): use the block's slot + self.chain + .slot_clock + .now_or_genesis() + .ok_or(RpcRequestSendError::SlotClockError)? + .epoch(T::EthSpec::slots_per_epoch()), + ) + { + return Ok(LookupRequestResult::NoRequestNeeded); + } let Some(block) = downloaded_block.or_else(|| { // If the block is already being processed or fully validated, retrieve how many blobs // it expects. Consider any stage of the block. If the block root has been validated, we @@ -553,7 +693,7 @@ impl SyncNetworkContext { /// Request to send a single `data_columns_by_root` request to the network. pub fn data_column_lookup_request( &mut self, - requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, peer_id: PeerId, request: DataColumnsByRootSingleBlockRequest, ) -> Result, &'static str> { @@ -627,7 +767,7 @@ impl SyncNetworkContext { .unwrap_or_default(); // TODO(das): figure out how to pass block.slot if we end up doing rotation - let custody_indexes_duty = self.network_globals().custody_columns(&self.chain.spec); + let custody_indexes_duty = self.network_globals().custody_columns(); // Include only the blob indexes not yet imported (received through gossip) let custody_indexes_to_fetch = custody_indexes_duty @@ -651,10 +791,28 @@ impl SyncNetworkContext { "id" => ?id ); - // TODO(das): Issue a custody request with `id` for the set of columns - // `custody_indexes_to_fetch` and block `block_root`. + let requester = CustodyRequester(id); + let mut request = ActiveCustodyRequest::new( + block_root, + // TODO(das): req_id is duplicated here, also present in id + CustodyId { requester, req_id }, + &custody_indexes_to_fetch, + self.log.clone(), + ); - Ok(LookupRequestResult::RequestSent(req_id)) + // TODO(das): start request + // Note that you can only send, but not handle a response here + match request.continue_requests(self) { + Ok(_) => { + // Ignoring the result of `continue_requests` is okay. A request that has just been + // created cannot return data immediately, it must send some request to the network + // first. And there must exist some request, `custody_indexes_to_fetch` is not empty. + self.custody_by_root_requests.insert(requester, request); + Ok(LookupRequestResult::RequestSent(req_id)) + } + // TODO(das): handle this error properly + Err(e) => Err(RpcRequestSendError::CustodyRequestError(e)), + } } pub fn is_execution_engine_online(&self) -> bool { @@ -738,12 +896,18 @@ impl SyncNetworkContext { "To deal with alignment with deneb boundaries, batches need to be of just one epoch" ); - if let Some(data_availability_boundary) = self.chain.data_availability_boundary() { - if epoch >= data_availability_boundary { - ByRangeRequestType::BlocksAndBlobs - } else { - ByRangeRequestType::Blocks - } + if self + .chain + .data_availability_checker + .data_columns_required_for_epoch(epoch) + { + ByRangeRequestType::BlocksAndColumns + } else if self + .chain + .data_availability_checker + .blobs_required_for_epoch(epoch) + { + ByRangeRequestType::BlocksAndBlobs } else { ByRangeRequestType::Blocks } @@ -753,9 +917,9 @@ impl SyncNetworkContext { &mut self, id: Id, sender_id: RangeRequestId, - info: BlocksAndBlobsRequestInfo, + info: RangeBlockComponentsRequest, ) { - self.range_blocks_and_blobs_requests + self.range_block_components_requests .insert(id, (sender_id, info)); } @@ -853,7 +1017,7 @@ impl SyncNetworkContext { pub fn on_data_columns_by_root_response( &mut self, id: DataColumnsByRootRequestId, - peer_id: PeerId, + _peer_id: PeerId, rpc_event: RpcEvent>>, ) -> Option>>>> { let Entry::Occupied(mut request) = self.data_columns_by_root_requests.entry(id) else { @@ -885,8 +1049,10 @@ impl SyncNetworkContext { // catch if a peer is returning more columns than requested or if the excess blobs are // invalid. Err((e, resolved)) => { - if let RpcResponseError::VerifyError(e) = &e { - self.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); + if let RpcResponseError::VerifyError(_e) = &e { + // TODO(das): this is a bug, we should not penalise peer in this case. + // confirm this can be removed. + // self.report_peer(peer_id, PeerAction::LowToleranceError, e.into()); } if resolved { None @@ -897,6 +1063,53 @@ impl SyncNetworkContext { } } + /// Insert a downloaded column into an active custody request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + #[allow(clippy::type_complexity)] + pub fn on_custody_by_root_response( + &mut self, + id: CustodyId, + req_id: DataColumnsByRootRequestId, + peer_id: PeerId, + resp: RpcResponseResult>>>, + ) -> Option, PeerGroup), RpcResponseError>> { + // Note: need to remove the request to borrow self again below. Otherwise we can't + // do nested requests + let Some(mut request) = self.custody_by_root_requests.remove(&id.requester) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Custody column downloaded event for unknown request"; "id" => ?id); + return None; + }; + + let result = request + .on_data_column_downloaded(peer_id, req_id, resp, self) + .map_err(RpcResponseError::CustodyRequestError) + .transpose(); + + // Convert a result from internal format of `ActiveCustodyRequest` (error first to use ?) to + // an Option first to use in an `if let Some() { act on result }` block. + if let Some(result) = result { + match result.as_ref() { + Ok((columns, peer_group)) => { + debug!(self.log, "Custody request success, removing"; "id" => ?id, "count" => columns.len(), "peers" => ?peer_group) + } + Err(e) => { + debug!(self.log, "Custody request failure, removing"; "id" => ?id, "error" => ?e) + } + } + + Some(result) + } else { + self.custody_by_root_requests.insert(id.requester, request); + None + } + } + pub fn send_block_for_processing( &self, id: Id, @@ -961,22 +1174,28 @@ impl SyncNetworkContext { pub fn send_custody_columns_for_processing( &self, - id: Id, + _id: Id, block_root: Hash256, - _custody_columns: DataColumnSidecarList, - _duration: Duration, + custody_columns: DataColumnSidecarList, + duration: Duration, + process_type: BlockProcessType, ) -> Result<(), SendErrorProcessor> { - let _beacon_processor = self + let beacon_processor = self .beacon_processor_if_enabled() .ok_or(SendErrorProcessor::ProcessorNotAvailable)?; - debug!(self.log, "Sending custody columns for processing"; "block" => ?block_root, "id" => id); + debug!(self.log, "Sending custody columns for processing"; "block" => ?block_root, "process_type" => ?process_type); - // Lookup sync event safety: If `beacon_processor.send_rpc_custody_columns` returns Ok() sync - // must receive a single `SyncMessage::BlockComponentProcessed` event with this process type - // - // TODO(das): After merging processor import PR, actually send columns to beacon processor. - Ok(()) + beacon_processor + .send_rpc_custody_columns(block_root, custody_columns, duration, process_type) + .map_err(|e| { + error!( + self.log, + "Failed to send sync custody columns to processor"; + "error" => ?e + ); + SendErrorProcessor::SendError + }) } pub(crate) fn register_metrics(&self) { @@ -993,7 +1212,7 @@ impl SyncNetworkContext { metrics::set_gauge_vec( &metrics::SYNC_ACTIVE_NETWORK_REQUESTS, &["range_blocks"], - self.range_blocks_and_blobs_requests.len() as i64, + self.range_block_components_requests.len() as i64, ); } } diff --git a/beacon_node/network/src/sync/network_context/custody.rs b/beacon_node/network/src/sync/network_context/custody.rs new file mode 100644 index 00000000000..b1038c74703 --- /dev/null +++ b/beacon_node/network/src/sync/network_context/custody.rs @@ -0,0 +1,415 @@ +use crate::sync::network_context::{ + DataColumnsByRootRequestId, DataColumnsByRootSingleBlockRequest, +}; + +use beacon_chain::BeaconChainTypes; +use fnv::FnvHashMap; +use lighthouse_network::service::api_types::{CustodyId, DataColumnsByRootRequester}; +use lighthouse_network::PeerId; +use lru_cache::LRUTimeCache; +use rand::Rng; +use slog::{debug, warn}; +use std::time::Duration; +use std::{collections::HashMap, marker::PhantomData, sync::Arc}; +use types::EthSpec; +use types::{data_column_sidecar::ColumnIndex, DataColumnSidecar, Hash256}; + +use super::{LookupRequestResult, PeerGroup, RpcResponseResult, SyncNetworkContext}; + +const FAILED_PEERS_CACHE_EXPIRY_SECONDS: u64 = 5; + +type DataColumnSidecarList = Vec>>; + +pub struct ActiveCustodyRequest { + block_root: Hash256, + custody_id: CustodyId, + /// List of column indices this request needs to download to complete successfully + column_requests: FnvHashMap>, + /// Active requests for 1 or more columns each + active_batch_columns_requests: + FnvHashMap, + /// Peers that have recently failed to successfully respond to a columns by root request. + /// Having a LRUTimeCache allows this request to not have to track disconnecting peers. + failed_peers: LRUTimeCache, + /// Logger for the `SyncNetworkContext`. + pub log: slog::Logger, + _phantom: PhantomData, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Error { + SendFailed(&'static str), + TooManyFailures, + BadState(String), + NoPeers(ColumnIndex), + /// Received a download result for a different request id than the in-flight request. + /// There should only exist a single request at a time. Having multiple requests is a bug and + /// can result in undefined state, so it's treated as a hard error and the lookup is dropped. + UnexpectedRequestId { + expected_req_id: DataColumnsByRootRequestId, + req_id: DataColumnsByRootRequestId, + }, +} + +struct ActiveBatchColumnsRequest { + peer_id: PeerId, + indices: Vec, +} + +type CustodyRequestResult = Result, PeerGroup)>, Error>; + +impl ActiveCustodyRequest { + pub(crate) fn new( + block_root: Hash256, + custody_id: CustodyId, + column_indices: &[ColumnIndex], + log: slog::Logger, + ) -> Self { + Self { + block_root, + custody_id, + column_requests: HashMap::from_iter( + column_indices + .iter() + .map(|index| (*index, ColumnRequest::new())), + ), + active_batch_columns_requests: <_>::default(), + failed_peers: LRUTimeCache::new(Duration::from_secs(FAILED_PEERS_CACHE_EXPIRY_SECONDS)), + log, + _phantom: PhantomData, + } + } + + /// Insert a downloaded column into an active custody request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Err`: Custody request has failed and will be dropped + /// - `Ok(Some)`: Custody request has successfully completed and will be dropped + /// - `Ok(None)`: Custody request still active + pub(crate) fn on_data_column_downloaded( + &mut self, + peer_id: PeerId, + req_id: DataColumnsByRootRequestId, + resp: RpcResponseResult>, + cx: &mut SyncNetworkContext, + ) -> CustodyRequestResult { + // TODO(das): Should downscore peers for verify errors here + + let Some(batch_request) = self.active_batch_columns_requests.get_mut(&req_id) else { + warn!(self.log, + "Received custody column response for unrequested index"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + ); + return Ok(None); + }; + + match resp { + Ok((data_columns, _seen_timestamp)) => { + debug!(self.log, + "Custody column download success"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + "count" => data_columns.len() + ); + + // Map columns by index as an optimization to not loop the returned list on each + // requested index. The worse case is 128 loops over a 128 item vec + mutation to + // drop the consumed columns. + let mut data_columns = HashMap::::from_iter( + data_columns.into_iter().map(|d| (d.index, d)), + ); + // Accumulate columns that the peer does not have to issue a single log per request + let mut missing_column_indexes = vec![]; + + for column_index in &batch_request.indices { + let column_request = self + .column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))?; + + if let Some(data_column) = data_columns.remove(column_index) { + column_request.on_download_success(req_id, peer_id, data_column)?; + } else { + // Peer does not have the requested data. + // TODO(das) do not consider this case a success. We know for sure the block has + // data. However we allow the peer to return empty as we can't attribute fault. + // TODO(das): Should track which columns are missing and eventually give up + // TODO(das): If the peer is in the lookup peer set it claims to have imported + // the block AND its custody columns. So in this case we can downscore + column_request.on_download_error(req_id)?; + missing_column_indexes.push(column_index); + } + } + + // Note: no need to check data_columns is empty, SyncNetworkContext ensures that + // successful responses only contain requested data. + + if !missing_column_indexes.is_empty() { + // Note: Batch logging that columns are missing to not spam logger + debug!(self.log, + "Custody column peer claims to not have some data"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + // TODO(das): this property can become very noisy, being the full range 0..128 + "missing_column_indexes" => ?missing_column_indexes + ); + + self.failed_peers.insert(peer_id); + } + } + Err(err) => { + debug!(self.log, + "Custody column download error"; + "id" => ?self.custody_id, + "block_root" => ?self.block_root, + "req_id" => %req_id, + "peer" => %peer_id, + "error" => ?err + ); + + // TODO(das): Should mark peer as failed and try from another peer + for column_index in &batch_request.indices { + self.column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))? + .on_download_error_and_mark_failure(req_id)?; + } + + self.failed_peers.insert(peer_id); + } + }; + + self.continue_requests(cx) + } + + pub(crate) fn continue_requests( + &mut self, + cx: &mut SyncNetworkContext, + ) -> CustodyRequestResult { + if self.column_requests.values().all(|r| r.is_downloaded()) { + // All requests have completed successfully. + let mut peers = HashMap::>::new(); + let columns = std::mem::take(&mut self.column_requests) + .into_values() + .map(|request| { + let (peer, data_column) = request.complete()?; + peers + .entry(peer) + .or_default() + .push(data_column.index as usize); + Ok(data_column) + }) + .collect::, _>>()?; + + let peer_group = PeerGroup::from_set(peers); + return Ok(Some((columns, peer_group))); + } + + let mut columns_to_request_by_peer = HashMap::>::new(); + + // Need to: + // - track how many active requests a peer has for load balancing + // - which peers have failures to attempt others + // - which peer returned what to have PeerGroup attributability + + for (column_index, request) in self.column_requests.iter_mut() { + if request.is_awaiting_download() { + if request.download_failures > MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS { + return Err(Error::TooManyFailures); + } + + // TODO: When is a fork and only a subset of your peers know about a block, we should only + // query the peers on that fork. Should this case be handled? How to handle it? + let custodial_peers = cx.get_custodial_peers(*column_index); + + // TODO(das): cache this computation in a OneCell or similar to prevent having to + // run it every loop + let mut active_requests_by_peer = HashMap::::new(); + for batch_request in self.active_batch_columns_requests.values() { + *active_requests_by_peer + .entry(batch_request.peer_id) + .or_default() += 1; + } + + let mut priorized_peers = custodial_peers + .iter() + .map(|peer| { + ( + // De-prioritize peers that have failed to successfully respond to + // requests recently + self.failed_peers.contains(peer), + // Prefer peers with less requests to load balance across peers + active_requests_by_peer.get(peer).copied().unwrap_or(0), + // Final random factor to give all peers a shot in each retry + rand::thread_rng().gen::(), + *peer, + ) + }) + .collect::>(); + priorized_peers.sort_unstable(); + + let Some((_, _, _, peer_id)) = priorized_peers.first() else { + // Do not tolerate not having custody peers, hard error. + // TODO(das): we might implement some grace period. The request will pause for X + // seconds expecting the peer manager to find peers before failing the request. + return Err(Error::NoPeers(*column_index)); + }; + + columns_to_request_by_peer + .entry(*peer_id) + .or_default() + .push(*column_index); + } + } + + for (peer_id, indices) in columns_to_request_by_peer.into_iter() { + let request_result = cx + .data_column_lookup_request( + DataColumnsByRootRequester::Custody(self.custody_id), + peer_id, + DataColumnsByRootSingleBlockRequest { + block_root: self.block_root, + indices: indices.clone(), + }, + ) + .map_err(Error::SendFailed)?; + + match request_result { + LookupRequestResult::RequestSent(req_id) => { + for column_index in &indices { + let column_request = self + .column_requests + .get_mut(column_index) + .ok_or(Error::BadState("unknown column_index".to_owned()))?; + + column_request.on_download_start(req_id)?; + } + + self.active_batch_columns_requests + .insert(req_id, ActiveBatchColumnsRequest { indices, peer_id }); + } + LookupRequestResult::NoRequestNeeded => unreachable!(), + LookupRequestResult::Pending(_) => unreachable!(), + } + } + + Ok(None) + } +} + +/// TODO(das): this attempt count is nested into the existing lookup request count. +const MAX_CUSTODY_COLUMN_DOWNLOAD_ATTEMPTS: usize = 3; + +struct ColumnRequest { + status: Status, + download_failures: usize, +} + +#[derive(Debug, Clone)] +enum Status { + NotStarted, + Downloading(DataColumnsByRootRequestId), + Downloaded(PeerId, Arc>), +} + +impl ColumnRequest { + fn new() -> Self { + Self { + status: Status::NotStarted, + download_failures: 0, + } + } + + fn is_awaiting_download(&self) -> bool { + match self.status { + Status::NotStarted => true, + Status::Downloading { .. } | Status::Downloaded { .. } => false, + } + } + + fn is_downloaded(&self) -> bool { + match self.status { + Status::NotStarted | Status::Downloading { .. } => false, + Status::Downloaded { .. } => true, + } + } + + fn on_download_start(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> { + match &self.status { + Status::NotStarted => { + self.status = Status::Downloading(req_id); + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_start expected NotStarted got {other:?}" + ))), + } + } + + fn on_download_error(&mut self, req_id: DataColumnsByRootRequestId) -> Result<(), Error> { + match &self.status { + Status::Downloading(expected_req_id) => { + if req_id != *expected_req_id { + return Err(Error::UnexpectedRequestId { + expected_req_id: *expected_req_id, + req_id, + }); + } + self.status = Status::NotStarted; + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_error expected Downloading got {other:?}" + ))), + } + } + + fn on_download_error_and_mark_failure( + &mut self, + req_id: DataColumnsByRootRequestId, + ) -> Result<(), Error> { + // TODO(das): Should track which peers don't have data + self.download_failures += 1; + self.on_download_error(req_id) + } + + fn on_download_success( + &mut self, + req_id: DataColumnsByRootRequestId, + peer_id: PeerId, + data_column: Arc>, + ) -> Result<(), Error> { + match &self.status { + Status::Downloading(expected_req_id) => { + if req_id != *expected_req_id { + return Err(Error::UnexpectedRequestId { + expected_req_id: *expected_req_id, + req_id, + }); + } + self.status = Status::Downloaded(peer_id, data_column); + Ok(()) + } + other => Err(Error::BadState(format!( + "bad state on_download_success expected Downloading got {other:?}" + ))), + } + } + + fn complete(self) -> Result<(PeerId, Arc>), Error> { + match self.status { + Status::Downloaded(peer_id, data_column) => Ok((peer_id, data_column)), + other => Err(Error::BadState(format!( + "bad state complete expected Downloaded got {other:?}" + ))), + } + } +} diff --git a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs index a45916905ce..a42ae7ca41f 100644 --- a/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs +++ b/beacon_node/network/src/sync/network_context/requests/data_columns_by_root.rs @@ -1,6 +1,5 @@ -use lighthouse_network::{ - rpc::methods::DataColumnsByRootRequest, service::api_types::SingleLookupReqId, PeerId, -}; +use lighthouse_network::service::api_types::DataColumnsByRootRequester; +use lighthouse_network::{rpc::methods::DataColumnsByRootRequest, PeerId}; use std::sync::Arc; use types::{ChainSpec, DataColumnIdentifier, DataColumnSidecar, EthSpec, Hash256}; @@ -32,14 +31,14 @@ pub struct ActiveDataColumnsByRootRequest { items: Vec>>, resolved: bool, pub(crate) peer_id: PeerId, - pub(crate) requester: SingleLookupReqId, + pub(crate) requester: DataColumnsByRootRequester, } impl ActiveDataColumnsByRootRequest { pub fn new( request: DataColumnsByRootSingleBlockRequest, peer_id: PeerId, - requester: SingleLookupReqId, + requester: DataColumnsByRootRequester, ) -> Self { Self { request, diff --git a/beacon_node/network/src/sync/range_sync/batch.rs b/beacon_node/network/src/sync/range_sync/batch.rs index 7f9629740bb..53fb55b14da 100644 --- a/beacon_node/network/src/sync/range_sync/batch.rs +++ b/beacon_node/network/src/sync/range_sync/batch.rs @@ -20,6 +20,7 @@ const MAX_BATCH_PROCESSING_ATTEMPTS: u8 = 3; #[derive(Debug, Copy, Clone, Display)] #[strum(serialize_all = "snake_case")] pub enum ByRangeRequestType { + BlocksAndColumns, BlocksAndBlobs, Blocks, } @@ -199,9 +200,9 @@ impl BatchInfo { } /// Verifies if an incoming block belongs to this batch. - pub fn is_expecting_block(&self, peer_id: &PeerId, request_id: &Id) -> bool { - if let BatchState::Downloading(expected_peer, expected_id) = &self.state { - return peer_id == expected_peer && expected_id == request_id; + pub fn is_expecting_block(&self, request_id: &Id) -> bool { + if let BatchState::Downloading(_, expected_id) = &self.state { + return expected_id == request_id; } false } diff --git a/beacon_node/network/src/sync/range_sync/chain.rs b/beacon_node/network/src/sync/range_sync/chain.rs index d92dcd4851c..1756fb513da 100644 --- a/beacon_node/network/src/sync/range_sync/chain.rs +++ b/beacon_node/network/src/sync/range_sync/chain.rs @@ -1,15 +1,18 @@ use super::batch::{BatchInfo, BatchProcessingResult, BatchState}; use super::RangeSyncType; use crate::metrics; +use crate::metrics::PEERS_PER_COLUMN_SUBNET; use crate::network_beacon_processor::ChainSegmentProcessId; use crate::sync::network_context::RangeRequestId; use crate::sync::{network_context::SyncNetworkContext, BatchOperationOutcome, BatchProcessResult}; use beacon_chain::block_verification_types::RpcBlock; use beacon_chain::BeaconChainTypes; use fnv::FnvHashMap; +use lighthouse_metrics::set_int_gauge; use lighthouse_network::service::api_types::Id; use lighthouse_network::{PeerAction, PeerId}; -use rand::{seq::SliceRandom, Rng}; +use rand::seq::SliceRandom; +use rand::Rng; use slog::{crit, debug, o, warn}; use std::collections::{btree_map::Entry, BTreeMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -256,7 +259,9 @@ impl SyncingChain { // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer, and that the // request_id matches - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { return Ok(KeepChain); } batch @@ -439,6 +444,11 @@ impl SyncingChain { self.request_batches(network)?; } } + } else if !self.good_peers_on_custody_subnets(self.processing_target, network) { + // This is to handle the case where no batch was sent for the current processing + // target when there is no custody peers available. This is a valid state and should not + // return an error. + return Ok(KeepChain); } else { return Err(RemoveChain::WrongChainState(format!( "Batch not found for current processing target {}", @@ -862,7 +872,9 @@ impl SyncingChain { // A batch could be retried without the peer failing the request (disconnecting/ // sending an error /timeout) if the peer is removed from the chain for other // reasons. Check that this block belongs to the expected peer - if !batch.is_expecting_block(peer_id, &request_id) { + // TODO(das): removed peer_id matching as the node may request a different peer for data + // columns. + if !batch.is_expecting_block(&request_id) { debug!( self.log, "Batch not expecting block"; @@ -953,7 +965,7 @@ impl SyncingChain { let batch_state = self.visualize_batch_state(); if let Some(batch) = self.batches.get_mut(&batch_id) { let (request, batch_type) = batch.to_blocks_by_range_request(); - match network.blocks_and_blobs_by_range_request( + match network.block_components_by_range_request( peer, batch_type, request, @@ -1063,6 +1075,14 @@ impl SyncingChain { // check if we have the batch for our optimistic start. If not, request it first. // We wait for this batch before requesting any other batches. if let Some(epoch) = self.optimistic_start { + if !self.good_peers_on_custody_subnets(epoch, network) { + debug!( + self.log, + "Waiting for peers to be available on custody column subnets" + ); + return Ok(KeepChain); + } + if let Entry::Vacant(entry) = self.batches.entry(epoch) { if let Some(peer) = idle_peers.pop() { let batch_type = network.batch_type(epoch); @@ -1087,6 +1107,36 @@ impl SyncingChain { Ok(KeepChain) } + /// Checks all custody column subnets for peers. Returns `true` if there is at least one peer in + /// every custody column subnet. + fn good_peers_on_custody_subnets(&self, epoch: Epoch, network: &SyncNetworkContext) -> bool { + if network.chain.spec.is_peer_das_enabled_for_epoch(epoch) { + // Require peers on all custody column subnets before sending batches + let peers_on_all_custody_subnets = + network + .network_globals() + .custody_subnets() + .all(|subnet_id| { + let peer_count = network + .network_globals() + .peers + .read() + .good_custody_subnet_peer(subnet_id) + .count(); + + set_int_gauge( + &PEERS_PER_COLUMN_SUBNET, + &[&subnet_id.to_string()], + peer_count as i64, + ); + peer_count > 0 + }); + peers_on_all_custody_subnets + } else { + true + } + } + /// Creates the next required batch from the chain. If there are no more batches required, /// `false` is returned. fn include_next_batch(&mut self, network: &mut SyncNetworkContext) -> Option { @@ -1117,6 +1167,18 @@ impl SyncingChain { return None; } + // don't send batch requests until we have peers on custody subnets + // TODO(das): this is a workaround to avoid sending out excessive block requests because + // block and data column requests are currently coupled. This can be removed once we find a + // way to decouple the requests and do retries individually, see issue #6258. + if !self.good_peers_on_custody_subnets(self.to_be_downloaded, network) { + debug!( + self.log, + "Waiting for peers to be available on custody column subnets" + ); + return None; + } + let batch_id = self.to_be_downloaded; // this batch could have been included already being an optimistic batch match self.batches.entry(batch_id) { diff --git a/beacon_node/network/src/sync/range_sync/range.rs b/beacon_node/network/src/sync/range_sync/range.rs index 334c58090e2..c8bb9b3b09a 100644 --- a/beacon_node/network/src/sync/range_sync/range.rs +++ b/beacon_node/network/src/sync/range_sync/range.rs @@ -689,7 +689,11 @@ mod tests { log.new(o!("component" => "range")), ); let (network_tx, network_rx) = mpsc::unbounded_channel(); - let globals = Arc::new(NetworkGlobals::new_test_globals(Vec::new(), &log)); + let globals = Arc::new(NetworkGlobals::new_test_globals( + Vec::new(), + &log, + chain.spec.clone(), + )); let (network_beacon_processor, beacon_processor_rx) = NetworkBeaconProcessor::null_for_testing( globals.clone(), diff --git a/beacon_node/network/src/sync/sampling.rs b/beacon_node/network/src/sync/sampling.rs new file mode 100644 index 00000000000..524fe86bee9 --- /dev/null +++ b/beacon_node/network/src/sync/sampling.rs @@ -0,0 +1,628 @@ +use self::request::ActiveColumnSampleRequest; +use super::network_context::{ + DataColumnsByRootSingleBlockRequest, RpcResponseError, SyncNetworkContext, +}; +use crate::metrics; +use beacon_chain::BeaconChainTypes; +use fnv::FnvHashMap; +use lighthouse_network::service::api_types::{ + DataColumnsByRootRequester, SamplingId, SamplingRequestId, SamplingRequester, +}; +use lighthouse_network::{PeerAction, PeerId}; +use rand::{seq::SliceRandom, thread_rng}; +use slog::{debug, error, warn}; +use std::{ + collections::hash_map::Entry, collections::HashMap, marker::PhantomData, sync::Arc, + time::Duration, +}; +use types::{data_column_sidecar::ColumnIndex, ChainSpec, DataColumnSidecar, Hash256}; + +pub type SamplingResult = Result<(), SamplingError>; + +type DataColumnSidecarList = Vec>>; + +pub struct Sampling { + // TODO(das): stalled sampling request are never cleaned up + requests: HashMap>, + sampling_config: SamplingConfig, + log: slog::Logger, +} + +impl Sampling { + pub fn new(sampling_config: SamplingConfig, log: slog::Logger) -> Self { + Self { + requests: <_>::default(), + sampling_config, + log, + } + } + + #[cfg(test)] + pub fn active_sampling_requests(&self) -> Vec { + self.requests.values().map(|r| r.block_root).collect() + } + + /// Create a new sampling request for a known block + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_new_sample_request( + &mut self, + block_root: Hash256, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let id = SamplingRequester::ImportedBlock(block_root); + + let request = match self.requests.entry(id) { + Entry::Vacant(e) => e.insert(ActiveSamplingRequest::new( + block_root, + id, + &self.sampling_config, + self.log.clone(), + &cx.chain.spec, + )), + Entry::Occupied(_) => { + // Sampling is triggered from multiple sources, duplicate sampling requests are + // likely (gossip block + gossip data column) + // TODO(das): Should track failed sampling request for some time? Otherwise there's + // a risk of a loop with multiple triggers creating the request, then failing, + // and repeat. + debug!(self.log, "Ignoring duplicate sampling request"; "id" => ?id); + return None; + } + }; + + debug!(self.log, "Created new sample request"; "id" => ?id); + + // TOOD(das): If a node has very little peers, continue_sampling() will attempt to find enough + // to sample here, immediately failing the sampling request. There should be some grace + // period to allow the peer manager to find custody peers. + let result = request.continue_sampling(cx); + self.handle_sampling_result(result, &id) + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_sample_downloaded( + &mut self, + id: SamplingId, + peer_id: PeerId, + resp: Result<(DataColumnSidecarList, Duration), RpcResponseError>, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let Some(request) = self.requests.get_mut(&id.id) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Sample downloaded event for unknown request"; "id" => ?id); + return None; + }; + + let result = request.on_sample_downloaded(peer_id, id.sampling_request_id, resp, cx); + self.handle_sampling_result(result, &id.id) + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Some`: Request completed, won't make more progress. Expect requester to act on the result. + /// - `None`: Request still active, requester should do no action + pub fn on_sample_verified( + &mut self, + id: SamplingId, + result: Result<(), String>, + cx: &mut SyncNetworkContext, + ) -> Option<(SamplingRequester, SamplingResult)> { + let Some(request) = self.requests.get_mut(&id.id) else { + // TOOD(das): This log can happen if the request is error'ed early and dropped + debug!(self.log, "Sample verified event for unknown request"; "id" => ?id); + return None; + }; + + let result = request.on_sample_verified(id.sampling_request_id, result, cx); + self.handle_sampling_result(result, &id.id) + } + + /// Converts a result from the internal format of `ActiveSamplingRequest` (error first to use ? + /// conveniently), to an Option first format to use an `if let Some() { act on result }` pattern + /// in the sync manager. + fn handle_sampling_result( + &mut self, + result: Result, SamplingError>, + id: &SamplingRequester, + ) -> Option<(SamplingRequester, SamplingResult)> { + let result = result.transpose(); + if let Some(result) = result { + debug!(self.log, "Sampling request completed, removing"; "id" => ?id, "result" => ?result); + metrics::inc_counter_vec( + &metrics::SAMPLING_REQUEST_RESULT, + &[metrics::from_result(&result)], + ); + self.requests.remove(id); + Some((*id, result)) + } else { + None + } + } +} + +pub struct ActiveSamplingRequest { + block_root: Hash256, + requester_id: SamplingRequester, + column_requests: FnvHashMap, + /// Mapping of column indexes for a sampling request. + column_indexes_by_sampling_request: FnvHashMap>, + /// Sequential ID for sampling requests. + current_sampling_request_id: SamplingRequestId, + column_shuffle: Vec, + required_successes: Vec, + /// Logger for the `SyncNetworkContext`. + pub log: slog::Logger, + _phantom: PhantomData, +} + +#[derive(Debug)] +pub enum SamplingError { + SendFailed(#[allow(dead_code)] &'static str), + ProcessorUnavailable, + TooManyFailures, + BadState(#[allow(dead_code)] String), + ColumnIndexOutOfBounds, +} + +/// Required success index by current failures, with p_target=5.00E-06 +/// Ref: https://colab.research.google.com/drive/18uUgT2i-m3CbzQ5TyP9XFKqTn1DImUJD#scrollTo=E82ITcgB5ATh +const REQUIRED_SUCCESSES: [usize; 11] = [16, 20, 23, 26, 29, 32, 34, 37, 39, 42, 44]; + +#[derive(Debug, Clone)] +pub enum SamplingConfig { + Default, + #[allow(dead_code)] + Custom { + required_successes: Vec, + }, +} + +impl ActiveSamplingRequest { + fn new( + block_root: Hash256, + requester_id: SamplingRequester, + sampling_config: &SamplingConfig, + log: slog::Logger, + spec: &ChainSpec, + ) -> Self { + // Select ahead of time the full list of to-sample columns + let mut column_shuffle = + (0..spec.number_of_columns as ColumnIndex).collect::>(); + let mut rng = thread_rng(); + column_shuffle.shuffle(&mut rng); + + Self { + block_root, + requester_id, + column_requests: <_>::default(), + column_indexes_by_sampling_request: <_>::default(), + current_sampling_request_id: SamplingRequestId(0), + column_shuffle, + required_successes: match sampling_config { + SamplingConfig::Default => REQUIRED_SUCCESSES.to_vec(), + SamplingConfig::Custom { required_successes } => required_successes.clone(), + }, + log, + _phantom: PhantomData, + } + } + + /// Insert a downloaded column into an active sampling request. Then make progress on the + /// entire request. + /// + /// ### Returns + /// + /// - `Err`: Sampling request has failed and will be dropped + /// - `Ok(Some)`: Sampling request has successfully completed and will be dropped + /// - `Ok(None)`: Sampling request still active + pub(crate) fn on_sample_downloaded( + &mut self, + _peer_id: PeerId, + sampling_request_id: SamplingRequestId, + resp: Result<(DataColumnSidecarList, Duration), RpcResponseError>, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + // Select columns to sample + // Create individual request per column + // Progress requests + // If request fails retry or expand search + // If all good return + let Some(column_indexes) = self + .column_indexes_by_sampling_request + .get(&sampling_request_id) + else { + error!(self.log, "Column indexes for the sampling request ID not found"; "sampling_request_id" => ?sampling_request_id); + return Ok(None); + }; + + match resp { + Ok((mut resp_data_columns, seen_timestamp)) => { + debug!(self.log, "Sample download success"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "count" => resp_data_columns.len()); + metrics::inc_counter_vec(&metrics::SAMPLE_DOWNLOAD_RESULT, &[metrics::SUCCESS]); + + // Filter the data received in the response using the requested column indexes. + let mut data_columns = vec![]; + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + + let Some(data_pos) = resp_data_columns + .iter() + .position(|data| &data.index == column_index) + else { + // Peer does not have the requested data. + // TODO(das) what to do? + debug!(self.log, "Sampling peer claims to not have the data"; "block_root" => %self.block_root, "column_index" => column_index); + request.on_sampling_error()?; + continue; + }; + + data_columns.push(resp_data_columns.swap_remove(data_pos)); + } + + if !resp_data_columns.is_empty() { + let resp_column_indexes = resp_data_columns + .iter() + .map(|d| d.index) + .collect::>(); + debug!( + self.log, + "Received data that was not requested"; "block_root" => %self.block_root, "column_indexes" => ?resp_column_indexes + ); + } + + // Handle the downloaded data columns. + if data_columns.is_empty() { + debug!(self.log,"Received empty response"; "block_root" => %self.block_root); + self.column_indexes_by_sampling_request + .remove(&sampling_request_id); + } else { + // Overwrite `column_indexes` with the column indexes received in the response. + let column_indexes = data_columns.iter().map(|d| d.index).collect::>(); + self.column_indexes_by_sampling_request + .insert(sampling_request_id, column_indexes.clone()); + // Peer has data column, send to verify + let Some(beacon_processor) = cx.beacon_processor_if_enabled() else { + // If processor is not available, error the entire sampling + debug!(self.log, "Dropping sampling"; "block" => %self.block_root, "reason" => "beacon processor unavailable"); + return Err(SamplingError::ProcessorUnavailable); + }; + debug!(self.log, "Sending data_column for verification"; "block" => ?self.block_root, "column_indexes" => ?column_indexes); + if let Err(e) = beacon_processor.send_rpc_validate_data_columns( + self.block_root, + data_columns, + seen_timestamp, + SamplingId { + id: self.requester_id, + sampling_request_id, + }, + ) { + // TODO(das): Beacon processor is overloaded, what should we do? + error!(self.log, "Dropping sampling"; "block" => %self.block_root, "reason" => e.to_string()); + return Err(SamplingError::SendFailed("beacon processor send failure")); + } + } + } + Err(err) => { + debug!(self.log, "Sample download error"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "error" => ?err); + metrics::inc_counter_vec(&metrics::SAMPLE_DOWNLOAD_RESULT, &[metrics::FAILURE]); + + // Error downloading, maybe penalize peer and retry again. + // TODO(das) with different peer or different peer? + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + request.on_sampling_error()?; + } + } + }; + + self.continue_sampling(cx) + } + + /// Insert a column verification result into an active sampling request. Then make progress + /// on the entire request. + /// + /// ### Returns + /// + /// - `Err`: Sampling request has failed and will be dropped + /// - `Ok(Some)`: Sampling request has successfully completed and will be dropped + /// - `Ok(None)`: Sampling request still active + pub(crate) fn on_sample_verified( + &mut self, + sampling_request_id: SamplingRequestId, + result: Result<(), String>, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + let Some(column_indexes) = self + .column_indexes_by_sampling_request + .get(&sampling_request_id) + else { + error!(self.log, "Column indexes for the sampling request ID not found"; "sampling_request_id" => ?sampling_request_id); + return Ok(None); + }; + + match result { + Ok(_) => { + debug!(self.log, "Sample verification success"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes); + metrics::inc_counter_vec(&metrics::SAMPLE_VERIFY_RESULT, &[metrics::SUCCESS]); + + // Valid, continue_sampling will maybe consider sampling succees + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + request.on_sampling_success()?; + } + } + Err(err) => { + debug!(self.log, "Sample verification failure"; "block_root" => %self.block_root, "column_indexes" => ?column_indexes, "reason" => ?err); + metrics::inc_counter_vec(&metrics::SAMPLE_VERIFY_RESULT, &[metrics::FAILURE]); + + // TODO(das): Peer sent invalid data, penalize and try again from different peer + // TODO(das): Count individual failures + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(column_index) else { + warn!( + self.log, + "Active column sample request not found"; "block_root" => %self.block_root, "column_index" => column_index + ); + continue; + }; + let peer_id = request.on_sampling_error()?; + cx.report_peer( + peer_id, + PeerAction::LowToleranceError, + "invalid data column", + ); + } + } + } + + self.continue_sampling(cx) + } + + pub(crate) fn continue_sampling( + &mut self, + cx: &mut SyncNetworkContext, + ) -> Result, SamplingError> { + // First check if sampling is completed, by computing `required_successes` + let mut successes = 0; + let mut failures = 0; + let mut ongoings = 0; + + for request in self.column_requests.values() { + if request.is_completed() { + successes += 1; + } + if request.is_failed() { + failures += 1; + } + if request.is_ongoing() { + ongoings += 1; + } + } + + // If there are too many failures, consider the sampling failed + let Some(required_successes) = self.required_successes.get(failures) else { + return Err(SamplingError::TooManyFailures); + }; + + // If there are enough successes, consider the sampling complete + if successes >= *required_successes { + return Ok(Some(())); + } + + // First, attempt to progress sampling by requesting more columns, so that request failures + // are accounted for below. + + // Group the requested column indexes by the destination peer to batch sampling requests. + let mut column_indexes_to_request = FnvHashMap::default(); + for idx in 0..*required_successes { + // Re-request columns. Note: out of bounds error should never happen, inputs are hardcoded + let column_index = *self + .column_shuffle + .get(idx) + .ok_or(SamplingError::ColumnIndexOutOfBounds)?; + let request = self + .column_requests + .entry(column_index) + .or_insert(ActiveColumnSampleRequest::new(column_index)); + + if request.is_ready_to_request() { + if let Some(peer_id) = request.choose_peer(cx) { + let indexes = column_indexes_to_request.entry(peer_id).or_insert(vec![]); + indexes.push(column_index); + } + } + } + + // Send requests. + let mut sent_request = false; + for (peer_id, column_indexes) in column_indexes_to_request { + cx.data_column_lookup_request( + DataColumnsByRootRequester::Sampling(SamplingId { + id: self.requester_id, + sampling_request_id: self.current_sampling_request_id, + }), + peer_id, + DataColumnsByRootSingleBlockRequest { + block_root: self.block_root, + indices: column_indexes.clone(), + }, + ) + .map_err(SamplingError::SendFailed)?; + self.column_indexes_by_sampling_request + .insert(self.current_sampling_request_id, column_indexes.clone()); + self.current_sampling_request_id.0 += 1; + sent_request = true; + + // Update request status. + for column_index in column_indexes { + let Some(request) = self.column_requests.get_mut(&column_index) else { + continue; + }; + request.on_start_sampling(peer_id)?; + } + } + + // Make sure that sampling doesn't stall, by ensuring that this sampling request will + // receive a new event of some type. If there are no ongoing requests, and no new + // request was sent, loop to increase the required_successes until the sampling fails if + // there are no peers. + if ongoings == 0 && !sent_request { + debug!(self.log, "Sampling request stalled"; "block_root" => %self.block_root); + } + + Ok(None) + } +} + +mod request { + use super::SamplingError; + use crate::sync::network_context::SyncNetworkContext; + use beacon_chain::BeaconChainTypes; + use lighthouse_network::PeerId; + use rand::seq::SliceRandom; + use rand::thread_rng; + use std::collections::HashSet; + use types::data_column_sidecar::ColumnIndex; + + pub(crate) struct ActiveColumnSampleRequest { + column_index: ColumnIndex, + status: Status, + // TODO(das): Should downscore peers that claim to not have the sample? + peers_dont_have: HashSet, + } + + #[derive(Debug, Clone)] + enum Status { + NoPeers, + NotStarted, + Sampling(PeerId), + Verified, + } + + impl ActiveColumnSampleRequest { + pub(crate) fn new(column_index: ColumnIndex) -> Self { + Self { + column_index, + status: Status::NotStarted, + peers_dont_have: <_>::default(), + } + } + + pub(crate) fn is_completed(&self) -> bool { + match self.status { + Status::NoPeers | Status::NotStarted | Status::Sampling(_) => false, + Status::Verified => true, + } + } + + pub(crate) fn is_failed(&self) -> bool { + match self.status { + Status::NotStarted | Status::Sampling(_) | Status::Verified => false, + Status::NoPeers => true, + } + } + + pub(crate) fn is_ongoing(&self) -> bool { + match self.status { + Status::NotStarted | Status::NoPeers | Status::Verified => false, + Status::Sampling(_) => true, + } + } + + pub(crate) fn is_ready_to_request(&self) -> bool { + match self.status { + Status::NoPeers | Status::NotStarted => true, + Status::Sampling(_) | Status::Verified => false, + } + } + + pub(crate) fn choose_peer( + &mut self, + cx: &SyncNetworkContext, + ) -> Option { + // TODO: When is a fork and only a subset of your peers know about a block, sampling should only + // be queried on the peers on that fork. Should this case be handled? How to handle it? + let mut peer_ids = cx.get_custodial_peers(self.column_index); + + peer_ids.retain(|peer_id| !self.peers_dont_have.contains(peer_id)); + + if let Some(peer_id) = peer_ids.choose(&mut thread_rng()) { + Some(*peer_id) + } else { + self.status = Status::NoPeers; + None + } + } + + pub(crate) fn on_start_sampling(&mut self, peer_id: PeerId) -> Result<(), SamplingError> { + match self.status.clone() { + Status::NoPeers | Status::NotStarted => { + self.status = Status::Sampling(peer_id); + Ok(()) + } + other => Err(SamplingError::BadState(format!( + "bad state on_start_sampling expected NoPeers|NotStarted got {other:?}. column_index:{}", + self.column_index + ))), + } + } + + pub(crate) fn on_sampling_error(&mut self) -> Result { + match self.status.clone() { + Status::Sampling(peer_id) => { + self.peers_dont_have.insert(peer_id); + self.status = Status::NotStarted; + Ok(peer_id) + } + other => Err(SamplingError::BadState(format!( + "bad state on_sampling_error expected Sampling got {other:?}. column_index:{}", + self.column_index + ))), + } + } + + pub(crate) fn on_sampling_success(&mut self) -> Result<(), SamplingError> { + match &self.status { + Status::Sampling(_) => { + self.status = Status::Verified; + Ok(()) + } + other => Err(SamplingError::BadState(format!( + "bad state on_sampling_success expected Sampling got {other:?}. column_index:{}", + self.column_index + ))), + } + } + } +} diff --git a/beacon_node/src/cli.rs b/beacon_node/src/cli.rs index 54502f70646..67bc9d7d407 100644 --- a/beacon_node/src/cli.rs +++ b/beacon_node/src/cli.rs @@ -66,6 +66,25 @@ pub fn cli_app() -> Command { .display_order(0) .hide(true) ) + .arg( + // TODO(das): remove this before PeerDAS release + Arg::new("malicious-withhold-count") + .long("malicious-withhold-count") + .action(ArgAction::Set) + .help_heading(FLAG_HEADER) + .help("TESTING ONLY do not use this") + .hide(true) + .display_order(0) + ) + .arg( + Arg::new("enable-sampling") + .long("enable-sampling") + .action(ArgAction::SetTrue) + .help_heading(FLAG_HEADER) + .help("Enable peer sampling on data columns. Disabled by default.") + .hide(true) + .display_order(0) + ) .arg( Arg::new("subscribe-all-subnets") .long("subscribe-all-subnets") diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index 558b1cb6ebe..6f61748a2d3 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -204,6 +204,10 @@ pub fn get_config( client_config.chain.shuffling_cache_size = cache_size; } + if cli_args.get_flag("enable-sampling") { + client_config.chain.enable_sampling = true; + } + /* * Prometheus metrics HTTP server */ @@ -477,6 +481,12 @@ pub fn get_config( client_config.store.blob_prune_margin_epochs = blob_prune_margin_epochs; } + if let Some(malicious_withhold_count) = + clap_utils::parse_optional(cli_args, "malicious-withhold-count")? + { + client_config.chain.malicious_withhold_count = malicious_withhold_count; + } + /* * Zero-ports * diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 8b144c1be93..fecd8e37442 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -18,11 +18,11 @@ use crate::metadata::{ STATE_UPPER_LIMIT_NO_RETAIN, }; use crate::state_cache::{PutStateOutcome, StateCache}; -use crate::{get_data_column_key, metrics, parse_data_column_key}; use crate::{ - get_key_for_col, ChunkWriter, DBColumn, DatabaseBlock, Error, ItemStore, KeyValueStoreOp, - PartialBeaconState, StoreItem, StoreOp, + get_data_column_key, get_key_for_col, ChunkWriter, DBColumn, DatabaseBlock, Error, ItemStore, + KeyValueStoreOp, PartialBeaconState, StoreItem, StoreOp, }; +use crate::{metrics, parse_data_column_key}; use itertools::process_results; use leveldb::iterator::LevelDBIterator; use lru::LruCache; diff --git a/common/eth2_network_config/built_in_network_configs/chiado/config.yaml b/common/eth2_network_config/built_in_network_configs/chiado/config.yaml index 066b27795cd..74fca4c5010 100644 --- a/common/eth2_network_config/built_in_network_configs/chiado/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/chiado/config.yaml @@ -138,6 +138,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 16384 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml b/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml index 23cf040b276..07bd21b35c2 100644 --- a/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/gnosis/config.yaml @@ -121,6 +121,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 16384 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml index cec2b61f213..67f1e5b6831 100644 --- a/common/eth2_network_config/built_in_network_configs/holesky/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/holesky/config.yaml @@ -125,6 +125,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml index 500b9e60a5c..acf4d83f323 100644 --- a/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/mainnet/config.yaml @@ -147,6 +147,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml index 2a1809d6ce9..8b84d870103 100644 --- a/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml +++ b/common/eth2_network_config/built_in_network_configs/sepolia/config.yaml @@ -121,6 +121,6 @@ MIN_EPOCHS_FOR_BLOB_SIDECARS_REQUESTS: 4096 BLOB_SIDECAR_SUBNET_COUNT: 6 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/consensus/types/src/chain_spec.rs b/consensus/types/src/chain_spec.rs index 2c64d21130f..10b00d5ba1d 100644 --- a/consensus/types/src/chain_spec.rs +++ b/consensus/types/src/chain_spec.rs @@ -807,8 +807,8 @@ impl ChainSpec { * DAS params */ eip7594_fork_epoch: None, - custody_requirement: 1, - data_column_sidecar_subnet_count: 32, + custody_requirement: 4, + data_column_sidecar_subnet_count: 128, number_of_columns: 128, /* @@ -1129,8 +1129,8 @@ impl ChainSpec { * DAS params */ eip7594_fork_epoch: None, - custody_requirement: 1, - data_column_sidecar_subnet_count: 32, + custody_requirement: 4, + data_column_sidecar_subnet_count: 128, number_of_columns: 128, /* * Network specific @@ -2122,7 +2122,7 @@ mod yaml_tests { DEPOSIT_NETWORK_ID: 1 DEPOSIT_CONTRACT_ADDRESS: 0x00000000219ab540356cBB839Cbe05303d7705Fa CUSTODY_REQUIREMENT: 1 - DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 + DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 "#; diff --git a/consensus/types/src/lib.rs b/consensus/types/src/lib.rs index 2b874be4825..68d48ec7c8b 100644 --- a/consensus/types/src/lib.rs +++ b/consensus/types/src/lib.rs @@ -138,7 +138,7 @@ pub use crate::beacon_block_body::{ pub use crate::beacon_block_header::BeaconBlockHeader; pub use crate::beacon_committee::{BeaconCommittee, OwnedBeaconCommittee}; pub use crate::beacon_state::{Error as BeaconStateError, *}; -pub use crate::blob_sidecar::{BlobSidecar, BlobSidecarList, BlobsList}; +pub use crate::blob_sidecar::{BlobIdentifier, BlobSidecar, BlobSidecarList, BlobsList}; pub use crate::bls_to_execution_change::BlsToExecutionChange; pub use crate::chain_spec::{ChainSpec, Config, Domain}; pub use crate::checkpoint::Checkpoint; diff --git a/consensus/types/src/runtime_var_list.rs b/consensus/types/src/runtime_var_list.rs index 84ad5d074e7..af4ee87c158 100644 --- a/consensus/types/src/runtime_var_list.rs +++ b/consensus/types/src/runtime_var_list.rs @@ -1,20 +1,58 @@ -use ssz::{Decode, Encode}; -use ssz_derive::Encode; +use derivative::Derivative; +use serde::{Deserialize, Serialize}; +use ssz::Decode; +use ssz_types::Error; +use std::ops::{Deref, DerefMut, Index, IndexMut}; +use std::slice::SliceIndex; -#[derive(Debug, Clone, PartialEq, Encode)] -#[ssz(struct_behaviour = "transparent")] -pub struct RuntimeVariableList { +/// Emulates a SSZ `List`. +/// +/// An ordered, heap-allocated, variable-length, homogeneous collection of `T`, with no more than +/// `max_len` values. +/// +/// ## Example +/// +/// ``` +/// use ssz_types::{RuntimeVariableList}; +/// +/// let base: Vec = vec![1, 2, 3, 4]; +/// +/// // Create a `RuntimeVariableList` from a `Vec` that has the expected length. +/// let exact: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base.clone(), 4); +/// assert_eq!(&exact[..], &[1, 2, 3, 4]); +/// +/// // Create a `RuntimeVariableList` from a `Vec` that is too long and the `Vec` is truncated. +/// let short: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base.clone(), 3); +/// assert_eq!(&short[..], &[1, 2, 3]); +/// +/// // Create a `RuntimeVariableList` from a `Vec` that is shorter than the maximum. +/// let mut long: RuntimeVariableList<_> = RuntimeVariableList::from_vec(base, 5); +/// assert_eq!(&long[..], &[1, 2, 3, 4]); +/// +/// // Push a value to if it does not exceed the maximum +/// long.push(5).unwrap(); +/// assert_eq!(&long[..], &[1, 2, 3, 4, 5]); +/// +/// // Push a value to if it _does_ exceed the maximum. +/// assert!(long.push(6).is_err()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize, Derivative)] +#[derivative(PartialEq, Eq, Hash(bound = "T: std::hash::Hash"))] +#[serde(transparent)] +pub struct RuntimeVariableList { vec: Vec, - #[ssz(skip_serializing, skip_deserializing)] + #[serde(skip)] max_len: usize, } -impl RuntimeVariableList { - pub fn new(vec: Vec, max_len: usize) -> Result { +impl RuntimeVariableList { + /// Returns `Ok` if the given `vec` equals the fixed length of `Self`. Otherwise returns + /// `Err(OutOfBounds { .. })`. + pub fn new(vec: Vec, max_len: usize) -> Result { if vec.len() <= max_len { Ok(Self { vec, max_len }) } else { - Err(ssz_types::Error::OutOfBounds { + Err(Error::OutOfBounds { i: vec.len(), len: max_len, }) @@ -27,22 +65,50 @@ impl RuntimeVariableList { Self { vec, max_len } } - pub fn to_vec(&self) -> Vec { - self.vec.clone() + /// Create an empty list. + pub fn empty(max_len: usize) -> Self { + Self { + vec: vec![], + max_len, + } } pub fn as_slice(&self) -> &[T] { self.vec.as_slice() } + /// Returns the number of values presently in `self`. pub fn len(&self) -> usize { self.vec.len() } + /// True if `self` does not contain any values. pub fn is_empty(&self) -> bool { - self.vec.is_empty() + self.len() == 0 + } + + /// Returns the type-level maximum length. + pub fn max_len(&self) -> usize { + self.max_len + } + + /// Appends `value` to the back of `self`. + /// + /// Returns `Err(())` when appending `value` would exceed the maximum length. + pub fn push(&mut self, value: T) -> Result<(), Error> { + if self.vec.len() < self.max_len { + self.vec.push(value); + Ok(()) + } else { + Err(Error::OutOfBounds { + i: self.vec.len().saturating_add(1), + len: self.max_len, + }) + } } +} +impl RuntimeVariableList { pub fn from_ssz_bytes(bytes: &[u8], max_len: usize) -> Result { let vec = if bytes.is_empty() { vec![] @@ -54,7 +120,7 @@ impl RuntimeVariableList { if num_items > max_len { return Err(ssz::DecodeError::BytesInvalid(format!( - "VariableList of {} items exceeds maximum of {}", + "RuntimeVariableList of {} items exceeds maximum of {}", num_items, max_len ))); } @@ -73,65 +139,162 @@ impl RuntimeVariableList { } } +impl From> for Vec { + fn from(list: RuntimeVariableList) -> Vec { + list.vec + } +} + +impl> Index for RuntimeVariableList { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&self.vec, index) + } +} + +impl> IndexMut for RuntimeVariableList { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut self.vec, index) + } +} + +impl Deref for RuntimeVariableList { + type Target = [T]; + + fn deref(&self) -> &[T] { + &self.vec[..] + } +} + +impl DerefMut for RuntimeVariableList { + fn deref_mut(&mut self) -> &mut [T] { + &mut self.vec[..] + } +} + +impl<'a, T> IntoIterator for &'a RuntimeVariableList { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for RuntimeVariableList { + type Item = T; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.vec.into_iter() + } +} + +impl ssz::Encode for RuntimeVariableList +where + T: ssz::Encode, +{ + fn is_ssz_fixed_len() -> bool { + >::is_ssz_fixed_len() + } + + fn ssz_append(&self, buf: &mut Vec) { + self.vec.ssz_append(buf) + } + + fn ssz_fixed_len() -> usize { + >::ssz_fixed_len() + } + + fn ssz_bytes_len(&self) -> usize { + self.vec.ssz_bytes_len() + } +} + #[cfg(test)] mod test { - use ssz_types::{typenum::U4, VariableList}; - use super::*; + use ssz::*; + use std::fmt::Debug; #[test] fn new() { let vec = vec![42; 5]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_err()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_err()); let vec = vec![42; 3]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_ok()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_ok()); let vec = vec![42; 4]; - let runtime_var_list: Result, _> = - RuntimeVariableList::new(vec, 4); - assert!(runtime_var_list.is_ok()); + let fixed: Result, _> = RuntimeVariableList::new(vec, 4); + assert!(fixed.is_ok()); + } + + #[test] + fn indexing() { + let vec = vec![1, 2]; + + let mut fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 8192); + + assert_eq!(fixed[0], 1); + assert_eq!(&fixed[0..1], &vec[0..1]); + assert_eq!(fixed[..].len(), 2); + + fixed[1] = 3; + assert_eq!(fixed[1], 3); } #[test] fn length() { + let vec = vec![42; 5]; + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 4); + assert_eq!(&fixed[..], &vec[0..4]); + let vec = vec![42; 3]; - let runtime_var_list: RuntimeVariableList = - RuntimeVariableList::new(vec.clone(), 4).unwrap(); - let var_list: VariableList = VariableList::from(vec.clone()); - assert_eq!(&runtime_var_list.as_slice()[0..3], &vec[..]); - assert_eq!(runtime_var_list.as_slice(), &vec![42, 42, 42][..]); - assert_eq!(runtime_var_list.len(), var_list.len()); + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec.clone(), 4); + assert_eq!(&fixed[0..3], &vec[..]); + assert_eq!(&fixed[..], &vec![42, 42, 42][..]); let vec = vec![]; - let runtime_var_list: RuntimeVariableList = RuntimeVariableList::new(vec, 4).unwrap(); - assert_eq!(runtime_var_list.as_slice(), &[] as &[u64]); - assert!(runtime_var_list.is_empty()); + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec, 4); + assert_eq!(&fixed[..], &[] as &[u64]); } #[test] - fn encode() { - let runtime_var_list: RuntimeVariableList = - RuntimeVariableList::new(vec![0; 2], 2).unwrap(); + fn deref() { + let vec = vec![0, 2, 4, 6]; + let fixed: RuntimeVariableList = RuntimeVariableList::from_vec(vec, 4); - assert_eq!(runtime_var_list.as_ssz_bytes(), vec![0, 0, 0, 0]); - assert_eq!( as Encode>::ssz_fixed_len(), 4); + assert_eq!(fixed.first(), Some(&0)); + assert_eq!(fixed.get(3), Some(&6)); + assert_eq!(fixed.get(4), None); } #[test] - fn round_trip() { - let item = RuntimeVariableList::::new(vec![42; 8], 8).unwrap(); - let encoded = &item.as_ssz_bytes(); - assert_eq!(item.ssz_bytes_len(), encoded.len()); - assert_eq!(RuntimeVariableList::from_ssz_bytes(encoded, 8), Ok(item)); + fn encode() { + let vec: RuntimeVariableList = RuntimeVariableList::from_vec(vec![0; 2], 2); + assert_eq!(vec.as_ssz_bytes(), vec![0, 0, 0, 0]); + assert_eq!( as Encode>::ssz_fixed_len(), 4); + } - let item = RuntimeVariableList::::new(vec![0; 8], 8).unwrap(); + fn round_trip(item: RuntimeVariableList) { + let max_len = item.max_len(); let encoded = &item.as_ssz_bytes(); assert_eq!(item.ssz_bytes_len(), encoded.len()); - assert_eq!(RuntimeVariableList::from_ssz_bytes(encoded, 8), Ok(item)); + assert_eq!( + RuntimeVariableList::from_ssz_bytes(encoded, max_len), + Ok(item) + ); + } + + #[test] + fn u16_len_8() { + round_trip::(RuntimeVariableList::from_vec(vec![42; 8], 8)); + round_trip::(RuntimeVariableList::from_vec(vec![0; 8], 8)); } } diff --git a/lighthouse/environment/tests/testnet_dir/config.yaml b/lighthouse/environment/tests/testnet_dir/config.yaml index 4fc7bc2dcff..84e8274f06e 100644 --- a/lighthouse/environment/tests/testnet_dir/config.yaml +++ b/lighthouse/environment/tests/testnet_dir/config.yaml @@ -100,6 +100,6 @@ ATTESTATION_SUBNET_PREFIX_BITS: 6 ATTESTATION_SUBNET_SHUFFLING_PREFIX_BITS: 3 # DAS -CUSTODY_REQUIREMENT: 1 -DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +CUSTODY_REQUIREMENT: 4 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 128 NUMBER_OF_COLUMNS: 128 \ No newline at end of file diff --git a/lighthouse/tests/beacon_node.rs b/lighthouse/tests/beacon_node.rs index 4fdd967c65c..f3832a1a1e5 100644 --- a/lighthouse/tests/beacon_node.rs +++ b/lighthouse/tests/beacon_node.rs @@ -825,6 +825,26 @@ fn network_target_peers_flag() { }); } #[test] +fn network_subscribe_all_data_column_subnets_flag() { + CommandLineTest::new() + .flag("subscribe-all-data-column-subnets", None) + .run_with_zero_port() + .with_config(|config| assert!(config.network.subscribe_all_data_column_subnets)); +} +#[test] +fn network_enable_sampling_flag() { + CommandLineTest::new() + .flag("enable-sampling", None) + .run_with_zero_port() + .with_config(|config| assert!(config.chain.enable_sampling)); +} +#[test] +fn network_enable_sampling_flag_default() { + CommandLineTest::new() + .run_with_zero_port() + .with_config(|config| assert!(!config.chain.enable_sampling)); +} +#[test] fn network_subscribe_all_subnets_flag() { CommandLineTest::new() .flag("subscribe-all-subnets", None) @@ -2022,6 +2042,13 @@ fn epochs_per_migration_override() { .run_with_zero_port() .with_config(|config| assert_eq!(config.chain.epochs_per_migration, 128)); } +#[test] +fn malicious_withhold_count_flag() { + CommandLineTest::new() + .flag("malicious-withhold-count", Some("128")) + .run_with_zero_port() + .with_config(|config| assert_eq!(config.chain.malicious_withhold_count, 128)); +} // Tests for Slasher flags. // Using `--slasher-max-db-size` to work around https://github.com/sigp/lighthouse/issues/2342 diff --git a/scripts/local_testnet/network_params.yaml b/scripts/local_testnet/network_params.yaml index 1c25c30f060..b53d88e52c5 100644 --- a/scripts/local_testnet/network_params.yaml +++ b/scripts/local_testnet/network_params.yaml @@ -1,4 +1,4 @@ -# Full configuration reference [here](https://github.com/kurtosis-tech/ethereum-package?tab=readme-ov-file#configuration). +# Full configuration reference [here](https://github.com/ethpandaops/ethereum-package?tab=readme-ov-file#configuration). participants: - el_type: geth el_image: ethereum/client-go:latest @@ -14,4 +14,4 @@ global_log_level: debug snooper_enabled: false additional_services: - dora - - prometheus_grafana \ No newline at end of file + - prometheus_grafana diff --git a/scripts/local_testnet/network_params_das_devnet_1.yaml b/scripts/local_testnet/network_params_das_devnet_1.yaml new file mode 100644 index 00000000000..fcd131a06ca --- /dev/null +++ b/scripts/local_testnet/network_params_das_devnet_1.yaml @@ -0,0 +1,8 @@ +participants: + - cl_type: lighthouse + cl_image: lighthouse:local +network_params: + network: peerdas-devnet-1 +global_log_level: debug +additional_services: + - prometheus_grafana \ No newline at end of file diff --git a/scripts/local_testnet/network_params_das_interop.yaml b/scripts/local_testnet/network_params_das_interop.yaml new file mode 100644 index 00000000000..0c8f9d7f49d --- /dev/null +++ b/scripts/local_testnet/network_params_das_interop.yaml @@ -0,0 +1,38 @@ +# Full configuration reference [here](https://github.com/ethpandaops/ethereum-package?tab=readme-ov-file#configuration). +participants: + - cl_type: prysm + cl_image: ethpandaops/prysm-beacon-chain:peerDAS + + - cl_type: lighthouse + cl_extra_params: [ + --subscribe-all-data-column-subnets, + ] + cl_image: lighthouse:local + + - cl_type: lighthouse + cl_image: lighthouse:local + + - cl_type: teku + cl_image: ethpandaops/teku:nashatyrev-das + +# - cl_type: nimbus +# cl_image: ethpandaops/nimbus-eth2:kzgpeerdas +# +# - cl_type: grandine +# cl_image: ethpandaops/grandine:das +# +# - cl_type: lodestar +# cl_image: ethpandaops/lodestar:peerDAS +network_params: + eip7594_fork_epoch: 0 + eip7594_fork_version: "0x50000038" + data_column_sidecar_subnet_count: 128 + samples_per_slot: 16 + custody_requirement: 4 +snooper_enabled: false +global_log_level: debug +ethereum_metrics_exporter_enabled: true +additional_services: + - dora + - goomy_blob + - prometheus_grafana diff --git a/scripts/local_testnet/network_params_das_local.yaml b/scripts/local_testnet/network_params_das_local.yaml new file mode 100644 index 00000000000..d1b646a34a3 --- /dev/null +++ b/scripts/local_testnet/network_params_das_local.yaml @@ -0,0 +1,20 @@ +participants: + - cl_type: lighthouse + cl_image: lighthouse:local + cl_extra_params: + - --subscribe-all-data-column-subnets + - --target-peers=2 + count: 2 + - cl_type: lighthouse + cl_image: lighthouse:local + cl_extra_params: + - --target-peers=2 + count: 1 +network_params: + eip7594_fork_epoch: 0 + seconds_per_slot: 6 +snooper_enabled: false +global_log_level: debug +additional_services: + - dora + - goomy_blob diff --git a/testing/ef_tests/check_all_files_accessed.py b/testing/ef_tests/check_all_files_accessed.py index 9495047e7f9..f6ae4cfa450 100755 --- a/testing/ef_tests/check_all_files_accessed.py +++ b/testing/ef_tests/check_all_files_accessed.py @@ -20,6 +20,8 @@ # following regular expressions, we will assume they are to be ignored (i.e., we are purposefully # *not* running the spec tests). excluded_paths = [ + # TODO(das): ignore until new spec test release with column subnet count = 64. + "tests/.*/.*/.*/get_custody_columns/", # Eth1Block and PowBlock # # Intentionally omitted, as per https://github.com/sigp/lighthouse/issues/1835 @@ -33,10 +35,15 @@ "tests/.*/.*/ssz_static/LightClientStore", # LightClientSnapshot "tests/.*/.*/ssz_static/LightClientSnapshot", + # Unused container for das + "tests/.*/.*/ssz_static/MatrixEntry", + # Unused kzg methods + "tests/.*/.*/kzg/verify_cell_kzg_proof", # One of the EF researchers likes to pack the tarballs on a Mac ".*\\.DS_Store.*", # More Mac weirdness. "tests/mainnet/bellatrix/operations/deposit/pyspec_tests/deposit_with_previous_fork_version__valid_ineffective/._meta.yaml", + "tests/mainnet/eip7594/networking/get_custody_columns/pyspec_tests/get_custody_columns__short_node_id/._meta.yaml", # bls tests are moved to bls12-381-tests directory "tests/general/phase0/bls", # some bls tests are not included now diff --git a/testing/ef_tests/src/cases.rs b/testing/ef_tests/src/cases.rs index 2d6f661f0e4..63274ee0c03 100644 --- a/testing/ef_tests/src/cases.rs +++ b/testing/ef_tests/src/cases.rs @@ -1,6 +1,6 @@ use super::*; use rayon::prelude::*; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter}; use std::path::{Path, PathBuf}; use types::ForkName; @@ -18,11 +18,15 @@ mod fork; mod fork_choice; mod genesis_initialization; mod genesis_validity; +mod get_custody_columns; mod kzg_blob_to_kzg_commitment; mod kzg_compute_blob_kzg_proof; +mod kzg_compute_cells_and_kzg_proofs; mod kzg_compute_kzg_proof; +mod kzg_recover_cells_and_kzg_proofs; mod kzg_verify_blob_kzg_proof; mod kzg_verify_blob_kzg_proof_batch; +mod kzg_verify_cell_kzg_proof_batch; mod kzg_verify_kzg_proof; mod light_client_verify_is_better_update; mod merkle_proof_validity; @@ -49,11 +53,15 @@ pub use epoch_processing::*; pub use fork::ForkTest; pub use genesis_initialization::*; pub use genesis_validity::*; +pub use get_custody_columns::*; pub use kzg_blob_to_kzg_commitment::*; pub use kzg_compute_blob_kzg_proof::*; +pub use kzg_compute_cells_and_kzg_proofs::*; pub use kzg_compute_kzg_proof::*; +pub use kzg_recover_cells_and_kzg_proofs::*; pub use kzg_verify_blob_kzg_proof::*; pub use kzg_verify_blob_kzg_proof_batch::*; +pub use kzg_verify_cell_kzg_proof_batch::*; pub use kzg_verify_kzg_proof::*; pub use light_client_verify_is_better_update::*; pub use merkle_proof_validity::*; @@ -66,6 +74,19 @@ pub use ssz_generic::*; pub use ssz_static::*; pub use transition::TransitionTest; +#[derive(Debug, PartialEq)] +pub enum FeatureName { + Eip7594, +} + +impl Display for FeatureName { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + FeatureName::Eip7594 => f.write_str("eip7594"), + } + } +} + pub trait LoadCase: Sized { /// Load the test case from a test case directory. fn load_from_dir(_path: &Path, _fork_name: ForkName) -> Result; @@ -86,6 +107,13 @@ pub trait Case: Debug + Sync { true } + /// Whether or not this test exists for the given `feature_name`. + /// + /// Returns `true` by default. + fn is_enabled_for_feature(_feature_name: FeatureName) -> bool { + true + } + /// Execute a test and return the result. /// /// `case_index` reports the index of the case in the set of test cases. It is not strictly diff --git a/testing/ef_tests/src/cases/get_custody_columns.rs b/testing/ef_tests/src/cases/get_custody_columns.rs new file mode 100644 index 00000000000..efe5b147e44 --- /dev/null +++ b/testing/ef_tests/src/cases/get_custody_columns.rs @@ -0,0 +1,43 @@ +use super::*; +use ethereum_types::U256; +use serde::Deserialize; +use std::marker::PhantomData; +use types::DataColumnSubnetId; + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct GetCustodyColumns { + pub node_id: String, + pub custody_subnet_count: u64, + pub result: Vec, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for GetCustodyColumns { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("meta.yaml").as_path()) + } +} + +impl Case for GetCustodyColumns { + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let spec = E::default_spec(); + let node_id = U256::from_dec_str(&self.node_id) + .map_err(|e| Error::FailedToParseTest(format!("{e:?}")))?; + let computed = DataColumnSubnetId::compute_custody_columns::( + node_id, + self.custody_subnet_count, + &spec, + ) + .collect::>(); + let expected = &self.result; + if computed == *expected { + Ok(()) + } else { + Err(Error::NotEqual(format!( + "Got {computed:?}\nExpected {expected:?}" + ))) + } + } +} diff --git a/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs b/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs index aa48c127b20..5194c3336c8 100644 --- a/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs +++ b/testing/ef_tests/src/cases/kzg_blob_to_kzg_commitment.rs @@ -31,9 +31,12 @@ impl Case for KZGBlobToKZGCommitment { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let kzg = get_kzg()?; - let commitment = parse_blob::(&self.input.blob).and_then(|blob| { blob_to_kzg_commitment::(&kzg, &blob).map_err(|e| { Error::InternalError(format!("Failed to compute kzg commitment: {:?}", e)) diff --git a/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs index 71e1ff8e23d..61e7248deeb 100644 --- a/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_compute_blob_kzg_proof.rs @@ -32,6 +32,10 @@ impl Case for KZGComputeBlobKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGComputeBlobKZGProofInput| -> Result<_, Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs b/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs new file mode 100644 index 00000000000..74a44fdddfc --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_compute_cells_and_kzg_proofs.rs @@ -0,0 +1,67 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::CellsAndKzgProofs; +use serde::Deserialize; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGComputeCellsAndKzgProofsInput { + pub blob: String, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGComputeCellsAndKZGProofs { + pub input: KZGComputeCellsAndKzgProofsInput, + pub output: Option<(Vec, Vec)>, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGComputeCellsAndKZGProofs { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGComputeCellsAndKZGProofs { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let cells_and_proofs = parse_blob::(&self.input.blob).and_then(|blob| { + let blob = blob.as_ref().try_into().map_err(|e| { + Error::InternalError(format!("Failed to convert blob to kzg blob: {e:?}")) + })?; + let kzg = get_kzg()?; + kzg.compute_cells_and_proofs(blob).map_err(|e| { + Error::InternalError(format!("Failed to compute cells and kzg proofs: {e:?}")) + }) + }); + + let expected = self.output.as_ref().and_then(|(cells, proofs)| { + parse_cells_and_proofs(cells, proofs) + .map(|(cells, proofs)| { + ( + cells + .try_into() + .map_err(|e| { + Error::FailedToParseTest(format!("Failed to parse cells: {e:?}")) + }) + .unwrap(), + proofs + .try_into() + .map_err(|e| { + Error::FailedToParseTest(format!("Failed to parse proofs: {e:?}")) + }) + .unwrap(), + ) + }) + .ok() + }); + + compare_result::(&cells_and_proofs, &expected) + } +} diff --git a/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs index 98bb7492491..ca19882d501 100644 --- a/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_compute_kzg_proof.rs @@ -39,6 +39,10 @@ impl Case for KZGComputeKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGComputeKZGProofInput| -> Result<_, Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs b/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs new file mode 100644 index 00000000000..fc41f1f2a62 --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_recover_cells_and_kzg_proofs.rs @@ -0,0 +1,97 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::{CellsAndKzgProofs, KzgProof}; +use serde::Deserialize; +use std::convert::Infallible; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGRecoverCellsAndKzgProofsInput { + pub cell_indices: Vec, + pub cells: Vec, + pub proofs: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGRecoverCellsAndKZGProofs { + pub input: KZGRecoverCellsAndKzgProofsInput, + pub output: Option<(Vec, Vec)>, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGRecoverCellsAndKZGProofs { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGRecoverCellsAndKZGProofs { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let parse_input = |input: &KZGRecoverCellsAndKzgProofsInput| { + // Proofs are not used for `recover_cells_and_compute_kzg_proofs`, they are only checked + // to satisfy the spec tests. + if input.proofs.len() != input.cell_indices.len() { + return Err(Error::SkippedKnownFailure); + } + + let proofs = input + .proofs + .iter() + .map(|s| parse_proof(s)) + .collect::, Error>>()?; + + let cells = input + .cells + .iter() + .map(|s| parse_cell(s)) + .collect::, Error>>()?; + + Ok((proofs, cells, input.cell_indices.clone())) + }; + + let result = + parse_input(&self.input).and_then(|(input_proofs, input_cells, cell_indices)| { + let input_cells_ref: Vec<_> = input_cells.iter().map(|cell| &**cell).collect(); + let kzg = get_kzg()?; + let (cells, proofs) = kzg + .recover_cells_and_compute_kzg_proofs( + cell_indices.as_slice(), + input_cells_ref.as_slice(), + ) + .map_err(|e| { + Error::InternalError(format!( + "Failed to recover cells and kzg proofs: {e:?}" + )) + })?; + + // Check recovered proofs matches inputs proofs. This is done only to satisfy the + // spec tests, as the ckzg library recomputes all proofs and does not require + // proofs to recover. + for (input_proof, cell_id) in input_proofs.iter().zip(cell_indices) { + if let Err(e) = compare_result::( + &Ok(*input_proof), + &proofs.get(cell_id as usize).cloned(), + ) { + return Err(e); + } + } + + Ok((cells, proofs)) + }); + + let expected = self + .output + .as_ref() + .and_then(|(cells, proofs)| parse_cells_and_proofs(cells, proofs).ok()) + .map(|(cells, proofs)| (cells.try_into().unwrap(), proofs.try_into().unwrap())); + + compare_result::(&result, &expected) + } +} diff --git a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs index f68f0fd7ed0..4e56b2b44c3 100644 --- a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof.rs @@ -2,7 +2,7 @@ use super::*; use crate::case_result::compare_result; use beacon_chain::kzg_utils::validate_blob; use eth2_network_config::TRUSTED_SETUP_BYTES; -use kzg::{Error as KzgError, Kzg, KzgCommitment, KzgProof, TrustedSetup}; +use kzg::{Cell, Error as KzgError, Kzg, KzgCommitment, KzgProof, TrustedSetup}; use serde::Deserialize; use std::marker::PhantomData; use types::Blob; @@ -10,10 +10,38 @@ use types::Blob; pub fn get_kzg() -> Result { let trusted_setup: TrustedSetup = serde_json::from_reader(TRUSTED_SETUP_BYTES) .map_err(|e| Error::InternalError(format!("Failed to initialize kzg: {:?}", e)))?; + // TODO(das): need to enable these tests when rayon issues in rust_eth_kzg are fixed Kzg::new_from_trusted_setup(trusted_setup) .map_err(|e| Error::InternalError(format!("Failed to initialize kzg: {:?}", e))) } +pub fn parse_cells_and_proofs( + cells: &[String], + proofs: &[String], +) -> Result<(Vec, Vec), Error> { + let cells = cells + .iter() + .map(|s| parse_cell(s.as_str())) + .collect::, Error>>()?; + + let proofs = proofs + .iter() + .map(|s| parse_proof(s.as_str())) + .collect::, Error>>()?; + + Ok((cells, proofs)) +} + +pub fn parse_cell(cell: &str) -> Result { + hex::decode(strip_0x(cell)?) + .map_err(|e| Error::FailedToParseTest(format!("Failed to parse cell: {:?}", e))) + .and_then(|bytes| { + bytes + .try_into() + .map_err(|e| Error::FailedToParseTest(format!("Failed to parse cell: {:?}", e))) + }) +} + pub fn parse_proof(proof: &str) -> Result { hex::decode(strip_0x(proof)?) .map_err(|e| Error::FailedToParseTest(format!("Failed to parse proof: {:?}", e))) @@ -80,6 +108,10 @@ impl Case for KZGVerifyBlobKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyBlobKZGProofInput| -> Result<(Blob, KzgCommitment, KzgProof), Error> { let blob = parse_blob::(&input.blob)?; diff --git a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs index ae5caedf069..cfe15d5c05a 100644 --- a/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs +++ b/testing/ef_tests/src/cases/kzg_verify_blob_kzg_proof_batch.rs @@ -33,6 +33,10 @@ impl Case for KZGVerifyBlobKZGProofBatch { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyBlobKZGProofBatchInput| -> Result<_, Error> { let blobs = input @@ -54,7 +58,6 @@ impl Case for KZGVerifyBlobKZGProofBatch { }; let kzg = get_kzg()?; - let result = parse_input(&self.input).and_then( |(commitments, blobs, proofs)| match validate_blobs::( diff --git a/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs b/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs new file mode 100644 index 00000000000..9c651d2d633 --- /dev/null +++ b/testing/ef_tests/src/cases/kzg_verify_cell_kzg_proof_batch.rs @@ -0,0 +1,77 @@ +use super::*; +use crate::case_result::compare_result; +use kzg::{Bytes48, Error as KzgError}; +use serde::Deserialize; +use std::marker::PhantomData; + +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct KZGVerifyCellKZGProofBatchInput { + pub row_commitments: Vec, + pub row_indices: Vec, + pub column_indices: Vec, + pub cells: Vec, + pub proofs: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(bound = "E: EthSpec", deny_unknown_fields)] +pub struct KZGVerifyCellKZGProofBatch { + pub input: KZGVerifyCellKZGProofBatchInput, + pub output: Option, + #[serde(skip)] + _phantom: PhantomData, +} + +impl LoadCase for KZGVerifyCellKZGProofBatch { + fn load_from_dir(path: &Path, _fork_name: ForkName) -> Result { + decode::yaml_decode_file(path.join("data.yaml").as_path()) + } +} + +impl Case for KZGVerifyCellKZGProofBatch { + fn is_enabled_for_fork(fork_name: ForkName) -> bool { + fork_name == ForkName::Deneb + } + + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + let parse_input = |input: &KZGVerifyCellKZGProofBatchInput| -> Result<_, Error> { + let (cells, proofs) = parse_cells_and_proofs(&input.cells, &input.proofs)?; + let row_commitments = input + .row_commitments + .iter() + .map(|s| parse_commitment(s)) + .collect::, _>>()?; + let coordinates = input + .row_indices + .iter() + .zip(&input.column_indices) + .map(|(&row, &col)| (row as u64, col as u64)) + .collect::>(); + + Ok((cells, proofs, coordinates, row_commitments)) + }; + + let result = + parse_input(&self.input).and_then(|(cells, proofs, coordinates, commitments)| { + let proofs: Vec = proofs.iter().map(|&proof| proof.into()).collect(); + let commitments: Vec = commitments.iter().map(|&c| c.into()).collect(); + let cells = cells.iter().map(|c| c.as_ref()).collect::>(); + let column_indices = coordinates + .into_iter() + .map(|(_row, col)| col) + .collect::>(); + let kzg = get_kzg()?; + match kzg.verify_cell_proof_batch(&cells, &proofs, column_indices, &commitments) { + Ok(_) => Ok(true), + Err(KzgError::KzgVerificationFailed) => Ok(false), + Err(e) => Err(Error::InternalError(format!( + "Failed to validate cells: {:?}", + e + ))), + } + }); + + compare_result::(&result, &self.output) + } +} diff --git a/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs b/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs index e395558e0e1..4468176c277 100644 --- a/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs +++ b/testing/ef_tests/src/cases/kzg_verify_kzg_proof.rs @@ -33,6 +33,10 @@ impl Case for KZGVerifyKZGProof { fork_name == ForkName::Deneb } + fn is_enabled_for_feature(feature_name: FeatureName) -> bool { + feature_name != FeatureName::Eip7594 + } + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { let parse_input = |input: &KZGVerifyKZGProofInput| -> Result<_, Error> { let commitment = parse_commitment(&input.commitment)?; diff --git a/testing/ef_tests/src/cases/merkle_proof_validity.rs b/testing/ef_tests/src/cases/merkle_proof_validity.rs index 8d5c0687753..b68bbdc5d39 100644 --- a/testing/ef_tests/src/cases/merkle_proof_validity.rs +++ b/testing/ef_tests/src/cases/merkle_proof_validity.rs @@ -3,7 +3,8 @@ use crate::decode::{ssz_decode_file, ssz_decode_state, yaml_decode_file}; use serde::Deserialize; use tree_hash::Hash256; use types::{ - BeaconBlockBody, BeaconBlockBodyDeneb, BeaconBlockBodyElectra, BeaconState, FullPayload, + BeaconBlockBody, BeaconBlockBodyDeneb, BeaconBlockBodyElectra, BeaconState, FixedVector, + FullPayload, Unsigned, }; #[derive(Debug, Clone, Deserialize)] @@ -81,12 +82,18 @@ impl Case for MerkleProofValidity { } } -#[derive(Debug, Clone, Deserialize)] -#[serde(bound = "E: EthSpec")] +#[derive(Debug, Clone)] pub struct KzgInclusionMerkleProofValidity { pub metadata: Option, pub block: BeaconBlockBody, pub merkle_proof: MerkleProof, + pub proof_type: KzgInclusionProofType, +} + +#[derive(Debug, Clone)] +pub enum KzgInclusionProofType { + Single, + List, } impl LoadCase for KzgInclusionMerkleProofValidity { @@ -115,21 +122,33 @@ impl LoadCase for KzgInclusionMerkleProofValidity { None }; + let file_name = path + .file_name() + .and_then(|file_name| file_name.to_str()) + .ok_or(Error::InternalError( + "failed to read file name from path".to_string(), + ))?; + + let proof_type = if file_name.starts_with("blob_kzg_commitments") { + KzgInclusionProofType::List + } else { + KzgInclusionProofType::Single + }; + Ok(Self { metadata, block, merkle_proof, + proof_type, }) } } -impl Case for KzgInclusionMerkleProofValidity { - fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { - let Ok(proof) = self.block.to_ref().kzg_commitment_merkle_proof(0) else { - return Err(Error::FailedToParseTest( - "Could not retrieve merkle proof".to_string(), - )); - }; +impl KzgInclusionMerkleProofValidity { + fn verify_kzg_inclusion_proof( + &self, + proof: FixedVector, + ) -> Result<(), Error> { let proof_len = proof.len(); let branch_len = self.merkle_proof.branch.len(); if proof_len != branch_len { @@ -153,3 +172,29 @@ impl Case for KzgInclusionMerkleProofValidity { Ok(()) } } +impl Case for KzgInclusionMerkleProofValidity { + fn result(&self, _case_index: usize, _fork_name: ForkName) -> Result<(), Error> { + match self.proof_type { + KzgInclusionProofType::Single => { + let proof = self + .block + .to_ref() + .kzg_commitment_merkle_proof(0) + .map_err(|e| { + Error::FailedToParseTest(format!("Could not retrieve merkle proof: {e:?}")) + })?; + self.verify_kzg_inclusion_proof(proof) + } + KzgInclusionProofType::List => { + let proof = self + .block + .to_ref() + .kzg_commitments_merkle_proof() + .map_err(|e| { + Error::FailedToParseTest(format!("Could not retrieve merkle proof: {e:?}")) + })?; + self.verify_kzg_inclusion_proof(proof) + } + } + } +} diff --git a/testing/ef_tests/src/handler.rs b/testing/ef_tests/src/handler.rs index 52fc58f3d8c..dacaba1dcab 100644 --- a/testing/ef_tests/src/handler.rs +++ b/testing/ef_tests/src/handler.rs @@ -1,12 +1,15 @@ use crate::cases::{self, Case, Cases, EpochTransition, LoadCase, Operation}; -use crate::type_name; use crate::type_name::TypeName; +use crate::{type_name, FeatureName}; use derivative::Derivative; use std::fs::{self, DirEntry}; use std::marker::PhantomData; use std::path::PathBuf; use types::{BeaconState, EthSpec, ForkName}; +const EIP7594_FORK: ForkName = ForkName::Deneb; +const EIP7594_TESTS: [&str; 4] = ["ssz_static", "merkle_proof", "networking", "kzg"]; + pub trait Handler { type Case: Case + LoadCase; @@ -28,10 +31,21 @@ pub trait Handler { Self::Case::is_enabled_for_fork(fork_name) } + fn is_enabled_for_feature(&self, feature_name: FeatureName) -> bool { + Self::Case::is_enabled_for_feature(feature_name) + } + fn run(&self) { for fork_name in ForkName::list_all() { if !self.disabled_forks().contains(&fork_name) && self.is_enabled_for_fork(fork_name) { - self.run_for_fork(fork_name) + self.run_for_fork(fork_name); + + if fork_name == EIP7594_FORK + && EIP7594_TESTS.contains(&Self::runner_name()) + && self.is_enabled_for_feature(FeatureName::Eip7594) + { + self.run_for_feature(EIP7594_FORK, FeatureName::Eip7594); + } } } } @@ -81,6 +95,47 @@ pub trait Handler { ); crate::results::assert_tests_pass(&name, &handler_path, &results); } + + fn run_for_feature(&self, fork_name: ForkName, feature_name: FeatureName) { + let feature_name_str = feature_name.to_string(); + + let handler_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("consensus-spec-tests") + .join("tests") + .join(Self::config_name()) + .join(&feature_name_str) + .join(Self::runner_name()) + .join(self.handler_name()); + + // Iterate through test suites + let as_directory = |entry: Result| -> Option { + entry + .ok() + .filter(|e| e.file_type().map(|ty| ty.is_dir()).unwrap()) + }; + + let test_cases = fs::read_dir(&handler_path) + .unwrap_or_else(|e| panic!("handler dir {} exists: {:?}", handler_path.display(), e)) + .filter_map(as_directory) + .flat_map(|suite| fs::read_dir(suite.path()).expect("suite dir exists")) + .filter_map(as_directory) + .map(|test_case_dir| { + let path = test_case_dir.path(); + let case = Self::Case::load_from_dir(&path, fork_name).expect("test should load"); + (path, case) + }) + .collect(); + + let results = Cases { test_cases }.test_results(fork_name, Self::use_rayon()); + + let name = format!( + "{}/{}/{}", + feature_name_str, + Self::runner_name(), + self.handler_name() + ); + crate::results::assert_tests_pass(&name, &handler_path, &results); + } } macro_rules! bls_eth_handler { @@ -784,6 +839,86 @@ impl Handler for KZGVerifyKZGProofHandler { } } +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct GetCustodyColumnsHandler(PhantomData); + +impl Handler for GetCustodyColumnsHandler { + type Case = cases::GetCustodyColumns; + + fn config_name() -> &'static str { + E::name() + } + + fn runner_name() -> &'static str { + "networking" + } + + fn handler_name(&self) -> String { + "get_custody_columns".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGComputeCellsAndKZGProofHandler(PhantomData); + +impl Handler for KZGComputeCellsAndKZGProofHandler { + type Case = cases::KZGComputeCellsAndKZGProofs; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "compute_cells_and_kzg_proofs".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGVerifyCellKZGProofBatchHandler(PhantomData); + +impl Handler for KZGVerifyCellKZGProofBatchHandler { + type Case = cases::KZGVerifyCellKZGProofBatch; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "verify_cell_kzg_proof_batch".into() + } +} + +#[derive(Derivative)] +#[derivative(Default(bound = ""))] +pub struct KZGRecoverCellsAndKZGProofHandler(PhantomData); + +impl Handler for KZGRecoverCellsAndKZGProofHandler { + type Case = cases::KZGRecoverCellsAndKZGProofs; + + fn config_name() -> &'static str { + "general" + } + + fn runner_name() -> &'static str { + "kzg" + } + + fn handler_name(&self) -> String { + "recover_cells_and_kzg_proofs".into() + } +} + #[derive(Derivative)] #[derivative(Default(bound = ""))] pub struct MerkleProofValidityHandler(PhantomData); diff --git a/testing/ef_tests/src/lib.rs b/testing/ef_tests/src/lib.rs index e55551be701..e7367719d72 100644 --- a/testing/ef_tests/src/lib.rs +++ b/testing/ef_tests/src/lib.rs @@ -1,10 +1,11 @@ pub use case_result::CaseResult; pub use cases::WithdrawalsPayload; pub use cases::{ - Case, EffectiveBalanceUpdates, Eth1DataReset, HistoricalRootsUpdate, HistoricalSummariesUpdate, - InactivityUpdates, JustificationAndFinalization, ParticipationFlagUpdates, - ParticipationRecordUpdates, PendingBalanceDeposits, PendingConsolidations, RandaoMixesReset, - RegistryUpdates, RewardsAndPenalties, Slashings, SlashingsReset, SyncCommitteeUpdates, + Case, EffectiveBalanceUpdates, Eth1DataReset, FeatureName, HistoricalRootsUpdate, + HistoricalSummariesUpdate, InactivityUpdates, JustificationAndFinalization, + ParticipationFlagUpdates, ParticipationRecordUpdates, PendingBalanceDeposits, + PendingConsolidations, RandaoMixesReset, RegistryUpdates, RewardsAndPenalties, Slashings, + SlashingsReset, SyncCommitteeUpdates, }; pub use decode::log_file_access; pub use error::Error; diff --git a/testing/ef_tests/src/type_name.rs b/testing/ef_tests/src/type_name.rs index c61dfef09cc..49de073d6ae 100644 --- a/testing/ef_tests/src/type_name.rs +++ b/testing/ef_tests/src/type_name.rs @@ -1,5 +1,4 @@ //! Mapping from types to canonical string identifiers used in testing. -use types::blob_sidecar::BlobIdentifier; use types::historical_summary::HistoricalSummary; use types::*; @@ -58,7 +57,9 @@ type_name_generic!(BeaconBlockBodyElectra, "BeaconBlockBody"); type_name!(BeaconBlockHeader); type_name_generic!(BeaconState); type_name!(BlobIdentifier); +type_name!(DataColumnIdentifier); type_name_generic!(BlobSidecar); +type_name_generic!(DataColumnSidecar); type_name!(Checkpoint); type_name!(ConsolidationRequest); type_name_generic!(ContributionAndProof); diff --git a/testing/ef_tests/tests/tests.rs b/testing/ef_tests/tests/tests.rs index 7f69521bb67..2c62edb62cc 100644 --- a/testing/ef_tests/tests/tests.rs +++ b/testing/ef_tests/tests/tests.rs @@ -237,8 +237,9 @@ macro_rules! ssz_static_test_no_run { #[cfg(feature = "fake_crypto")] mod ssz_static { - use ef_tests::{Handler, SszStaticHandler, SszStaticTHCHandler, SszStaticWithSpecHandler}; - use types::blob_sidecar::BlobIdentifier; + use ef_tests::{ + FeatureName, Handler, SszStaticHandler, SszStaticTHCHandler, SszStaticWithSpecHandler, + }; use types::historical_summary::HistoricalSummary; use types::{ AttesterSlashingBase, AttesterSlashingElectra, ConsolidationRequest, DepositRequest, @@ -627,6 +628,22 @@ mod ssz_static { SszStaticHandler::::capella_and_later().run(); } + #[test] + fn data_column_sidecar() { + SszStaticHandler::, MinimalEthSpec>::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + SszStaticHandler::, MainnetEthSpec>::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + } + + #[test] + fn data_column_identifier() { + SszStaticHandler::::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + SszStaticHandler::::deneb_only() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + } + #[test] fn consolidation() { SszStaticHandler::::electra_and_later().run(); @@ -884,6 +901,26 @@ fn kzg_verify_kzg_proof() { KZGVerifyKZGProofHandler::::default().run(); } +/* TODO(das): enable these tests +#[test] +fn kzg_compute_cells_and_proofs() { + KZGComputeCellsAndKZGProofHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} + +#[test] +fn kzg_verify_cell_proof_batch() { + KZGVerifyCellKZGProofBatchHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} + +#[test] +fn kzg_recover_cells_and_proofs() { + KZGRecoverCellsAndKZGProofHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +} +*/ + #[test] fn merkle_proof_validity() { MerkleProofValidityHandler::::default().run(); @@ -908,3 +945,11 @@ fn rewards() { RewardsHandler::::new(handler).run(); } } + +#[test] +fn get_custody_columns() { + GetCustodyColumnsHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); + GetCustodyColumnsHandler::::default() + .run_for_feature(ForkName::Deneb, FeatureName::Eip7594); +}