From 579cca600066be178733ef1e466090c09672e2bf Mon Sep 17 00:00:00 2001 From: Matthias Fasching <5011972+fasmat@users.noreply.github.com> Date: Wed, 31 Jan 2024 09:12:16 +0000 Subject: [PATCH] Prevent node from marking itself as malicious (#5518) ## Motivation The ATX handler should not create malfeasance proofs against the node itself. In case we have a bug somewhere that would publish 2 ATXs in a single epoch fail validation, but do not mark yourself as malicious Merge after #5484 --- CHANGELOG.md | 31 +++++++++++ activation/handler.go | 104 ++++++++++++++++++++++++------------- activation/handler_test.go | 60 +++++++++++++++++++++ node/node.go | 1 + 4 files changed, 159 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c608ba2dd8..35dbf6c6ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -88,6 +88,37 @@ configuration is as follows: ### Features +### Improvements + +* [#5518](https://github.com/spacemeshos/go-spacemesh/pull/5518) In rare cases the node could create a malfeasance + proof against itself. This is now prevented. + +## Release v1.3.7 + +### Improvements + +* [#5502](https://github.com/spacemeshos/go-spacemesh/pull/5502) + Increase limits of p2p messages to compensate for the increased number of nodes on the network. + +## Release v1.3.6 + +### Improvements + +* [#5479](https://github.com/spacemeshos/go-spacemesh/pull/5486) + p2p: make AutoNAT service limits configurable. This helps with AutoNAT dialback to determine + nodes' reachability status. + +* [#5490](https://github.com/spacemeshos/go-spacemesh/pull/5490) + The path in `smeshing-opts-datadir` used to be resolved relative to the location of the `service` binary when running + the node in supervised mode. This is no longer the case. The path is now resolved relative to the current working + directory. + +* [#5489](https://github.com/spacemeshos/go-spacemesh/pull/5489) + Fix problem in POST proving where too many files were opened at the same time. + +* [#5498](https://github.com/spacemeshos/go-spacemesh/pull/5498) + Reduce the default number of CPU cores that are used for verifying incoming ATXs to half of the available cores. + * [#5462](https://github.com/spacemeshos/go-spacemesh/pull/5462) Add separate metric for failed p2p server requests ### Improvements diff --git a/activation/handler.go b/activation/handler.go index 4846df3e39..4c7b1f2b7b 100644 --- a/activation/handler.go +++ b/activation/handler.go @@ -8,6 +8,7 @@ import ( "time" "github.com/spacemeshos/post/shared" + "go.uber.org/zap" "golang.org/x/exp/maps" "github.com/spacemeshos/go-spacemesh/atxsdata" @@ -50,6 +51,9 @@ type Handler struct { fetcher system.Fetcher poetCfg PoetConfig + signerMtx sync.Mutex + signers map[types.NodeID]*signing.EdSigner + // inProgress map gathers ATXs that are currently being processed. // It's used to avoid processing the same ATX twice. inProgress map[types.ATXID][]chan error @@ -89,10 +93,23 @@ func NewHandler( tortoise: tortoise, poetCfg: poetCfg, + signers: make(map[types.NodeID]*signing.EdSigner), inProgress: make(map[types.ATXID][]chan error), } } +func (h *Handler) Register(sig *signing.EdSigner) { + h.signerMtx.Lock() + defer h.signerMtx.Unlock() + if _, exists := h.signers[sig.NodeID()]; exists { + h.log.Error("signing key already registered", zap.Stringer("id", sig.NodeID())) + return + } + + h.log.Info("registered signing key", zap.Stringer("id", sig.NodeID())) + h.signers[sig.NodeID()] = sig +} + // ProcessAtx validates the active set size declared in the atx, and contextually validates the atx according to atx // validation rules it then stores the atx with flag set to validity of the atx. // @@ -372,48 +389,61 @@ func (h *Handler) storeAtx(ctx context.Context, atx *types.VerifiedActivationTx) return fmt.Errorf("checking if node is malicious: %w", err) } var proof *types.MalfeasanceProof - if err := h.cdb.WithTx(ctx, func(dbtx *sql.Tx) error { - if !malicious { - prev, err := atxs.GetByEpochAndNodeID(dbtx, atx.PublishEpoch, atx.SmesherID) - if err != nil && !errors.Is(err, sql.ErrNotFound) { - return err + if err := h.cdb.WithTx(ctx, func(tx *sql.Tx) error { + if malicious { + if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { + return fmt.Errorf("add atx to db: %w", err) } - // do ID check to be absolutely sure. - if prev != nil && prev.ID() != atx.ID() { - var atxProof types.AtxProof - for i, a := range []*types.VerifiedActivationTx{prev, atx} { - atxProof.Messages[i] = types.AtxProofMsg{ - InnerMsg: types.ATXMetadata{ - PublishEpoch: a.PublishEpoch, - MsgHash: types.BytesToHash(a.HashInnerBytes()), - }, - SmesherID: a.SmesherID, - Signature: a.Signature, - } - } - proof = &types.MalfeasanceProof{ - Layer: atx.PublishEpoch.FirstLayer(), - Proof: types.Proof{ - Type: types.MultipleATXs, - Data: &atxProof, + return nil + } + + prev, err := atxs.GetByEpochAndNodeID(tx, atx.PublishEpoch, atx.SmesherID) + if err != nil && !errors.Is(err, sql.ErrNotFound) { + return err + } + + // do ID check to be absolutely sure. + if prev != nil && prev.ID() != atx.ID() { + if _, ok := h.signers[atx.SmesherID]; ok { + // if we land here we tried to publish 2 ATXs in the same epoch + // don't punish ourselves but fail validation and thereby the handling of the incoming ATX + return fmt.Errorf("%s already published an ATX in epoch %d", atx.SmesherID.ShortString(), atx.PublishEpoch) + } + + var atxProof types.AtxProof + for i, a := range []*types.VerifiedActivationTx{prev, atx} { + atxProof.Messages[i] = types.AtxProofMsg{ + InnerMsg: types.ATXMetadata{ + PublishEpoch: a.PublishEpoch, + MsgHash: types.BytesToHash(a.HashInnerBytes()), }, + SmesherID: a.SmesherID, + Signature: a.Signature, } - encoded, err := codec.Encode(proof) - if err != nil { - h.log.With().Panic("failed to encode malfeasance proof", log.Err(err)) - } - if err := identities.SetMalicious(dbtx, atx.SmesherID, encoded, time.Now()); err != nil { - return fmt.Errorf("add malfeasance proof: %w", err) - } - - h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch", - log.Stringer("smesher", atx.SmesherID), - log.Object("prev", prev), - log.Object("curr", atx), - ) } + proof = &types.MalfeasanceProof{ + Layer: atx.PublishEpoch.FirstLayer(), + Proof: types.Proof{ + Type: types.MultipleATXs, + Data: &atxProof, + }, + } + encoded, err := codec.Encode(proof) + if err != nil { + h.log.With().Panic("failed to encode malfeasance proof", log.Err(err)) + } + if err := identities.SetMalicious(tx, atx.SmesherID, encoded, time.Now()); err != nil { + return fmt.Errorf("add malfeasance proof: %w", err) + } + + h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch", + log.Stringer("smesher", atx.SmesherID), + log.Object("prev", prev), + log.Object("curr", atx), + ) } - if err := atxs.Add(dbtx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { + + if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) { return fmt.Errorf("add atx to db: %w", err) } return nil diff --git a/activation/handler_test.go b/activation/handler_test.go index 36def9a5b1..78dda171ea 100644 --- a/activation/handler_test.go +++ b/activation/handler_test.go @@ -1006,6 +1006,66 @@ func TestHandler_ProcessAtx(t *testing.T) { require.Equal(t, atx2.PublishEpoch.FirstLayer(), got.MalfeasanceProof.Layer) } +func TestHandler_ProcessAtx_OwnNotMalicious(t *testing.T) { + // Arrange + goldenATXID := types.ATXID{2, 3, 4} + atxHdlr := newTestHandler(t, goldenATXID) + + sig, err := signing.NewEdSigner() + require.NoError(t, err) + atxHdlr.Register(sig) + + coinbase := types.GenerateAddress([]byte("aaaa")) + + // Act & Assert + atx1 := newActivationTx( + t, + sig, + 0, + types.EmptyATXID, + types.EmptyATXID, + nil, + types.LayerID(layersPerEpoch).GetEpoch(), + 0, + 100, + coinbase, + 100, + &types.NIPost{}, + ) + atxHdlr.mbeacon.EXPECT().OnAtx(gomock.Any()) + atxHdlr.mtortoise.EXPECT().OnAtx(gomock.Any()) + require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1)) + + // processing an already stored ATX returns no error + require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1)) + proof, err := identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID()) + require.ErrorIs(t, err, sql.ErrNotFound) + require.Nil(t, proof) + + // another atx for the same epoch is considered malicious + atx2 := newActivationTx( + t, + sig, + 1, + atx1.ID(), + atx1.ID(), + nil, + types.LayerID(layersPerEpoch+1).GetEpoch(), + 0, + 100, + coinbase, + 100, + &types.NIPost{}, + ) + require.ErrorContains(t, + atxHdlr.ProcessAtx(context.Background(), atx2), + fmt.Sprintf("%s already published an ATX", sig.NodeID().ShortString()), + ) + proof, err = identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID()) + require.ErrorIs(t, err, sql.ErrNotFound) + require.Nil(t, proof) +} + func TestHandler_ProcessAtxStoresNewVRFNonce(t *testing.T) { // Arrange goldenATXID := types.ATXID{2, 3, 4} diff --git a/node/node.go b/node/node.go index 0f029a5af2..b09d2a5faa 100644 --- a/node/node.go +++ b/node/node.go @@ -732,6 +732,7 @@ func (app *App) initServices(ctx context.Context) error { app.addLogger(ATXHandlerLogger, lg), app.Config.POET, ) + atxHandler.Register(app.edSgn) // we can't have an epoch offset which is greater/equal than the number of layers in an epoch