Skip to content

Commit

Permalink
Prevent node from marking itself as malicious (#5518)
Browse files Browse the repository at this point in the history
## Motivation

The ATX handler should not create malfeasance proofs against the node itself. In case we have a bug somewhere that would publish 2 ATXs in a single epoch fail validation, but do not mark yourself as malicious

Merge after #5484
  • Loading branch information
fasmat committed Jan 31, 2024
1 parent 9e38180 commit 579cca6
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 37 deletions.
31 changes: 31 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,37 @@ configuration is as follows:

### Features

### Improvements

* [#5518](https://github.com/spacemeshos/go-spacemesh/pull/5518) In rare cases the node could create a malfeasance
proof against itself. This is now prevented.

## Release v1.3.7

### Improvements

* [#5502](https://github.com/spacemeshos/go-spacemesh/pull/5502)
Increase limits of p2p messages to compensate for the increased number of nodes on the network.

## Release v1.3.6

### Improvements

* [#5479](https://github.com/spacemeshos/go-spacemesh/pull/5486)
p2p: make AutoNAT service limits configurable. This helps with AutoNAT dialback to determine
nodes' reachability status.

* [#5490](https://github.com/spacemeshos/go-spacemesh/pull/5490)
The path in `smeshing-opts-datadir` used to be resolved relative to the location of the `service` binary when running
the node in supervised mode. This is no longer the case. The path is now resolved relative to the current working
directory.

* [#5489](https://github.com/spacemeshos/go-spacemesh/pull/5489)
Fix problem in POST proving where too many files were opened at the same time.

* [#5498](https://github.com/spacemeshos/go-spacemesh/pull/5498)
Reduce the default number of CPU cores that are used for verifying incoming ATXs to half of the available cores.

* [#5462](https://github.com/spacemeshos/go-spacemesh/pull/5462) Add separate metric for failed p2p server requests

### Improvements
Expand Down
104 changes: 67 additions & 37 deletions activation/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/spacemeshos/post/shared"
"go.uber.org/zap"
"golang.org/x/exp/maps"

"github.com/spacemeshos/go-spacemesh/atxsdata"
Expand Down Expand Up @@ -50,6 +51,9 @@ type Handler struct {
fetcher system.Fetcher
poetCfg PoetConfig

signerMtx sync.Mutex
signers map[types.NodeID]*signing.EdSigner

// inProgress map gathers ATXs that are currently being processed.
// It's used to avoid processing the same ATX twice.
inProgress map[types.ATXID][]chan error
Expand Down Expand Up @@ -89,10 +93,23 @@ func NewHandler(
tortoise: tortoise,
poetCfg: poetCfg,

signers: make(map[types.NodeID]*signing.EdSigner),
inProgress: make(map[types.ATXID][]chan error),
}
}

func (h *Handler) Register(sig *signing.EdSigner) {
h.signerMtx.Lock()
defer h.signerMtx.Unlock()
if _, exists := h.signers[sig.NodeID()]; exists {
h.log.Error("signing key already registered", zap.Stringer("id", sig.NodeID()))
return
}

h.log.Info("registered signing key", zap.Stringer("id", sig.NodeID()))
h.signers[sig.NodeID()] = sig
}

// ProcessAtx validates the active set size declared in the atx, and contextually validates the atx according to atx
// validation rules it then stores the atx with flag set to validity of the atx.
//
Expand Down Expand Up @@ -372,48 +389,61 @@ func (h *Handler) storeAtx(ctx context.Context, atx *types.VerifiedActivationTx)
return fmt.Errorf("checking if node is malicious: %w", err)
}
var proof *types.MalfeasanceProof
if err := h.cdb.WithTx(ctx, func(dbtx *sql.Tx) error {
if !malicious {
prev, err := atxs.GetByEpochAndNodeID(dbtx, atx.PublishEpoch, atx.SmesherID)
if err != nil && !errors.Is(err, sql.ErrNotFound) {
return err
if err := h.cdb.WithTx(ctx, func(tx *sql.Tx) error {
if malicious {
if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) {
return fmt.Errorf("add atx to db: %w", err)
}
// do ID check to be absolutely sure.
if prev != nil && prev.ID() != atx.ID() {
var atxProof types.AtxProof
for i, a := range []*types.VerifiedActivationTx{prev, atx} {
atxProof.Messages[i] = types.AtxProofMsg{
InnerMsg: types.ATXMetadata{
PublishEpoch: a.PublishEpoch,
MsgHash: types.BytesToHash(a.HashInnerBytes()),
},
SmesherID: a.SmesherID,
Signature: a.Signature,
}
}
proof = &types.MalfeasanceProof{
Layer: atx.PublishEpoch.FirstLayer(),
Proof: types.Proof{
Type: types.MultipleATXs,
Data: &atxProof,
return nil
}

prev, err := atxs.GetByEpochAndNodeID(tx, atx.PublishEpoch, atx.SmesherID)
if err != nil && !errors.Is(err, sql.ErrNotFound) {
return err
}

// do ID check to be absolutely sure.
if prev != nil && prev.ID() != atx.ID() {
if _, ok := h.signers[atx.SmesherID]; ok {
// if we land here we tried to publish 2 ATXs in the same epoch
// don't punish ourselves but fail validation and thereby the handling of the incoming ATX
return fmt.Errorf("%s already published an ATX in epoch %d", atx.SmesherID.ShortString(), atx.PublishEpoch)
}

var atxProof types.AtxProof
for i, a := range []*types.VerifiedActivationTx{prev, atx} {
atxProof.Messages[i] = types.AtxProofMsg{
InnerMsg: types.ATXMetadata{
PublishEpoch: a.PublishEpoch,
MsgHash: types.BytesToHash(a.HashInnerBytes()),
},
SmesherID: a.SmesherID,
Signature: a.Signature,
}
encoded, err := codec.Encode(proof)
if err != nil {
h.log.With().Panic("failed to encode malfeasance proof", log.Err(err))
}
if err := identities.SetMalicious(dbtx, atx.SmesherID, encoded, time.Now()); err != nil {
return fmt.Errorf("add malfeasance proof: %w", err)
}

h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch",
log.Stringer("smesher", atx.SmesherID),
log.Object("prev", prev),
log.Object("curr", atx),
)
}
proof = &types.MalfeasanceProof{
Layer: atx.PublishEpoch.FirstLayer(),
Proof: types.Proof{
Type: types.MultipleATXs,
Data: &atxProof,
},
}
encoded, err := codec.Encode(proof)
if err != nil {
h.log.With().Panic("failed to encode malfeasance proof", log.Err(err))
}
if err := identities.SetMalicious(tx, atx.SmesherID, encoded, time.Now()); err != nil {
return fmt.Errorf("add malfeasance proof: %w", err)
}

h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch",
log.Stringer("smesher", atx.SmesherID),
log.Object("prev", prev),
log.Object("curr", atx),
)
}
if err := atxs.Add(dbtx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) {

if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) {
return fmt.Errorf("add atx to db: %w", err)
}
return nil
Expand Down
60 changes: 60 additions & 0 deletions activation/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,66 @@ func TestHandler_ProcessAtx(t *testing.T) {
require.Equal(t, atx2.PublishEpoch.FirstLayer(), got.MalfeasanceProof.Layer)
}

func TestHandler_ProcessAtx_OwnNotMalicious(t *testing.T) {
// Arrange
goldenATXID := types.ATXID{2, 3, 4}
atxHdlr := newTestHandler(t, goldenATXID)

sig, err := signing.NewEdSigner()
require.NoError(t, err)
atxHdlr.Register(sig)

coinbase := types.GenerateAddress([]byte("aaaa"))

// Act & Assert
atx1 := newActivationTx(
t,
sig,
0,
types.EmptyATXID,
types.EmptyATXID,
nil,
types.LayerID(layersPerEpoch).GetEpoch(),
0,
100,
coinbase,
100,
&types.NIPost{},
)
atxHdlr.mbeacon.EXPECT().OnAtx(gomock.Any())
atxHdlr.mtortoise.EXPECT().OnAtx(gomock.Any())
require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1))

// processing an already stored ATX returns no error
require.NoError(t, atxHdlr.ProcessAtx(context.Background(), atx1))
proof, err := identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID())
require.ErrorIs(t, err, sql.ErrNotFound)
require.Nil(t, proof)

// another atx for the same epoch is considered malicious
atx2 := newActivationTx(
t,
sig,
1,
atx1.ID(),
atx1.ID(),
nil,
types.LayerID(layersPerEpoch+1).GetEpoch(),
0,
100,
coinbase,
100,
&types.NIPost{},
)
require.ErrorContains(t,
atxHdlr.ProcessAtx(context.Background(), atx2),
fmt.Sprintf("%s already published an ATX", sig.NodeID().ShortString()),
)
proof, err = identities.GetMalfeasanceProof(atxHdlr.cdb, sig.NodeID())
require.ErrorIs(t, err, sql.ErrNotFound)
require.Nil(t, proof)
}

func TestHandler_ProcessAtxStoresNewVRFNonce(t *testing.T) {
// Arrange
goldenATXID := types.ATXID{2, 3, 4}
Expand Down
1 change: 1 addition & 0 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ func (app *App) initServices(ctx context.Context) error {
app.addLogger(ATXHandlerLogger, lg),
app.Config.POET,
)
atxHandler.Register(app.edSgn)

// we can't have an epoch offset which is greater/equal than the number of layers in an epoch

Expand Down

0 comments on commit 579cca6

Please sign in to comment.