Skip to content

Commit

Permalink
feat: improved resource manager based on Rainbow (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
hacdias authored May 21, 2024
1 parent a9caa78 commit 3b9cce4
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ The following emojis are used to highlight certain changes:

### Changed

- The resource manager's defaults have been improved based on Rainbow's and Kubo's defaults. In addition, you can now customize a few options using flags, or [environment variables](./docs/environment-variables.md).

### Removed

### Fixed
Expand Down
35 changes: 35 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
- [`SOMEGUY_PROVIDER_ENDPOINTS`](#someguy_provider_endpoints)
- [`SOMEGUY_PEER_ENDPOINTS`](#someguy_peer_endpoints)
- [`SOMEGUY_IPNS_ENDPOINTS`](#someguy_ipns_endpoints)
- [`SOMEGUY_LIBP2P_CONNMGR_LOW`](#someguy_libp2p_connmgr_low)
- [`SOMEGUY_LIBP2P_CONNMGR_HIGH`](#someguy_libp2p_connmgr_high)
- [`SOMEGUY_LIBP2P_CONNMGR_GRACE_PERIOD`](#someguy_libp2p_connmgr_grace_period)
- [`SOMEGUY_LIBP2P_MAX_MEMORY`](#someguy_libp2p_max_memory)
- [`SOMEGUY_LIBP2P_MAX_FD`](#someguy_libp2p_max_fd)
- [Logging](#logging)
- [`GOLOG_LOG_LEVEL`](#golog_log_level)
- [`GOLOG_LOG_FMT`](#golog_log_fmt)
Expand Down Expand Up @@ -46,6 +51,36 @@ Comma-separated list of other Delegated Routing V1 endpoints to proxy IPNS reque

Default: none

### `SOMEGUY_LIBP2P_CONNMGR_LOW`

Minimum number of libp2p connections to keep.

Default: 100

### `SOMEGUY_LIBP2P_CONNMGR_HIGH`

Maximum number of libp2p connections to keep.

Default: 3000

### `SOMEGUY_LIBP2P_CONNMGR_GRACE_PERIOD`

Minimum libp2p connection TTL.

Default: 1m

### `SOMEGUY_LIBP2P_MAX_MEMORY`

Maximum memory to use for libp2p.

Default: 0 (85% of the system's available RAM)

### `SOMEGUY_LIBP2P_MAX_FD`

Maximum number of file descriptors used by libp2p node.

Default: 0 (50% of the process' limit)

## Logging

### `GOLOG_LOG_LEVEL`
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.21

require (
github.com/CAFxX/httpcompression v0.0.9
github.com/dustin/go-humanize v1.0.1
github.com/felixge/httpsnoop v1.0.4
github.com/ipfs/boxo v0.19.1-0.20240515083429-ac0bab3926a8
github.com/ipfs/go-cid v0.4.1
Expand All @@ -14,11 +15,13 @@ require (
github.com/multiformats/go-multiaddr v0.12.3
github.com/multiformats/go-multibase v0.2.0
github.com/multiformats/go-multihash v0.2.3
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
github.com/prometheus/client_golang v1.19.0
github.com/rs/cors v1.10.1
github.com/slok/go-http-metrics v0.11.0
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
golang.org/x/sys v0.19.0
)

require (
Expand Down Expand Up @@ -89,7 +92,6 @@ require (
github.com/onsi/ginkgo/v2 v2.17.1 // indirect
github.com/opencontainers/runtime-spec v1.2.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
Expand Down Expand Up @@ -120,7 +122,6 @@ require (
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.24.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.19.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.20.0 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
Expand Down
7 changes: 7 additions & 0 deletions internal/fd/sys_not_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build !linux && !darwin && !windows

package fd

func GetNumFDs() int {
return 0
}
17 changes: 17 additions & 0 deletions internal/fd/sys_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//go:build linux || darwin
// +build linux darwin

// Package fd provides filesystem descriptor count for different architectures.
package fd

import (
"golang.org/x/sys/unix"
)

func GetNumFDs() int {
var l unix.Rlimit
if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil {
return 0
}
return int(l.Cur)
}
11 changes: 11 additions & 0 deletions internal/fd/sys_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build windows

package fd

import (
"math"
)

func GetNumFDs() int {
return math.MaxInt
}
48 changes: 47 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"log"
"os"
"time"

"github.com/ipfs/boxo/ipns"
"github.com/ipfs/go-cid"
Expand Down Expand Up @@ -53,9 +54,54 @@ func main() {
EnvVars: []string{"SOMEGUY_IPNS_ENDPOINTS"},
Usage: "other Delegated Routing V1 endpoints to proxy IPNS requests to",
},
&cli.IntFlag{
Name: "libp2p-connmgr-low",
Value: 100,
EnvVars: []string{"SOMEGUY_LIBP2P_CONNMGR_LOW"},
Usage: "minimum number of libp2p connections to keep",
},
&cli.IntFlag{
Name: "libp2p-connmgr-high",
Value: 3000,
EnvVars: []string{"SOMEGUY_LIBP2P_CONNMGR_HIGH"},
Usage: "maximum number of libp2p connections to keep",
},
&cli.DurationFlag{
Name: "libp2p-connmgr-grace",
Value: time.Minute,
EnvVars: []string{"SOMEGUY_LIBP2P_CONNMGR_GRACE_PERIOD"},
Usage: "minimum libp2p connection TTL",
},
&cli.Uint64Flag{
Name: "libp2p-max-memory",
Value: 0,
EnvVars: []string{"SOMEGUY_LIBP2P_MAX_MEMORY"},
Usage: "maximum memory to use for libp2p. Defaults to 85% of the system's available RAM",
},
&cli.Uint64Flag{
Name: "libp2p-max-fd",
Value: 0,
EnvVars: []string{"SOMEGUY_LIBP2P_MAX_FD"},
Usage: "maximum number of file descriptors used by libp2p node. Defaults to 50% of the process' limit",
},
},
Action: func(ctx *cli.Context) error {
return start(ctx.Context, ctx.String("listen-address"), ctx.Bool("accelerated-dht"), ctx.StringSlice("provider-endpoints"), ctx.StringSlice("peer-endpoints"), ctx.StringSlice("ipns-endpoints"))
cfg := &config{
listenAddress: ctx.String("listen-address"),
acceleratedDHTClient: ctx.Bool("accelerated-dht"),

contentEndpoints: ctx.StringSlice("provider-endpoints"),
peerEndpoints: ctx.StringSlice("peer-endpoints"),
ipnsEndpoints: ctx.StringSlice("ipns-endpoints"),

connMgrLow: ctx.Int("libp2p-connmgr-low"),
connMgrHi: ctx.Int("libp2p-connmgr-high"),
connMgrGrace: ctx.Duration("libp2p-connmgr-grace"),
maxMemory: ctx.Uint64("libp2p-max-memory"),
maxFD: ctx.Int("libp2p-max-fd"),
}

return start(ctx.Context, cfg)
},
},
{
Expand Down
153 changes: 153 additions & 0 deletions rcmgr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package main

import (
"github.com/dustin/go-humanize"
"github.com/pbnjay/memory"

"github.com/ipfs/someguy/internal/fd"
"github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/network"
rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager"
)

// Note: this comes from rainbow/rcmgr.go with minimal adaptations.

var infiniteResourceLimits = rcmgr.InfiniteLimits.ToPartialLimitConfig().System

func makeResourceMgrs(maxMemory uint64, maxFD int, connMgrHighWater int) (rm network.ResourceManager, err error) {
if maxMemory == 0 {
maxMemory = uint64((float64(memory.TotalMemory()) * 0.85))
}
if maxFD == 0 {
maxFD = fd.GetNumFDs() / 2
}
return rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(makeResourceManagerConfig(maxMemory, maxFD, connMgrHighWater)))
}

func makeResourceManagerConfig(maxMemory uint64, maxFD int, connMgrHighWater int) (limitConfig rcmgr.ConcreteLimitConfig) {
if maxMemory == 0 {
maxMemory = uint64((float64(memory.TotalMemory()) * 0.85))
}
if maxFD == 0 {
maxFD = fd.GetNumFDs() / 2
}

maxMemoryMB := maxMemory / (1024 * 1024)

// At least as of 2023-01-25, it's possible to open a connection that
// doesn't ask for any memory usage with the libp2p Resource Manager/Accountant
// (see https://github.com/libp2p/go-libp2p/issues/2010#issuecomment-1404280736).
// As a result, we can't currently rely on Memory limits to full protect us.
// Until https://github.com/libp2p/go-libp2p/issues/2010 is addressed,
// we take a proxy now of restricting to 1 inbound connection per MB.
// Note: this is more generous than go-libp2p's default autoscaled limits which do
// 64 connections per 1GB
// (see https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L357 ).
systemConnsInbound := int(1 * maxMemoryMB)

partialLimits := rcmgr.PartialLimitConfig{
System: rcmgr.ResourceLimits{
Memory: rcmgr.LimitVal64(maxMemory),
FD: rcmgr.LimitVal(maxFD),

Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.LimitVal(systemConnsInbound),
ConnsOutbound: rcmgr.Unlimited,

Streams: rcmgr.Unlimited,
StreamsOutbound: rcmgr.Unlimited,
StreamsInbound: rcmgr.Unlimited,
},

// Transient connections won't cause any memory to be accounted for by the resource manager/accountant.
// Only established connections do.
// As a result, we can't rely on System.Memory to protect us from a bunch of transient connection being opened.
// We limit the same values as the System scope, but only allow the Transient scope to take 25% of what is allowed for the System scope.
Transient: rcmgr.ResourceLimits{
Memory: rcmgr.LimitVal64(maxMemory / 4),
FD: rcmgr.LimitVal(maxFD / 4),

Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.LimitVal(systemConnsInbound / 4),
ConnsOutbound: rcmgr.Unlimited,

Streams: rcmgr.Unlimited,
StreamsInbound: rcmgr.Unlimited,
StreamsOutbound: rcmgr.Unlimited,
},

// Lets get out of the way of the allow list functionality.
// If someone specified "Swarm.ResourceMgr.Allowlist" we should let it go through.
AllowlistedSystem: infiniteResourceLimits,

AllowlistedTransient: infiniteResourceLimits,

// Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Conn, or Stream limits.
ServiceDefault: infiniteResourceLimits,

ServicePeerDefault: infiniteResourceLimits,

ProtocolDefault: infiniteResourceLimits,

ProtocolPeerDefault: infiniteResourceLimits,

Conn: infiniteResourceLimits,

Stream: infiniteResourceLimits,

// Limit the resources consumed by a peer.
// This doesn't protect us against intentional DoS attacks since an attacker can easily spin up multiple peers.
// We specify this limit against unintentional DoS attacks (e.g., a peer has a bug and is sending too much traffic intentionally).
// In that case we want to keep that peer's resource consumption contained.
// To keep this simple, we only constrain inbound connections and streams.
PeerDefault: rcmgr.ResourceLimits{
Memory: rcmgr.Unlimited64,
FD: rcmgr.Unlimited,
Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.DefaultLimit,
ConnsOutbound: rcmgr.Unlimited,
Streams: rcmgr.Unlimited,
StreamsInbound: rcmgr.DefaultLimit,
StreamsOutbound: rcmgr.Unlimited,
},
}

scalingLimitConfig := rcmgr.DefaultLimits
libp2p.SetDefaultServiceLimits(&scalingLimitConfig)

// Anything set above in partialLimits that had a value of rcmgr.DefaultLimit will be overridden.
// Anything in scalingLimitConfig that wasn't defined in partialLimits above will be added (e.g., libp2p's default service limits).
partialLimits = partialLimits.Build(scalingLimitConfig.Scale(int64(maxMemory), maxFD)).ToPartialLimitConfig()

// Simple checks to override autoscaling ensuring limits make sense versus the connmgr values.
// There are ways to break this, but this should catch most problems already.
// We might improve this in the future.
// See: https://github.com/ipfs/kubo/issues/9545
if partialLimits.System.ConnsInbound > rcmgr.DefaultLimit {
maxInboundConns := int(partialLimits.System.ConnsInbound)
if connmgrHighWaterTimesTwo := connMgrHighWater * 2; maxInboundConns < connmgrHighWaterTimesTwo {
maxInboundConns = connmgrHighWaterTimesTwo
}

if maxInboundConns < 800 {
maxInboundConns = 800
}

// Scale System.StreamsInbound as well, but use the existing ratio of StreamsInbound to ConnsInbound
if partialLimits.System.StreamsInbound > rcmgr.DefaultLimit {
partialLimits.System.StreamsInbound = rcmgr.LimitVal(int64(maxInboundConns) * int64(partialLimits.System.StreamsInbound) / int64(partialLimits.System.ConnsInbound))
}
partialLimits.System.ConnsInbound = rcmgr.LimitVal(maxInboundConns)
}

logger.Infof(`
go-libp2p Resource Manager limits based on:
- --max-memory: %s
- --max-fd: %d
`, humanize.Bytes(maxMemory), maxFD)

// We already have a complete value thus pass in an empty ConcreteLimitConfig.
return partialLimits.Build(rcmgr.ConcreteLimitConfig{})
}
Loading

0 comments on commit 3b9cce4

Please sign in to comment.