Skip to content

Commit

Permalink
ratelimits: Add a feature-flag which makes key-value implementation a…
Browse files Browse the repository at this point in the history
…uthoritative
  • Loading branch information
beautifulentropy committed Aug 16, 2024
1 parent 14c0b2c commit 9dda074
Show file tree
Hide file tree
Showing 10 changed files with 237 additions and 106 deletions.
13 changes: 13 additions & 0 deletions features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,19 @@ type Config struct {
// returned to the Subscriber indicating that the order cannot be processed
// until the paused identifiers are unpaused and the order is resubmitted.
CheckIdentifiersPaused bool

// UseKvLimitsForNewOrder when enabled, causes the key-value rate limiter to
// be the authoritative source of rate limiting information for new-order
// callers and disables the legacy rate limiting checks.
//
// Note: this flag does not disable writes to the certificatesPerName or
// fqdnSets tables at Finalize time.
UseKvLimitsForNewOrder bool

// UseKvLimitsForNewAccount when enabled, causes the key-value rate limiter
// to be the authoritative source of rate limiting information for
// new-account callers and disables the legacy rate limiting checks.
UseKvLimitsForNewAccount bool
}

var fMu = new(sync.RWMutex)
Expand Down
46 changes: 28 additions & 18 deletions ra/ra.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,12 @@ func (ra *RegistrationAuthorityImpl) NewRegistration(ctx context.Context, reques
if err != nil {
return nil, berrors.InternalServerError("failed to unmarshal ip address: %s", err.Error())
}
err = ra.checkRegistrationLimits(ctx, ipAddr)
if err != nil {
return nil, err

if !features.Get().UseKvLimitsForNewAccount {
err = ra.checkRegistrationLimits(ctx, ipAddr)
if err != nil {
return nil, err
}
}

// Check that contacts conform to our expectations.
Expand Down Expand Up @@ -1302,18 +1305,20 @@ func (ra *RegistrationAuthorityImpl) issueCertificateOuter(
// account) and duplicate certificate rate limits. There is no reason to surface
// errors from this function to the Subscriber, spends against these limit are
// best effort.
func (ra *RegistrationAuthorityImpl) countCertificateIssued(ctx context.Context, regId int64, orderDomains []string) {
func (ra *RegistrationAuthorityImpl) countCertificateIssued(ctx context.Context, regId int64, orderDomains []string, isRenewal bool) {
if ra.limiter == nil || ra.txnBuilder == nil {
// Limiter is disabled.
return
}

var transactions []ratelimits.Transaction
txns, err := ra.txnBuilder.CertificatesPerDomainSpendOnlyTransactions(regId, orderDomains)
if err != nil {
ra.log.Warningf("building rate limit transactions at finalize: %s", err)
if !isRenewal {
txns, err := ra.txnBuilder.CertificatesPerDomainSpendOnlyTransactions(regId, orderDomains)
if err != nil {
ra.log.Warningf("building rate limit transactions at finalize: %s", err)
}
transactions = append(transactions, txns...)
}
transactions = append(transactions, txns...)

txn, err := ra.txnBuilder.CertificatesPerFQDNSetSpendOnlyTransaction(orderDomains)
if err != nil {
Expand Down Expand Up @@ -1402,6 +1407,17 @@ func (ra *RegistrationAuthorityImpl) issueCertificateInner(
return nil, nil, wrapError(err, "getting SCTs")
}

var isRenewal bool
if len(parsedPrecert.DNSNames) > 0 {
// This should never happen under normal operation, but it sometimes
// occurs under test.
exists, err := ra.SA.FQDNSetExists(ctx, &sapb.FQDNSetExistsRequest{DnsNames: parsedPrecert.DNSNames})
if err != nil {
return nil, nil, wrapError(err, "checking if certificate is a renewal")
}
isRenewal = exists.Exists
}

cert, err := ra.CA.IssueCertificateForPrecertificate(ctx, &capb.IssueCertificateForPrecertificateRequest{
DER: precert.DER,
SCTs: scts,
Expand All @@ -1418,7 +1434,7 @@ func (ra *RegistrationAuthorityImpl) issueCertificateInner(
return nil, nil, wrapError(err, "parsing final certificate")
}

ra.countCertificateIssued(ctx, int64(acctID), parsedCertificate.DNSNames)
ra.countCertificateIssued(ctx, int64(acctID), parsedCertificate.DNSNames, isRenewal)

// Asynchronously submit the final certificate to any configured logs
go ra.ctpolicy.SubmitFinalCert(cert.Der, parsedCertificate.NotAfter)
Expand Down Expand Up @@ -1994,13 +2010,7 @@ func (ra *RegistrationAuthorityImpl) PerformValidation(
if prob != nil {
challenge.Status = core.StatusInvalid
challenge.Error = prob

// TODO(#5545): Spending can be async until key-value rate limits
// are authoritative. This saves us from adding latency to each
// request. Goroutines spun out below will respect a context
// deadline set by the ratelimits package and cannot be prematurely
// canceled by the requester.
go ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier.Value)
ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier.Value)
} else {
challenge.Status = core.StatusValid
}
Expand Down Expand Up @@ -2572,7 +2582,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New
}

// Renewal orders, indicated by ARI, are exempt from NewOrder rate limits.
if !req.IsARIRenewal {
if !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder {
// Check if there is rate limit space for issuing a certificate.
err = ra.checkNewOrderLimits(ctx, newOrder.DnsNames, newOrder.RegistrationID, req.IsRenewal)
if err != nil {
Expand Down Expand Up @@ -2651,7 +2661,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New
}

// Renewal orders, indicated by ARI, are exempt from NewOrder rate limits.
if len(missingAuthzIdents) > 0 && !req.IsARIRenewal {
if len(missingAuthzIdents) > 0 && !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder {
pendingAuthzLimits := ra.rlPolicies.PendingAuthorizationsPerAccount()
if pendingAuthzLimits.Enabled() {
// The order isn't fully authorized we need to check that the client
Expand Down
4 changes: 4 additions & 0 deletions ra/ra_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3709,6 +3709,10 @@ func (sa *mockSAWithFinalize) FinalizeOrder(ctx context.Context, req *sapb.Final
return &emptypb.Empty{}, nil
}

func (sa *mockSAWithFinalize) FQDNSetExists(ctx context.Context, in *sapb.FQDNSetExistsRequest, opts ...grpc.CallOption) (*sapb.Exists, error) {
return &sapb.Exists{}, nil
}

func TestIssueCertificateInnerWithProfile(t *testing.T) {
_, _, ra, fc, cleanup := initAuthorities(t)
defer cleanup()
Expand Down
4 changes: 3 additions & 1 deletion test/config-next/ra.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
},
"features": {
"AsyncFinalize": true,
"CheckRenewalExemptionAtWFE": true
"CheckRenewalExemptionAtWFE": true,
"UseKvLimitsForNewOrder": true,
"UseKvLimitsForNewAccount": true
},
"ctLogs": {
"stagger": "500ms",
Expand Down
4 changes: 4 additions & 0 deletions test/config-next/wfe2-ratelimit-overrides.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
ids:
- id: 127.0.0.1
comment: localhost
- id: 10.77.77.77
comment: test
- id: 10.88.88.88
comment: test
- CertificatesPerDomain:
burst: 1
count: 1
Expand Down
4 changes: 3 additions & 1 deletion test/config-next/wfe2.json
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
"ServeRenewalInfo": true,
"TrackReplacementCertificatesARI": true,
"CheckRenewalExemptionAtWFE": true,
"CheckIdentifiersPaused": true
"CheckIdentifiersPaused": true,
"UseKvLimitsForNewOrder": true,
"UseKvLimitsForNewAccount": true
},
"certProfiles": {
"legacy": "The normal profile you know and love",
Expand Down
162 changes: 119 additions & 43 deletions test/integration/ratelimit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,74 @@ package integration

import (
"context"
"crypto/rand"
"database/sql"
"encoding/hex"
"fmt"
"os"
"strings"
"testing"

"github.com/jmhodges/clock"

"github.com/letsencrypt/boulder/cmd"
berrors "github.com/letsencrypt/boulder/errors"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/ratelimits"
bredis "github.com/letsencrypt/boulder/redis"
"github.com/letsencrypt/boulder/test"
"github.com/letsencrypt/boulder/test/vars"

"github.com/redis/go-redis/v9"
)

func flushLegacyLimits(t *testing.T) {
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
test.AssertNotError(t, err, "Failed to open database connection")

_, err = db.Exec("TRUNCATE certificatesPerName")
test.AssertNotError(t, err, "Failed to delete rate limits")
}

// flushRedis MUST NOT be called in parallel with any other rate limits tests.
func flushKeyValueLimits(t *testing.T) {
rc := bredis.Config{
Username: "unittest-rw",
TLS: cmd.TLSConfig{
CACertFile: "test/certs/ipki/minica.pem",
CertFile: "test/certs/ipki/localhost/cert.pem",
KeyFile: "test/certs/ipki/localhost/key.pem",
},
Lookups: []cmd.ServiceDomain{
{
Service: "redisratelimits",
Domain: "service.consul",
},
},
LookupDNSAuthority: "consul.service.consul",
}
rc.PasswordConfig = cmd.PasswordConfig{
PasswordFile: "test/secrets/ratelimits_redis_password",
}

stats := metrics.NoopRegisterer
log := blog.NewMock()
ring, err := bredis.NewRingFromConfig(rc, stats, log)
test.AssertNotError(t, err, "making redis ring client")

err = ring.ForEachShard(context.Background(), func(ctx context.Context, shard *redis.Client) error {
cmd := shard.FlushAll(ctx)
_, err := cmd.Result()
if err != nil {
return err
}
return nil
})
test.AssertNotError(t, err, "flushing all keys in all shards")
}

func TestDuplicateFQDNRateLimit(t *testing.T) {
t.Parallel()
domain := random_domain()

// The global rate limit for a duplicate certificates is 2 per 3 hours.
_, err := authAndIssue(nil, nil, []string{domain}, true)
test.AssertNotError(t, err, "Failed to issue first certificate")

Expand All @@ -33,45 +82,72 @@ func TestDuplicateFQDNRateLimit(t *testing.T) {
test.AssertError(t, err, "Somehow managed to issue third certificate")

if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
// Setup rate limiting.
rc := bredis.Config{
Username: "unittest-rw",
TLS: cmd.TLSConfig{
CACertFile: "test/certs/ipki/minica.pem",
CertFile: "test/certs/ipki/localhost/cert.pem",
KeyFile: "test/certs/ipki/localhost/key.pem",
},
Lookups: []cmd.ServiceDomain{
{
Service: "redisratelimits",
Domain: "service.consul",
},
},
LookupDNSAuthority: "consul.service.consul",
}
rc.PasswordConfig = cmd.PasswordConfig{
PasswordFile: "test/secrets/ratelimits_redis_password",
}

fc := clock.New()
stats := metrics.NoopRegisterer
log := blog.NewMock()
ring, err := bredis.NewRingFromConfig(rc, stats, log)
test.AssertNotError(t, err, "making redis ring client")
source := ratelimits.NewRedisSource(ring.Ring, fc, stats)
test.AssertNotNil(t, source, "source should not be nil")
limiter, err := ratelimits.NewLimiter(fc, source, stats)
test.AssertNotError(t, err, "making limiter")
txnBuilder, err := ratelimits.NewTransactionBuilder("test/config-next/wfe2-ratelimit-defaults.yml", "")
test.AssertNotError(t, err, "making transaction composer")

// Check that the CertificatesPerFQDNSet limit is reached.
txns, err := txnBuilder.NewOrderLimitTransactions(1, []string{domain}, false)
test.AssertNotError(t, err, "making transaction")
decision, err := limiter.BatchSpend(context.Background(), txns)
test.AssertNotError(t, err, "checking transaction")
err = decision.Result(fc.Now())
test.AssertErrorIs(t, err, berrors.RateLimit)
// Error should be served from key-value rate limits implementation.
test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3h0m0s")
} else {
// Error should be served from legacy rate limits implementation.
test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3 hours")
}
}

func TestCertificatesPerDomain(t *testing.T) {
t.Parallel()

randomDomain := random_domain()
randomSubDomain := func() string {
var bytes [3]byte
rand.Read(bytes[:])
return fmt.Sprintf("%s.%s", hex.EncodeToString(bytes[:]), randomDomain)
}

_, err := authAndIssue(nil, nil, []string{randomSubDomain()}, true)
test.AssertNotError(t, err, "Failed to issue first certificate")

_, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true)
test.AssertNotError(t, err, "Failed to issue second certificate")

_, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true)
test.AssertError(t, err, "Somehow managed to issue third certificate")

if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
// Error should be served from key-value rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates (2) already issued for %q in the last 2160h0m0s", randomDomain))
} else {
// Error should be served from legacy rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates already issued for %q", randomDomain))
}
}

func TestRenewalExemption(t *testing.T) {
t.Parallel()

// Issue two certificates for different subdomains under a single domain,
// then renew both. With the certificatesPerName limit at 2 per 90 days, and
// renewals not exempt, both issuances should succeed. Finally, issue a
// certificate for a third subdomain, which should fail due to the limit.

baseDomain := random_domain()

_, err := authAndIssue(nil, nil, []string{"www." + baseDomain}, true)
test.AssertNotError(t, err, "Failed to issue first certificate")

_, err = authAndIssue(nil, nil, []string{"www." + baseDomain}, true)
test.AssertNotError(t, err, "Failed to issue first renewal")

_, err = authAndIssue(nil, nil, []string{"blog." + baseDomain}, true)
test.AssertNotError(t, err, "Failed to issue second certificate")

_, err = authAndIssue(nil, nil, []string{"blog." + baseDomain}, true)
test.AssertNotError(t, err, "Failed to issue second renewal")

_, err = authAndIssue(nil, nil, []string{"mail." + baseDomain}, true)
test.AssertError(t, err, "Somehow managed to issue third certificate")

if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
// Error should be served from key-value rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates (2) already issued for %q in the last 2160h0m0s", baseDomain))
} else {
// Error should be served from legacy rate limits implementation.
test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates already issued for %q", baseDomain))
}
}
1 change: 0 additions & 1 deletion test/redis-ratelimits.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ rename-command BGREWRITEAOF ""
rename-command BGSAVE ""
rename-command CONFIG ""
rename-command DEBUG ""
rename-command FLUSHALL ""
rename-command FLUSHDB ""
rename-command KEYS ""
rename-command PEXPIRE ""
Expand Down
12 changes: 7 additions & 5 deletions test/v2_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,12 +1559,14 @@ def test_renewal_exemption():
chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited",
lambda: chisel2.auth_and_issue(["mail." + base_domain]))

# TODO(#5545)
# - Phase 2: Once the new rate limits are authoritative in config-next, ensure
# that this test only runs in config.
# - Phase 3: Once the new rate limits are authoritative in config, remove this
# test entirely.
# TODO(#5545) Remove this test once key-value rate limits are authoritative in
# production.
def test_certificates_per_name():
if CONFIG_NEXT:
# This test is replaced by TestCertificatesPerDomain in the Go
# integration tests because key-value rate limits does not support
# override limits of 0.
return
chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited",
lambda: chisel2.auth_and_issue([random_domain() + ".lim.it"]))

Expand Down
Loading

0 comments on commit 9dda074

Please sign in to comment.