Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ratelimits: Correctly handle stale and concurrently initialized buckets #7886

Merged
merged 7 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 39 additions & 10 deletions ratelimits/limiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,31 +277,34 @@ func (l *Limiter) BatchSpend(ctx context.Context, txns []Transaction) (*Decision
batchDecision := allowedDecision
newBuckets := make(map[string]time.Time)
incrBuckets := make(map[string]increment)
staleBuckets := make(map[string]time.Time)
txnOutcomes := make(map[Transaction]string)

for _, txn := range batch {
tat, bucketExists := tats[txn.bucketKey]
storedTAT, bucketExists := tats[txn.bucketKey]
effectiveTAT := storedTAT
if !bucketExists {
// First request from this client.
tat = l.clk.Now()
effectiveTAT = l.clk.Now()
}

d := maybeSpend(l.clk, txn, tat)
d := maybeSpend(l.clk, txn, effectiveTAT)
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved

if txn.limit.isOverride() {
utilization := float64(txn.limit.Burst-d.remaining) / float64(txn.limit.Burst)
l.overrideUsageGauge.WithLabelValues(txn.limit.name.String(), txn.limit.overrideKey).Set(utilization)
}

if d.allowed && (tat != d.newTAT) && txn.spend {
// New bucket state should be persisted.
if bucketExists {
if d.allowed && (effectiveTAT != d.newTAT) && txn.spend {
if !bucketExists {
newBuckets[txn.bucketKey] = d.newTAT
} else if storedTAT.Before(l.clk.Now()) {
staleBuckets[txn.bucketKey] = d.newTAT
} else {
incrBuckets[txn.bucketKey] = increment{
cost: time.Duration(txn.cost * txn.limit.emissionInterval),
ttl: time.Duration(txn.limit.burstOffset),
}
} else {
newBuckets[txn.bucketKey] = d.newTAT
}
}

Expand All @@ -319,14 +322,40 @@ func (l *Limiter) BatchSpend(ctx context.Context, txns []Transaction) (*Decision

if batchDecision.allowed {
if len(newBuckets) > 0 {
err = l.source.BatchSet(ctx, newBuckets)
// Use BatchSetNotExisting to initialize new buckets so that we
// detect if concurrent requests have created this bucket at the
// same time, which would result in overwriting if we used a plain
// "SET" command. If that happens, fall back to incrementing.
initializationResults, err := l.source.BatchSetNotExisting(ctx, newBuckets)
if err != nil {
return nil, fmt.Errorf("batch set for %d keys: %w", len(newBuckets), err)
}
existingBuckets := make(map[string]struct{})
for k, initialized := range initializationResults {
if !initialized {
existingBuckets[k] = struct{}{}
}
}
for _, v := range txns {
_, bucketExists := existingBuckets[v.bucketKey]
if bucketExists {
jsha marked this conversation as resolved.
Show resolved Hide resolved
incrBuckets[v.bucketKey] = increment{
cost: time.Duration(v.cost * v.limit.emissionInterval),
ttl: time.Duration(v.limit.burstOffset),
}
}
}
}

if len(staleBuckets) > 0 {
err := l.source.BatchSet(ctx, staleBuckets)
if err != nil {
return nil, fmt.Errorf("batch set for %d keys: %w", len(staleBuckets), err)
}
}

if len(incrBuckets) > 0 {
err = l.source.BatchIncrement(ctx, incrBuckets)
err := l.source.BatchIncrement(ctx, incrBuckets)
if err != nil {
return nil, fmt.Errorf("batch increment for %d keys: %w", len(incrBuckets), err)
}
Expand Down
19 changes: 19 additions & 0 deletions ratelimits/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ type Source interface {
// the underlying storage client implementation).
BatchSet(ctx context.Context, bucketKeys map[string]time.Time) error

// BatchSetNotExisting attempts to set TATs for the specified bucketKeys if
// they do not already exist. Returns a map indicating which keys were set
// successfully.
BatchSetNotExisting(ctx context.Context, buckets map[string]time.Time) (map[string]bool, error)

// BatchIncrement updates the TATs for the specified bucketKeys, similar to
// BatchSet. Implementations MUST ensure non-blocking operations by either:
// a) applying a deadline or timeout to the context WITHIN the method, or
Expand Down Expand Up @@ -79,6 +84,20 @@ func (in *inmem) BatchSet(_ context.Context, bucketKeys map[string]time.Time) er
return nil
}

func (in *inmem) BatchSetNotExisting(_ context.Context, bucketKeys map[string]time.Time) (map[string]bool, error) {
in.Lock()
defer in.Unlock()
results := make(map[string]bool, len(bucketKeys))
for k, v := range bucketKeys {
_, ok := in.m[k]
if !ok {
in.m[k] = v
results[k] = true
}
}
return results, nil
}

func (in *inmem) BatchIncrement(_ context.Context, bucketKeys map[string]increment) error {
in.Lock()
defer in.Unlock()
Expand Down
35 changes: 35 additions & 0 deletions ratelimits/source_redis.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,41 @@ func (r *RedisSource) BatchSet(ctx context.Context, buckets map[string]time.Time
return nil
}

// BatchSetNotExisting attempts to set TATs for the specified bucketKeys if they
// do not already exist. Returns a map indicating which keys were set successfully.
func (r *RedisSource) BatchSetNotExisting(ctx context.Context, buckets map[string]time.Time) (map[string]bool, error) {
start := r.clk.Now()

pipeline := r.client.Pipeline()
cmds := make(map[string]*redis.BoolCmd, len(buckets))
for bucketKey, tat := range buckets {
// Set a TTL of TAT + 10 minutes to account for clock skew.
ttl := tat.UTC().Sub(r.clk.Now()) + 10*time.Minute
cmds[bucketKey] = pipeline.SetNX(ctx, bucketKey, tat.UTC().UnixNano(), ttl)
}
_, err := pipeline.Exec(ctx)
if err != nil {
r.observeLatency("batchsetnotexisting", r.clk.Since(start), err)
return nil, err
}

results := make(map[string]bool, len(buckets))
totalLatency := r.clk.Since(start)
perSetLatency := totalLatency / time.Duration(len(buckets))
for bucketKey, cmd := range cmds {
success, err := cmd.Result()
if err != nil {
r.observeLatency("batchsetnotexisting_entry", perSetLatency, err)
return nil, err
}
results[bucketKey] = success
r.observeLatency("batchsetnotexisting_entry", perSetLatency, nil)
}

r.observeLatency("batchsetnotexisting", totalLatency, nil)
return results, nil
}

// BatchIncrement updates TATs for the specified bucketKeys using a pipelined
// Redis Transaction in order to reduce the number of round-trips to each Redis
// shard.
Expand Down
Loading