From 03d817108d53adf61ee4decf6f0b4cc11f5e5863 Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Fri, 31 May 2024 16:27:59 -0700 Subject: [PATCH] Deprecate EnforceMultiVA and MultiVAFullResults feature flags --- docs/multi-va.md | 19 ++-- features/features.go | 9 +- test/config-next/va.json | 2 - test/config/va.json | 5 +- va/caa.go | 2 +- va/va.go | 186 +++++++++++++-------------------------- va/va_test.go | 175 ++++++------------------------------ 7 files changed, 99 insertions(+), 299 deletions(-) diff --git a/docs/multi-va.md b/docs/multi-va.md index 3e05f41ef8c..4c8df880daa 100644 --- a/docs/multi-va.md +++ b/docs/multi-va.md @@ -37,19 +37,12 @@ and [`test/config-next/remoteva-b.json`](https://github.com/letsencrypt/boulder/blob/5c27eadb1db0605f380e41c8bd444a7f4ffe3c08/test/config-next/remoteva-b.json) as their config files. -There are two feature flags that control whether multi-VA takes effect: -MultiVAFullResults and EnforceMultiVA. If MultiVAFullResults is enabled -then each primary validation will also send out remote validation requests, and -wait for all the results to come in, so we can log the results for analysis. If -EnforceMultiVA is enabled, we require that almost all remote validation requests -succeed. The primary VA's "maxRemoteValidationFailures" config field specifies -how many remote VAs can fail before the primary VA considers overall validation -a failure. It should be strictly less than the number of remote VAs. - -Validation is also controlled by the "multiVAPolicyFile" config field on the -primary VA. This specifies a file that can contain temporary overrides for -domains or accounts that fail under multi-va. Over time those temporary -overrides will be removed. +We require that almost all remote validation requests succeed; the exact number +is controlled by the VA's `maxRemoteFailures` config variable. If the number of +failing remote VAs exceeds that threshold, validation is terminated. If the +number of successful remote VAs is high enough that it would be impossible for +the outstanding remote VAs to exceed that threshold, validation immediately +succeeds. There are some integration tests that test this end to end. The most relevant is probably diff --git a/features/features.go b/features/features.go index ffc41e5a333..8820ffbf972 100644 --- a/features/features.go +++ b/features/features.go @@ -20,13 +20,8 @@ type Config struct { CAAAfterValidation bool AllowNoCommonName bool SHA256SubjectKeyIdentifier bool - - // EnforceMultiVA causes the VA to block on remote VA PerformValidation - // requests in order to make a valid/invalid decision with the results. - EnforceMultiVA bool - // MultiVAFullResults will cause the main VA to wait for all of the remote VA - // results, not just the threshold required to make a decision. - MultiVAFullResults bool + EnforceMultiVA bool + MultiVAFullResults bool // ECDSAForAll enables all accounts, regardless of their presence in the CA's // ecdsaAllowedAccounts config value, to get issuance from ECDSA issuers. diff --git a/test/config-next/va.json b/test/config-next/va.json index abc38e53805..12efd33bcce 100644 --- a/test/config-next/va.json +++ b/test/config-next/va.json @@ -38,8 +38,6 @@ } }, "features": { - "EnforceMultiVA": true, - "MultiVAFullResults": true, "EnforceMultiCAA": true, "MultiCAAFullResults": true, "DOH": true diff --git a/test/config/va.json b/test/config/va.json index 37388b8f964..a04a35380d5 100644 --- a/test/config/va.json +++ b/test/config/va.json @@ -38,10 +38,7 @@ } } }, - "features": { - "EnforceMultiVA": true, - "MultiVAFullResults": true - }, + "features": {}, "remoteVAs": [ { "serverAddress": "rva1.service.consul:9397", diff --git a/va/caa.go b/va/caa.go index 9647db07bc0..8d9d676390b 100644 --- a/va/caa.go +++ b/va/caa.go @@ -212,7 +212,7 @@ func (va *ValidationAuthorityImpl) processRemoteCAAResults( // If we are using `features.MultiCAAFullResults` then we haven't returned // early and can now log the differential between what the primary VA saw and // what all of the remote VAs saw. - va.logRemoteDifferentials( + va.logRemoteResults( domain, acctID, challengeType, diff --git a/va/va.go b/va/va.go index 54c1b430b6c..70a9672e3a6 100644 --- a/va/va.go +++ b/va/va.go @@ -23,7 +23,6 @@ import ( "github.com/letsencrypt/boulder/canceled" "github.com/letsencrypt/boulder/core" berrors "github.com/letsencrypt/boulder/errors" - "github.com/letsencrypt/boulder/features" bgrpc "github.com/letsencrypt/boulder/grpc" "github.com/letsencrypt/boulder/identifier" blog "github.com/letsencrypt/boulder/log" @@ -82,21 +81,20 @@ type RemoteVA struct { } type vaMetrics struct { - validationTime *prometheus.HistogramVec - localValidationTime *prometheus.HistogramVec - remoteValidationTime *prometheus.HistogramVec - remoteValidationFailures prometheus.Counter - prospectiveRemoteValidationFailures prometheus.Counter - caaCheckTime *prometheus.HistogramVec - localCAACheckTime *prometheus.HistogramVec - remoteCAACheckTime *prometheus.HistogramVec - remoteCAACheckFailures prometheus.Counter - prospectiveRemoteCAACheckFailures prometheus.Counter - tlsALPNOIDCounter *prometheus.CounterVec - http01Fallbacks prometheus.Counter - http01Redirects prometheus.Counter - caaCounter *prometheus.CounterVec - ipv4FallbackCounter prometheus.Counter + validationTime *prometheus.HistogramVec + localValidationTime *prometheus.HistogramVec + remoteValidationTime *prometheus.HistogramVec + remoteValidationFailures prometheus.Counter + caaCheckTime *prometheus.HistogramVec + localCAACheckTime *prometheus.HistogramVec + remoteCAACheckTime *prometheus.HistogramVec + remoteCAACheckFailures prometheus.Counter + prospectiveRemoteCAACheckFailures prometheus.Counter + tlsALPNOIDCounter *prometheus.CounterVec + http01Fallbacks prometheus.Counter + http01Redirects prometheus.Counter + caaCounter *prometheus.CounterVec + ipv4FallbackCounter prometheus.Counter } func initMetrics(stats prometheus.Registerer) *vaMetrics { @@ -130,12 +128,6 @@ func initMetrics(stats prometheus.Registerer) *vaMetrics { Help: "Number of validations failed due to remote VAs returning failure when consensus is enforced", }) stats.MustRegister(remoteValidationFailures) - prospectiveRemoteValidationFailures := prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "prospective_remote_validation_failures", - Help: "Number of validations that would have failed due to remote VAs returning failure if consesus were enforced", - }) - stats.MustRegister(prospectiveRemoteValidationFailures) caaCheckTime := prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "caa_check_time", @@ -204,21 +196,20 @@ func initMetrics(stats prometheus.Registerer) *vaMetrics { stats.MustRegister(ipv4FallbackCounter) return &vaMetrics{ - validationTime: validationTime, - remoteValidationTime: remoteValidationTime, - localValidationTime: localValidationTime, - remoteValidationFailures: remoteValidationFailures, - prospectiveRemoteValidationFailures: prospectiveRemoteValidationFailures, - caaCheckTime: caaCheckTime, - localCAACheckTime: localCAACheckTime, - remoteCAACheckTime: remoteCAACheckTime, - remoteCAACheckFailures: remoteCAACheckFailures, - prospectiveRemoteCAACheckFailures: prospectiveRemoteCAACheckFailures, - tlsALPNOIDCounter: tlsALPNOIDCounter, - http01Fallbacks: http01Fallbacks, - http01Redirects: http01Redirects, - caaCounter: caaCounter, - ipv4FallbackCounter: ipv4FallbackCounter, + validationTime: validationTime, + remoteValidationTime: remoteValidationTime, + localValidationTime: localValidationTime, + remoteValidationFailures: remoteValidationFailures, + caaCheckTime: caaCheckTime, + localCAACheckTime: localCAACheckTime, + remoteCAACheckTime: remoteCAACheckTime, + remoteCAACheckFailures: remoteCAACheckFailures, + prospectiveRemoteCAACheckFailures: prospectiveRemoteCAACheckFailures, + tlsALPNOIDCounter: tlsALPNOIDCounter, + http01Fallbacks: http01Fallbacks, + http01Redirects: http01Redirects, + caaCounter: caaCounter, + ipv4FallbackCounter: ipv4FallbackCounter, } } @@ -528,23 +519,9 @@ func (va *ValidationAuthorityImpl) performRemoteValidation( // processRemoteValidationResults evaluates a primary VA result, and a channel // of remote VA problems to produce a single overall validation result based on // configured feature flags. The overall result is calculated based on the VA's -// configured `maxRemoteFailures` value. -// -// If the `MultiVAFullResults` feature is enabled then -// `processRemoteValidationResults` will expect to read a result from the -// `remoteErrors` channel for each VA and will not produce an overall result -// until all remote VAs have responded. In this case `logRemoteDifferentials` -// will also be called to describe the differential between the primary and all -// of the remote VAs. -// -// If the `MultiVAFullResults` feature flag is not enabled then -// `processRemoteValidationResults` will potentially return before all remote -// VAs have had a chance to respond. This happens if the success or failure -// threshold is met. This doesn't allow for logging the differential between the -// primary and remote VAs but is more performant. +// configured `maxRemoteFailures` value, and the function returns as soon as +// that threshold has been exceeded or cannot possibly be exceeded. func (va *ValidationAuthorityImpl) processRemoteValidationResults( - domain string, - acctID int64, challengeType string, remoteResultsChan <-chan *remoteVAResult) *probs.ProblemDetails { @@ -575,60 +552,36 @@ func (va *ValidationAuthorityImpl) processRemoteValidationResults( } else { bad++ } - // Store the first non-nil problem to return later (if `MultiVAFullResults` - // is enabled). + // Store the first non-nil problem to return later. if firstProb == nil && result.Problem != nil { firstProb = result.Problem } - // If MultiVAFullResults isn't enabled then return early whenever the - // success or failure threshold is met. - if !features.Get().MultiVAFullResults { - if good >= required { - state = "success" - return nil - } else if bad > va.maxRemoteFailures { - modifiedProblem := *result.Problem - modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail - return &modifiedProblem - } + // Return as soon as we have enough successes or failures for a definitive result. + if good >= required { + state = "success" + return nil + } else if bad > va.maxRemoteFailures { + modifiedProblem := *result.Problem + modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail + return &modifiedProblem } - // If we haven't returned early because of MultiVAFullResults being enabled - // we need to break the loop once all of the VAs have returned a result. + // If we somehow haven't returned early, we need to break the loop once all + // of the VAs have returned a result. if len(remoteResults) == len(va.remoteVAs) { break } } - // If we are using `features.MultiVAFullResults` then we haven't returned - // early and can now log the differential between what the primary VA saw and - // what all of the remote VAs saw. - va.logRemoteDifferentials( - domain, - acctID, - challengeType, - remoteResults) - - // Based on the threshold of good/bad return nil or a problem. - if good >= required { - state = "success" - return nil - } else if bad > va.maxRemoteFailures { - modifiedProblem := *firstProb - modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail - va.metrics.prospectiveRemoteValidationFailures.Inc() - return &modifiedProblem - } // This condition should not occur - it indicates the good/bad counts didn't // meet either the required threshold or the maxRemoteFailures threshold. return probs.ServerInternal("Too few remote PerformValidation RPC results") } -// logRemoteDifferentials is called by `processRemoteValidationResults` when the -// `MultiVAFullResults` feature flag is enabled and `processRemoteCAAResults` +// logRemoteResults is called by `processRemoteCAAResults` when the // `MultiCAAFullResults` feature flag is enabled. It produces a JSON log line -// that contains the primary VA result and the results each remote VA returned. -func (va *ValidationAuthorityImpl) logRemoteDifferentials( +// that contains the results each remote VA returned. +func (va *ValidationAuthorityImpl) logRemoteResults( domain string, acctID int64, challengeType string, @@ -726,41 +679,24 @@ func (va *ValidationAuthorityImpl) PerformValidation(ctx context.Context, req *v logEvent.Error = prob.Error() logEvent.InternalError = err.Error() } else if remoteResults != nil { - if !features.Get().EnforceMultiVA && features.Get().MultiVAFullResults { - go func() { - _ = va.processRemoteValidationResults( - req.Domain, - req.Authz.RegID, - string(challenge.Type), - remoteResults) - }() - // Since prob was nil and we're not enforcing the results from - // `processRemoteValidationResults` set the challenge status to - // valid so the validationTime metrics increment has the correct - // result label. + remoteProb := va.processRemoteValidationResults( + string(challenge.Type), + remoteResults) + + // If the remote result was a non-nil problem then fail the validation + if remoteProb != nil { + prob = remoteProb + challenge.Status = core.StatusInvalid + challenge.Error = remoteProb + // We only set .Error here, not .InternalError, because the + // remote VA doesn't send us the internal error. But that's ok, + // it got logged at the remote VA. + logEvent.Error = remoteProb.Error() + va.log.Infof("Validation failed due to remote failures: identifier=%v err=%s", + req.Domain, remoteProb) + va.metrics.remoteValidationFailures.Inc() + } else { challenge.Status = core.StatusValid - } else if features.Get().EnforceMultiVA { - remoteProb := va.processRemoteValidationResults( - req.Domain, - req.Authz.RegID, - string(challenge.Type), - remoteResults) - - // If the remote result was a non-nil problem then fail the validation - if remoteProb != nil { - prob = remoteProb - challenge.Status = core.StatusInvalid - challenge.Error = remoteProb - // We only set .Error here, not .InternalError, because the - // remote VA doesn't send us the internal error. But that's ok, - // it got logged at the remote VA. - logEvent.Error = remoteProb.Error() - va.log.Infof("Validation failed due to remote failures: identifier=%v err=%s", - req.Domain, remoteProb) - va.metrics.remoteValidationFailures.Inc() - } else { - challenge.Status = core.StatusValid - } } } else { challenge.Status = core.StatusValid diff --git a/va/va_test.go b/va/va_test.go index 59854a82e8f..397c73e057d 100644 --- a/va/va_test.go +++ b/va/va_test.go @@ -395,20 +395,6 @@ func TestMultiVA(t *testing.T) { VAClient: cancelledVA{}, CAAClient: cancelledVA{}, } - enforceMultiVA := features.Config{ - EnforceMultiVA: true, - } - enforceMultiVAFullResults := features.Config{ - EnforceMultiVA: true, - MultiVAFullResults: true, - } - noEnforceMultiVA := features.Config{ - EnforceMultiVA: false, - } - noEnforceMultiVAFullResults := features.Config{ - EnforceMultiVA: false, - MultiVAFullResults: true, - } unauthorized := probs.Unauthorized(fmt.Sprintf( `The key authorization file from the server did not match this challenge. Expected %q (got "???")`, @@ -420,124 +406,68 @@ func TestMultiVA(t *testing.T) { Name string RemoteVAs []RemoteVA AllowedUAs map[string]bool - Features features.Config ExpectedProb *probs.ProblemDetails ExpectedLog string }{ { // With local and both remote VAs working there should be no problem. - Name: "Local and remote VAs OK, enforce multi VA", - RemoteVAs: remoteVAs, - AllowedUAs: allowedUAs, - Features: enforceMultiVA, - }, - { - // Ditto if multi VA enforcement is disabled - Name: "Local and remote VAs OK, no enforce multi VA", + Name: "Local and remote VAs OK", RemoteVAs: remoteVAs, AllowedUAs: allowedUAs, - Features: noEnforceMultiVA, }, { // If the local VA fails everything should fail - Name: "Local VA bad, remote VAs OK, no enforce multi VA", + Name: "Local VA bad, remote VAs OK", RemoteVAs: remoteVAs, AllowedUAs: map[string]bool{remoteUA1: true, remoteUA2: true}, - Features: noEnforceMultiVA, ExpectedProb: unauthorized, }, { - // Ditto when enforcing remote VA - Name: "Local VA bad, remote VAs OK, enforce multi VA", - RemoteVAs: remoteVAs, - AllowedUAs: map[string]bool{remoteUA1: true, remoteUA2: true}, - Features: enforceMultiVA, - ExpectedProb: unauthorized, - }, - { - // If a remote VA fails with an internal err it should fail when enforcing multi VA - Name: "Local VA ok, remote VA internal err, enforce multi VA", + // If a remote VA fails with an internal err it should fail + Name: "Local VA ok, remote VA internal err", RemoteVAs: []RemoteVA{ {remoteVA1, remoteUA1}, {brokenVA, "broken"}, }, AllowedUAs: allowedUAs, - Features: enforceMultiVA, ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC failed"), // The real failure cause should be logged ExpectedLog: expectedInternalErrLine, }, - { - // If a remote VA fails with an internal err it should not fail when not - // enforcing multi VA - Name: "Local VA ok, remote VA internal err, no enforce multi VA", - RemoteVAs: []RemoteVA{ - {remoteVA1, remoteUA1}, - {brokenVA, "broken"}, - }, - AllowedUAs: allowedUAs, - Features: noEnforceMultiVA, - // Like above, the real failure cause will be logged eventually, but that - // will happen asynchronously. It's not guaranteed to happen before the - // test case exits, so we don't check for it here. - }, - { - // With only one working remote VA there should *not* be a validation - // failure when not enforcing multi VA. - Name: "Local VA and one remote VA OK, no enforce multi VA", - RemoteVAs: remoteVAs, - AllowedUAs: map[string]bool{localUA: true, remoteUA2: true}, - Features: noEnforceMultiVA, - }, { // With only one working remote VA there should be a validation failure - // when enforcing multi VA. - Name: "Local VA and one remote VA OK, enforce multi VA", + Name: "Local VA and one remote VA OK", RemoteVAs: remoteVAs, AllowedUAs: map[string]bool{localUA: true, remoteUA2: true}, - Features: enforceMultiVA, ExpectedProb: probs.Unauthorized(fmt.Sprintf( `During secondary validation: The key authorization file from the server did not match this challenge. Expected %q (got "???")`, expectedKeyAuthorization)), }, { - // When enforcing multi-VA, any cancellations are a problem. - Name: "Local VA and one remote VA OK, one cancelled VA, enforce multi VA", + // Any remote VA cancellations are a problem. + Name: "Local VA and one remote VA OK, one cancelled VA", RemoteVAs: []RemoteVA{ {remoteVA1, remoteUA1}, {cancelledVA, remoteUA2}, }, AllowedUAs: allowedUAs, - Features: enforceMultiVA, ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC canceled"), }, { - // When enforcing multi-VA, any cancellations are a problem. - Name: "Local VA OK, two cancelled remote VAs, enforce multi VA", + // Any remote VA cancellations are a problem. + Name: "Local VA OK, two cancelled remote VAs", RemoteVAs: []RemoteVA{ {cancelledVA, remoteUA1}, {cancelledVA, remoteUA2}, }, AllowedUAs: allowedUAs, - Features: enforceMultiVA, ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC canceled"), }, { - // With the local and remote VAs seeing diff problems and the full results - // feature flag on but multi VA enforcement off we expect - // no problem. - Name: "Local and remote VA differential, full results, no enforce multi VA", - RemoteVAs: remoteVAs, - AllowedUAs: map[string]bool{localUA: true}, - Features: noEnforceMultiVAFullResults, - }, - { - // With the local and remote VAs seeing diff problems and the full results - // feature flag on and multi VA enforcement on we expect a problem. + // With the local and remote VAs seeing diff problems, we expect a problem. Name: "Local and remote VA differential, full results, enforce multi VA", RemoteVAs: remoteVAs, AllowedUAs: map[string]bool{localUA: true}, - Features: enforceMultiVAFullResults, ExpectedProb: probs.Unauthorized(fmt.Sprintf( `During secondary validation: The key authorization file from the server did not match this challenge. Expected %q (got "???")`, expectedKeyAuthorization)), @@ -552,9 +482,6 @@ func TestMultiVA(t *testing.T) { // Configure a primary VA with testcase remote VAs. localVA, mockLog := setup(ms.Server, 0, localUA, tc.RemoteVAs, nil) - features.Set(tc.Features) - defer features.Reset() - // Perform all validations res, _ := localVA.PerformValidation(ctx, req) if res.Problems == nil && tc.ExpectedProb != nil { @@ -601,66 +528,28 @@ func TestMultiVAEarlyReturn(t *testing.T) { } // Create a local test VA with the two remote VAs - localVA, mockLog := setup(ms.Server, 0, localUA, remoteVAs, nil) - - testCases := []struct { - Name string - EarlyReturn bool - }{ - { - Name: "One slow remote VA, no early return", - }, - { - Name: "One slow remote VA, early return", - EarlyReturn: true, - }, - } - - earlyReturnFeatures := features.Config{ - EnforceMultiVA: true, - MultiVAFullResults: false, - } - noEarlyReturnFeatures := features.Config{ - EnforceMultiVA: true, - MultiVAFullResults: true, - } + localVA, _ := setup(ms.Server, 0, localUA, remoteVAs, nil) + // Perform all validations + start := time.Now() req := createValidationRequest("localhost", core.ChallengeTypeHTTP01) - for _, tc := range testCases { - t.Run(tc.Name, func(t *testing.T) { - mockLog.Clear() - - var err error - if tc.EarlyReturn { - features.Set(earlyReturnFeatures) - } else { - features.Set(noEarlyReturnFeatures) - } - test.AssertNotError(t, err, "Failed to set MultiVAFullResults feature flag") - defer features.Reset() + res, _ := localVA.PerformValidation(ctx, req) - start := time.Now() + // It should always fail + if res.Problems == nil { + t.Error("expected prob from PerformValidation, got nil") + } - // Perform all validations - res, _ := localVA.PerformValidation(ctx, req) - // It should always fail - if res.Problems == nil { - t.Error("expected prob from PerformValidation, got nil") - } + elapsed := time.Since(start).Round(time.Millisecond).Milliseconds() - elapsed := time.Since(start).Round(time.Millisecond).Milliseconds() - - // The slow UA should sleep for `slowRemoteSleepMillis`. In the early return - // case the first remote VA should fail the overall validation and a prob - // should be returned quickly (i.e. in less than half of `slowRemoteSleepMillis`). - // In the non-early return case we don't expect a problem until - // `slowRemoteSleepMillis`. - if tc.EarlyReturn && elapsed > slowRemoteSleepMillis/2 { - t.Errorf( - "Expected an early return from PerformValidation in < %d ms, took %d ms", - slowRemoteSleepMillis/2, elapsed) - } - }) + // The slow UA should sleep for `slowRemoteSleepMillis`. But the first remote + // VA should fail quickly and the early-return code should cause the overall + // overall validation to return a prob quickly (i.e. in less than half of + // `slowRemoteSleepMillis`). + if elapsed > slowRemoteSleepMillis/2 { + t.Errorf( + "Expected an early return from PerformValidation in < %d ms, took %d ms", + slowRemoteSleepMillis/2, elapsed) } } @@ -691,14 +580,6 @@ func TestMultiVAPolicy(t *testing.T) { // Create a local test VA with the two remote VAs localVA, _ := setup(ms.Server, 0, localUA, remoteVAs, nil) - // Ensure multi VA enforcement is enabled, don't wait for full multi VA - // results. - features.Set(features.Config{ - EnforceMultiVA: true, - MultiVAFullResults: false, - }) - defer features.Reset() - // Perform validation for a domain not in the disabledDomains list req := createValidationRequest("letsencrypt.org", core.ChallengeTypeHTTP01) res, _ := localVA.PerformValidation(ctx, req) @@ -814,7 +695,7 @@ func TestLogRemoteDifferentials(t *testing.T) { t.Run(tc.name, func(t *testing.T) { mockLog.Clear() - localVA.logRemoteDifferentials( + localVA.logRemoteResults( "example.com", 1999, "blorpus-01", tc.remoteProbs) lines := mockLog.GetAllMatching("remoteVADifferentials JSON=.*")