Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add trace for rate-limit #2974

Merged
merged 7 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions api/v1alpha1/envoygateway_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ type RateLimit struct {
type RateLimitTelemetry struct {
// Metrics defines metrics configuration for RateLimit.
Metrics *RateLimitMetrics `json:"metrics,omitempty"`

// Tracing defines traces configuration for RateLimit.
Tracing *RateLimitTracing `json:"tracing,omitempty"`
}

type RateLimitMetrics struct {
Expand All @@ -366,6 +369,34 @@ type RateLimitMetricsPrometheusProvider struct {
Disable bool `json:"disable,omitempty"`
}

type RateLimitTracing struct {
ShyunnY marked this conversation as resolved.
Show resolved Hide resolved
// SamplingRate controls the rate at which traffic will be
// selected for tracing if no prior sampling decision has been made.
// Defaults to 100, valid values [0-100]. 100 indicates 100% sampling.
// +optional
ShyunnY marked this conversation as resolved.
Show resolved Hide resolved
SamplingRate *uint32 `json:"samplingRate,omitempty"`

// Provider defines the rateLimit tracing provider.
// Only OpenTelemetry is supported currently.
Provider *RateLimitTracingProvider `json:"provider,omitempty"`
}

type RateLimitTracingProviderType string

const (
RateLimitTracingProviderTypeOpenTelemetry TracingProviderType = "OpenTelemetry"
)

// RateLimitTracingProvider defines the tracing provider configuration of RateLimit
type RateLimitTracingProvider struct {
// Type defines the tracing provider type.
// Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported
Type *RateLimitTracingProviderType `json:"type,omitempty"`
zirain marked this conversation as resolved.
Show resolved Hide resolved

// URL is the endpoint of the trace collector that supports the OTLP protocol
URL string `json:"url"`
}

// RateLimitDatabaseBackend defines the configuration associated with
// the database backend used by the rate limit service.
// +union
Expand Down
50 changes: 50 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 93 additions & 3 deletions internal/infrastructure/kubernetes/ratelimit/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ const (
ConfigGrpcXdsServerURLEnvVar = "CONFIG_GRPC_XDS_SERVER_URL"
// ConfigGrpcXdsNodeIDEnvVar is the id of ratelimit node.
ConfigGrpcXdsNodeIDEnvVar = "CONFIG_GRPC_XDS_NODE_ID"
// TracingEnabledVar is enabled the tracing feature
TracingEnabledVar = "TRACING_ENABLED"
// TracingServiceNameVar is service name appears in tracing span
TracingServiceNameVar = "TRACING_SERVICE_NAME"
// TracingServiceNamespaceVar is service namespace appears in tracing span
TracingServiceNamespaceVar = "TRACING_SERVICE_NAMESPACE"
// TracingServiceInstanceIDVar is service instance id appears in tracing span
TracingServiceInstanceIDVar = "TRACING_SERVICE_INSTANCE_ID"
// TracingSamplingRateVar is trace sampling rate
TracingSamplingRateVar = "TRACING_SAMPLING_RATE"
// OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send
OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_ENDPOINT"

// InfraName is the name for rate-limit resources.
InfraName = "envoy-ratelimit"
Expand Down Expand Up @@ -125,7 +137,8 @@ func rateLimitLabels() map[string]string {
}

// expectedRateLimitContainers returns expected rateLimit containers.
func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.Container {
func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
namespace string) []corev1.Container {
ports := []corev1.ContainerPort{
{
Name: "grpc",
Expand All @@ -142,7 +155,7 @@ func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeploymen
Command: []string{
"/bin/ratelimit",
},
Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment),
Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment, namespace),
Ports: ports,
Resources: *rateLimitDeployment.Container.Resources,
SecurityContext: rateLimitDeployment.Container.SecurityContext,
Expand Down Expand Up @@ -275,7 +288,8 @@ func expectedDeploymentVolumes(rateLimit *egv1a1.RateLimit, rateLimitDeployment
}

// expectedRateLimitContainerEnv returns expected rateLimit container envs.
func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.EnvVar {
func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
namespace string) []corev1.EnvVar {
env := []corev1.EnvVar{
{
Name: RuntimeRootEnvVar,
Expand Down Expand Up @@ -384,6 +398,54 @@ func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeploym
}
}

if enableTracing(rateLimit) {
var sampleRate = 1.0
if rateLimit.Telemetry.Tracing.SamplingRate != nil {
sampleRate = float64(*rateLimit.Telemetry.Tracing.SamplingRate) / 100.0
}

traceEndpoint := checkTraceEndpointScheme(rateLimit.Telemetry.Tracing.Provider.URL)
tracingEnvs := []corev1.EnvVar{
{
Name: TracingEnabledVar,
Value: "true",
},
{
Name: TracingServiceNameVar,
Value: InfraName,
},
{
Name: TracingServiceNamespaceVar,
Value: namespace,
},
{
// By default, this is a random instanceID,
// we use the RateLimit pod name as the trace service instanceID.
Name: TracingServiceInstanceIDVar,
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "metadata.name",
},
},
},
{
Name: TracingSamplingRateVar,
// The api is configured with [0,100], but sampling can only be [0,1].
// doc: https://github.com/envoyproxy/ratelimit?tab=readme-ov-file#tracing
// You will lose precision during the conversion process, but don't worry,
// this follows the rounding rule and won't make the expected sampling rate too different
// from the actual sampling rate
Value: strconv.FormatFloat(sampleRate, 'f', 1, 64),
},
{
Name: OTELExporterOTLPTraceEndpointVar,
Value: traceEndpoint,
},
}
env = append(env, tracingEnvs...)
}

return resource.ExpectedContainerEnv(rateLimitDeployment.Container, env)
}

Expand All @@ -399,3 +461,31 @@ func Validate(ctx context.Context, client client.Client, gateway *egv1a1.EnvoyGa

return nil
}

func enableTracing(rl *egv1a1.RateLimit) bool {
// Other fields can use the default values,
// but we have to make sure the user has the Provider.URL
if rl != nil && rl.Telemetry != nil &&
rl.Telemetry.Tracing != nil &&
rl.Telemetry.Tracing.Provider != nil &&
len(rl.Telemetry.Tracing.Provider.URL) != 0 {
return true
}

return false
}

// checkTraceEndpointScheme Check the scheme prefix in the trace url
arkodg marked this conversation as resolved.
Show resolved Hide resolved
func checkTraceEndpointScheme(url string) string {
// Since the OTLP collector needs to configure the scheme prefix,
// we need to check if the user has configured this
// TODO: It is currently assumed to be a normal connection,
// and a TLS connection will be added later.
httpScheme := "http://"
exist := strings.HasPrefix(url, httpScheme)
if exist {
return url
}

return fmt.Sprintf("%s%s", httpScheme, url)
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (r *ResourceRender) Name() string {
func enablePrometheus(rl *egv1a1.RateLimit) bool {
if rl != nil &&
rl.Telemetry != nil &&
rl.Telemetry.Metrics != nil &&
rl.Telemetry.Metrics.Prometheus != nil {
return !rl.Telemetry.Metrics.Prometheus.Disable
}
Expand Down Expand Up @@ -183,7 +184,7 @@ func (r *ResourceRender) ServiceAccount() (*corev1.ServiceAccount, error) {

// Deployment returns the expected rate limit Deployment based on the provided infra.
func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) {
containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment)
containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment, r.Namespace)
labels := rateLimitLabels()
selector := resource.GetSelector(labels)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,46 @@ func TestDeployment(t *testing.T) {
},
},
},
{
caseName: "enable-tracing",
rateLimit: &egv1a1.RateLimit{
Backend: egv1a1.RateLimitDatabaseBackend{
Type: egv1a1.RedisBackendType,
Redis: &egv1a1.RateLimitRedisSettings{
URL: "redis.redis.svc:6379",
},
},
Telemetry: &egv1a1.RateLimitTelemetry{
Tracing: &egv1a1.RateLimitTracing{
Provider: &egv1a1.RateLimitTracingProvider{
URL: "http://trace-collector.envoy-gateway-system.svc.cluster.local:4318",
},
},
},
},
},
{
caseName: "enable-tracing-custom",
rateLimit: &egv1a1.RateLimit{
Backend: egv1a1.RateLimitDatabaseBackend{
Type: egv1a1.RedisBackendType,
Redis: &egv1a1.RateLimitRedisSettings{
URL: "redis.redis.svc:6379",
},
},
Telemetry: &egv1a1.RateLimitTelemetry{
Tracing: &egv1a1.RateLimitTracing{
SamplingRate: func() *uint32 {
var samplingRate uint32 = 55
return &samplingRate
}(),
Provider: &egv1a1.RateLimitTracingProvider{
URL: "trace-collector.envoy-gateway-system.svc.cluster.local:4317",
},
},
},
},
},
}
for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
Expand Down
40 changes: 40 additions & 0 deletions internal/infrastructure/kubernetes/ratelimit/resource_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright Envoy Gateway Authors
// SPDX-License-Identifier: Apache-2.0
// The full text of the Apache license is available in the LICENSE file at
// the root of the repo.

package ratelimit

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestCheckTraceEndpointScheme(t *testing.T) {

cases := []struct {
caseName string
actualURL string
expectedURL string
}{
{
caseName: "normal url with http prefix",
actualURL: "http://collector.observability.svc.cluster.local:4318",
expectedURL: "http://collector.observability.svc.cluster.local:4318",
},
{
caseName: "abnormal url without http prefix",
actualURL: "collector.observability.svc.cluster.local:4318",
expectedURL: "http://collector.observability.svc.cluster.local:4318",
},
}

for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
actual := checkTraceEndpointScheme(tc.actualURL)
require.Equal(t, tc.expectedURL, actual)
})
}

}
Loading
Loading