Skip to content

Commit

Permalink
feat: add trace for rate-limit (#2974)
Browse files Browse the repository at this point in the history
* feat: support trace of ratelimit

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix: add json tag

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix: use OTEL_EXPORTER_OTLP_ENDPOINT env

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix

Signed-off-by: ShyunnY <1147212064@qq.com>

* docs: update docs

Signed-off-by: yuluo-yx <yuluo08290126@gmail.com>

---------

Signed-off-by: ShyunnY <1147212064@qq.com>
Signed-off-by: yuluo-yx <yuluo08290126@gmail.com>
Co-authored-by: yuluo-yx <yuluo08290126@gmail.com>
Co-authored-by: zirain <zirain2009@gmail.com>
  • Loading branch information
3 people authored Apr 11, 2024
1 parent b608831 commit aa4e3a0
Show file tree
Hide file tree
Showing 10 changed files with 679 additions and 4 deletions.
31 changes: 31 additions & 0 deletions api/v1alpha1/envoygateway_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ type RateLimit struct {
type RateLimitTelemetry struct {
// Metrics defines metrics configuration for RateLimit.
Metrics *RateLimitMetrics `json:"metrics,omitempty"`

// Tracing defines traces configuration for RateLimit.
Tracing *RateLimitTracing `json:"tracing,omitempty"`
}

type RateLimitMetrics struct {
Expand All @@ -366,6 +369,34 @@ type RateLimitMetricsPrometheusProvider struct {
Disable bool `json:"disable,omitempty"`
}

type RateLimitTracing struct {
// SamplingRate controls the rate at which traffic will be
// selected for tracing if no prior sampling decision has been made.
// Defaults to 100, valid values [0-100]. 100 indicates 100% sampling.
// +optional
SamplingRate *uint32 `json:"samplingRate,omitempty"`

// Provider defines the rateLimit tracing provider.
// Only OpenTelemetry is supported currently.
Provider *RateLimitTracingProvider `json:"provider,omitempty"`
}

type RateLimitTracingProviderType string

const (
RateLimitTracingProviderTypeOpenTelemetry TracingProviderType = "OpenTelemetry"
)

// RateLimitTracingProvider defines the tracing provider configuration of RateLimit
type RateLimitTracingProvider struct {
// Type defines the tracing provider type.
// Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported
Type *RateLimitTracingProviderType `json:"type,omitempty"`

// URL is the endpoint of the trace collector that supports the OTLP protocol
URL string `json:"url"`
}

// RateLimitDatabaseBackend defines the configuration associated with
// the database backend used by the rate limit service.
// +union
Expand Down
50 changes: 50 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 93 additions & 3 deletions internal/infrastructure/kubernetes/ratelimit/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ const (
ConfigGrpcXdsServerURLEnvVar = "CONFIG_GRPC_XDS_SERVER_URL"
// ConfigGrpcXdsNodeIDEnvVar is the id of ratelimit node.
ConfigGrpcXdsNodeIDEnvVar = "CONFIG_GRPC_XDS_NODE_ID"
// TracingEnabledVar is enabled the tracing feature
TracingEnabledVar = "TRACING_ENABLED"
// TracingServiceNameVar is service name appears in tracing span
TracingServiceNameVar = "TRACING_SERVICE_NAME"
// TracingServiceNamespaceVar is service namespace appears in tracing span
TracingServiceNamespaceVar = "TRACING_SERVICE_NAMESPACE"
// TracingServiceInstanceIDVar is service instance id appears in tracing span
TracingServiceInstanceIDVar = "TRACING_SERVICE_INSTANCE_ID"
// TracingSamplingRateVar is trace sampling rate
TracingSamplingRateVar = "TRACING_SAMPLING_RATE"
// OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send
OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_ENDPOINT"

// InfraName is the name for rate-limit resources.
InfraName = "envoy-ratelimit"
Expand Down Expand Up @@ -125,7 +137,8 @@ func rateLimitLabels() map[string]string {
}

// expectedRateLimitContainers returns expected rateLimit containers.
func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.Container {
func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
namespace string) []corev1.Container {
ports := []corev1.ContainerPort{
{
Name: "grpc",
Expand All @@ -142,7 +155,7 @@ func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeploymen
Command: []string{
"/bin/ratelimit",
},
Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment),
Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment, namespace),
Ports: ports,
Resources: *rateLimitDeployment.Container.Resources,
SecurityContext: rateLimitDeployment.Container.SecurityContext,
Expand Down Expand Up @@ -275,7 +288,8 @@ func expectedDeploymentVolumes(rateLimit *egv1a1.RateLimit, rateLimitDeployment
}

// expectedRateLimitContainerEnv returns expected rateLimit container envs.
func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.EnvVar {
func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
namespace string) []corev1.EnvVar {
env := []corev1.EnvVar{
{
Name: RuntimeRootEnvVar,
Expand Down Expand Up @@ -384,6 +398,54 @@ func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeploym
}
}

if enableTracing(rateLimit) {
var sampleRate = 1.0
if rateLimit.Telemetry.Tracing.SamplingRate != nil {
sampleRate = float64(*rateLimit.Telemetry.Tracing.SamplingRate) / 100.0
}

traceEndpoint := checkTraceEndpointScheme(rateLimit.Telemetry.Tracing.Provider.URL)
tracingEnvs := []corev1.EnvVar{
{
Name: TracingEnabledVar,
Value: "true",
},
{
Name: TracingServiceNameVar,
Value: InfraName,
},
{
Name: TracingServiceNamespaceVar,
Value: namespace,
},
{
// By default, this is a random instanceID,
// we use the RateLimit pod name as the trace service instanceID.
Name: TracingServiceInstanceIDVar,
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
APIVersion: "v1",
FieldPath: "metadata.name",
},
},
},
{
Name: TracingSamplingRateVar,
// The api is configured with [0,100], but sampling can only be [0,1].
// doc: https://github.com/envoyproxy/ratelimit?tab=readme-ov-file#tracing
// You will lose precision during the conversion process, but don't worry,
// this follows the rounding rule and won't make the expected sampling rate too different
// from the actual sampling rate
Value: strconv.FormatFloat(sampleRate, 'f', 1, 64),
},
{
Name: OTELExporterOTLPTraceEndpointVar,
Value: traceEndpoint,
},
}
env = append(env, tracingEnvs...)
}

return resource.ExpectedContainerEnv(rateLimitDeployment.Container, env)
}

Expand All @@ -399,3 +461,31 @@ func Validate(ctx context.Context, client client.Client, gateway *egv1a1.EnvoyGa

return nil
}

func enableTracing(rl *egv1a1.RateLimit) bool {
// Other fields can use the default values,
// but we have to make sure the user has the Provider.URL
if rl != nil && rl.Telemetry != nil &&
rl.Telemetry.Tracing != nil &&
rl.Telemetry.Tracing.Provider != nil &&
len(rl.Telemetry.Tracing.Provider.URL) != 0 {
return true
}

return false
}

// checkTraceEndpointScheme Check the scheme prefix in the trace url
func checkTraceEndpointScheme(url string) string {
// Since the OTLP collector needs to configure the scheme prefix,
// we need to check if the user has configured this
// TODO: It is currently assumed to be a normal connection,
// and a TLS connection will be added later.
httpScheme := "http://"
exist := strings.HasPrefix(url, httpScheme)
if exist {
return url
}

return fmt.Sprintf("%s%s", httpScheme, url)
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (r *ResourceRender) Name() string {
func enablePrometheus(rl *egv1a1.RateLimit) bool {
if rl != nil &&
rl.Telemetry != nil &&
rl.Telemetry.Metrics != nil &&
rl.Telemetry.Metrics.Prometheus != nil {
return !rl.Telemetry.Metrics.Prometheus.Disable
}
Expand Down Expand Up @@ -183,7 +184,7 @@ func (r *ResourceRender) ServiceAccount() (*corev1.ServiceAccount, error) {

// Deployment returns the expected rate limit Deployment based on the provided infra.
func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) {
containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment)
containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment, r.Namespace)
labels := rateLimitLabels()
selector := resource.GetSelector(labels)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,46 @@ func TestDeployment(t *testing.T) {
},
},
},
{
caseName: "enable-tracing",
rateLimit: &egv1a1.RateLimit{
Backend: egv1a1.RateLimitDatabaseBackend{
Type: egv1a1.RedisBackendType,
Redis: &egv1a1.RateLimitRedisSettings{
URL: "redis.redis.svc:6379",
},
},
Telemetry: &egv1a1.RateLimitTelemetry{
Tracing: &egv1a1.RateLimitTracing{
Provider: &egv1a1.RateLimitTracingProvider{
URL: "http://trace-collector.envoy-gateway-system.svc.cluster.local:4318",
},
},
},
},
},
{
caseName: "enable-tracing-custom",
rateLimit: &egv1a1.RateLimit{
Backend: egv1a1.RateLimitDatabaseBackend{
Type: egv1a1.RedisBackendType,
Redis: &egv1a1.RateLimitRedisSettings{
URL: "redis.redis.svc:6379",
},
},
Telemetry: &egv1a1.RateLimitTelemetry{
Tracing: &egv1a1.RateLimitTracing{
SamplingRate: func() *uint32 {
var samplingRate uint32 = 55
return &samplingRate
}(),
Provider: &egv1a1.RateLimitTracingProvider{
URL: "trace-collector.envoy-gateway-system.svc.cluster.local:4317",
},
},
},
},
},
}
for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
Expand Down
40 changes: 40 additions & 0 deletions internal/infrastructure/kubernetes/ratelimit/resource_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright Envoy Gateway Authors
// SPDX-License-Identifier: Apache-2.0
// The full text of the Apache license is available in the LICENSE file at
// the root of the repo.

package ratelimit

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestCheckTraceEndpointScheme(t *testing.T) {

cases := []struct {
caseName string
actualURL string
expectedURL string
}{
{
caseName: "normal url with http prefix",
actualURL: "http://collector.observability.svc.cluster.local:4318",
expectedURL: "http://collector.observability.svc.cluster.local:4318",
},
{
caseName: "abnormal url without http prefix",
actualURL: "collector.observability.svc.cluster.local:4318",
expectedURL: "http://collector.observability.svc.cluster.local:4318",
},
}

for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
actual := checkTraceEndpointScheme(tc.actualURL)
require.Equal(t, tc.expectedURL, actual)
})
}

}
Loading

0 comments on commit aa4e3a0

Please sign in to comment.