From 98cb3a8595924134bad5c7ecf7e06559b7e93663 Mon Sep 17 00:00:00 2001 From: sh2 Date: Tue, 12 Sep 2023 09:27:53 +0800 Subject: [PATCH 1/3] fix: ensure the xds grpc server is properly stopped (#1860) --- internal/globalratelimit/runner/runner.go | 15 +++++++++------ internal/xds/server/runner/runner.go | 22 ++++++++++++---------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/internal/globalratelimit/runner/runner.go b/internal/globalratelimit/runner/runner.go index b8615cbeb83..4d4c41b4feb 100644 --- a/internal/globalratelimit/runner/runner.go +++ b/internal/globalratelimit/runner/runner.go @@ -81,7 +81,7 @@ func (r *Runner) Start(ctx context.Context) error { discoveryv3.RegisterAggregatedDiscoveryServiceServer(r.grpc, serverv3.NewServer(ctx, r.cache, cb)) // Start and listen xDS gRPC config Server. - go r.serverXdsConfigServer(ctx) + go r.serveXdsConfigServer(ctx) // Start message Subscription. go r.subscribeAndTranslate(ctx) @@ -90,20 +90,23 @@ func (r *Runner) Start(ctx context.Context) error { return nil } -func (r *Runner) serverXdsConfigServer(ctx context.Context) { +func (r *Runner) serveXdsConfigServer(ctx context.Context) { addr := net.JoinHostPort(XdsGrpcSotwConfigServerAddress, strconv.Itoa(ratelimit.XdsGrpcSotwConfigServerPort)) l, err := net.Listen("tcp", addr) if err != nil { r.Logger.Error(err, "failed to listen on address", "address", addr) return } + + go func() { + <-ctx.Done() + r.Logger.Info("grpc server shutting down") + r.grpc.Stop() + }() + if err = r.grpc.Serve(l); err != nil { r.Logger.Error(err, "failed to start grpc based xds config server") } - - <-ctx.Done() - r.Logger.Info("grpc config server shutting down") - r.grpc.Stop() } func (r *Runner) subscribeAndTranslate(ctx context.Context) { diff --git a/internal/xds/server/runner/runner.go b/internal/xds/server/runner/runner.go index ee285c133f2..af9117c8841 100644 --- a/internal/xds/server/runner/runner.go +++ b/internal/xds/server/runner/runner.go @@ -102,18 +102,20 @@ func (r *Runner) serveXdsServer(ctx context.Context) { r.Logger.Error(err, "failed to listen on address", "address", addr) return } - err = r.grpc.Serve(l) - if err != nil { + + go func() { + <-ctx.Done() + r.Logger.Info("grpc server shutting down") + // We don't use GracefulStop here because envoy + // has long-lived hanging xDS requests. There's no + // mechanism to make those pending requests fail, + // so we forcibly terminate the TCP sessions. + r.grpc.Stop() + }() + + if err = r.grpc.Serve(l); err != nil { r.Logger.Error(err, "failed to start grpc based xds server") } - - <-ctx.Done() - r.Logger.Info("grpc server shutting down") - // We don't use GracefulStop here because envoy - // has long-lived hanging xDS requests. There's no - // mechanism to make those pending requests fail, - // so we forcibly terminate the TCP sessions. - r.grpc.Stop() } // registerServer registers the given xDS protocol Server with the gRPC From 5cb8697fd90bde39909609a3a8b6a62ac867f602 Mon Sep 17 00:00:00 2001 From: sh2 Date: Tue, 12 Sep 2023 10:01:53 +0800 Subject: [PATCH 2/3] fix: skip the not found filters in grpc routes (#1873) add continue to the not found filters in grpc routes Signed-off-by: sh2 --- internal/provider/kubernetes/routes.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/provider/kubernetes/routes.go b/internal/provider/kubernetes/routes.go index d3d9fbea6d4..cc33db4e4f2 100644 --- a/internal/provider/kubernetes/routes.go +++ b/internal/provider/kubernetes/routes.go @@ -182,6 +182,7 @@ func (r *gatewayAPIReconciler) processGRPCRoutes(ctx context.Context, gatewayNam authFilter, ok := resourceMap.authenFilters[key] if !ok { r.log.Error(err, "AuthenticationFilter not found; bypassing rule", "index", i) + continue } resourceTree.AuthenticationFilters = append(resourceTree.AuthenticationFilters, authFilter) @@ -193,6 +194,7 @@ func (r *gatewayAPIReconciler) processGRPCRoutes(ctx context.Context, gatewayNam rateLimitFilter, ok := resourceMap.rateLimitFilters[key] if !ok { r.log.Error(err, "RateLimitFilter not found; bypassing rule", "index", i) + continue } resourceTree.RateLimitFilters = append(resourceTree.RateLimitFilters, rateLimitFilter) From 31ea5316b4bdd007350cd271409d70ef7f7bf659 Mon Sep 17 00:00:00 2001 From: David Martin Date: Tue, 12 Sep 2023 07:06:44 +0100 Subject: [PATCH 3/3] docs: Add user guide for Gateway API State Metrics (#1871) --- docs/latest/user/gateway-api-metrics.md | 52 +++++++++++++++++++++++++ docs/latest/user_docs.rst | 1 + 2 files changed, 53 insertions(+) create mode 100644 docs/latest/user/gateway-api-metrics.md diff --git a/docs/latest/user/gateway-api-metrics.md b/docs/latest/user/gateway-api-metrics.md new file mode 100644 index 00000000000..e403c90404f --- /dev/null +++ b/docs/latest/user/gateway-api-metrics.md @@ -0,0 +1,52 @@ +# Gateway API Metrics + +Resource metrics for Gateway API objects are available using the [Gateway API State Metrics](https://github.com/Kuadrant/gateway-api-state-metrics) project. +The project also provides example dashboard for visualising the metrics using Grafana, and example alerts using Prometheus & Alertmanager. + +## Prerequisites + +Follow the steps from the [Quickstart Guide](quickstart.md) to install Envoy Gateway and the example manifest. +Before proceeding, you should be able to query the example backend using HTTP. + +Run the following commands to install the metrics stack, with the Gateway API State Metrics configuration, on your kubernetes cluster: + +```shell +kubectl apply --server-side -f https://raw.githubusercontent.com/Kuadrant/gateway-api-state-metrics/main/config/examples/kube-prometheus/bundle_crd.yaml +kubectl apply -f https://raw.githubusercontent.com/Kuadrant/gateway-api-state-metrics/main/config/examples/kube-prometheus/bundle.yaml +``` + +## Metrics and Alerts + +To access the Prometheus UI, wait for the statefulset to be ready, then use the port-forward command: + +```shell +# This first command may fail if the statefulset has not been created yet. +# In that case, try again until you get a message like 'Waiting for 2 pods to be ready...' +# or 'statefulset rolling update complete 2 pods...' +kubectl -n monitoring rollout status --watch --timeout=5m statefulset/prometheus-k8s +kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 > /dev/null & +``` + +Navigate to [http://localhost:9090](http://localhost:9090). +Metrics can be queried from the 'Graph' tab e.g. `gatewayapi_gateway_created` +See the [Gateway API State Metrics README](https://github.com/Kuadrant/gateway-api-state-metrics/tree/main#metrics) for the full list of Gateway API metrics available. + +Alerts can be see in the 'Alerts' tab. +Gateway API specific alerts will be grouped under the 'gateway-api.rules' heading. + +***Note:*** Alerts are defined in a PrometheusRules custom resource in the 'monitoring' namespace. You can modify the alert rules by updating this resource. + +## Dashboards + +To view the dashboards in Grafana, wait for the deployment to be ready, then use the port-forward command: + +```shell +kubectl -n monitoring wait --timeout=5m deployment/grafana --for=condition=Available +kubectl -n monitoring port-forward service/grafana 3000:3000 > /dev/null & +``` + +Navigate to [http://localhost:3000](http://localhost:3000) and sign in with admin/admin. +The Gateway API State dashboards will be available in the 'Default' folder and tagged with 'gateway-api'. +See the [Gateway API State Metrics README](https://github.com/Kuadrant/gateway-api-state-metrics/tree/main#dashboards) for further information on available dashboards. + +***Note:*** Dashboards are loaded from configmaps. You can modify the dashboards in the Grafana UI, however you will need to export them from the UI and update the json in the configmaps to persist changes. diff --git a/docs/latest/user_docs.rst b/docs/latest/user_docs.rst index 21d8e9c0f6d..8bd0c4ccfa9 100644 --- a/docs/latest/user_docs.rst +++ b/docs/latest/user_docs.rst @@ -30,5 +30,6 @@ Learn how to deploy, use, and operate Envoy Gateway. user/deployment-mode user/gateway-address user/gatewayapi-support + user/gateway-api-metrics user/proxy-observability user/multicluster-service