Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ffresty] [metric] HTTP Response Time and Complete Gauge Support #160

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ coverage.txt
**/debug.test
.DS_Store
__debug*
.vscode/*.log
.vscode/*.log
*.iml
.idea/
64 changes: 45 additions & 19 deletions pkg/ffresty/ffresty.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,14 @@
"golang.org/x/time/rate"
)

const (
metricsHTTPResponsesTotal = "http_responses_total"
metricsNetworkErrorsTotal = "network_errors_total"
metricsHTTPResponseTime = "http_response_time_seconds"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some size metrics for data transmitted would be useful to add as well.

)

type retryCtxKey struct{}
type hostCtxKey struct{}

type retryCtx struct {
id string
Expand Down Expand Up @@ -98,27 +105,21 @@
if err != nil {
return err
}
metricsManager.NewCounterMetricWithLabels(ctx, "http_response", "HTTP response", []string{"status", "error", "host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, "network_error", "Network error", []string{"host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, metricsHTTPResponsesTotal, "HTTP response", []string{"status", "error", "host", "method"}, false)
metricsManager.NewCounterMetricWithLabels(ctx, metricsNetworkErrorsTotal, "Network error", []string{"host", "method"}, false)
metricsManager.NewSummaryMetricWithLabels(ctx, metricsHTTPResponseTime, "HTTP response time", []string{"status", "host", "method"}, false)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@onelapahead what's your consideration between Histogram and Summary for this timing metrics?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Histogram is more costly in terms of cardinality bc of all the buckets you make. So this felt light enough to only calculate average response time across a few basic dimensions.

Would love to toggle btwn histogram and summaries based on certain settings eventually, but that was a bigger change than I had an appetite for.

}

// create hooks
onErrorMetricsHook := func(req *resty.Request, _ error) {
method := req.Method
u, _ := url.Parse(req.URL)
host := u.Host
// whilst there it is a possibility to get an response returned in the error here (and resty doc for OnError shows this) it seems to be a special case and the statuscode in such cases was not set.
// therefore we log all cases as network_error we may in future find reason to extract more detail from the error
metricsManager.IncCounterMetricWithLabels(ctx, "network_error", map[string]string{"host": host, "method": method}, nil)
metricsManager.IncCounterMetricWithLabels(ctx, metricsNetworkErrorsTotal, map[string]string{"host": req.Context().Value(hostCtxKey{}).(string), "method": req.Method}, nil)
}
RegisterGlobalOnError(onErrorMetricsHook)

onSuccessMetricsHook := func(_ *resty.Client, resp *resty.Response) {
method := resp.Request.Method
u, _ := url.Parse(resp.Request.URL)
host := u.Host
code := resp.RawResponse.StatusCode
metricsManager.IncCounterMetricWithLabels(ctx, "http_response", map[string]string{"status": fmt.Sprintf("%d", code), "error": "false", "host": host, "method": method}, nil)
metricsManager.IncCounterMetricWithLabels(ctx, metricsHTTPResponsesTotal, map[string]string{"status": fmt.Sprintf("%d", resp.RawResponse.StatusCode), "error": "false", "host": resp.Request.Context().Value(hostCtxKey{}).(string), "method": resp.Request.Method}, nil)
}
RegisterGlobalOnSuccess(onSuccessMetricsHook)
return nil
Expand All @@ -142,13 +143,17 @@
}
rCtx := resp.Request.Context()
rc := rCtx.Value(retryCtxKey{}).(*retryCtx)
elapsed := float64(time.Since(rc.start)) / float64(time.Millisecond)
elapsed := time.Since(rc.start)
level := logrus.DebugLevel
status := resp.StatusCode()
if status >= 300 {
level = logrus.ErrorLevel
}
log.L(rCtx).Logf(level, "<== %s %s [%d] (%.2fms)", resp.Request.Method, resp.Request.URL, status, elapsed)
log.L(rCtx).Logf(level, "<== %s %s [%d] (%dms)", resp.Request.Method, resp.Request.URL, status, time.Since(rc.start).Milliseconds())
if metricsManager != nil {
metricsManager.ObserveSummaryMetricWithLabels(rCtx, metricsHTTPResponseTime, elapsed.Seconds(), map[string]string{"status": fmt.Sprintf("%d", status), "host": rCtx.Value(hostCtxKey{}).(string), "method": resp.Request.Method}, nil)
}
// TODO use req.TraceInfo() for richer metrics at the DNS and transport layer
}

func OnError(req *resty.Request, err error) {
Expand Down Expand Up @@ -193,7 +198,7 @@
//
// You can use the normal Resty builder pattern, to set per-instance configuration
// as required.
func NewWithConfig(ctx context.Context, ffrestyConfig Config) (client *resty.Client) {

Check failure on line 201 in pkg/ffresty/ffresty.go

View workflow job for this annotation

GitHub Actions / build

cyclomatic complexity 37 of func `NewWithConfig` is high (> 30) (gocyclo)
if ffrestyConfig.HTTPCustomClient != nil {
if httpClient, ok := ffrestyConfig.HTTPCustomClient.(*http.Client); ok {
client = resty.NewWithClient(httpClient)
Expand Down Expand Up @@ -232,10 +237,10 @@

rateLimiterMap[client] = GetRateLimiter(ffrestyConfig.ThrottleRequestsPerSecond, ffrestyConfig.ThrottleBurst)

url := strings.TrimSuffix(ffrestyConfig.URL, "/")
if url != "" {
client.SetBaseURL(url)
log.L(ctx).Debugf("Created REST client to %s", url)
_url := strings.TrimSuffix(ffrestyConfig.URL, "/")
if _url != "" {
client.SetBaseURL(_url)
log.L(ctx).Debugf("Created REST client to %s", _url)
}

if ffrestyConfig.ProxyURL != "" {
Expand All @@ -244,7 +249,7 @@

client.SetTimeout(time.Duration(ffrestyConfig.HTTPRequestTimeout))

client.OnBeforeRequest(func(_ *resty.Client, req *resty.Request) error {
client.OnBeforeRequest(func(c *resty.Client, req *resty.Request) error {
if rateLimiterMap[client] != nil {
// Wait for permission to proceed with the request
err := rateLimiterMap[client].Wait(req.Context())
Expand All @@ -253,6 +258,26 @@
}
}
rCtx := req.Context()
// Record host in context to avoid redundant parses in hooks
var u *url.URL
if req.URL != "" {
u, _ = url.Parse(req.URL)
}
// The req.URL might have only set a path i.e. /home, fallbacking to the base URL of the client.
// So if the URL is nil, that's likely the case and we'll derive the host from the configured
// based instead.
if u == nil && _url != "" {
u, _ = url.Parse(_url)
}
if u == nil && c.BaseURL != "" {
u, _ = url.Parse(c.BaseURL)
}
if u != nil && u.Host != "" {
host := u.Host
rCtx = context.WithValue(rCtx, hostCtxKey{}, host)
} else {
rCtx = context.WithValue(rCtx, hostCtxKey{}, "unknown")
}
rc := rCtx.Value(retryCtxKey{})
if rc == nil {
// First attempt
Expand Down Expand Up @@ -289,8 +314,9 @@
}
}

log.L(rCtx).Debugf("==> %s %s%s", req.Method, url, req.URL)
log.L(rCtx).Debugf("==> %s %s%s", req.Method, _url, req.URL)
log.L(rCtx).Tracef("==> (body) %+v", req.Body)

return nil
})

Expand Down
4 changes: 2 additions & 2 deletions pkg/ffresty/ffresty_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ func TestMTLSClientWithServer(t *testing.T) {
var restyConfig = config.RootSection("resty")
InitConfig(restyConfig)
clientTLSSection := restyConfig.SubSection("tls")
restyConfig.Set(HTTPConfigURL, ln.Addr())
restyConfig.Set(HTTPConfigURL, ln.Addr()) // note this does not have https:// in the URL
clientTLSSection.Set(fftls.HTTPConfTLSEnabled, true)
clientTLSSection.Set(fftls.HTTPConfTLSKeyFile, privateKeyFile.Name())
clientTLSSection.Set(fftls.HTTPConfTLSCertFile, publicKeyFile.Name())
Expand Down Expand Up @@ -722,7 +722,6 @@ func TestEnableClientMetrics(t *testing.T) {

err := EnableClientMetrics(ctx, mr)
assert.NoError(t, err)

}

func TestEnableClientMetricsIdempotent(t *testing.T) {
Expand All @@ -749,6 +748,7 @@ func TestHooks(t *testing.T) {
}

customOnSuccess := func(c *resty.Client, resp *resty.Response) {
assert.Equal(t, "localhost:12345", resp.Request.Context().Value(hostCtxKey{}).(string))
onSuccessCount++
}

Expand Down
5 changes: 5 additions & 0 deletions pkg/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ type MetricsManager interface {
// functions for emitting metrics
SetGaugeMetric(ctx context.Context, metricName string, number float64, defaultLabels *FireflyDefaultLabels)
SetGaugeMetricWithLabels(ctx context.Context, metricName string, number float64, labels map[string]string, defaultLabels *FireflyDefaultLabels)
IncGaugeMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
IncGaugeMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)
DecGaugeMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
DecGaugeMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)

IncCounterMetric(ctx context.Context, metricName string, defaultLabels *FireflyDefaultLabels)
IncCounterMetricWithLabels(ctx context.Context, metricName string, labels map[string]string, defaultLabels *FireflyDefaultLabels)
ObserveHistogramMetric(ctx context.Context, metricName string, number float64, defaultLabels *FireflyDefaultLabels)
Expand Down
32 changes: 19 additions & 13 deletions pkg/metric/metric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ func TestMetricsManager(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_request", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_stalled", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{}, nil) // no label provided
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, nil)
}

func TestMetricsManagerWithDefaultLabels(t *testing.T) {
Expand All @@ -87,6 +87,8 @@ func TestMetricsManagerWithDefaultLabels(t *testing.T) {
mm.NewCounterMetricWithLabels(ctx, "tx_process", "Transaction processed", []string{"status"}, true)
mm.NewGaugeMetric(ctx, "tx_stalled", "Transactions that are stuck in a loop", true)
mm.NewGaugeMetricWithLabels(ctx, "tx_inflight", "Transactions that are in flight", []string{"stage"}, true)
mm.NewGaugeMetric(ctx, "tx_deadlettered", "Transactions that are put onto the deadletter queue", true)
mm.NewGaugeMetricWithLabels(ctx, "tx_confirming", "Transactions that are waiting to be confirmed", []string{"stage"}, true)
mm.NewHistogramMetric(ctx, "tx_timeout_seconds", "Duration of timed out transactions", []float64{}, true)
mm.NewHistogramMetricWithLabels(ctx, "tx_stage_seconds", "Duration of each transaction stage", []float64{}, []string{"stage"}, true)
mm.NewSummaryMetric(ctx, "tx_request_bytes", "Request size of timed out transactions", true)
Expand All @@ -96,21 +98,25 @@ func TestMetricsManagerWithDefaultLabels(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_request", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_stalled", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, nil)
mm.IncGaugeMetric(ctx, "tx_deadlettered", nil)
mm.IncGaugeMetricWithLabels(ctx, "tx_confirming", map[string]string{"stage": "signing"}, nil)
mm.DecGaugeMetric(ctx, "tx_deadlettered", nil)
mm.DecGaugeMetricWithLabels(ctx, "tx_confirming", map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, nil)

mm.IncCounterMetric(ctx, "tx_request", &FireflyDefaultLabels{Namespace: "ns_1"})
mm.IncCounterMetricWithLabels(ctx, "tx_process", map[string]string{"status": "success"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetric(ctx, "tx_stalled", 2, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight", 2, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetric(ctx, "tx_timeout_seconds", 2000, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveHistogramMetricWithLabels(ctx, "tx_stage_seconds", 2000, map[string]string{}, nil) // no label provided
mm.ObserveSummaryMetric(ctx, "tx_request_bytes", 2000, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "singing:)"}, &FireflyDefaultLabels{Namespace: "ns_1"})
mm.ObserveSummaryMetricWithLabels(ctx, "tx_retry_bytes", 2000, map[string]string{"stage": "signing"}, &FireflyDefaultLabels{Namespace: "ns_1"})

}

Expand Down Expand Up @@ -142,12 +148,12 @@ func TestMetricsManagerErrors(t *testing.T) {
mm.IncCounterMetric(ctx, "tx_not_exist", nil)
mm.IncCounterMetricWithLabels(ctx, "tx_not_exist", map[string]string{"status": "success"}, nil)
mm.SetGaugeMetric(ctx, "tx_not_exist", 2, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_not_exist", 2, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_not_exist", 2, map[string]string{"stage": "signing"}, nil)
mm.ObserveHistogramMetric(ctx, "tx_not_exist", 2000, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.ObserveHistogramMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "signing"}, nil)
mm.ObserveSummaryMetric(ctx, "tx_not_exist", 2000, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "singing:)"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight_invalid_labels", 2, map[string]string{"ff_stage": "singing:)"}, nil)
mm.ObserveSummaryMetricWithLabels(ctx, "tx_not_exist", 2000, map[string]string{"stage": "signing"}, nil)
mm.SetGaugeMetricWithLabels(ctx, "tx_inflight_invalid_labels", 2, map[string]string{"ff_stage": "signing"}, nil)
}

func TestHTTPMetricsInstrumentations(t *testing.T) {
Expand Down
Loading
Loading