From 0e1175c1a6a3470e46e040a72e4c22f76b93177e Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 01:56:12 +0530 Subject: [PATCH 1/7] x-pack/metricbeat/module/openai: Add new module --- x-pack/metricbeat/include/list.go | 2 + x-pack/metricbeat/metricbeat.reference.yml | 29 ++ .../metricbeat/module/openai/_meta/config.yml | 28 ++ .../module/openai/_meta/docs.asciidoc | 2 + .../metricbeat/module/openai/_meta/fields.yml | 10 + x-pack/metricbeat/module/openai/doc.go | 2 + x-pack/metricbeat/module/openai/fields.go | 23 ++ .../module/openai/usage/_meta/data.json | 19 ++ .../module/openai/usage/_meta/docs.asciidoc | 1 + .../module/openai/usage/_meta/fields.yml | 10 + .../metricbeat/module/openai/usage/client.go | 44 +++ .../metricbeat/module/openai/usage/config.go | 76 +++++ .../metricbeat/module/openai/usage/helper.go | 19 ++ .../metricbeat/module/openai/usage/schema.go | 82 +++++ .../metricbeat/module/openai/usage/usage.go | 316 ++++++++++++++++++ 15 files changed, 663 insertions(+) create mode 100644 x-pack/metricbeat/module/openai/_meta/config.yml create mode 100644 x-pack/metricbeat/module/openai/_meta/docs.asciidoc create mode 100644 x-pack/metricbeat/module/openai/_meta/fields.yml create mode 100644 x-pack/metricbeat/module/openai/doc.go create mode 100644 x-pack/metricbeat/module/openai/fields.go create mode 100644 x-pack/metricbeat/module/openai/usage/_meta/data.json create mode 100644 x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc create mode 100644 x-pack/metricbeat/module/openai/usage/_meta/fields.yml create mode 100644 x-pack/metricbeat/module/openai/usage/client.go create mode 100644 x-pack/metricbeat/module/openai/usage/config.go create mode 100644 x-pack/metricbeat/module/openai/usage/helper.go create mode 100644 x-pack/metricbeat/module/openai/usage/schema.go create mode 100644 x-pack/metricbeat/module/openai/usage/usage.go diff --git a/x-pack/metricbeat/include/list.go b/x-pack/metricbeat/include/list.go index 01ce86edf78..fdf19a87024 100644 --- a/x-pack/metricbeat/include/list.go +++ b/x-pack/metricbeat/include/list.go @@ -53,6 +53,8 @@ import ( _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql/performance" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql/transaction_log" + _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/openai" + _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/openai/usage" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle/performance" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle/sysmetric" diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 240acb2cfd6..48fc2eefa30 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1257,6 +1257,35 @@ metricbeat.modules: # Path to server status. Default nginx_status server_status_path: "nginx_status" +#-------------------------------- Openai Module -------------------------------- +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 #----------------------------- Openmetrics Module ----------------------------- - module: openmetrics metricsets: ['collector'] diff --git a/x-pack/metricbeat/module/openai/_meta/config.yml b/x-pack/metricbeat/module/openai/_meta/config.yml new file mode 100644 index 00000000000..dadcd35b1ba --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/config.yml @@ -0,0 +1,28 @@ +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 \ No newline at end of file diff --git a/x-pack/metricbeat/module/openai/_meta/docs.asciidoc b/x-pack/metricbeat/module/openai/_meta/docs.asciidoc new file mode 100644 index 00000000000..744909c7a1a --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/docs.asciidoc @@ -0,0 +1,2 @@ +This is the openai module. + diff --git a/x-pack/metricbeat/module/openai/_meta/fields.yml b/x-pack/metricbeat/module/openai/_meta/fields.yml new file mode 100644 index 00000000000..d514eb010f1 --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/fields.yml @@ -0,0 +1,10 @@ +- key: openai + title: "openai" + release: beta + description: > + openai module + fields: + - name: openai + type: group + description: > + fields: diff --git a/x-pack/metricbeat/module/openai/doc.go b/x-pack/metricbeat/module/openai/doc.go new file mode 100644 index 00000000000..b6d7a70b121 --- /dev/null +++ b/x-pack/metricbeat/module/openai/doc.go @@ -0,0 +1,2 @@ +// Package openai is a Metricbeat module that contains MetricSets. +package openai diff --git a/x-pack/metricbeat/module/openai/fields.go b/x-pack/metricbeat/module/openai/fields.go new file mode 100644 index 00000000000..a23f01a7810 --- /dev/null +++ b/x-pack/metricbeat/module/openai/fields.go @@ -0,0 +1,23 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +// Code generated by beats/dev-tools/cmd/asset/asset.go - DO NOT EDIT. + +package openai + +import ( + "github.com/elastic/beats/v7/libbeat/asset" +) + +func init() { + if err := asset.SetFields("metricbeat", "openai", asset.ModuleFieldsPri, AssetOpenai); err != nil { + panic(err) + } +} + +// AssetOpenai returns asset data. +// This is the base64 encoded zlib format compressed contents of module/openai. +func AssetOpenai() string { + return "eJx8jkHOgyAUhPecYuLeC7D4d/9BXsvUEFEIYFpv31i0UUL6Ld/kfTM9Rq4aPnAWq4Bss6NGVw6dAiIdJVHjxiwKMEz3aEO2ftb4UwD2b0zeLI4KeFg6k/Qn6zHLxFPDRl4DNYbol7BfGtar5+xakgz8Xlu6jXr4QbOqUIvrAecRfMkUHC/ZMWXk+vTRVNmP4o3/Iiyl6h0AAP//O4NsEQ==" +} diff --git a/x-pack/metricbeat/module/openai/usage/_meta/data.json b/x-pack/metricbeat/module/openai/usage/_meta/data.json new file mode 100644 index 00000000000..91e18aa44af --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/data.json @@ -0,0 +1,19 @@ +{ + "@timestamp":"2016-05-23T08:05:34.853Z", + "beat":{ + "hostname":"beathost", + "name":"beathost" + }, + "metricset":{ + "host":"localhost", + "module":"openai", + "name":"usage", + "rtt":44269 + }, + "openai":{ + "usage":{ + "example": "usage" + } + }, + "type":"metricsets" +} diff --git a/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc b/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc new file mode 100644 index 00000000000..dc88baf82a0 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc @@ -0,0 +1 @@ +This is the usage metricset of the module openai. diff --git a/x-pack/metricbeat/module/openai/usage/_meta/fields.yml b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml new file mode 100644 index 00000000000..7de86e52163 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml @@ -0,0 +1,10 @@ +- name: usage + type: group + release: beta + description: > + usage + fields: + - name: example + type: keyword + description: > + Example field diff --git a/x-pack/metricbeat/module/openai/usage/client.go b/x-pack/metricbeat/module/openai/usage/client.go new file mode 100644 index 00000000000..4fdbcc0b775 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/client.go @@ -0,0 +1,44 @@ +package usage + +import ( + "context" + "net/http" + "time" + + "github.com/elastic/elastic-agent-libs/logp" + "golang.org/x/time/rate" +) + +// RLHTTPClient implements a rate-limited HTTP client that wraps the standard http.Client +// with a rate limiter to control API request frequency. +type RLHTTPClient struct { + ctx context.Context + client *http.Client + logger *logp.Logger + Ratelimiter *rate.Limiter +} + +// Do executes an HTTP request while respecting rate limits. +// It waits for rate limit token before proceeding with the request. +// Returns the HTTP response and any error encountered. +func (c *RLHTTPClient) Do(req *http.Request) (*http.Response, error) { + c.logger.Warn("Waiting for rate limit token") + err := c.Ratelimiter.Wait(context.TODO()) + if err != nil { + return nil, err + } + c.logger.Warn("Rate limit token acquired") + return c.client.Do(req) +} + +// newClient creates a new rate-limited HTTP client with specified rate limiter and timeout. +func newClient(ctx context.Context, logger *logp.Logger, rl *rate.Limiter, timeout time.Duration) *RLHTTPClient { + var client = http.DefaultClient + client.Timeout = timeout + return &RLHTTPClient{ + ctx: ctx, + client: client, + logger: logger, + Ratelimiter: rl, + } +} diff --git a/x-pack/metricbeat/module/openai/usage/config.go b/x-pack/metricbeat/module/openai/usage/config.go new file mode 100644 index 00000000000..2ca63bf0c10 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/config.go @@ -0,0 +1,76 @@ +package usage + +import ( + "fmt" + "time" +) + +type Config struct { + APIKeys []apiKeyConfig `config:"api_keys" validate:"required"` + APIURL string `config:"api_url"` + Headers []string `config:"headers"` + RateLimit *rateLimitConfig `config:"rate_limit"` + Timeout time.Duration `config:"timeout"` + Collection collectionConfig `config:"collection"` +} + +type rateLimitConfig struct { + Limit *int `config:"limit"` + Burst *int `config:"burst"` +} + +type apiKeyConfig struct { + Key string `config:"key"` +} + +type collectionConfig struct { + LookbackDays int `config:"lookback_days"` +} + +func defaultConfig() Config { + return Config{ + APIURL: "https://api.openai.com/v1/usage", + Timeout: 30 * time.Second, + RateLimit: &rateLimitConfig{ + Limit: ptr(60), + Burst: ptr(5), + }, + Collection: collectionConfig{ + LookbackDays: 5, // 5 days + }, + } +} + +func (c *Config) Validate() error { + switch { + case len(c.APIKeys) == 0: + return fmt.Errorf("at least one API key must be configured") + + case c.APIURL == "": + return fmt.Errorf("api_url cannot be empty") + + case c.RateLimit == nil: + return fmt.Errorf("rate_limit must be configured") + + case c.RateLimit.Limit == nil: + return fmt.Errorf("rate_limit.limit must be configured") + + case c.RateLimit.Burst == nil: + return fmt.Errorf("rate_limit.burst must be configured") + + case c.Timeout <= 0: + return fmt.Errorf("timeout must be greater than 0") + + case c.Collection.LookbackDays <= 0: + return fmt.Errorf("lookback_days must be greater than 0") + } + + // API keys validation in a separate loop since it needs iteration + for i, apiKey := range c.APIKeys { + if apiKey.Key == "" { + return fmt.Errorf("API key at position %d cannot be empty", i) + } + } + + return nil +} diff --git a/x-pack/metricbeat/module/openai/usage/helper.go b/x-pack/metricbeat/module/openai/usage/helper.go new file mode 100644 index 00000000000..08c7046c6c8 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/helper.go @@ -0,0 +1,19 @@ +package usage + +import "strings" + +func ptr[T any](value T) *T { + return &value +} + +func processHeaders(headers []string) map[string]string { + headersMap := make(map[string]string, len(headers)) + for _, header := range headers { + parts := strings.Split(header, ":") + if len(parts) != 2 { + continue + } + headersMap[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) + } + return headersMap +} diff --git a/x-pack/metricbeat/module/openai/usage/schema.go b/x-pack/metricbeat/module/openai/usage/schema.go new file mode 100644 index 00000000000..5b44ef687d8 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/schema.go @@ -0,0 +1,82 @@ +package usage + +type UsageResponse struct { + Object string `json:"object"` + Data []UsageData `json:"data"` + FtData []interface{} `json:"ft_data"` + DalleApiData []DalleData `json:"dalle_api_data"` + WhisperApiData []WhisperData `json:"whisper_api_data"` + TtsApiData []TtsData `json:"tts_api_data"` + AssistantCodeInterpreterData []interface{} `json:"assistant_code_interpreter_data"` + RetrievalStorageData []interface{} `json:"retrieval_storage_data"` +} + +type UsageData struct { + OrganizationID string `json:"organization_id"` + OrganizationName string `json:"organization_name"` + AggregationTimestamp int64 `json:"aggregation_timestamp"` + NRequests int `json:"n_requests"` + Operation string `json:"operation"` + SnapshotID string `json:"snapshot_id"` + NContextTokensTotal int `json:"n_context_tokens_total"` + NGeneratedTokensTotal int `json:"n_generated_tokens_total"` + Email *string `json:"email"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` + RequestType string `json:"request_type"` + NCachedContextTokensTotal int `json:"n_cached_context_tokens_total"` +} + +type DalleData struct { + Timestamp int64 `json:"timestamp"` + NumImages int `json:"num_images"` + NumRequests int `json:"num_requests"` + ImageSize string `json:"image_size"` + Operation string `json:"operation"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ModelID string `json:"model_id"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} + +type WhisperData struct { + Timestamp int64 `json:"timestamp"` + ModelID string `json:"model_id"` + NumSeconds int `json:"num_seconds"` + NumRequests int `json:"num_requests"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} + +type TtsData struct { + Timestamp int64 `json:"timestamp"` + ModelID string `json:"model_id"` + NumCharacters int `json:"num_characters"` + NumRequests int `json:"num_requests"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go new file mode 100644 index 00000000000..523cb40b719 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -0,0 +1,316 @@ +package usage + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" + "github.com/elastic/beats/v7/metricbeat/mb" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" + "golang.org/x/time/rate" +) + +// init registers the MetricSet with the central registry as soon as the program +// starts. The New function will be called later to instantiate an instance of +// the MetricSet for each host is defined in the module's configuration. After the +// MetricSet has been created then Fetch will begin to be called periodically. +func init() { + mb.Registry.MustAddMetricSet("openai", "usage", New) +} + +// MetricSet holds any configuration or state information. It must implement +// the mb.MetricSet interface. And this is best achieved by embedding +// mb.BaseMetricSet because it implements all of the required mb.MetricSet +// interface methods except for Fetch. +type MetricSet struct { + mb.BaseMetricSet + logger *logp.Logger + config Config + report mb.ReporterV2 +} + +// New creates a new instance of the MetricSet. New is responsible for unpacking +// any MetricSet specific configuration options if there are any. +func New(base mb.BaseMetricSet) (mb.MetricSet, error) { + cfgwarn.Beta("The openai usage metricset is beta.") + + config := defaultConfig() + if err := base.Module().UnpackConfig(&config); err != nil { + return nil, err + } + + if err := config.Validate(); err != nil { + return nil, err + } + + return &MetricSet{ + BaseMetricSet: base, + logger: logp.NewLogger("openai.usage"), + config: config, + }, nil +} + +// Fetch method implements the data gathering and data conversion to the right +// format. It publishes the event which is then forwarded to the output. In case +// of an error set the Error field of mb.Event or simply call report.Error(). +func (m *MetricSet) Fetch(report mb.ReporterV2) error { + httpClient := newClient( + context.TODO(), + m.logger, + rate.NewLimiter( + rate.Every(time.Duration(*m.config.RateLimit.Limit)*time.Second), + *m.config.RateLimit.Burst, + ), + m.config.Timeout, + ) + + m.report = report + + endDate := time.Now().UTC() + startDate := endDate.AddDate(0, 0, -m.config.Collection.LookbackDays) + + return m.fetchDateRange(startDate, endDate, httpClient) +} + +func (m *MetricSet) fetchDateRange(startDate, endDate time.Time, httpClient *RLHTTPClient) error { + for d := startDate; !d.After(endDate); d = d.AddDate(0, 0, 1) { + dateStr := d.Format("2006-01-02") + for _, apiKey := range m.config.APIKeys { + if err := m.fetchSingleDay(dateStr, apiKey.Key, httpClient); err != nil { + m.logger.Errorf("Error fetching data for date %s: %v", dateStr, err) + continue + } + } + } + return nil +} + +func (m *MetricSet) fetchSingleDay(dateStr, apiKey string, httpClient *RLHTTPClient) error { + req, err := m.createRequest(dateStr, apiKey) + if err != nil { + return fmt.Errorf("error creating request: %w", err) + } + + resp, err := httpClient.Do(req) + if err != nil { + return fmt.Errorf("error making request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("error response from API: %s", resp.Status) + } + + return m.processResponse(resp, dateStr) +} + +func (m *MetricSet) createRequest(dateStr, apiKey string) (*http.Request, error) { + req, err := http.NewRequest(http.MethodGet, m.config.APIURL, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + q := req.URL.Query() + q.Add("date", dateStr) + req.URL.RawQuery = q.Encode() + + req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", apiKey)) + for key, value := range processHeaders(m.config.Headers) { + req.Header.Add(key, value) + } + + return req, nil +} + +func (m *MetricSet) processResponse(resp *http.Response, dateStr string) error { + var usageResponse UsageResponse + if err := json.NewDecoder(resp.Body).Decode(&usageResponse); err != nil { + return fmt.Errorf("error decoding response: %w", err) + } + + m.logger.Info("Fetched usage metrics for date:", dateStr) + + events := make([]mb.Event, 0, len(usageResponse.Data)) + + m.processUsageData(events, usageResponse.Data) + m.processDalleData(events, usageResponse.DalleApiData) + m.processWhisperData(events, usageResponse.WhisperApiData) + m.processTTSData(events, usageResponse.TtsApiData) + + // Process additional data. + // + // NOTE(shmsr): During testing, could not get the usage data for the following + // and found no documentation, example responses, etc. That's why let's store them + // as it is so that we can use processors later on to process them as needed. + m.processFTData(events, usageResponse.FtData) + m.processAssistantCodeInterpreterData(events, usageResponse.AssistantCodeInterpreterData) + m.processRetrievalStorageData(events, usageResponse.RetrievalStorageData) + + return nil +} + +func (m *MetricSet) processUsageData(events []mb.Event, data []UsageData) { + for _, usage := range data { + event := mb.Event{ + Timestamp: time.Unix(usage.AggregationTimestamp, 0), + MetricSetFields: mapstr.M{ + "data": mapstr.M{ + "organization_id": usage.OrganizationID, + "organization_name": usage.OrganizationName, + "n_requests": usage.NRequests, + "operation": usage.Operation, + "snapshot_id": usage.SnapshotID, + "n_context_tokens_total": usage.NContextTokensTotal, + "n_generated_tokens_total": usage.NGeneratedTokensTotal, + "email": usage.Email, + "api_key_id": usage.ApiKeyID, + "api_key_name": usage.ApiKeyName, + "api_key_redacted": usage.ApiKeyRedacted, + "api_key_type": usage.ApiKeyType, + "project_id": usage.ProjectID, + "project_name": usage.ProjectName, + "request_type": usage.RequestType, + "n_cached_context_tokens_total": usage.NCachedContextTokensTotal, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processDalleData(events []mb.Event, data []DalleData) { + for _, dalle := range data { + event := mb.Event{ + Timestamp: time.Unix(dalle.Timestamp, 0), + MetricSetFields: mapstr.M{ + "dalle": mapstr.M{ + "num_images": dalle.NumImages, + "num_requests": dalle.NumRequests, + "image_size": dalle.ImageSize, + "operation": dalle.Operation, + "user_id": dalle.UserID, + "organization_id": dalle.OrganizationID, + "api_key_id": dalle.ApiKeyID, + "api_key_name": dalle.ApiKeyName, + "api_key_redacted": dalle.ApiKeyRedacted, + "api_key_type": dalle.ApiKeyType, + "organization_name": dalle.OrganizationName, + "model_id": dalle.ModelID, + "project_id": dalle.ProjectID, + "project_name": dalle.ProjectName, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processWhisperData(events []mb.Event, data []WhisperData) { + for _, whisper := range data { + event := mb.Event{ + Timestamp: time.Unix(whisper.Timestamp, 0), + MetricSetFields: mapstr.M{ + "whisper": mapstr.M{ + "model_id": whisper.ModelID, + "num_seconds": whisper.NumSeconds, + "num_requests": whisper.NumRequests, + "user_id": whisper.UserID, + "organization_id": whisper.OrganizationID, + "api_key_id": whisper.ApiKeyID, + "api_key_name": whisper.ApiKeyName, + "api_key_redacted": whisper.ApiKeyRedacted, + "api_key_type": whisper.ApiKeyType, + "organization_name": whisper.OrganizationName, + "project_id": whisper.ProjectID, + "project_name": whisper.ProjectName, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processTTSData(events []mb.Event, data []TtsData) { + for _, tts := range data { + event := mb.Event{ + Timestamp: time.Unix(tts.Timestamp, 0), + MetricSetFields: mapstr.M{ + "tts": mapstr.M{ + "model_id": tts.ModelID, + "num_characters": tts.NumCharacters, + "num_requests": tts.NumRequests, + "user_id": tts.UserID, + "organization_id": tts.OrganizationID, + "api_key_id": tts.ApiKeyID, + "api_key_name": tts.ApiKeyName, + "api_key_redacted": tts.ApiKeyRedacted, + "api_key_type": tts.ApiKeyType, + "organization_name": tts.OrganizationName, + "project_id": tts.ProjectID, + "project_name": tts.ProjectName, + }, + }, + } + events = append(events, event) + } + + m.processEvents(events) +} + +func (m *MetricSet) processFTData(events []mb.Event, data []interface{}) { + for _, ft := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "ft_data": mapstr.M{ + "original": ft, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processAssistantCodeInterpreterData(events []mb.Event, data []interface{}) { + for _, aci := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "assistant_code_interpreter": mapstr.M{ + "original": aci, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processRetrievalStorageData(events []mb.Event, data []interface{}) { + for _, rs := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "retrieval_storage": mapstr.M{ + "original": rs, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processEvents(events []mb.Event) { + if len(events) > 0 { + for i := range events { + m.report.Event(events[i]) + } + } + clear(events) +} From 8725cbb430396bd88de03bee1767e6e4e4679a7c Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 13:20:00 +0530 Subject: [PATCH 2/7] update module --- .../module/openai/usage/persistcache.go | 61 +++++++++++++++++++ .../metricbeat/module/openai/usage/usage.go | 60 +++++++++++++++--- .../metricbeat/modules.d/openai.yml.disabled | 31 ++++++++++ 3 files changed, 142 insertions(+), 10 deletions(-) create mode 100644 x-pack/metricbeat/module/openai/usage/persistcache.go create mode 100644 x-pack/metricbeat/modules.d/openai.yml.disabled diff --git a/x-pack/metricbeat/module/openai/usage/persistcache.go b/x-pack/metricbeat/module/openai/usage/persistcache.go new file mode 100644 index 00000000000..8da3f9071c3 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/persistcache.go @@ -0,0 +1,61 @@ +package usage + +import ( + "fmt" + "os" + "path" +) + +// stateStore handles persistence of state markers using the filesystem +type stateStore struct { + Dir string // Base directory for storing state files +} + +// newStateStore creates a new state store instance at the specified path +func newStateStore(path string) (*stateStore, error) { + if err := os.MkdirAll(path, 0o755); err != nil { + return nil, fmt.Errorf("creating state directory: %w", err) + } + return &stateStore{ + Dir: path, + }, nil +} + +// getStatePath builds the full file path for a given state key +func (s *stateStore) getStatePath(name string) string { + return path.Join(s.Dir, name) +} + +// Put creates a state marker file for the given key +func (s *stateStore) Put(key string) error { + filePath := s.getStatePath(key) + f, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("creating state file: %w", err) + } + return f.Close() +} + +// Has checks if a state exists for the given key +func (s *stateStore) Has(key string) bool { + filePath := s.getStatePath(key) + _, err := os.Stat(filePath) + return err == nil +} + +// Remove deletes the state marker file for the given key +func (s *stateStore) Remove(key string) error { + filePath := s.getStatePath(key) + if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing state file: %w", err) + } + return nil +} + +// Clear removes all state markers by deleting and recreating the state directory +func (s *stateStore) Clear() error { + if err := os.RemoveAll(s.Dir); err != nil { + return fmt.Errorf("clearing state directory: %w", err) + } + return os.MkdirAll(s.Dir, 0o755) +} diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go index 523cb40b719..61f5b30ae9c 100644 --- a/x-pack/metricbeat/module/openai/usage/usage.go +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -2,6 +2,8 @@ package usage import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "net/http" @@ -11,6 +13,7 @@ import ( "github.com/elastic/beats/v7/metricbeat/mb" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" + "github.com/elastic/elastic-agent-libs/paths" "golang.org/x/time/rate" ) @@ -28,9 +31,10 @@ func init() { // interface methods except for Fetch. type MetricSet struct { mb.BaseMetricSet - logger *logp.Logger - config Config - report mb.ReporterV2 + logger *logp.Logger + config Config + report mb.ReporterV2 + stateStore *stateStore } // New creates a new instance of the MetricSet. New is responsible for unpacking @@ -47,10 +51,16 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, err } + st, err := newStateStore(paths.Resolve(paths.Data, base.Name())) + if err != nil { + return nil, fmt.Errorf("creating state store: %w", err) + } + return &MetricSet{ BaseMetricSet: base, logger: logp.NewLogger("openai.usage"), config: config, + stateStore: st, }, nil } @@ -77,14 +87,44 @@ func (m *MetricSet) Fetch(report mb.ReporterV2) error { } func (m *MetricSet) fetchDateRange(startDate, endDate time.Time, httpClient *RLHTTPClient) error { - for d := startDate; !d.After(endDate); d = d.AddDate(0, 0, 1) { - dateStr := d.Format("2006-01-02") - for _, apiKey := range m.config.APIKeys { + for _, apiKey := range m.config.APIKeys { + // SHA-256 produces a fixed-length (64 characters) hexadecimal string + // that is safe for filenames across all major platforms. Hex encoding + // ensures that the hash is safe for use in file paths as it uses only + // alphanumeric characters. + // + // Also, SHA-256 is a strong cryptographic hash function that is + // deterministic, meaning that the same input will always produce + // the same output and it is an one-way function, meaning that it is + // computationally infeasible to reverse the hash to obtain the + // original. + hasher := sha256.New() + hasher.Write([]byte(apiKey.Key)) + hashedKey := hex.EncodeToString(hasher.Sum(nil)) + stateKey := "state_" + hashedKey + + // If state exists, only fetch current day + if m.stateStore.Has(stateKey) { + currentDay := endDate.Format("2006-01-02") + if err := m.fetchSingleDay(currentDay, apiKey.Key, httpClient); err != nil { + m.logger.Errorf("Error fetching data for date %s: %v", currentDay, err) + } + continue + } + + // First run for this API key - fetch historical data + for d := startDate; !d.After(endDate); d = d.AddDate(0, 0, 1) { + dateStr := d.Format("2006-01-02") if err := m.fetchSingleDay(dateStr, apiKey.Key, httpClient); err != nil { m.logger.Errorf("Error fetching data for date %s: %v", dateStr, err) continue } } + + // Mark this API key as processed + if err := m.stateStore.Put(stateKey); err != nil { + m.logger.Errorf("Error storing state for API key: %v", err) + } } return nil } @@ -156,11 +196,11 @@ func (m *MetricSet) processResponse(resp *http.Response, dateStr string) error { func (m *MetricSet) processUsageData(events []mb.Event, data []UsageData) { for _, usage := range data { event := mb.Event{ - Timestamp: time.Unix(usage.AggregationTimestamp, 0), MetricSetFields: mapstr.M{ "data": mapstr.M{ "organization_id": usage.OrganizationID, "organization_name": usage.OrganizationName, + "aggregation_timestamp": usage.AggregationTimestamp, "n_requests": usage.NRequests, "operation": usage.Operation, "snapshot_id": usage.SnapshotID, @@ -186,9 +226,9 @@ func (m *MetricSet) processUsageData(events []mb.Event, data []UsageData) { func (m *MetricSet) processDalleData(events []mb.Event, data []DalleData) { for _, dalle := range data { event := mb.Event{ - Timestamp: time.Unix(dalle.Timestamp, 0), MetricSetFields: mapstr.M{ "dalle": mapstr.M{ + "timestamp": dalle.Timestamp, "num_images": dalle.NumImages, "num_requests": dalle.NumRequests, "image_size": dalle.ImageSize, @@ -214,9 +254,9 @@ func (m *MetricSet) processDalleData(events []mb.Event, data []DalleData) { func (m *MetricSet) processWhisperData(events []mb.Event, data []WhisperData) { for _, whisper := range data { event := mb.Event{ - Timestamp: time.Unix(whisper.Timestamp, 0), MetricSetFields: mapstr.M{ "whisper": mapstr.M{ + "timestamp": whisper.Timestamp, "model_id": whisper.ModelID, "num_seconds": whisper.NumSeconds, "num_requests": whisper.NumRequests, @@ -240,9 +280,9 @@ func (m *MetricSet) processWhisperData(events []mb.Event, data []WhisperData) { func (m *MetricSet) processTTSData(events []mb.Event, data []TtsData) { for _, tts := range data { event := mb.Event{ - Timestamp: time.Unix(tts.Timestamp, 0), MetricSetFields: mapstr.M{ "tts": mapstr.M{ + "timestamp": tts.Timestamp, "model_id": tts.ModelID, "num_characters": tts.NumCharacters, "num_requests": tts.NumRequests, diff --git a/x-pack/metricbeat/modules.d/openai.yml.disabled b/x-pack/metricbeat/modules.d/openai.yml.disabled new file mode 100644 index 00000000000..41d604c3ea9 --- /dev/null +++ b/x-pack/metricbeat/modules.d/openai.yml.disabled @@ -0,0 +1,31 @@ +# Module: openai +# Docs: https://www.elastic.co/guide/en/beats/metricbeat/main/metricbeat-module-openai.html + +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 \ No newline at end of file From 03995f656a59f88db9102cf7c56160188b83b751 Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 17:35:13 +0530 Subject: [PATCH 3/7] update module --- metricbeat/docs/fields.asciidoc | 31 +++++++++ metricbeat/docs/modules/openai.asciidoc | 68 +++++++++++++++++++ metricbeat/docs/modules/openai/usage.asciidoc | 29 ++++++++ metricbeat/docs/modules_list.asciidoc | 3 + .../metricbeat/module/openai/usage/client.go | 3 +- .../module/openai/usage/persistcache.go | 16 ++++- .../metricbeat/module/openai/usage/usage.go | 6 +- 7 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 metricbeat/docs/modules/openai.asciidoc create mode 100644 metricbeat/docs/modules/openai/usage.asciidoc diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 09fd5e53245..018c7017dd2 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -67,6 +67,7 @@ grouped in the following categories: * <> * <> * <> +* <> * <> * <> * <> @@ -56654,6 +56655,36 @@ type: long -- +[[exported-fields-openai]] +== openai fields + +openai module + + + +[float] +=== openai + + + + +[float] +=== usage + +usage + + + +*`openai.usage.example`*:: ++ +-- +Example field + + +type: keyword + +-- + [[exported-fields-openmetrics]] == Openmetrics fields diff --git a/metricbeat/docs/modules/openai.asciidoc b/metricbeat/docs/modules/openai.asciidoc new file mode 100644 index 00000000000..f961b1e09fb --- /dev/null +++ b/metricbeat/docs/modules/openai.asciidoc @@ -0,0 +1,68 @@ +//// +This file is generated! See scripts/mage/docs_collector.go +//// + +:modulename: openai +:edit_url: https://github.com/elastic/beats/edit/main/x-pack/metricbeat/module/openai/_meta/docs.asciidoc + + +[[metricbeat-module-openai]] +[role="xpack"] +== openai module + +beta[] + +This is the openai module. + + + +:edit_url: + +[float] +=== Example configuration + +The openai module supports the standard configuration options that are described +in <>. Here is an example configuration: + +[source,yaml] +---- +metricbeat.modules: +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30---- + +[float] +=== Metricsets + +The following metricsets are available: + +* <> + +include::openai/usage.asciidoc[] + +:edit_url!: diff --git a/metricbeat/docs/modules/openai/usage.asciidoc b/metricbeat/docs/modules/openai/usage.asciidoc new file mode 100644 index 00000000000..69d0ba313d9 --- /dev/null +++ b/metricbeat/docs/modules/openai/usage.asciidoc @@ -0,0 +1,29 @@ +//// +This file is generated! See scripts/mage/docs_collector.go +//// +:edit_url: https://github.com/elastic/beats/edit/main/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc + + +[[metricbeat-metricset-openai-usage]] +[role="xpack"] +=== openai usage metricset + +beta[] + +include::../../../../x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc[] + + +:edit_url: + +==== Fields + +For a description of each field in the metricset, see the +<> section. + +Here is an example document generated by this metricset: + +[source,json] +---- +include::../../../../x-pack/metricbeat/module/openai/usage/_meta/data.json[] +---- +:edit_url!: \ No newline at end of file diff --git a/metricbeat/docs/modules_list.asciidoc b/metricbeat/docs/modules_list.asciidoc index f68dc8e1e65..3b66228a75f 100644 --- a/metricbeat/docs/modules_list.asciidoc +++ b/metricbeat/docs/modules_list.asciidoc @@ -240,6 +240,8 @@ This file is generated! See scripts/mage/docs_collector.go |<> |<> |image:./images/icon-yes.png[Prebuilt dashboards are available] | .1+| .1+| |<> +|<> beta[] |image:./images/icon-no.png[No prebuilt dashboards] | +.1+| .1+| |<> beta[] |<> beta[] |image:./images/icon-no.png[No prebuilt dashboards] | .1+| .1+| |<> beta[] |<> |image:./images/icon-yes.png[Prebuilt dashboards are available] | @@ -381,6 +383,7 @@ include::modules/munin.asciidoc[] include::modules/mysql.asciidoc[] include::modules/nats.asciidoc[] include::modules/nginx.asciidoc[] +include::modules/openai.asciidoc[] include::modules/openmetrics.asciidoc[] include::modules/oracle.asciidoc[] include::modules/panw.asciidoc[] diff --git a/x-pack/metricbeat/module/openai/usage/client.go b/x-pack/metricbeat/module/openai/usage/client.go index 4fdbcc0b775..b5d1b3e6078 100644 --- a/x-pack/metricbeat/module/openai/usage/client.go +++ b/x-pack/metricbeat/module/openai/usage/client.go @@ -5,8 +5,9 @@ import ( "net/http" "time" - "github.com/elastic/elastic-agent-libs/logp" "golang.org/x/time/rate" + + "github.com/elastic/elastic-agent-libs/logp" ) // RLHTTPClient implements a rate-limited HTTP client that wraps the standard http.Client diff --git a/x-pack/metricbeat/module/openai/usage/persistcache.go b/x-pack/metricbeat/module/openai/usage/persistcache.go index 8da3f9071c3..34bb18a6b53 100644 --- a/x-pack/metricbeat/module/openai/usage/persistcache.go +++ b/x-pack/metricbeat/module/openai/usage/persistcache.go @@ -4,11 +4,13 @@ import ( "fmt" "os" "path" + "sync" ) // stateStore handles persistence of state markers using the filesystem type stateStore struct { - Dir string // Base directory for storing state files + Dir string // Base directory for storing state files + sync.RWMutex // Protects access to the state store } // newStateStore creates a new state store instance at the specified path @@ -28,6 +30,9 @@ func (s *stateStore) getStatePath(name string) string { // Put creates a state marker file for the given key func (s *stateStore) Put(key string) error { + s.Lock() + defer s.Unlock() + filePath := s.getStatePath(key) f, err := os.Create(filePath) if err != nil { @@ -38,6 +43,9 @@ func (s *stateStore) Put(key string) error { // Has checks if a state exists for the given key func (s *stateStore) Has(key string) bool { + s.RLock() + defer s.RUnlock() + filePath := s.getStatePath(key) _, err := os.Stat(filePath) return err == nil @@ -45,6 +53,9 @@ func (s *stateStore) Has(key string) bool { // Remove deletes the state marker file for the given key func (s *stateStore) Remove(key string) error { + s.Lock() + defer s.Unlock() + filePath := s.getStatePath(key) if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) { return fmt.Errorf("removing state file: %w", err) @@ -54,6 +65,9 @@ func (s *stateStore) Remove(key string) error { // Clear removes all state markers by deleting and recreating the state directory func (s *stateStore) Clear() error { + s.Lock() + defer s.Unlock() + if err := os.RemoveAll(s.Dir); err != nil { return fmt.Errorf("clearing state directory: %w", err) } diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go index 61f5b30ae9c..0988e635367 100644 --- a/x-pack/metricbeat/module/openai/usage/usage.go +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -7,14 +7,16 @@ import ( "encoding/json" "fmt" "net/http" + "path" "time" + "golang.org/x/time/rate" + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" "github.com/elastic/beats/v7/metricbeat/mb" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" "github.com/elastic/elastic-agent-libs/paths" - "golang.org/x/time/rate" ) // init registers the MetricSet with the central registry as soon as the program @@ -51,7 +53,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { return nil, err } - st, err := newStateStore(paths.Resolve(paths.Data, base.Name())) + st, err := newStateStore(paths.Resolve(paths.Data, path.Join(base.Module().Name(), base.Name()))) if err != nil { return nil, fmt.Errorf("creating state store: %w", err) } From 406c1a4ff61f9ac2509e706a71b440d8645b7894 Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 20:43:10 +0530 Subject: [PATCH 4/7] update module --- metricbeat/docs/fields.asciidoc | 594 +++++++++++++++++- x-pack/metricbeat/module/openai/fields.go | 2 +- .../module/openai/usage/_meta/data.json | 47 +- .../module/openai/usage/_meta/fields.yml | 231 ++++++- .../metricbeat/module/openai/usage/client.go | 4 + .../metricbeat/module/openai/usage/config.go | 8 +- .../metricbeat/module/openai/usage/helper.go | 4 + .../module/openai/usage/persistcache.go | 4 + .../metricbeat/module/openai/usage/schema.go | 4 + .../metricbeat/module/openai/usage/usage.go | 13 +- .../openai/usage/usage_integration_test.go | 175 ++++++ 11 files changed, 1058 insertions(+), 28 deletions(-) create mode 100644 x-pack/metricbeat/module/openai/usage/usage_integration_test.go diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 018c7017dd2..1705b79fe70 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -56671,20 +56671,608 @@ openai module [float] === usage -usage +OpenAI API usage metrics and statistics -*`openai.usage.example`*:: +[float] +=== data + +General usage data metrics + + + +*`openai.usage.data.organization_id`*:: + -- -Example field +Organization identifier + +type: keyword + +-- + +*`openai.usage.data.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.data.aggregation_timestamp`*:: ++ +-- +Timestamp of data aggregation + +type: date + +-- + +*`openai.usage.data.n_requests`*:: ++ +-- +Number of requests made + +type: long + +-- + +*`openai.usage.data.operation`*:: ++ +-- +Operation type + +type: keyword + +-- + +*`openai.usage.data.snapshot_id`*:: ++ +-- +Snapshot identifier + +type: keyword + +-- + +*`openai.usage.data.n_context_tokens_total`*:: ++ +-- +Total number of context tokens used + +type: long + +-- + +*`openai.usage.data.n_generated_tokens_total`*:: ++ +-- +Total number of generated tokens + +type: long + +-- + +*`openai.usage.data.n_cached_context_tokens_total`*:: ++ +-- +Total number of cached context tokens + +type: long + +-- + +*`openai.usage.data.email`*:: ++ +-- +User email + +type: keyword + +-- + +*`openai.usage.data.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.data.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.data.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.data.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.data.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.data.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +*`openai.usage.data.request_type`*:: ++ +-- +Type of request + +type: keyword + +-- + +[float] +=== dalle + +DALL-E API usage metrics + + + +*`openai.usage.dalle.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.dalle.num_images`*:: ++ +-- +Number of images generated + +type: long + +-- + +*`openai.usage.dalle.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.dalle.image_size`*:: ++ +-- +Size of generated images + +type: keyword + +-- + +*`openai.usage.dalle.operation`*:: ++ +-- +Operation type + +type: keyword + +-- + +*`openai.usage.dalle.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.dalle.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.dalle.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.dalle.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.dalle.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.dalle.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.dalle.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.dalle.model_id`*:: ++ +-- +Model identifier + +type: keyword + +-- + +*`openai.usage.dalle.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.dalle.project_name`*:: ++ +-- +Project name +type: keyword + +-- + +[float] +=== whisper + +Whisper API usage metrics + + + +*`openai.usage.whisper.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.whisper.model_id`*:: ++ +-- +Model identifier + +type: keyword + +-- + +*`openai.usage.whisper.num_seconds`*:: ++ +-- +Number of seconds processed + +type: long + +-- + +*`openai.usage.whisper.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.whisper.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.whisper.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.whisper.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.whisper.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.whisper.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.whisper.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.whisper.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.whisper.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.whisper.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +[float] +=== tts + +Text-to-Speech API usage metrics + + + +*`openai.usage.tts.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.tts.model_id`*:: ++ +-- +Model identifier type: keyword -- +*`openai.usage.tts.num_characters`*:: ++ +-- +Number of characters processed + +type: long + +-- + +*`openai.usage.tts.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.tts.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.tts.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.tts.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.tts.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.tts.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.tts.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.tts.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.tts.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.tts.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +[float] +=== ft_data + +Fine-tuning data metrics + + + +*`openai.usage.ft_data.original`*:: ++ +-- +Raw fine-tuning data + +type: object + +-- + +[float] +=== assistant_code_interpreter + +Assistant Code Interpreter usage metrics + + + +*`openai.usage.assistant_code_interpreter.original`*:: ++ +-- +Raw assistant code interpreter data + +type: object + +-- + +[float] +=== retrieval_storage + +Retrieval storage usage metrics + + + +*`openai.usage.retrieval_storage.original`*:: ++ +-- +Raw retrieval storage data + +type: object + +-- + [[exported-fields-openmetrics]] == Openmetrics fields diff --git a/x-pack/metricbeat/module/openai/fields.go b/x-pack/metricbeat/module/openai/fields.go index a23f01a7810..fa62e586ec5 100644 --- a/x-pack/metricbeat/module/openai/fields.go +++ b/x-pack/metricbeat/module/openai/fields.go @@ -19,5 +19,5 @@ func init() { // AssetOpenai returns asset data. // This is the base64 encoded zlib format compressed contents of module/openai. func AssetOpenai() string { - return "eJx8jkHOgyAUhPecYuLeC7D4d/9BXsvUEFEIYFpv31i0UUL6Ld/kfTM9Rq4aPnAWq4Bss6NGVw6dAiIdJVHjxiwKMEz3aEO2ftb4UwD2b0zeLI4KeFg6k/Qn6zHLxFPDRl4DNYbol7BfGtar5+xakgz8Xlu6jXr4QbOqUIvrAecRfMkUHC/ZMWXk+vTRVNmP4o3/Iiyl6h0AAP//O4NsEQ==" + return "eJzsms9uozwUxfd5iqvu8wJZfFL0/VOlzrRqM5olcvEN8QRsxr5Mmz79yAEnQIBA4qbtFC8hnN/h+uJjRKawxs0MVIqSiQkACYpxBlf5gasJgMYYmcEZPCKxCQBHE2qRklByBn9NAKC4GhLFsxgnAEuBMTez7bkpSJZgiWAHbVKcQaRVlhZHGlSrOmWtzLAId0eb5OyoG3ejEZWP2xTl/Brmd9c5AxIkLUIDTHIwxEgYEqEpXVO3WLbJWYXb7vSIKzv+R4maxYUtq+y81X7aZKgyDzpiUrwwCwoEP/id87jGzZPSTecrTm9LciA4ShJLgbof3h7yaqBR0KFZFGmMcjKJBA2xpD4Lezxn1OStwl44FVDLfE5KjFYfMtD4M0ND9anbw2Mlo2Pwr1nyiNqSnRwkjLffvkpRNxsbWnEntL2slWckS81KkYcmeyik+jSYDEIlCZ8pILVGaQJSxOKzKr2wCiB39S4AkAMgM3h4B3s70fbJJeSvZ2iHKCx1FYeFK+QXqNGWUytVqy9MmGjn9+ySbwZ1i9JuCUhFsMaNh5a0EbHGTZ+OdFAfq53Ddi90BVAjZyE1NOdA6H2h4+hHwY3rwkDoYpOibaRjzFSrHxj6WGXucqU+U+qgPqbUYTuntFjivVa20Nz9vGUPE8d14smbmH/mNzfTfw+3VwO3MK8Q3vVa1JEySwKRsAh9ZXYutl+5O8mvtF9oZW7NBUa8nN1rD+IFqwnVUsW32qVkBrWHtWObPUO3v5fffY/5dwr0XeffG75QJYpj7KGXvlidIbH7Z2V9Y+4+rYRJD1ydnLzfc7mPGL0X7zKbuAZDJbmvwC3UbCuFaDpfF98g7ccIPI08RmAH+JNE4CdKJDpYQk5OowU+05TU9CFFDFdjKPUMpXDFtH0Kta9o2AuO0TRG0xhNYzQ1Mt99NC0p8Pmt9T8hcUqZFDI670uriITs+K6iHu39NZzOTwTDHgP2BMua8e6qMWOEISYpCBXHQEhCnWokf2+dc0eAvxVHuN4Tzsr7yxd2VyqwpYJSqXrUWdubxF8sDgwpXf3nwlnlvXfCUAh/sKrqA//bYv4OAAD//+T56FM=" } diff --git a/x-pack/metricbeat/module/openai/usage/_meta/data.json b/x-pack/metricbeat/module/openai/usage/_meta/data.json index 91e18aa44af..7a6355842f0 100644 --- a/x-pack/metricbeat/module/openai/usage/_meta/data.json +++ b/x-pack/metricbeat/module/openai/usage/_meta/data.json @@ -1,19 +1,38 @@ { - "@timestamp":"2016-05-23T08:05:34.853Z", - "beat":{ - "hostname":"beathost", - "name":"beathost" + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "openai.usage", + "duration": 115000, + "module": "openai" }, - "metricset":{ - "host":"localhost", - "module":"openai", - "name":"usage", - "rtt":44269 + "metricset": { + "name": "usage", + "period": 10000 }, - "openai":{ - "usage":{ - "example": "usage" + "openai": { + "usage": { + "data": { + "aggregation_timestamp": 1730696460, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "email": null, + "n_cached_context_tokens_total": 0, + "n_context_tokens_total": 118, + "n_generated_tokens_total": 35, + "n_requests": 1, + "operation": "completion-realtime", + "organization_id": "org-dummy", + "organization_name": "Personal", + "project_id": null, + "project_name": null, + "request_type": "", + "snapshot_id": "gpt-4o-realtime-preview-2024-10-01" + } } }, - "type":"metricsets" -} + "service": { + "type": "openai" + } +} \ No newline at end of file diff --git a/x-pack/metricbeat/module/openai/usage/_meta/fields.yml b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml index 7de86e52163..203dacaced0 100644 --- a/x-pack/metricbeat/module/openai/usage/_meta/fields.yml +++ b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml @@ -2,9 +2,232 @@ type: group release: beta description: > - usage + OpenAI API usage metrics and statistics fields: - - name: example - type: keyword + - name: data + type: group description: > - Example field + General usage data metrics + fields: + - name: organization_id + type: keyword + description: Organization identifier + - name: organization_name + type: keyword + description: Organization name + - name: aggregation_timestamp + type: date + description: Timestamp of data aggregation + - name: n_requests + type: long + description: Number of requests made + - name: operation + type: keyword + description: Operation type + - name: snapshot_id + type: keyword + description: Snapshot identifier + - name: n_context_tokens_total + type: long + description: Total number of context tokens used + - name: n_generated_tokens_total + type: long + description: Total number of generated tokens + - name: n_cached_context_tokens_total + type: long + description: Total number of cached context tokens + - name: email + type: keyword + description: User email + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + - name: request_type + type: keyword + description: Type of request + + - name: dalle + type: group + description: > + DALL-E API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: num_images + type: long + description: Number of images generated + - name: num_requests + type: long + description: Number of requests + - name: image_size + type: keyword + description: Size of generated images + - name: operation + type: keyword + description: Operation type + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: model_id + type: keyword + description: Model identifier + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: whisper + type: group + description: > + Whisper API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: model_id + type: keyword + description: Model identifier + - name: num_seconds + type: long + description: Number of seconds processed + - name: num_requests + type: long + description: Number of requests + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: tts + type: group + description: > + Text-to-Speech API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: model_id + type: keyword + description: Model identifier + - name: num_characters + type: long + description: Number of characters processed + - name: num_requests + type: long + description: Number of requests + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: ft_data + type: group + description: > + Fine-tuning data metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw fine-tuning data + + - name: assistant_code_interpreter + type: group + description: > + Assistant Code Interpreter usage metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw assistant code interpreter data + + - name: retrieval_storage + type: group + description: > + Retrieval storage usage metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw retrieval storage data diff --git a/x-pack/metricbeat/module/openai/usage/client.go b/x-pack/metricbeat/module/openai/usage/client.go index b5d1b3e6078..c8f24aa8a50 100644 --- a/x-pack/metricbeat/module/openai/usage/client.go +++ b/x-pack/metricbeat/module/openai/usage/client.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage import ( diff --git a/x-pack/metricbeat/module/openai/usage/config.go b/x-pack/metricbeat/module/openai/usage/config.go index 2ca63bf0c10..5908bdd57f9 100644 --- a/x-pack/metricbeat/module/openai/usage/config.go +++ b/x-pack/metricbeat/module/openai/usage/config.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage import ( @@ -61,8 +65,8 @@ func (c *Config) Validate() error { case c.Timeout <= 0: return fmt.Errorf("timeout must be greater than 0") - case c.Collection.LookbackDays <= 0: - return fmt.Errorf("lookback_days must be greater than 0") + case c.Collection.LookbackDays < 0: + return fmt.Errorf("lookback_days must be >= 0") } // API keys validation in a separate loop since it needs iteration diff --git a/x-pack/metricbeat/module/openai/usage/helper.go b/x-pack/metricbeat/module/openai/usage/helper.go index 08c7046c6c8..f2d3adadd01 100644 --- a/x-pack/metricbeat/module/openai/usage/helper.go +++ b/x-pack/metricbeat/module/openai/usage/helper.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage import "strings" diff --git a/x-pack/metricbeat/module/openai/usage/persistcache.go b/x-pack/metricbeat/module/openai/usage/persistcache.go index 34bb18a6b53..9b7d725461a 100644 --- a/x-pack/metricbeat/module/openai/usage/persistcache.go +++ b/x-pack/metricbeat/module/openai/usage/persistcache.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage import ( diff --git a/x-pack/metricbeat/module/openai/usage/schema.go b/x-pack/metricbeat/module/openai/usage/schema.go index 5b44ef687d8..668d5b03370 100644 --- a/x-pack/metricbeat/module/openai/usage/schema.go +++ b/x-pack/metricbeat/module/openai/usage/schema.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage type UsageResponse struct { diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go index 0988e635367..92319af0c55 100644 --- a/x-pack/metricbeat/module/openai/usage/usage.go +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package usage import ( @@ -196,13 +200,14 @@ func (m *MetricSet) processResponse(resp *http.Response, dateStr string) error { } func (m *MetricSet) processUsageData(events []mb.Event, data []UsageData) { + for _, usage := range data { event := mb.Event{ MetricSetFields: mapstr.M{ "data": mapstr.M{ "organization_id": usage.OrganizationID, "organization_name": usage.OrganizationName, - "aggregation_timestamp": usage.AggregationTimestamp, + "aggregation_timestamp": time.Unix(usage.AggregationTimestamp, 0), "n_requests": usage.NRequests, "operation": usage.Operation, "snapshot_id": usage.SnapshotID, @@ -230,7 +235,7 @@ func (m *MetricSet) processDalleData(events []mb.Event, data []DalleData) { event := mb.Event{ MetricSetFields: mapstr.M{ "dalle": mapstr.M{ - "timestamp": dalle.Timestamp, + "timestamp": time.Unix(dalle.Timestamp, 0), "num_images": dalle.NumImages, "num_requests": dalle.NumRequests, "image_size": dalle.ImageSize, @@ -258,7 +263,7 @@ func (m *MetricSet) processWhisperData(events []mb.Event, data []WhisperData) { event := mb.Event{ MetricSetFields: mapstr.M{ "whisper": mapstr.M{ - "timestamp": whisper.Timestamp, + "timestamp": time.Unix(whisper.Timestamp, 0), "model_id": whisper.ModelID, "num_seconds": whisper.NumSeconds, "num_requests": whisper.NumRequests, @@ -284,7 +289,7 @@ func (m *MetricSet) processTTSData(events []mb.Event, data []TtsData) { event := mb.Event{ MetricSetFields: mapstr.M{ "tts": mapstr.M{ - "timestamp": tts.Timestamp, + "timestamp": time.Unix(tts.Timestamp, 0), "model_id": tts.ModelID, "num_characters": tts.NumCharacters, "num_requests": tts.NumRequests, diff --git a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go new file mode 100644 index 00000000000..e7754939845 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go @@ -0,0 +1,175 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package usage + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" + "github.com/stretchr/testify/assert" +) + +func TestFetch(t *testing.T) { + apiKey := "most_secure_token" + usagePath := "/usage" + server := initServer(usagePath, apiKey) + defer server.Close() + + f := mbtest.NewReportingMetricSetV2Error(t, getConfig(server.URL+"/usage", apiKey)) + + events, errs := mbtest.ReportingFetchV2Error(f) + if len(errs) > 0 { + t.Fatalf("Expected 0 error, has %d: %v", len(errs), errs) + } + + assert.NotEmpty(t, events) + +} + +func TestData(t *testing.T) { + apiKey := "most_secure_token" + usagePath := "/usage" + server := initServer(usagePath, apiKey) + defer server.Close() + + f := mbtest.NewReportingMetricSetV2Error(t, getConfig(server.URL+"/usage", apiKey)) + + err := mbtest.WriteEventsReporterV2Error(f, t, "") + if !assert.NoError(t, err) { + t.FailNow() + } +} + +func getConfig(url, apiKey string) map[string]interface{} { + return map[string]interface{}{ + "module": "openai", + "metricsets": []string{"usage"}, + "enabled": true, + "period": "1h", + "api_url": url, + "api_keys": []map[string]interface{}{ + {"key": apiKey}, + }, + "rate_limit": map[string]interface{}{ + "limit": 60, + "burst": 5, + }, + "collection": map[string]interface{}{ + "lookback_days": 1, + }, + } +} + +func initServer(endpoint string, api_key string) *httptest.Server { + data := []byte(`{ + "object": "list", + "data": [ + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730696460, + "n_requests": 1, + "operation": "completion-realtime", + "snapshot_id": "gpt-4o-realtime-preview-2024-10-01", + "n_context_tokens_total": 118, + "n_generated_tokens_total": 35, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + }, + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730696460, + "n_requests": 1, + "operation": "completion", + "snapshot_id": "gpt-4o-2024-08-06", + "n_context_tokens_total": 31, + "n_generated_tokens_total": 12, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + }, + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730697540, + "n_requests": 1, + "operation": "completion", + "snapshot_id": "ft:gpt-3.5-turbo-0125:personal:yay-renew:APjjyG8E:ckpt-step-84", + "n_context_tokens_total": 13, + "n_generated_tokens_total": 9, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + } + ], + "ft_data": [], + "dalle_api_data": [], + "whisper_api_data": [ + { + "timestamp": 1730696460, + "model_id": "whisper-1", + "num_seconds": 2, + "num_requests": 1, + "user_id": null, + "organization_id": "org-dummy", + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "organization_name": "Personal", + "project_id": null, + "project_name": null + } + ], + "tts_api_data": [], + "assistant_code_interpreter_data": [], + "retrieval_storage_data": [] +}`) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Validate Bearer token + authHeader := r.Header.Get("Authorization") + expectedToken := fmt.Sprintf("Bearer %s", api_key) + + if authHeader != expectedToken { + w.WriteHeader(http.StatusUnauthorized) + return + } + + // Validate the endpoint + if r.URL.Path == endpoint { + w.WriteHeader(http.StatusOK) + w.Write(data) + } else { + w.WriteHeader(http.StatusNotFound) + } + })) + return server +} From a3c46b7d302c3ab517042f432e38d42f4c642fcf Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 20:51:59 +0530 Subject: [PATCH 5/7] update module --- metricbeat/docs/modules/openai.asciidoc | 3 ++- x-pack/metricbeat/metricbeat.reference.yml | 1 + x-pack/metricbeat/module/openai/_meta/config.yml | 2 +- x-pack/metricbeat/modules.d/openai.yml.disabled | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/metricbeat/docs/modules/openai.asciidoc b/metricbeat/docs/modules/openai.asciidoc index f961b1e09fb..ad0d7f97712 100644 --- a/metricbeat/docs/modules/openai.asciidoc +++ b/metricbeat/docs/modules/openai.asciidoc @@ -54,7 +54,8 @@ metricbeat.modules: # # Data Collection Configuration # collection: # ## Number of days to look back when collecting usage data - # lookback_days: 30---- + # lookback_days: 30 +---- [float] === Metricsets diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 48fc2eefa30..263d3df085c 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1286,6 +1286,7 @@ metricbeat.modules: # collection: # ## Number of days to look back when collecting usage data # lookback_days: 30 + #----------------------------- Openmetrics Module ----------------------------- - module: openmetrics metricsets: ['collector'] diff --git a/x-pack/metricbeat/module/openai/_meta/config.yml b/x-pack/metricbeat/module/openai/_meta/config.yml index dadcd35b1ba..1290889640b 100644 --- a/x-pack/metricbeat/module/openai/_meta/config.yml +++ b/x-pack/metricbeat/module/openai/_meta/config.yml @@ -25,4 +25,4 @@ # # Data Collection Configuration # collection: # ## Number of days to look back when collecting usage data - # lookback_days: 30 \ No newline at end of file + # lookback_days: 30 diff --git a/x-pack/metricbeat/modules.d/openai.yml.disabled b/x-pack/metricbeat/modules.d/openai.yml.disabled index 41d604c3ea9..10a9fb48577 100644 --- a/x-pack/metricbeat/modules.d/openai.yml.disabled +++ b/x-pack/metricbeat/modules.d/openai.yml.disabled @@ -28,4 +28,4 @@ # # Data Collection Configuration # collection: # ## Number of days to look back when collecting usage data - # lookback_days: 30 \ No newline at end of file + # lookback_days: 30 From 72488f71cf76ce7f71fbfb1bd1276ae8fb391e45 Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 21:04:10 +0530 Subject: [PATCH 6/7] update module --- x-pack/metricbeat/module/openai/doc.go | 4 ++++ x-pack/metricbeat/module/openai/usage/usage.go | 2 +- .../metricbeat/module/openai/usage/usage_integration_test.go | 5 +++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/x-pack/metricbeat/module/openai/doc.go b/x-pack/metricbeat/module/openai/doc.go index b6d7a70b121..5f2f07fb0bc 100644 --- a/x-pack/metricbeat/module/openai/doc.go +++ b/x-pack/metricbeat/module/openai/doc.go @@ -1,2 +1,6 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + // Package openai is a Metricbeat module that contains MetricSets. package openai diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go index 92319af0c55..4e9b8e88e5b 100644 --- a/x-pack/metricbeat/module/openai/usage/usage.go +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -155,7 +155,7 @@ func (m *MetricSet) fetchSingleDay(dateStr, apiKey string, httpClient *RLHTTPCli } func (m *MetricSet) createRequest(dateStr, apiKey string) (*http.Request, error) { - req, err := http.NewRequest(http.MethodGet, m.config.APIURL, nil) + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, m.config.APIURL, nil) if err != nil { return nil, fmt.Errorf("error creating request: %w", err) } diff --git a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go index e7754939845..a39eb8700a9 100644 --- a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go +++ b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go @@ -12,8 +12,9 @@ import ( "net/http/httptest" "testing" - mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" "github.com/stretchr/testify/assert" + + mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" ) func TestFetch(t *testing.T) { @@ -166,7 +167,7 @@ func initServer(endpoint string, api_key string) *httptest.Server { // Validate the endpoint if r.URL.Path == endpoint { w.WriteHeader(http.StatusOK) - w.Write(data) + _ = w.Write(data) } else { w.WriteHeader(http.StatusNotFound) } From 2d4027dc85be76dd3e3f991a3ae5b24d492547ac Mon Sep 17 00:00:00 2001 From: subham sarkar Date: Tue, 5 Nov 2024 22:18:48 +0530 Subject: [PATCH 7/7] fix bug --- x-pack/metricbeat/module/openai/usage/usage_integration_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go index a39eb8700a9..8a0c9b70090 100644 --- a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go +++ b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go @@ -167,7 +167,7 @@ func initServer(endpoint string, api_key string) *httptest.Server { // Validate the endpoint if r.URL.Path == endpoint { w.WriteHeader(http.StatusOK) - _ = w.Write(data) + _, _ = w.Write(data) } else { w.WriteHeader(http.StatusNotFound) }