diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index 09fd5e53245..1705b79fe70 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -67,6 +67,7 @@ grouped in the following categories: * <> * <> * <> +* <> * <> * <> * <> @@ -56654,6 +56655,624 @@ type: long -- +[[exported-fields-openai]] +== openai fields + +openai module + + + +[float] +=== openai + + + + +[float] +=== usage + +OpenAI API usage metrics and statistics + + + +[float] +=== data + +General usage data metrics + + + +*`openai.usage.data.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.data.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.data.aggregation_timestamp`*:: ++ +-- +Timestamp of data aggregation + +type: date + +-- + +*`openai.usage.data.n_requests`*:: ++ +-- +Number of requests made + +type: long + +-- + +*`openai.usage.data.operation`*:: ++ +-- +Operation type + +type: keyword + +-- + +*`openai.usage.data.snapshot_id`*:: ++ +-- +Snapshot identifier + +type: keyword + +-- + +*`openai.usage.data.n_context_tokens_total`*:: ++ +-- +Total number of context tokens used + +type: long + +-- + +*`openai.usage.data.n_generated_tokens_total`*:: ++ +-- +Total number of generated tokens + +type: long + +-- + +*`openai.usage.data.n_cached_context_tokens_total`*:: ++ +-- +Total number of cached context tokens + +type: long + +-- + +*`openai.usage.data.email`*:: ++ +-- +User email + +type: keyword + +-- + +*`openai.usage.data.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.data.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.data.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.data.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.data.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.data.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +*`openai.usage.data.request_type`*:: ++ +-- +Type of request + +type: keyword + +-- + +[float] +=== dalle + +DALL-E API usage metrics + + + +*`openai.usage.dalle.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.dalle.num_images`*:: ++ +-- +Number of images generated + +type: long + +-- + +*`openai.usage.dalle.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.dalle.image_size`*:: ++ +-- +Size of generated images + +type: keyword + +-- + +*`openai.usage.dalle.operation`*:: ++ +-- +Operation type + +type: keyword + +-- + +*`openai.usage.dalle.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.dalle.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.dalle.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.dalle.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.dalle.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.dalle.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.dalle.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.dalle.model_id`*:: ++ +-- +Model identifier + +type: keyword + +-- + +*`openai.usage.dalle.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.dalle.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +[float] +=== whisper + +Whisper API usage metrics + + + +*`openai.usage.whisper.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.whisper.model_id`*:: ++ +-- +Model identifier + +type: keyword + +-- + +*`openai.usage.whisper.num_seconds`*:: ++ +-- +Number of seconds processed + +type: long + +-- + +*`openai.usage.whisper.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.whisper.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.whisper.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.whisper.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.whisper.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.whisper.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.whisper.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.whisper.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.whisper.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.whisper.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +[float] +=== tts + +Text-to-Speech API usage metrics + + + +*`openai.usage.tts.timestamp`*:: ++ +-- +Timestamp of request + +type: date + +-- + +*`openai.usage.tts.model_id`*:: ++ +-- +Model identifier + +type: keyword + +-- + +*`openai.usage.tts.num_characters`*:: ++ +-- +Number of characters processed + +type: long + +-- + +*`openai.usage.tts.num_requests`*:: ++ +-- +Number of requests + +type: long + +-- + +*`openai.usage.tts.user_id`*:: ++ +-- +User identifier + +type: keyword + +-- + +*`openai.usage.tts.organization_id`*:: ++ +-- +Organization identifier + +type: keyword + +-- + +*`openai.usage.tts.api_key_id`*:: ++ +-- +API key identifier + +type: keyword + +-- + +*`openai.usage.tts.api_key_name`*:: ++ +-- +API key name + +type: keyword + +-- + +*`openai.usage.tts.api_key_redacted`*:: ++ +-- +Redacted API key + +type: keyword + +-- + +*`openai.usage.tts.api_key_type`*:: ++ +-- +Type of API key + +type: keyword + +-- + +*`openai.usage.tts.organization_name`*:: ++ +-- +Organization name + +type: keyword + +-- + +*`openai.usage.tts.project_id`*:: ++ +-- +Project identifier + +type: keyword + +-- + +*`openai.usage.tts.project_name`*:: ++ +-- +Project name + +type: keyword + +-- + +[float] +=== ft_data + +Fine-tuning data metrics + + + +*`openai.usage.ft_data.original`*:: ++ +-- +Raw fine-tuning data + +type: object + +-- + +[float] +=== assistant_code_interpreter + +Assistant Code Interpreter usage metrics + + + +*`openai.usage.assistant_code_interpreter.original`*:: ++ +-- +Raw assistant code interpreter data + +type: object + +-- + +[float] +=== retrieval_storage + +Retrieval storage usage metrics + + + +*`openai.usage.retrieval_storage.original`*:: ++ +-- +Raw retrieval storage data + +type: object + +-- + [[exported-fields-openmetrics]] == Openmetrics fields diff --git a/metricbeat/docs/modules/openai.asciidoc b/metricbeat/docs/modules/openai.asciidoc new file mode 100644 index 00000000000..ad0d7f97712 --- /dev/null +++ b/metricbeat/docs/modules/openai.asciidoc @@ -0,0 +1,69 @@ +//// +This file is generated! See scripts/mage/docs_collector.go +//// + +:modulename: openai +:edit_url: https://github.com/elastic/beats/edit/main/x-pack/metricbeat/module/openai/_meta/docs.asciidoc + + +[[metricbeat-module-openai]] +[role="xpack"] +== openai module + +beta[] + +This is the openai module. + + + +:edit_url: + +[float] +=== Example configuration + +The openai module supports the standard configuration options that are described +in <>. Here is an example configuration: + +[source,yaml] +---- +metricbeat.modules: +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 +---- + +[float] +=== Metricsets + +The following metricsets are available: + +* <> + +include::openai/usage.asciidoc[] + +:edit_url!: diff --git a/metricbeat/docs/modules/openai/usage.asciidoc b/metricbeat/docs/modules/openai/usage.asciidoc new file mode 100644 index 00000000000..69d0ba313d9 --- /dev/null +++ b/metricbeat/docs/modules/openai/usage.asciidoc @@ -0,0 +1,29 @@ +//// +This file is generated! See scripts/mage/docs_collector.go +//// +:edit_url: https://github.com/elastic/beats/edit/main/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc + + +[[metricbeat-metricset-openai-usage]] +[role="xpack"] +=== openai usage metricset + +beta[] + +include::../../../../x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc[] + + +:edit_url: + +==== Fields + +For a description of each field in the metricset, see the +<> section. + +Here is an example document generated by this metricset: + +[source,json] +---- +include::../../../../x-pack/metricbeat/module/openai/usage/_meta/data.json[] +---- +:edit_url!: \ No newline at end of file diff --git a/metricbeat/docs/modules_list.asciidoc b/metricbeat/docs/modules_list.asciidoc index f68dc8e1e65..3b66228a75f 100644 --- a/metricbeat/docs/modules_list.asciidoc +++ b/metricbeat/docs/modules_list.asciidoc @@ -240,6 +240,8 @@ This file is generated! See scripts/mage/docs_collector.go |<> |<> |image:./images/icon-yes.png[Prebuilt dashboards are available] | .1+| .1+| |<> +|<> beta[] |image:./images/icon-no.png[No prebuilt dashboards] | +.1+| .1+| |<> beta[] |<> beta[] |image:./images/icon-no.png[No prebuilt dashboards] | .1+| .1+| |<> beta[] |<> |image:./images/icon-yes.png[Prebuilt dashboards are available] | @@ -381,6 +383,7 @@ include::modules/munin.asciidoc[] include::modules/mysql.asciidoc[] include::modules/nats.asciidoc[] include::modules/nginx.asciidoc[] +include::modules/openai.asciidoc[] include::modules/openmetrics.asciidoc[] include::modules/oracle.asciidoc[] include::modules/panw.asciidoc[] diff --git a/x-pack/metricbeat/include/list.go b/x-pack/metricbeat/include/list.go index 01ce86edf78..fdf19a87024 100644 --- a/x-pack/metricbeat/include/list.go +++ b/x-pack/metricbeat/include/list.go @@ -53,6 +53,8 @@ import ( _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql/performance" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/mssql/transaction_log" + _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/openai" + _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/openai/usage" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle/performance" _ "github.com/elastic/beats/v7/x-pack/metricbeat/module/oracle/sysmetric" diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 240acb2cfd6..263d3df085c 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -1257,6 +1257,36 @@ metricbeat.modules: # Path to server status. Default nginx_status server_status_path: "nginx_status" +#-------------------------------- Openai Module -------------------------------- +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 + #----------------------------- Openmetrics Module ----------------------------- - module: openmetrics metricsets: ['collector'] diff --git a/x-pack/metricbeat/module/openai/_meta/config.yml b/x-pack/metricbeat/module/openai/_meta/config.yml new file mode 100644 index 00000000000..1290889640b --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/config.yml @@ -0,0 +1,28 @@ +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30 diff --git a/x-pack/metricbeat/module/openai/_meta/docs.asciidoc b/x-pack/metricbeat/module/openai/_meta/docs.asciidoc new file mode 100644 index 00000000000..744909c7a1a --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/docs.asciidoc @@ -0,0 +1,2 @@ +This is the openai module. + diff --git a/x-pack/metricbeat/module/openai/_meta/fields.yml b/x-pack/metricbeat/module/openai/_meta/fields.yml new file mode 100644 index 00000000000..d514eb010f1 --- /dev/null +++ b/x-pack/metricbeat/module/openai/_meta/fields.yml @@ -0,0 +1,10 @@ +- key: openai + title: "openai" + release: beta + description: > + openai module + fields: + - name: openai + type: group + description: > + fields: diff --git a/x-pack/metricbeat/module/openai/doc.go b/x-pack/metricbeat/module/openai/doc.go new file mode 100644 index 00000000000..5f2f07fb0bc --- /dev/null +++ b/x-pack/metricbeat/module/openai/doc.go @@ -0,0 +1,6 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +// Package openai is a Metricbeat module that contains MetricSets. +package openai diff --git a/x-pack/metricbeat/module/openai/fields.go b/x-pack/metricbeat/module/openai/fields.go new file mode 100644 index 00000000000..fa62e586ec5 --- /dev/null +++ b/x-pack/metricbeat/module/openai/fields.go @@ -0,0 +1,23 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +// Code generated by beats/dev-tools/cmd/asset/asset.go - DO NOT EDIT. + +package openai + +import ( + "github.com/elastic/beats/v7/libbeat/asset" +) + +func init() { + if err := asset.SetFields("metricbeat", "openai", asset.ModuleFieldsPri, AssetOpenai); err != nil { + panic(err) + } +} + +// AssetOpenai returns asset data. +// This is the base64 encoded zlib format compressed contents of module/openai. +func AssetOpenai() string { + return "eJzsms9uozwUxfd5iqvu8wJZfFL0/VOlzrRqM5olcvEN8QRsxr5Mmz79yAEnQIBA4qbtFC8hnN/h+uJjRKawxs0MVIqSiQkACYpxBlf5gasJgMYYmcEZPCKxCQBHE2qRklByBn9NAKC4GhLFsxgnAEuBMTez7bkpSJZgiWAHbVKcQaRVlhZHGlSrOmWtzLAId0eb5OyoG3ejEZWP2xTl/Brmd9c5AxIkLUIDTHIwxEgYEqEpXVO3WLbJWYXb7vSIKzv+R4maxYUtq+y81X7aZKgyDzpiUrwwCwoEP/id87jGzZPSTecrTm9LciA4ShJLgbof3h7yaqBR0KFZFGmMcjKJBA2xpD4Lezxn1OStwl44FVDLfE5KjFYfMtD4M0ND9anbw2Mlo2Pwr1nyiNqSnRwkjLffvkpRNxsbWnEntL2slWckS81KkYcmeyik+jSYDEIlCZ8pILVGaQJSxOKzKr2wCiB39S4AkAMgM3h4B3s70fbJJeSvZ2iHKCx1FYeFK+QXqNGWUytVqy9MmGjn9+ySbwZ1i9JuCUhFsMaNh5a0EbHGTZ+OdFAfq53Ddi90BVAjZyE1NOdA6H2h4+hHwY3rwkDoYpOibaRjzFSrHxj6WGXucqU+U+qgPqbUYTuntFjivVa20Nz9vGUPE8d14smbmH/mNzfTfw+3VwO3MK8Q3vVa1JEySwKRsAh9ZXYutl+5O8mvtF9oZW7NBUa8nN1rD+IFqwnVUsW32qVkBrWHtWObPUO3v5fffY/5dwr0XeffG75QJYpj7KGXvlidIbH7Z2V9Y+4+rYRJD1ydnLzfc7mPGL0X7zKbuAZDJbmvwC3UbCuFaDpfF98g7ccIPI08RmAH+JNE4CdKJDpYQk5OowU+05TU9CFFDFdjKPUMpXDFtH0Kta9o2AuO0TRG0xhNYzQ1Mt99NC0p8Pmt9T8hcUqZFDI670uriITs+K6iHu39NZzOTwTDHgP2BMua8e6qMWOEISYpCBXHQEhCnWokf2+dc0eAvxVHuN4Tzsr7yxd2VyqwpYJSqXrUWdubxF8sDgwpXf3nwlnlvXfCUAh/sKrqA//bYv4OAAD//+T56FM=" +} diff --git a/x-pack/metricbeat/module/openai/usage/_meta/data.json b/x-pack/metricbeat/module/openai/usage/_meta/data.json new file mode 100644 index 00000000000..7a6355842f0 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/data.json @@ -0,0 +1,38 @@ +{ + "@timestamp": "2017-10-12T08:05:34.853Z", + "event": { + "dataset": "openai.usage", + "duration": 115000, + "module": "openai" + }, + "metricset": { + "name": "usage", + "period": 10000 + }, + "openai": { + "usage": { + "data": { + "aggregation_timestamp": 1730696460, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "email": null, + "n_cached_context_tokens_total": 0, + "n_context_tokens_total": 118, + "n_generated_tokens_total": 35, + "n_requests": 1, + "operation": "completion-realtime", + "organization_id": "org-dummy", + "organization_name": "Personal", + "project_id": null, + "project_name": null, + "request_type": "", + "snapshot_id": "gpt-4o-realtime-preview-2024-10-01" + } + } + }, + "service": { + "type": "openai" + } +} \ No newline at end of file diff --git a/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc b/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc new file mode 100644 index 00000000000..dc88baf82a0 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/docs.asciidoc @@ -0,0 +1 @@ +This is the usage metricset of the module openai. diff --git a/x-pack/metricbeat/module/openai/usage/_meta/fields.yml b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml new file mode 100644 index 00000000000..203dacaced0 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/_meta/fields.yml @@ -0,0 +1,233 @@ +- name: usage + type: group + release: beta + description: > + OpenAI API usage metrics and statistics + fields: + - name: data + type: group + description: > + General usage data metrics + fields: + - name: organization_id + type: keyword + description: Organization identifier + - name: organization_name + type: keyword + description: Organization name + - name: aggregation_timestamp + type: date + description: Timestamp of data aggregation + - name: n_requests + type: long + description: Number of requests made + - name: operation + type: keyword + description: Operation type + - name: snapshot_id + type: keyword + description: Snapshot identifier + - name: n_context_tokens_total + type: long + description: Total number of context tokens used + - name: n_generated_tokens_total + type: long + description: Total number of generated tokens + - name: n_cached_context_tokens_total + type: long + description: Total number of cached context tokens + - name: email + type: keyword + description: User email + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + - name: request_type + type: keyword + description: Type of request + + - name: dalle + type: group + description: > + DALL-E API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: num_images + type: long + description: Number of images generated + - name: num_requests + type: long + description: Number of requests + - name: image_size + type: keyword + description: Size of generated images + - name: operation + type: keyword + description: Operation type + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: model_id + type: keyword + description: Model identifier + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: whisper + type: group + description: > + Whisper API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: model_id + type: keyword + description: Model identifier + - name: num_seconds + type: long + description: Number of seconds processed + - name: num_requests + type: long + description: Number of requests + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: tts + type: group + description: > + Text-to-Speech API usage metrics + fields: + - name: timestamp + type: date + description: Timestamp of request + - name: model_id + type: keyword + description: Model identifier + - name: num_characters + type: long + description: Number of characters processed + - name: num_requests + type: long + description: Number of requests + - name: user_id + type: keyword + description: User identifier + - name: organization_id + type: keyword + description: Organization identifier + - name: api_key_id + type: keyword + description: API key identifier + - name: api_key_name + type: keyword + description: API key name + - name: api_key_redacted + type: keyword + description: Redacted API key + - name: api_key_type + type: keyword + description: Type of API key + - name: organization_name + type: keyword + description: Organization name + - name: project_id + type: keyword + description: Project identifier + - name: project_name + type: keyword + description: Project name + + - name: ft_data + type: group + description: > + Fine-tuning data metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw fine-tuning data + + - name: assistant_code_interpreter + type: group + description: > + Assistant Code Interpreter usage metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw assistant code interpreter data + + - name: retrieval_storage + type: group + description: > + Retrieval storage usage metrics + fields: + - name: original + type: object + object_type: keyword + description: Raw retrieval storage data diff --git a/x-pack/metricbeat/module/openai/usage/client.go b/x-pack/metricbeat/module/openai/usage/client.go new file mode 100644 index 00000000000..c8f24aa8a50 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/client.go @@ -0,0 +1,49 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +import ( + "context" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/elastic/elastic-agent-libs/logp" +) + +// RLHTTPClient implements a rate-limited HTTP client that wraps the standard http.Client +// with a rate limiter to control API request frequency. +type RLHTTPClient struct { + ctx context.Context + client *http.Client + logger *logp.Logger + Ratelimiter *rate.Limiter +} + +// Do executes an HTTP request while respecting rate limits. +// It waits for rate limit token before proceeding with the request. +// Returns the HTTP response and any error encountered. +func (c *RLHTTPClient) Do(req *http.Request) (*http.Response, error) { + c.logger.Warn("Waiting for rate limit token") + err := c.Ratelimiter.Wait(context.TODO()) + if err != nil { + return nil, err + } + c.logger.Warn("Rate limit token acquired") + return c.client.Do(req) +} + +// newClient creates a new rate-limited HTTP client with specified rate limiter and timeout. +func newClient(ctx context.Context, logger *logp.Logger, rl *rate.Limiter, timeout time.Duration) *RLHTTPClient { + var client = http.DefaultClient + client.Timeout = timeout + return &RLHTTPClient{ + ctx: ctx, + client: client, + logger: logger, + Ratelimiter: rl, + } +} diff --git a/x-pack/metricbeat/module/openai/usage/config.go b/x-pack/metricbeat/module/openai/usage/config.go new file mode 100644 index 00000000000..5908bdd57f9 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/config.go @@ -0,0 +1,80 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +import ( + "fmt" + "time" +) + +type Config struct { + APIKeys []apiKeyConfig `config:"api_keys" validate:"required"` + APIURL string `config:"api_url"` + Headers []string `config:"headers"` + RateLimit *rateLimitConfig `config:"rate_limit"` + Timeout time.Duration `config:"timeout"` + Collection collectionConfig `config:"collection"` +} + +type rateLimitConfig struct { + Limit *int `config:"limit"` + Burst *int `config:"burst"` +} + +type apiKeyConfig struct { + Key string `config:"key"` +} + +type collectionConfig struct { + LookbackDays int `config:"lookback_days"` +} + +func defaultConfig() Config { + return Config{ + APIURL: "https://api.openai.com/v1/usage", + Timeout: 30 * time.Second, + RateLimit: &rateLimitConfig{ + Limit: ptr(60), + Burst: ptr(5), + }, + Collection: collectionConfig{ + LookbackDays: 5, // 5 days + }, + } +} + +func (c *Config) Validate() error { + switch { + case len(c.APIKeys) == 0: + return fmt.Errorf("at least one API key must be configured") + + case c.APIURL == "": + return fmt.Errorf("api_url cannot be empty") + + case c.RateLimit == nil: + return fmt.Errorf("rate_limit must be configured") + + case c.RateLimit.Limit == nil: + return fmt.Errorf("rate_limit.limit must be configured") + + case c.RateLimit.Burst == nil: + return fmt.Errorf("rate_limit.burst must be configured") + + case c.Timeout <= 0: + return fmt.Errorf("timeout must be greater than 0") + + case c.Collection.LookbackDays < 0: + return fmt.Errorf("lookback_days must be >= 0") + } + + // API keys validation in a separate loop since it needs iteration + for i, apiKey := range c.APIKeys { + if apiKey.Key == "" { + return fmt.Errorf("API key at position %d cannot be empty", i) + } + } + + return nil +} diff --git a/x-pack/metricbeat/module/openai/usage/helper.go b/x-pack/metricbeat/module/openai/usage/helper.go new file mode 100644 index 00000000000..f2d3adadd01 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/helper.go @@ -0,0 +1,23 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +import "strings" + +func ptr[T any](value T) *T { + return &value +} + +func processHeaders(headers []string) map[string]string { + headersMap := make(map[string]string, len(headers)) + for _, header := range headers { + parts := strings.Split(header, ":") + if len(parts) != 2 { + continue + } + headersMap[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) + } + return headersMap +} diff --git a/x-pack/metricbeat/module/openai/usage/persistcache.go b/x-pack/metricbeat/module/openai/usage/persistcache.go new file mode 100644 index 00000000000..9b7d725461a --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/persistcache.go @@ -0,0 +1,79 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +import ( + "fmt" + "os" + "path" + "sync" +) + +// stateStore handles persistence of state markers using the filesystem +type stateStore struct { + Dir string // Base directory for storing state files + sync.RWMutex // Protects access to the state store +} + +// newStateStore creates a new state store instance at the specified path +func newStateStore(path string) (*stateStore, error) { + if err := os.MkdirAll(path, 0o755); err != nil { + return nil, fmt.Errorf("creating state directory: %w", err) + } + return &stateStore{ + Dir: path, + }, nil +} + +// getStatePath builds the full file path for a given state key +func (s *stateStore) getStatePath(name string) string { + return path.Join(s.Dir, name) +} + +// Put creates a state marker file for the given key +func (s *stateStore) Put(key string) error { + s.Lock() + defer s.Unlock() + + filePath := s.getStatePath(key) + f, err := os.Create(filePath) + if err != nil { + return fmt.Errorf("creating state file: %w", err) + } + return f.Close() +} + +// Has checks if a state exists for the given key +func (s *stateStore) Has(key string) bool { + s.RLock() + defer s.RUnlock() + + filePath := s.getStatePath(key) + _, err := os.Stat(filePath) + return err == nil +} + +// Remove deletes the state marker file for the given key +func (s *stateStore) Remove(key string) error { + s.Lock() + defer s.Unlock() + + filePath := s.getStatePath(key) + if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("removing state file: %w", err) + } + return nil +} + +// Clear removes all state markers by deleting and recreating the state directory +func (s *stateStore) Clear() error { + s.Lock() + defer s.Unlock() + + if err := os.RemoveAll(s.Dir); err != nil { + return fmt.Errorf("clearing state directory: %w", err) + } + return os.MkdirAll(s.Dir, 0o755) +} diff --git a/x-pack/metricbeat/module/openai/usage/schema.go b/x-pack/metricbeat/module/openai/usage/schema.go new file mode 100644 index 00000000000..668d5b03370 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/schema.go @@ -0,0 +1,86 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +type UsageResponse struct { + Object string `json:"object"` + Data []UsageData `json:"data"` + FtData []interface{} `json:"ft_data"` + DalleApiData []DalleData `json:"dalle_api_data"` + WhisperApiData []WhisperData `json:"whisper_api_data"` + TtsApiData []TtsData `json:"tts_api_data"` + AssistantCodeInterpreterData []interface{} `json:"assistant_code_interpreter_data"` + RetrievalStorageData []interface{} `json:"retrieval_storage_data"` +} + +type UsageData struct { + OrganizationID string `json:"organization_id"` + OrganizationName string `json:"organization_name"` + AggregationTimestamp int64 `json:"aggregation_timestamp"` + NRequests int `json:"n_requests"` + Operation string `json:"operation"` + SnapshotID string `json:"snapshot_id"` + NContextTokensTotal int `json:"n_context_tokens_total"` + NGeneratedTokensTotal int `json:"n_generated_tokens_total"` + Email *string `json:"email"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` + RequestType string `json:"request_type"` + NCachedContextTokensTotal int `json:"n_cached_context_tokens_total"` +} + +type DalleData struct { + Timestamp int64 `json:"timestamp"` + NumImages int `json:"num_images"` + NumRequests int `json:"num_requests"` + ImageSize string `json:"image_size"` + Operation string `json:"operation"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ModelID string `json:"model_id"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} + +type WhisperData struct { + Timestamp int64 `json:"timestamp"` + ModelID string `json:"model_id"` + NumSeconds int `json:"num_seconds"` + NumRequests int `json:"num_requests"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} + +type TtsData struct { + Timestamp int64 `json:"timestamp"` + ModelID string `json:"model_id"` + NumCharacters int `json:"num_characters"` + NumRequests int `json:"num_requests"` + UserID *string `json:"user_id"` + OrganizationID string `json:"organization_id"` + ApiKeyID *string `json:"api_key_id"` + ApiKeyName *string `json:"api_key_name"` + ApiKeyRedacted *string `json:"api_key_redacted"` + ApiKeyType *string `json:"api_key_type"` + OrganizationName string `json:"organization_name"` + ProjectID *string `json:"project_id"` + ProjectName *string `json:"project_name"` +} diff --git a/x-pack/metricbeat/module/openai/usage/usage.go b/x-pack/metricbeat/module/openai/usage/usage.go new file mode 100644 index 00000000000..4e9b8e88e5b --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/usage.go @@ -0,0 +1,363 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package usage + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "path" + "time" + + "golang.org/x/time/rate" + + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" + "github.com/elastic/beats/v7/metricbeat/mb" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" + "github.com/elastic/elastic-agent-libs/paths" +) + +// init registers the MetricSet with the central registry as soon as the program +// starts. The New function will be called later to instantiate an instance of +// the MetricSet for each host is defined in the module's configuration. After the +// MetricSet has been created then Fetch will begin to be called periodically. +func init() { + mb.Registry.MustAddMetricSet("openai", "usage", New) +} + +// MetricSet holds any configuration or state information. It must implement +// the mb.MetricSet interface. And this is best achieved by embedding +// mb.BaseMetricSet because it implements all of the required mb.MetricSet +// interface methods except for Fetch. +type MetricSet struct { + mb.BaseMetricSet + logger *logp.Logger + config Config + report mb.ReporterV2 + stateStore *stateStore +} + +// New creates a new instance of the MetricSet. New is responsible for unpacking +// any MetricSet specific configuration options if there are any. +func New(base mb.BaseMetricSet) (mb.MetricSet, error) { + cfgwarn.Beta("The openai usage metricset is beta.") + + config := defaultConfig() + if err := base.Module().UnpackConfig(&config); err != nil { + return nil, err + } + + if err := config.Validate(); err != nil { + return nil, err + } + + st, err := newStateStore(paths.Resolve(paths.Data, path.Join(base.Module().Name(), base.Name()))) + if err != nil { + return nil, fmt.Errorf("creating state store: %w", err) + } + + return &MetricSet{ + BaseMetricSet: base, + logger: logp.NewLogger("openai.usage"), + config: config, + stateStore: st, + }, nil +} + +// Fetch method implements the data gathering and data conversion to the right +// format. It publishes the event which is then forwarded to the output. In case +// of an error set the Error field of mb.Event or simply call report.Error(). +func (m *MetricSet) Fetch(report mb.ReporterV2) error { + httpClient := newClient( + context.TODO(), + m.logger, + rate.NewLimiter( + rate.Every(time.Duration(*m.config.RateLimit.Limit)*time.Second), + *m.config.RateLimit.Burst, + ), + m.config.Timeout, + ) + + m.report = report + + endDate := time.Now().UTC() + startDate := endDate.AddDate(0, 0, -m.config.Collection.LookbackDays) + + return m.fetchDateRange(startDate, endDate, httpClient) +} + +func (m *MetricSet) fetchDateRange(startDate, endDate time.Time, httpClient *RLHTTPClient) error { + for _, apiKey := range m.config.APIKeys { + // SHA-256 produces a fixed-length (64 characters) hexadecimal string + // that is safe for filenames across all major platforms. Hex encoding + // ensures that the hash is safe for use in file paths as it uses only + // alphanumeric characters. + // + // Also, SHA-256 is a strong cryptographic hash function that is + // deterministic, meaning that the same input will always produce + // the same output and it is an one-way function, meaning that it is + // computationally infeasible to reverse the hash to obtain the + // original. + hasher := sha256.New() + hasher.Write([]byte(apiKey.Key)) + hashedKey := hex.EncodeToString(hasher.Sum(nil)) + stateKey := "state_" + hashedKey + + // If state exists, only fetch current day + if m.stateStore.Has(stateKey) { + currentDay := endDate.Format("2006-01-02") + if err := m.fetchSingleDay(currentDay, apiKey.Key, httpClient); err != nil { + m.logger.Errorf("Error fetching data for date %s: %v", currentDay, err) + } + continue + } + + // First run for this API key - fetch historical data + for d := startDate; !d.After(endDate); d = d.AddDate(0, 0, 1) { + dateStr := d.Format("2006-01-02") + if err := m.fetchSingleDay(dateStr, apiKey.Key, httpClient); err != nil { + m.logger.Errorf("Error fetching data for date %s: %v", dateStr, err) + continue + } + } + + // Mark this API key as processed + if err := m.stateStore.Put(stateKey); err != nil { + m.logger.Errorf("Error storing state for API key: %v", err) + } + } + return nil +} + +func (m *MetricSet) fetchSingleDay(dateStr, apiKey string, httpClient *RLHTTPClient) error { + req, err := m.createRequest(dateStr, apiKey) + if err != nil { + return fmt.Errorf("error creating request: %w", err) + } + + resp, err := httpClient.Do(req) + if err != nil { + return fmt.Errorf("error making request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("error response from API: %s", resp.Status) + } + + return m.processResponse(resp, dateStr) +} + +func (m *MetricSet) createRequest(dateStr, apiKey string) (*http.Request, error) { + req, err := http.NewRequestWithContext(context.TODO(), http.MethodGet, m.config.APIURL, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + q := req.URL.Query() + q.Add("date", dateStr) + req.URL.RawQuery = q.Encode() + + req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", apiKey)) + for key, value := range processHeaders(m.config.Headers) { + req.Header.Add(key, value) + } + + return req, nil +} + +func (m *MetricSet) processResponse(resp *http.Response, dateStr string) error { + var usageResponse UsageResponse + if err := json.NewDecoder(resp.Body).Decode(&usageResponse); err != nil { + return fmt.Errorf("error decoding response: %w", err) + } + + m.logger.Info("Fetched usage metrics for date:", dateStr) + + events := make([]mb.Event, 0, len(usageResponse.Data)) + + m.processUsageData(events, usageResponse.Data) + m.processDalleData(events, usageResponse.DalleApiData) + m.processWhisperData(events, usageResponse.WhisperApiData) + m.processTTSData(events, usageResponse.TtsApiData) + + // Process additional data. + // + // NOTE(shmsr): During testing, could not get the usage data for the following + // and found no documentation, example responses, etc. That's why let's store them + // as it is so that we can use processors later on to process them as needed. + m.processFTData(events, usageResponse.FtData) + m.processAssistantCodeInterpreterData(events, usageResponse.AssistantCodeInterpreterData) + m.processRetrievalStorageData(events, usageResponse.RetrievalStorageData) + + return nil +} + +func (m *MetricSet) processUsageData(events []mb.Event, data []UsageData) { + + for _, usage := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "data": mapstr.M{ + "organization_id": usage.OrganizationID, + "organization_name": usage.OrganizationName, + "aggregation_timestamp": time.Unix(usage.AggregationTimestamp, 0), + "n_requests": usage.NRequests, + "operation": usage.Operation, + "snapshot_id": usage.SnapshotID, + "n_context_tokens_total": usage.NContextTokensTotal, + "n_generated_tokens_total": usage.NGeneratedTokensTotal, + "email": usage.Email, + "api_key_id": usage.ApiKeyID, + "api_key_name": usage.ApiKeyName, + "api_key_redacted": usage.ApiKeyRedacted, + "api_key_type": usage.ApiKeyType, + "project_id": usage.ProjectID, + "project_name": usage.ProjectName, + "request_type": usage.RequestType, + "n_cached_context_tokens_total": usage.NCachedContextTokensTotal, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processDalleData(events []mb.Event, data []DalleData) { + for _, dalle := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "dalle": mapstr.M{ + "timestamp": time.Unix(dalle.Timestamp, 0), + "num_images": dalle.NumImages, + "num_requests": dalle.NumRequests, + "image_size": dalle.ImageSize, + "operation": dalle.Operation, + "user_id": dalle.UserID, + "organization_id": dalle.OrganizationID, + "api_key_id": dalle.ApiKeyID, + "api_key_name": dalle.ApiKeyName, + "api_key_redacted": dalle.ApiKeyRedacted, + "api_key_type": dalle.ApiKeyType, + "organization_name": dalle.OrganizationName, + "model_id": dalle.ModelID, + "project_id": dalle.ProjectID, + "project_name": dalle.ProjectName, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processWhisperData(events []mb.Event, data []WhisperData) { + for _, whisper := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "whisper": mapstr.M{ + "timestamp": time.Unix(whisper.Timestamp, 0), + "model_id": whisper.ModelID, + "num_seconds": whisper.NumSeconds, + "num_requests": whisper.NumRequests, + "user_id": whisper.UserID, + "organization_id": whisper.OrganizationID, + "api_key_id": whisper.ApiKeyID, + "api_key_name": whisper.ApiKeyName, + "api_key_redacted": whisper.ApiKeyRedacted, + "api_key_type": whisper.ApiKeyType, + "organization_name": whisper.OrganizationName, + "project_id": whisper.ProjectID, + "project_name": whisper.ProjectName, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processTTSData(events []mb.Event, data []TtsData) { + for _, tts := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "tts": mapstr.M{ + "timestamp": time.Unix(tts.Timestamp, 0), + "model_id": tts.ModelID, + "num_characters": tts.NumCharacters, + "num_requests": tts.NumRequests, + "user_id": tts.UserID, + "organization_id": tts.OrganizationID, + "api_key_id": tts.ApiKeyID, + "api_key_name": tts.ApiKeyName, + "api_key_redacted": tts.ApiKeyRedacted, + "api_key_type": tts.ApiKeyType, + "organization_name": tts.OrganizationName, + "project_id": tts.ProjectID, + "project_name": tts.ProjectName, + }, + }, + } + events = append(events, event) + } + + m.processEvents(events) +} + +func (m *MetricSet) processFTData(events []mb.Event, data []interface{}) { + for _, ft := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "ft_data": mapstr.M{ + "original": ft, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processAssistantCodeInterpreterData(events []mb.Event, data []interface{}) { + for _, aci := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "assistant_code_interpreter": mapstr.M{ + "original": aci, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processRetrievalStorageData(events []mb.Event, data []interface{}) { + for _, rs := range data { + event := mb.Event{ + MetricSetFields: mapstr.M{ + "retrieval_storage": mapstr.M{ + "original": rs, + }, + }, + } + events = append(events, event) + } + m.processEvents(events) +} + +func (m *MetricSet) processEvents(events []mb.Event) { + if len(events) > 0 { + for i := range events { + m.report.Event(events[i]) + } + } + clear(events) +} diff --git a/x-pack/metricbeat/module/openai/usage/usage_integration_test.go b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go new file mode 100644 index 00000000000..8a0c9b70090 --- /dev/null +++ b/x-pack/metricbeat/module/openai/usage/usage_integration_test.go @@ -0,0 +1,176 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package usage + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + + mbtest "github.com/elastic/beats/v7/metricbeat/mb/testing" +) + +func TestFetch(t *testing.T) { + apiKey := "most_secure_token" + usagePath := "/usage" + server := initServer(usagePath, apiKey) + defer server.Close() + + f := mbtest.NewReportingMetricSetV2Error(t, getConfig(server.URL+"/usage", apiKey)) + + events, errs := mbtest.ReportingFetchV2Error(f) + if len(errs) > 0 { + t.Fatalf("Expected 0 error, has %d: %v", len(errs), errs) + } + + assert.NotEmpty(t, events) + +} + +func TestData(t *testing.T) { + apiKey := "most_secure_token" + usagePath := "/usage" + server := initServer(usagePath, apiKey) + defer server.Close() + + f := mbtest.NewReportingMetricSetV2Error(t, getConfig(server.URL+"/usage", apiKey)) + + err := mbtest.WriteEventsReporterV2Error(f, t, "") + if !assert.NoError(t, err) { + t.FailNow() + } +} + +func getConfig(url, apiKey string) map[string]interface{} { + return map[string]interface{}{ + "module": "openai", + "metricsets": []string{"usage"}, + "enabled": true, + "period": "1h", + "api_url": url, + "api_keys": []map[string]interface{}{ + {"key": apiKey}, + }, + "rate_limit": map[string]interface{}{ + "limit": 60, + "burst": 5, + }, + "collection": map[string]interface{}{ + "lookback_days": 1, + }, + } +} + +func initServer(endpoint string, api_key string) *httptest.Server { + data := []byte(`{ + "object": "list", + "data": [ + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730696460, + "n_requests": 1, + "operation": "completion-realtime", + "snapshot_id": "gpt-4o-realtime-preview-2024-10-01", + "n_context_tokens_total": 118, + "n_generated_tokens_total": 35, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + }, + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730696460, + "n_requests": 1, + "operation": "completion", + "snapshot_id": "gpt-4o-2024-08-06", + "n_context_tokens_total": 31, + "n_generated_tokens_total": 12, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + }, + { + "organization_id": "org-dummy", + "organization_name": "Personal", + "aggregation_timestamp": 1730697540, + "n_requests": 1, + "operation": "completion", + "snapshot_id": "ft:gpt-3.5-turbo-0125:personal:yay-renew:APjjyG8E:ckpt-step-84", + "n_context_tokens_total": 13, + "n_generated_tokens_total": 9, + "email": null, + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "project_id": null, + "project_name": null, + "request_type": "", + "n_cached_context_tokens_total": 0 + } + ], + "ft_data": [], + "dalle_api_data": [], + "whisper_api_data": [ + { + "timestamp": 1730696460, + "model_id": "whisper-1", + "num_seconds": 2, + "num_requests": 1, + "user_id": null, + "organization_id": "org-dummy", + "api_key_id": null, + "api_key_name": null, + "api_key_redacted": null, + "api_key_type": null, + "organization_name": "Personal", + "project_id": null, + "project_name": null + } + ], + "tts_api_data": [], + "assistant_code_interpreter_data": [], + "retrieval_storage_data": [] +}`) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Validate Bearer token + authHeader := r.Header.Get("Authorization") + expectedToken := fmt.Sprintf("Bearer %s", api_key) + + if authHeader != expectedToken { + w.WriteHeader(http.StatusUnauthorized) + return + } + + // Validate the endpoint + if r.URL.Path == endpoint { + w.WriteHeader(http.StatusOK) + _, _ = w.Write(data) + } else { + w.WriteHeader(http.StatusNotFound) + } + })) + return server +} diff --git a/x-pack/metricbeat/modules.d/openai.yml.disabled b/x-pack/metricbeat/modules.d/openai.yml.disabled new file mode 100644 index 00000000000..10a9fb48577 --- /dev/null +++ b/x-pack/metricbeat/modules.d/openai.yml.disabled @@ -0,0 +1,31 @@ +# Module: openai +# Docs: https://www.elastic.co/guide/en/beats/metricbeat/main/metricbeat-module-openai.html + +- module: openai + metricsets: ["usage"] + enabled: false + period: 1h + + # # Project API Keys - Multiple API keys can be specified for different projects + # api_keys: + # - key: "api_key1" + # - key: "api_key2" + + # # API Configuration + # ## Base URL for the OpenAI usage API endpoint + # api_url: "https://api.openai.com/v1/usage" + # ## Custom headers to be included in API requests + # headers: + # - "k1: v1" + # - "k2: v2" + ## Rate Limiting Configuration + # rate_limit: + # limit: 60 # requests per second + # burst: 5 # burst size + # ## Request timeout duration + # timeout: 30s + + # # Data Collection Configuration + # collection: + # ## Number of days to look back when collecting usage data + # lookback_days: 30