From 287898f7518573902c31261fa122f792ff41f34f Mon Sep 17 00:00:00 2001 From: Sam Brenner <106700075+sabrenner@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:12:51 -0400 Subject: [PATCH] [MLOB-1524] feat(llmobs): Introduce LLM Observability SDK (#4742) * [MLOB-1540] add llmobs configuration to global tracer config (#4696) add llmobs config * [MLOB-1555] LLM Observability writers (#4699) LLM Observability writers * [MLOB-1556] LLM Observability tagger (#4718) LLM Observability tagger * [MLOB-1560] LLMObs Span Processor (#4738) * span processor * tests * remove agent exporter log and do not stringify tags * remove llmobs from exporter tests * add in default unserializable value * review comments * warning log for metric * todo-ify * remove some duplicate logic * decouple llmobs span processing with a channel * use a static weakmap to store llmobs tags/annotations instead of span tags * do not register span in map if it does not have an llmobs span kind * span is passed on an object from sp publisher * re-clarify TODOs * only send span in publish * log multiple warnings and return conditional undefined * update error logic * [MLOB-1561] LLM Observability SDK API (#4773) * wip * type definitions * active + try/catch eval metric writer append * test ts * use tagger map and processor as a channel subscriber * change decorate and add in dev changes * try some api changes * add decorate to noop * fix breaking proxy tests * experimental decorators for TS docs * api changes, fix unit + e2e tests * try removing global log mocks * add some util tests * remove logger mocks * add module tests + do not enable when not specified * fix eval metric integration test * wip * memoize getFunctionArguments * move any subscriber and global writer to the module enablement level instead of sdk * should fix TS tests * add ts integration test and fix decorator * devex for ts versions * add noop typescript test * remove startSpan * remove unneeded change * dedup decorator code * Update index.d.ts Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> * map metrics names * change validKind to validateKind and throw * tagger for metrics follow-up * review feedback * add some tests for not auto-annotating in certain cases --------- Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> * hard fail instead of soft fail, except for `wrap` span name * add ml-observability codeowners * resolve ts test * update auto-annotation check * tagger can soft fail * using custom ASL instance and scope activation * fix test comments and remove * address review comments * remove llmobs.apiKey config, only rely on global * fix evaulations test * make llmobs storage accessible --------- Co-authored-by: Yun Kim <35776586+Yun-Kim@users.noreply.github.com> --- .github/workflows/llmobs.yml | 30 + docs/package.json | 2 +- docs/test.ts | 90 ++ docs/yarn.lock | 8 +- index.d.ts | 335 ++++++ integration-tests/helpers/fake-agent.js | 48 + package.json | 2 + packages/dd-trace/src/config.js | 20 + .../dd-trace/src/llmobs/constants/tags.js | 34 + .../dd-trace/src/llmobs/constants/text.js | 6 + .../dd-trace/src/llmobs/constants/writers.js | 13 + packages/dd-trace/src/llmobs/index.js | 103 ++ packages/dd-trace/src/llmobs/noop.js | 82 ++ packages/dd-trace/src/llmobs/sdk.js | 377 ++++++ .../dd-trace/src/llmobs/span_processor.js | 195 ++++ packages/dd-trace/src/llmobs/storage.js | 7 + packages/dd-trace/src/llmobs/tagger.js | 322 ++++++ packages/dd-trace/src/llmobs/util.js | 176 +++ packages/dd-trace/src/llmobs/writers/base.js | 111 ++ .../src/llmobs/writers/evaluations.js | 29 + .../src/llmobs/writers/spans/agentProxy.js | 23 + .../src/llmobs/writers/spans/agentless.js | 17 + .../dd-trace/src/llmobs/writers/spans/base.js | 49 + packages/dd-trace/src/noop/proxy.js | 3 + packages/dd-trace/src/proxy.js | 9 +- packages/dd-trace/src/span_processor.js | 5 + packages/dd-trace/test/config.spec.js | 91 +- packages/dd-trace/test/llmobs/index.spec.js | 137 +++ packages/dd-trace/test/llmobs/noop.spec.js | 58 + .../dd-trace/test/llmobs/sdk/index.spec.js | 1027 +++++++++++++++++ .../test/llmobs/sdk/integration.spec.js | 256 ++++ .../test/llmobs/sdk/typescript/index.spec.js | 133 +++ .../test/llmobs/sdk/typescript/index.ts | 23 + .../test/llmobs/sdk/typescript/noop.ts | 19 + .../test/llmobs/span_processor.spec.js | 360 ++++++ packages/dd-trace/test/llmobs/tagger.spec.js | 576 +++++++++ packages/dd-trace/test/llmobs/util.js | 201 ++++ packages/dd-trace/test/llmobs/util.spec.js | 142 +++ .../dd-trace/test/llmobs/writers/base.spec.js | 179 +++ .../test/llmobs/writers/evaluations.spec.js | 46 + .../llmobs/writers/spans/agentProxy.spec.js | 28 + .../llmobs/writers/spans/agentless.spec.js | 21 + .../test/llmobs/writers/spans/base.spec.js | 99 ++ packages/dd-trace/test/proxy.spec.js | 3 +- 44 files changed, 5484 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/llmobs.yml create mode 100644 packages/dd-trace/src/llmobs/constants/tags.js create mode 100644 packages/dd-trace/src/llmobs/constants/text.js create mode 100644 packages/dd-trace/src/llmobs/constants/writers.js create mode 100644 packages/dd-trace/src/llmobs/index.js create mode 100644 packages/dd-trace/src/llmobs/noop.js create mode 100644 packages/dd-trace/src/llmobs/sdk.js create mode 100644 packages/dd-trace/src/llmobs/span_processor.js create mode 100644 packages/dd-trace/src/llmobs/storage.js create mode 100644 packages/dd-trace/src/llmobs/tagger.js create mode 100644 packages/dd-trace/src/llmobs/util.js create mode 100644 packages/dd-trace/src/llmobs/writers/base.js create mode 100644 packages/dd-trace/src/llmobs/writers/evaluations.js create mode 100644 packages/dd-trace/src/llmobs/writers/spans/agentProxy.js create mode 100644 packages/dd-trace/src/llmobs/writers/spans/agentless.js create mode 100644 packages/dd-trace/src/llmobs/writers/spans/base.js create mode 100644 packages/dd-trace/test/llmobs/index.spec.js create mode 100644 packages/dd-trace/test/llmobs/noop.spec.js create mode 100644 packages/dd-trace/test/llmobs/sdk/index.spec.js create mode 100644 packages/dd-trace/test/llmobs/sdk/integration.spec.js create mode 100644 packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js create mode 100644 packages/dd-trace/test/llmobs/sdk/typescript/index.ts create mode 100644 packages/dd-trace/test/llmobs/sdk/typescript/noop.ts create mode 100644 packages/dd-trace/test/llmobs/span_processor.spec.js create mode 100644 packages/dd-trace/test/llmobs/tagger.spec.js create mode 100644 packages/dd-trace/test/llmobs/util.js create mode 100644 packages/dd-trace/test/llmobs/util.spec.js create mode 100644 packages/dd-trace/test/llmobs/writers/base.spec.js create mode 100644 packages/dd-trace/test/llmobs/writers/evaluations.spec.js create mode 100644 packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js create mode 100644 packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js create mode 100644 packages/dd-trace/test/llmobs/writers/spans/base.spec.js diff --git a/.github/workflows/llmobs.yml b/.github/workflows/llmobs.yml new file mode 100644 index 00000000000..df7754dca81 --- /dev/null +++ b/.github/workflows/llmobs.yml @@ -0,0 +1,30 @@ +name: LLMObs + +on: + pull_request: + push: + branches: [master] + schedule: + - cron: '0 4 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + ubuntu: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/testagent/start + - uses: ./.github/actions/node/setup + - uses: ./.github/actions/install + - uses: ./.github/actions/node/18 + - run: yarn test:llmobs:ci + - uses: ./.github/actions/node/20 + - run: yarn test:llmobs:ci + - uses: ./.github/actions/node/latest + - run: yarn test:llmobs:ci + - if: always() + uses: ./.github/actions/testagent/logs + - uses: codecov/codecov-action@v3 diff --git a/docs/package.json b/docs/package.json index 30cb5dd848a..e551a25e948 100644 --- a/docs/package.json +++ b/docs/package.json @@ -11,6 +11,6 @@ "private": true, "devDependencies": { "typedoc": "^0.25.8", - "typescript": "^4.6" + "typescript": "^5.0" } } diff --git a/docs/test.ts b/docs/test.ts index 9c6c7df6211..c2a198d7d98 100644 --- a/docs/test.ts +++ b/docs/test.ts @@ -536,3 +536,93 @@ const otelTraceId: string = spanContext.traceId const otelSpanId: string = spanContext.spanId const otelTraceFlags: number = spanContext.traceFlags const otelTraceState: opentelemetry.TraceState = spanContext.traceState! + +// -- LLM Observability -- +const llmobsEnableOptions = { + mlApp: 'mlApp', + agentlessEnabled: true +} +tracer.init({ + llmobs: llmobsEnableOptions, +}) +const llmobs = tracer.llmobs +const enabled = llmobs.enabled + +// manually enable +llmobs.enable({ + mlApp: 'mlApp', + agentlessEnabled: true +}) + +// manually disable +llmobs.disable() + +// trace block of code +llmobs.trace({ name: 'name', kind: 'llm' }, () => {}) +llmobs.trace({ kind: 'llm', name: 'myLLM', modelName: 'myModel', modelProvider: 'myProvider' }, () => {}) +llmobs.trace({ name: 'name', kind: 'llm' }, (span, cb) => { + llmobs.annotate(span, {}) + span.setTag('foo', 'bar') + cb(new Error('boom')) +}) + +// wrap a function +llmobs.wrap({ kind: 'llm' }, function myLLM () {})() +llmobs.wrap({ kind: 'llm', name: 'myLLM', modelName: 'myModel', modelProvider: 'myProvider' }, function myFunction () {})() + +// decorate a function +class MyClass { + @llmobs.decorate({ kind: 'llm' }) + myLLM () {} + + @llmobs.decorate({ kind: 'llm', name: 'myOtherLLM', modelName: 'myModel', modelProvider: 'myProvider' }) + myOtherLLM () {} +} + +const cls = new MyClass() +cls.myLLM() +cls.myOtherLLM() + +// export a span +llmobs.enable({ mlApp: 'myApp' }) +llmobs.trace({ kind: 'llm', name: 'myLLM' }, (span) => { + const llmobsSpanCtx = llmobs.exportSpan(span) + llmobsSpanCtx.traceId; + llmobsSpanCtx.spanId; + + // submit evaluation + llmobs.disable() + llmobs.submitEvaluation(llmobsSpanCtx, { + label: 'my-eval-metric', + metricType: 'categorical', + value: 'good', + mlApp: 'myApp', + tags: {}, + timestampMs: Date.now() + }) +}) + +// annotate a span +llmobs.annotate({ + inputData: 'input', + outputData: 'output', + metadata: {}, + metrics: { + inputTokens: 10, + outputTokens: 5, + totalTokens: 15 + }, + tags: {} +}) +llmobs.annotate(span, { + inputData: 'input', + outputData: 'output', + metadata: {}, + metrics: {}, + tags: {} +}) + + + +// flush +llmobs.flush() diff --git a/docs/yarn.lock b/docs/yarn.lock index 4b011ed3db2..be52dcbd364 100644 --- a/docs/yarn.lock +++ b/docs/yarn.lock @@ -61,10 +61,10 @@ typedoc@^0.25.8: minimatch "^9.0.3" shiki "^0.14.7" -typescript@^4.6: - version "4.9.5" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.5.tgz#095979f9bcc0d09da324d58d03ce8f8374cbe65a" - integrity sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g== +typescript@^5.0: + version "5.6.3" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.6.3.tgz#5f3449e31c9d94febb17de03cc081dd56d81db5b" + integrity sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw== vscode-oniguruma@^1.7.0: version "1.7.0" diff --git a/index.d.ts b/index.d.ts index 2e5aa4c57a8..3987c581c58 100644 --- a/index.d.ts +++ b/index.d.ts @@ -137,6 +137,11 @@ interface Tracer extends opentracing.Tracer { TracerProvider: tracer.opentelemetry.TracerProvider; dogstatsd: tracer.DogStatsD; + + /** + * LLM Observability SDK + */ + llmobs: tracer.llmobs.LLMObs; } // left out of the namespace, so it @@ -752,6 +757,11 @@ declare namespace tracer { */ maxDepth?: number } + + /** + * Configuration enabling LLM Observability. Enablement is superceded by the DD_LLMOBS_ENABLED environment variable. + */ + llmobs?: llmobs.LLMObsEnableOptions } /** @@ -2198,6 +2208,331 @@ declare namespace tracer { */ telemetryVerbosity?: string } + + export namespace llmobs { + export interface LLMObs { + + /** + * Whether or not LLM Observability is enabled. + */ + enabled: boolean, + + /** + * Enable LLM Observability tracing. + */ + enable (options: LLMObsEnableOptions): void, + + /** + * Disable LLM Observability tracing. + */ + disable (): void, + + /** + * Instruments a function by automatically creating a span activated on its + * scope. + * + * The span will automatically be finished when one of these conditions is + * met: + * + * * The function returns a promise, in which case the span will finish when + * the promise is resolved or rejected. + * * The function takes a callback as its second parameter, in which case the + * span will finish when that callback is called. + * * The function doesn't accept a callback and doesn't return a promise, in + * which case the span will finish at the end of the function execution. + * @param fn The function to instrument. + * @param options Optional LLM Observability span options. + * @returns The return value of the function. + */ + trace (options: LLMObsNamedSpanOptions, fn: (span: tracer.Span, done: (error?: Error) => void) => T): T + + /** + * Wrap a function to automatically create a span activated on its + * scope when it's called. + * + * The span will automatically be finished when one of these conditions is + * met: + * + * * The function returns a promise, in which case the span will finish when + * the promise is resolved or rejected. + * * The function takes a callback as its last parameter, in which case the + * span will finish when that callback is called. + * * The function doesn't accept a callback and doesn't return a promise, in + * which case the span will finish at the end of the function execution. + * @param fn The function to instrument. + * @param options Optional LLM Observability span options. + * @returns A new function that wraps the provided function with span creation. + */ + wrap any> (options: LLMObsNamelessSpanOptions, fn: T): T + + /** + * Decorate a function in a javascript runtime that supports function decorators. + * Note that this is **not** supported in the Node.js runtime, but is in TypeScript. + * + * In TypeScript, this decorator is only supported in contexts where general TypeScript + * function decorators are supported. + * + * @param options Optional LLM Observability span options. + */ + decorate (options: llmobs.LLMObsNamelessSpanOptions): any + + /** + * Returns a representation of a span to export its span and trace IDs. + * If no span is provided, the current LLMObs-type span will be used. + * @param span Optional span to export. + * @returns An object containing the span and trace IDs. + */ + exportSpan (span?: tracer.Span): llmobs.ExportedLLMObsSpan + + + /** + * Sets inputs, outputs, tags, metadata, and metrics as provided for a given LLM Observability span. + * Note that with the exception of tags, this method will override any existing values for the provided fields. + * + * For example: + * ```javascript + * llmobs.trace({ kind: 'llm', name: 'myLLM', modelName: 'gpt-4o', modelProvider: 'openai' }, () => { + * llmobs.annotate({ + * inputData: [{ content: 'system prompt, role: 'system' }, { content: 'user prompt', role: 'user' }], + * outputData: { content: 'response', role: 'ai' }, + * metadata: { temperature: 0.7 }, + * tags: { host: 'localhost' }, + * metrics: { inputTokens: 10, outputTokens: 20, totalTokens: 30 } + * }) + * }) + * ``` + * + * @param span The span to annotate (defaults to the current LLM Observability span if not provided) + * @param options An object containing the inputs, outputs, tags, metadata, and metrics to set on the span. + */ + annotate (options: llmobs.AnnotationOptions): void + annotate (span: tracer.Span | undefined, options: llmobs.AnnotationOptions): void + + /** + * Submits a custom evalutation metric for a given span ID and trace ID. + * @param spanContext The span context of the span to submit the evaluation metric for. + * @param options An object containing the label, metric type, value, and tags of the evaluation metric. + */ + submitEvaluation (spanContext: llmobs.ExportedLLMObsSpan, options: llmobs.EvaluationOptions): void + + /** + * Flushes any remaining spans and evaluation metrics to LLM Observability. + */ + flush (): void + } + + interface EvaluationOptions { + /** + * The name of the evalutation metric + */ + label: string, + + /** + * The type of evaluation metric, one of 'categorical' or 'score' + */ + metricType: 'categorical' | 'score', + + /** + * The value of the evaluation metric. + * Must be string for 'categorical' metrics and number for 'score' metrics. + */ + value: string | number, + + /** + * An object of string key-value pairs to tag the evaluation metric with. + */ + tags?: { [key: string]: any }, + + /** + * The name of the ML application + */ + mlApp?: string, + + /** + * The timestamp in milliseconds when the evaluation metric result was generated. + */ + timestampMs?: number + } + + interface Document { + /** + * Document text + */ + text?: string, + + /** + * Document name + */ + name?: string, + + /** + * Document ID + */ + id?: string, + + /** + * Score of the document retrieval as a source of ground truth + */ + score?: number + } + + /** + * Represents a single LLM chat model message + */ + interface Message { + /** + * Content of the message. + */ + content: string, + + /** + * Role of the message (ie system, user, ai) + */ + role?: string, + + /** + * Tool calls of the message + */ + toolCalls?: ToolCall[], + } + + /** + * Represents a single tool call for an LLM chat model message + */ + interface ToolCall { + /** + * Name of the tool + */ + name?: string, + + /** + * Arguments passed to the tool + */ + arguments?: { [key: string]: any }, + + /** + * The tool ID + */ + toolId?: string, + + /** + * The tool type + */ + type?: string + } + + /** + * Annotation options for LLM Observability spans. + */ + interface AnnotationOptions { + /** + * A single input string, object, or a list of objects based on the span kind: + * 1. LLM spans: accepts a string, or an object of the form {content: "...", role: "..."}, or a list of objects with the same signature. + * 2. Embedding spans: accepts a string, list of strings, or an object of the form {text: "...", ...}, or a list of objects with the same signature. + * 3. Other: any JSON serializable type + */ + inputData?: string | Message | Message[] | Document | Document[] | { [key: string]: any }, + + /** + * A single output string, object, or a list of objects based on the span kind: + * 1. LLM spans: accepts a string, or an object of the form {content: "...", role: "..."}, or a list of objects with the same signature. + * 2. Retrieval spans: An object containing any of the key value pairs {name: str, id: str, text: str, source: number} or a list of dictionaries with the same signature. + * 3. Other: any JSON serializable type + */ + outputData?: string | Message | Message[] | Document | Document[] | { [key: string]: any }, + + /** + * Object of JSON serializable key-value metadata pairs relevant to the input/output operation described by the LLM Observability span. + */ + metadata?: { [key: string]: any }, + + /** + * Object of JSON seraliazable key-value metrics (number) pairs, such as `{input,output,total}Tokens` + */ + metrics?: { [key: string]: number }, + + /** + * Object of JSON serializable key-value tag pairs to set or update on the LLM Observability span regarding the span's context. + */ + tags?: { [key: string]: any } + } + + /** + * An object containing the span ID and trace ID of interest + */ + interface ExportedLLMObsSpan { + /** + * Trace ID associated with the span of interest + */ + traceId: string, + + /** + * Span ID associated with the span of interest + */ + spanId: string, + } + + interface LLMObsSpanOptions extends SpanOptions { + /** + * LLM Observability span kind. One of `agent`, `workflow`, `task`, `tool`, `retrieval`, `embedding`, or `llm`. + */ + kind: llmobs.spanKind, + + /** + * The ID of the underlying user session. Required for tracking sessions. + */ + sessionId?: string, + + /** + * The name of the ML application that the agent is orchestrating. + * If not provided, the default value will be set to mlApp provided during initalization, or `DD_LLMOBS_ML_APP`. + */ + mlApp?: string, + + /** + * The name of the invoked LLM or embedding model. Only used on `llm` and `embedding` spans. + */ + modelName?: string, + + /** + * The name of the invoked LLM or embedding model provider. Only used on `llm` and `embedding` spans. + * If not provided for LLM or embedding spans, a default value of 'custom' will be set. + */ + modelProvider?: string, + } + + interface LLMObsNamedSpanOptions extends LLMObsSpanOptions { + /** + * The name of the traced operation. This is a required option. + */ + name: string, + } + + interface LLMObsNamelessSpanOptions extends LLMObsSpanOptions { + /** + * The name of the traced operation. + */ + name?: string, + } + + /** + * Options for enabling LLM Observability tracing. + */ + interface LLMObsEnableOptions { + /** + * The name of your ML application. + */ + mlApp?: string, + + /** + * Set to `true` to disbale sending data that requires a Datadog Agent. + */ + agentlessEnabled?: boolean, + } + + /** @hidden */ + type spanKind = 'agent' | 'workflow' | 'task' | 'tool' | 'retrieval' | 'embedding' | 'llm' + } } /** diff --git a/integration-tests/helpers/fake-agent.js b/integration-tests/helpers/fake-agent.js index 70aff2ecfa8..f1054720d92 100644 --- a/integration-tests/helpers/fake-agent.js +++ b/integration-tests/helpers/fake-agent.js @@ -188,6 +188,46 @@ module.exports = class FakeAgent extends EventEmitter { return resultPromise } + + assertLlmObsPayloadReceived (fn, timeout, expectedMessageCount = 1, resolveAtFirstSuccess) { + timeout = timeout || 30000 + let resultResolve + let resultReject + let msgCount = 0 + const errors = [] + + const timeoutObj = setTimeout(() => { + const errorsMsg = errors.length === 0 ? '' : `, additionally:\n${errors.map(e => e.stack).join('\n')}\n===\n` + resultReject(new Error(`timeout${errorsMsg}`, { cause: { errors } })) + }, timeout) + + const resultPromise = new Promise((resolve, reject) => { + resultResolve = () => { + clearTimeout(timeoutObj) + resolve() + } + resultReject = (e) => { + clearTimeout(timeoutObj) + reject(e) + } + }) + + const messageHandler = msg => { + try { + msgCount += 1 + fn(msg) + if (resolveAtFirstSuccess || msgCount === expectedMessageCount) { + resultResolve() + this.removeListener('llmobs', messageHandler) + } + } catch (e) { + errors.push(e) + } + } + this.on('llmobs', messageHandler) + + return resultPromise + } } function buildExpressServer (agent) { @@ -315,6 +355,14 @@ function buildExpressServer (agent) { }) }) + app.post('/evp_proxy/v2/api/v2/llmobs', (req, res) => { + res.status(200).send() + agent.emit('llmobs', { + headers: req.headers, + payload: req.body + }) + }) + return app } diff --git a/package.json b/package.json index 279949d36d8..b8eeced19e5 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,8 @@ "test:core:ci": "npm run test:core -- --coverage --nyc-arg=--include=\"packages/datadog-core/src/**/*.js\"", "test:lambda": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/lambda/**/*.spec.js\"", "test:lambda:ci": "nyc --no-clean --include \"packages/dd-trace/src/lambda/**/*.js\" -- npm run test:lambda", + "test:llmobs": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/llmobs/**/*.spec.js\"", + "test:llmobs:ci": "nyc --no-clean --include \"packages/dd-trace/src/llmobs/**/*.js\" -- npm run test:llmobs", "test:plugins": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/datadog-instrumentations/test/@($(echo $PLUGINS)).spec.js\" \"packages/datadog-plugin-@($(echo $PLUGINS))/test/**/*.spec.js\"", "test:plugins:ci": "yarn services && nyc --no-clean --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS)).js\" --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS))/**/*.js\" --include \"packages/datadog-plugin-@($(echo $PLUGINS))/src/**/*.js\" -- npm run test:plugins", "test:plugins:upstream": "node ./packages/dd-trace/test/plugins/suite.js", diff --git a/packages/dd-trace/src/config.js b/packages/dd-trace/src/config.js index defa10bffa0..5a9ec19f4a2 100644 --- a/packages/dd-trace/src/config.js +++ b/packages/dd-trace/src/config.js @@ -501,6 +501,9 @@ class Config { this._setValue(defaults, 'isGitUploadEnabled', false) this._setValue(defaults, 'isIntelligentTestRunnerEnabled', false) this._setValue(defaults, 'isManualApiEnabled', false) + this._setValue(defaults, 'llmobs.agentlessEnabled', false) + this._setValue(defaults, 'llmobs.enabled', false) + this._setValue(defaults, 'llmobs.mlApp', undefined) this._setValue(defaults, 'ciVisibilityTestSessionName', '') this._setValue(defaults, 'ciVisAgentlessLogSubmissionEnabled', false) this._setValue(defaults, 'isTestDynamicInstrumentationEnabled', false) @@ -605,6 +608,9 @@ class Config { DD_INSTRUMENTATION_TELEMETRY_ENABLED, DD_INSTRUMENTATION_CONFIG_ID, DD_LOGS_INJECTION, + DD_LLMOBS_AGENTLESS_ENABLED, + DD_LLMOBS_ENABLED, + DD_LLMOBS_ML_APP, DD_OPENAI_LOGS_ENABLED, DD_OPENAI_SPAN_CHAR_LIMIT, DD_PROFILING_ENABLED, @@ -751,6 +757,9 @@ class Config { this._setArray(env, 'injectionEnabled', DD_INJECTION_ENABLED) this._setBoolean(env, 'isAzureFunction', getIsAzureFunction()) this._setBoolean(env, 'isGCPFunction', getIsGCPFunction()) + this._setBoolean(env, 'llmobs.agentlessEnabled', DD_LLMOBS_AGENTLESS_ENABLED) + this._setBoolean(env, 'llmobs.enabled', DD_LLMOBS_ENABLED) + this._setString(env, 'llmobs.mlApp', DD_LLMOBS_ML_APP) this._setBoolean(env, 'logInjection', DD_LOGS_INJECTION) // Requires an accompanying DD_APM_OBFUSCATION_MEMCACHED_KEEP_COMMAND=true in the agent this._setBoolean(env, 'memcachedCommandEnabled', DD_TRACE_MEMCACHED_COMMAND_ENABLED) @@ -921,6 +930,8 @@ class Config { } this._setString(opts, 'iast.telemetryVerbosity', options.iast && options.iast.telemetryVerbosity) this._setBoolean(opts, 'isCiVisibility', options.isCiVisibility) + this._setBoolean(opts, 'llmobs.agentlessEnabled', options.llmobs?.agentlessEnabled) + this._setString(opts, 'llmobs.mlApp', options.llmobs?.mlApp) this._setBoolean(opts, 'logInjection', options.logInjection) this._setString(opts, 'lookup', options.lookup) this._setBoolean(opts, 'openAiLogsEnabled', options.openAiLogsEnabled) @@ -956,6 +967,15 @@ class Config { this._setBoolean(opts, 'traceId128BitGenerationEnabled', options.traceId128BitGenerationEnabled) this._setBoolean(opts, 'traceId128BitLoggingEnabled', options.traceId128BitLoggingEnabled) this._setString(opts, 'version', options.version || tags.version) + + // For LLMObs, we want the environment variable to take precedence over the options. + // This is reliant on environment config being set before options. + // This is to make sure the origins of each value are tracked appropriately for telemetry. + // We'll only set `llmobs.enabled` on the opts when it's not set on the environment, and options.llmobs is provided. + const llmobsEnabledEnv = this._env['llmobs.enabled'] + if (llmobsEnabledEnv == null && options.llmobs) { + this._setBoolean(opts, 'llmobs.enabled', !!options.llmobs) + } } _isCiVisibility () { diff --git a/packages/dd-trace/src/llmobs/constants/tags.js b/packages/dd-trace/src/llmobs/constants/tags.js new file mode 100644 index 00000000000..eee9a6b9890 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/tags.js @@ -0,0 +1,34 @@ +'use strict' + +module.exports = { + SPAN_KINDS: ['llm', 'agent', 'workflow', 'task', 'tool', 'embedding', 'retrieval'], + SPAN_KIND: '_ml_obs.meta.span.kind', + SESSION_ID: '_ml_obs.session_id', + METADATA: '_ml_obs.meta.metadata', + METRICS: '_ml_obs.metrics', + ML_APP: '_ml_obs.meta.ml_app', + PROPAGATED_PARENT_ID_KEY: '_dd.p.llmobs_parent_id', + PARENT_ID_KEY: '_ml_obs.llmobs_parent_id', + TAGS: '_ml_obs.tags', + NAME: '_ml_obs.name', + TRACE_ID: '_ml_obs.trace_id', + PROPAGATED_TRACE_ID_KEY: '_dd.p.llmobs_trace_id', + ROOT_PARENT_ID: 'undefined', + + MODEL_NAME: '_ml_obs.meta.model_name', + MODEL_PROVIDER: '_ml_obs.meta.model_provider', + + INPUT_DOCUMENTS: '_ml_obs.meta.input.documents', + INPUT_MESSAGES: '_ml_obs.meta.input.messages', + INPUT_VALUE: '_ml_obs.meta.input.value', + + OUTPUT_DOCUMENTS: '_ml_obs.meta.output.documents', + OUTPUT_MESSAGES: '_ml_obs.meta.output.messages', + OUTPUT_VALUE: '_ml_obs.meta.output.value', + + INPUT_TOKENS_METRIC_KEY: 'input_tokens', + OUTPUT_TOKENS_METRIC_KEY: 'output_tokens', + TOTAL_TOKENS_METRIC_KEY: 'total_tokens', + + DROPPED_IO_COLLECTION_ERROR: 'dropped_io' +} diff --git a/packages/dd-trace/src/llmobs/constants/text.js b/packages/dd-trace/src/llmobs/constants/text.js new file mode 100644 index 00000000000..3c19b9febb6 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/text.js @@ -0,0 +1,6 @@ +'use strict' + +module.exports = { + DROPPED_VALUE_TEXT: "[This value has been dropped because this span's size exceeds the 1MB size limit.]", + UNSERIALIZABLE_VALUE_TEXT: 'Unserializable value' +} diff --git a/packages/dd-trace/src/llmobs/constants/writers.js b/packages/dd-trace/src/llmobs/constants/writers.js new file mode 100644 index 00000000000..3726c33c7c0 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/writers.js @@ -0,0 +1,13 @@ +'use strict' + +module.exports = { + EVP_PROXY_AGENT_BASE_PATH: 'evp_proxy/v2', + EVP_PROXY_AGENT_ENDPOINT: 'evp_proxy/v2/api/v2/llmobs', + EVP_SUBDOMAIN_HEADER_NAME: 'X-Datadog-EVP-Subdomain', + EVP_SUBDOMAIN_HEADER_VALUE: 'llmobs-intake', + AGENTLESS_SPANS_ENDPOINT: '/api/v2/llmobs', + AGENTLESS_EVALULATIONS_ENDPOINT: '/api/intake/llm-obs/v1/eval-metric', + + EVP_PAYLOAD_SIZE_LIMIT: 5 << 20, // 5MB (actual limit is 5.1MB) + EVP_EVENT_SIZE_LIMIT: (1 << 20) - 1024 // 999KB (actual limit is 1MB) +} diff --git a/packages/dd-trace/src/llmobs/index.js b/packages/dd-trace/src/llmobs/index.js new file mode 100644 index 00000000000..5d33ecb4c5d --- /dev/null +++ b/packages/dd-trace/src/llmobs/index.js @@ -0,0 +1,103 @@ +'use strict' + +const log = require('../log') +const { PROPAGATED_PARENT_ID_KEY } = require('./constants/tags') +const { storage } = require('./storage') + +const LLMObsSpanProcessor = require('./span_processor') + +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const injectCh = channel('dd-trace:span:inject') + +const LLMObsAgentlessSpanWriter = require('./writers/spans/agentless') +const LLMObsAgentProxySpanWriter = require('./writers/spans/agentProxy') +const LLMObsEvalMetricsWriter = require('./writers/evaluations') + +/** + * Setting writers and processor globally when LLMObs is enabled + * We're setting these in this module instead of on the SDK. + * This is to isolate any subscribers and periodic tasks to this module, + * and not conditionally instantiate in the SDK, since the SDK is always instantiated + * if the tracer is `init`ed. But, in those cases, we don't want to start writers or subscribe + * to channels. + */ +let spanProcessor +let spanWriter +let evalWriter + +function enable (config) { + // create writers and eval writer append and flush channels + // span writer append is handled by the span processor + evalWriter = new LLMObsEvalMetricsWriter(config) + spanWriter = createSpanWriter(config) + + evalMetricAppendCh.subscribe(handleEvalMetricAppend) + flushCh.subscribe(handleFlush) + + // span processing + spanProcessor = new LLMObsSpanProcessor(config) + spanProcessor.setWriter(spanWriter) + spanProcessCh.subscribe(handleSpanProcess) + + // distributed tracing for llmobs + injectCh.subscribe(handleLLMObsParentIdInjection) +} + +function disable () { + if (evalMetricAppendCh.hasSubscribers) evalMetricAppendCh.unsubscribe(handleEvalMetricAppend) + if (flushCh.hasSubscribers) flushCh.unsubscribe(handleFlush) + if (spanProcessCh.hasSubscribers) spanProcessCh.unsubscribe(handleSpanProcess) + if (injectCh.hasSubscribers) injectCh.unsubscribe(handleLLMObsParentIdInjection) + + spanWriter?.destroy() + evalWriter?.destroy() + spanProcessor?.setWriter(null) + + spanWriter = null + evalWriter = null +} + +// since LLMObs traces can extend between services and be the same trace, +// we need to propogate the parent id. +function handleLLMObsParentIdInjection ({ carrier }) { + const parent = storage.getStore()?.span + if (!parent) return + + const parentId = parent?.context().toSpanId() + + carrier['x-datadog-tags'] += `,${PROPAGATED_PARENT_ID_KEY}=${parentId}` +} + +function createSpanWriter (config) { + const SpanWriter = config.llmobs.agentlessEnabled ? LLMObsAgentlessSpanWriter : LLMObsAgentProxySpanWriter + return new SpanWriter(config) +} + +function handleFlush () { + try { + spanWriter.flush() + evalWriter.flush() + } catch (e) { + log.warn(`Failed to flush LLMObs spans and evaluation metrics: ${e.message}`) + } +} + +function handleSpanProcess (data) { + spanProcessor.process(data) +} + +function handleEvalMetricAppend (payload) { + try { + evalWriter.append(payload) + } catch (e) { + log.warn(` + Failed to append evaluation metric to LLM Observability writer, likely due to an unserializable property. + Evaluation metrics won't be sent to LLM Observability: ${e.message} + `) + } +} + +module.exports = { enable, disable } diff --git a/packages/dd-trace/src/llmobs/noop.js b/packages/dd-trace/src/llmobs/noop.js new file mode 100644 index 00000000000..4eba48cd51c --- /dev/null +++ b/packages/dd-trace/src/llmobs/noop.js @@ -0,0 +1,82 @@ +'use strict' + +class NoopLLMObs { + constructor (noopTracer) { + this._tracer = noopTracer + } + + get enabled () { + return false + } + + enable (options) {} + + disable () {} + + trace (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const name = options.name || options.kind || fn.name + + return this._tracer.trace(name, options, fn) + } + + wrap (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const name = options.name || options.kind || fn.name + + return this._tracer.wrap(name, options, fn) + } + + decorate (options = {}) { + const llmobs = this + return function (target, ctxOrPropertyKey, descriptor) { + if (!ctxOrPropertyKey) return target + if (typeof ctxOrPropertyKey === 'object') { + const ctx = ctxOrPropertyKey + if (ctx.kind !== 'method') return target + + return llmobs.wrap({ name: ctx.name, ...options }, target) + } else { + const propertyKey = ctxOrPropertyKey + if (descriptor) { + if (typeof descriptor.value !== 'function') return descriptor + + const original = descriptor.value + descriptor.value = llmobs.wrap({ name: propertyKey, ...options }, original) + + return descriptor + } else { + if (typeof target[propertyKey] !== 'function') return target[propertyKey] + + const original = target[propertyKey] + Object.defineProperty(target, propertyKey, { + ...Object.getOwnPropertyDescriptor(target, propertyKey), + value: llmobs.wrap({ name: propertyKey, ...options }, original) + }) + + return target + } + } + } + } + + annotate (span, options) {} + + exportSpan (span) { + return {} + } + + submitEvaluation (llmobsSpanContext, options) {} + + flush () {} +} + +module.exports = NoopLLMObs diff --git a/packages/dd-trace/src/llmobs/sdk.js b/packages/dd-trace/src/llmobs/sdk.js new file mode 100644 index 00000000000..5717a8a0f19 --- /dev/null +++ b/packages/dd-trace/src/llmobs/sdk.js @@ -0,0 +1,377 @@ +'use strict' + +const { SPAN_KIND, OUTPUT_VALUE } = require('./constants/tags') + +const { + getFunctionArguments, + validateKind +} = require('./util') +const { isTrue } = require('../util') + +const { storage } = require('./storage') + +const Span = require('../opentracing/span') + +const tracerVersion = require('../../../../package.json').version +const logger = require('../log') + +const LLMObsTagger = require('./tagger') + +// communicating with writer +const { channel } = require('dc-polyfill') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const NoopLLMObs = require('./noop') + +class LLMObs extends NoopLLMObs { + constructor (tracer, llmobsModule, config) { + super(tracer) + + this._config = config + this._llmobsModule = llmobsModule + this._tagger = new LLMObsTagger(config) + } + + get enabled () { + return this._config.llmobs.enabled + } + + enable (options = {}) { + if (this.enabled) { + logger.debug('LLMObs is already enabled.') + return + } + + logger.debug('Enabling LLMObs') + + const { mlApp, agentlessEnabled } = options + + const { DD_LLMOBS_ENABLED } = process.env + + const llmobsConfig = { + mlApp, + agentlessEnabled + } + + const enabled = DD_LLMOBS_ENABLED == null || isTrue(DD_LLMOBS_ENABLED) + if (!enabled) { + logger.debug('LLMObs.enable() called when DD_LLMOBS_ENABLED is false. No action taken.') + return + } + + this._config.llmobs.enabled = true + this._config.configure({ ...this._config, llmobs: llmobsConfig }) + + // configure writers and channel subscribers + this._llmobsModule.enable(this._config) + } + + disable () { + if (!this.enabled) { + logger.debug('LLMObs is already disabled.') + return + } + + logger.debug('Disabling LLMObs') + + this._config.llmobs.enabled = false + + // disable writers and channel subscribers + this._llmobsModule.disable() + } + + trace (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const kind = validateKind(options.kind) // will throw if kind is undefined or not an expected kind + + // name is required for spans generated with `trace` + // while `kind` is required, this should never throw (as otherwise it would have thrown above) + const name = options.name || kind + if (!name) { + throw new Error('No span name provided for `trace`.') + } + + const { + spanOptions, + ...llmobsOptions + } = this._extractOptions(options) + + if (fn.length > 1) { + return this._tracer.trace(name, spanOptions, (span, cb) => + this._activate(span, { kind, options: llmobsOptions }, () => fn(span, cb)) + ) + } + + return this._tracer.trace(name, spanOptions, span => + this._activate(span, { kind, options: llmobsOptions }, () => fn(span)) + ) + } + + wrap (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const kind = validateKind(options.kind) // will throw if kind is undefined or not an expected kind + let name = options.name || (fn?.name ? fn.name : undefined) || kind + + if (!name) { + logger.warn('No span name provided for `wrap`. Defaulting to "unnamed-anonymous-function".') + name = 'unnamed-anonymous-function' + } + + const { + spanOptions, + ...llmobsOptions + } = this._extractOptions(options) + + const llmobs = this + + function wrapped () { + const span = llmobs._tracer.scope().active() + + const result = llmobs._activate(span, { kind, options: llmobsOptions }, () => { + if (!['llm', 'embedding'].includes(kind)) { + llmobs.annotate(span, { inputData: getFunctionArguments(fn, arguments) }) + } + + return fn.apply(this, arguments) + }) + + if (result && typeof result.then === 'function') { + return result.then(value => { + if (value && !['llm', 'retrieval'].includes(kind) && !LLMObsTagger.tagMap.get(span)?.[OUTPUT_VALUE]) { + llmobs.annotate(span, { outputData: value }) + } + return value + }) + } + + if (result && !['llm', 'retrieval'].includes(kind) && !LLMObsTagger.tagMap.get(span)?.[OUTPUT_VALUE]) { + llmobs.annotate(span, { outputData: result }) + } + + return result + } + + return this._tracer.wrap(name, spanOptions, wrapped) + } + + annotate (span, options) { + if (!this.enabled) return + + if (!span) { + span = this._active() + } + + if ((span && !options) && !(span instanceof Span)) { + options = span + span = this._active() + } + + if (!span) { + throw new Error('No span provided and no active LLMObs-generated span found') + } + if (!options) { + throw new Error('No options provided for annotation.') + } + + if (!LLMObsTagger.tagMap.has(span)) { + throw new Error('Span must be an LLMObs-generated span') + } + if (span._duration !== undefined) { + throw new Error('Cannot annotate a finished span') + } + + const spanKind = LLMObsTagger.tagMap.get(span)[SPAN_KIND] + if (!spanKind) { + throw new Error('LLMObs span must have a span kind specified') + } + + const { inputData, outputData, metadata, metrics, tags } = options + + if (inputData || outputData) { + if (spanKind === 'llm') { + this._tagger.tagLLMIO(span, inputData, outputData) + } else if (spanKind === 'embedding') { + this._tagger.tagEmbeddingIO(span, inputData, outputData) + } else if (spanKind === 'retrieval') { + this._tagger.tagRetrievalIO(span, inputData, outputData) + } else { + this._tagger.tagTextIO(span, inputData, outputData) + } + } + + if (metadata) { + this._tagger.tagMetadata(span, metadata) + } + + if (metrics) { + this._tagger.tagMetrics(span, metrics) + } + + if (tags) { + this._tagger.tagSpanTags(span, tags) + } + } + + exportSpan (span) { + span = span || this._active() + + if (!span) { + throw new Error('No span provided and no active LLMObs-generated span found') + } + + if (!(span instanceof Span)) { + throw new Error('Span must be a valid Span object.') + } + + if (!LLMObsTagger.tagMap.has(span)) { + throw new Error('Span must be an LLMObs-generated span') + } + + try { + return { + traceId: span.context().toTraceId(true), + spanId: span.context().toSpanId() + } + } catch { + logger.warn('Faild to export span. Span must be a valid Span object.') + } + } + + submitEvaluation (llmobsSpanContext, options = {}) { + if (!this.enabled) return + + if (!this._config.apiKey) { + throw new Error( + 'DD_API_KEY is required for sending evaluation metrics. Evaluation metric data will not be sent.\n' + + 'Ensure this configuration is set before running your application.' + ) + } + + const { traceId, spanId } = llmobsSpanContext + if (!traceId || !spanId) { + throw new Error( + 'spanId and traceId must both be specified for the given evaluation metric to be submitted.' + ) + } + + const mlApp = options.mlApp || this._config.llmobs.mlApp + if (!mlApp) { + throw new Error( + 'ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent.' + ) + } + + const timestampMs = options.timestampMs || Date.now() + if (typeof timestampMs !== 'number' || timestampMs < 0) { + throw new Error('timestampMs must be a non-negative integer. Evaluation metric data will not be sent') + } + + const { label, value, tags } = options + const metricType = options.metricType?.toLowerCase() + if (!label) { + throw new Error('label must be the specified name of the evaluation metric') + } + if (!metricType || !['categorical', 'score'].includes(metricType)) { + throw new Error('metricType must be one of "categorical" or "score"') + } + + if (metricType === 'categorical' && typeof value !== 'string') { + throw new Error('value must be a string for a categorical metric.') + } + if (metricType === 'score' && typeof value !== 'number') { + throw new Error('value must be a number for a score metric.') + } + + const evaluationTags = { + 'dd-trace.version': tracerVersion, + ml_app: mlApp + } + + if (tags) { + for (const key in tags) { + const tag = tags[key] + if (typeof tag === 'string') { + evaluationTags[key] = tag + } else if (typeof tag.toString === 'function') { + evaluationTags[key] = tag.toString() + } else if (tag == null) { + evaluationTags[key] = Object.prototype.toString.call(tag) + } else { + // should be a rare case + // every object in JS has a toString, otherwise every primitive has its own toString + // null and undefined are handled above + throw new Error('Failed to parse tags. Tags for evaluation metrics must be strings') + } + } + } + + const payload = { + span_id: spanId, + trace_id: traceId, + label, + metric_type: metricType, + ml_app: mlApp, + [`${metricType}_value`]: value, + timestamp_ms: timestampMs, + tags: Object.entries(evaluationTags).map(([key, value]) => `${key}:${value}`) + } + + evalMetricAppendCh.publish(payload) + } + + flush () { + if (!this.enabled) return + + flushCh.publish() + } + + _active () { + const store = storage.getStore() + return store?.span + } + + _activate (span, { kind, options } = {}, fn) { + const parent = this._active() + if (this.enabled) storage.enterWith({ span }) + + this._tagger.registerLLMObsSpan(span, { + ...options, + parent, + kind + }) + + try { + return fn() + } finally { + if (this.enabled) storage.enterWith({ span: parent }) + } + } + + _extractOptions (options) { + const { + modelName, + modelProvider, + sessionId, + mlApp, + ...spanOptions + } = options + + return { + mlApp, + modelName, + modelProvider, + sessionId, + spanOptions + } + } +} + +module.exports = LLMObs diff --git a/packages/dd-trace/src/llmobs/span_processor.js b/packages/dd-trace/src/llmobs/span_processor.js new file mode 100644 index 00000000000..bc8eeda06b7 --- /dev/null +++ b/packages/dd-trace/src/llmobs/span_processor.js @@ -0,0 +1,195 @@ +'use strict' + +const { + SPAN_KIND, + MODEL_NAME, + MODEL_PROVIDER, + METADATA, + INPUT_MESSAGES, + INPUT_VALUE, + OUTPUT_MESSAGES, + INPUT_DOCUMENTS, + OUTPUT_DOCUMENTS, + OUTPUT_VALUE, + METRICS, + ML_APP, + TAGS, + PARENT_ID_KEY, + SESSION_ID, + NAME +} = require('./constants/tags') +const { UNSERIALIZABLE_VALUE_TEXT } = require('./constants/text') + +const { + ERROR_MESSAGE, + ERROR_TYPE, + ERROR_STACK +} = require('../constants') + +const LLMObsTagger = require('./tagger') + +const tracerVersion = require('../../../../package.json').version +const logger = require('../log') + +class LLMObsSpanProcessor { + constructor (config) { + this._config = config + } + + setWriter (writer) { + this._writer = writer + } + + // TODO: instead of relying on the tagger's weakmap registry, can we use some namespaced storage correlation? + process ({ span }) { + if (!this._config.llmobs.enabled) return + // if the span is not in our private tagger map, it is not an llmobs span + if (!LLMObsTagger.tagMap.has(span)) return + + try { + const formattedEvent = this.format(span) + this._writer.append(formattedEvent) + } catch (e) { + // this should be a rare case + // we protect against unserializable properties in the format function, and in + // safeguards in the tagger + logger.warn(` + Failed to append span to LLM Observability writer, likely due to an unserializable property. + Span won't be sent to LLM Observability: ${e.message} + `) + } + } + + format (span) { + const spanTags = span.context()._tags + const mlObsTags = LLMObsTagger.tagMap.get(span) + + const spanKind = mlObsTags[SPAN_KIND] + + const meta = { 'span.kind': spanKind, input: {}, output: {} } + const input = {} + const output = {} + + if (['llm', 'embedding'].includes(spanKind)) { + meta.model_name = mlObsTags[MODEL_NAME] || 'custom' + meta.model_provider = (mlObsTags[MODEL_PROVIDER] || 'custom').toLowerCase() + } + if (mlObsTags[METADATA]) { + this._addObject(mlObsTags[METADATA], meta.metadata = {}) + } + if (spanKind === 'llm' && mlObsTags[INPUT_MESSAGES]) { + input.messages = mlObsTags[INPUT_MESSAGES] + } + if (mlObsTags[INPUT_VALUE]) { + input.value = mlObsTags[INPUT_VALUE] + } + if (spanKind === 'llm' && mlObsTags[OUTPUT_MESSAGES]) { + output.messages = mlObsTags[OUTPUT_MESSAGES] + } + if (spanKind === 'embedding' && mlObsTags[INPUT_DOCUMENTS]) { + input.documents = mlObsTags[INPUT_DOCUMENTS] + } + if (mlObsTags[OUTPUT_VALUE]) { + output.value = mlObsTags[OUTPUT_VALUE] + } + if (spanKind === 'retrieval' && mlObsTags[OUTPUT_DOCUMENTS]) { + output.documents = mlObsTags[OUTPUT_DOCUMENTS] + } + + const error = spanTags.error || spanTags[ERROR_TYPE] + if (error) { + meta[ERROR_MESSAGE] = spanTags[ERROR_MESSAGE] || error.message || error.code + meta[ERROR_TYPE] = spanTags[ERROR_TYPE] || error.name + meta[ERROR_STACK] = spanTags[ERROR_STACK] || error.stack + } + + if (input) meta.input = input + if (output) meta.output = output + + const metrics = mlObsTags[METRICS] || {} + + const mlApp = mlObsTags[ML_APP] + const sessionId = mlObsTags[SESSION_ID] + const parentId = mlObsTags[PARENT_ID_KEY] + + const name = mlObsTags[NAME] || span._name + + const llmObsSpanEvent = { + trace_id: span.context().toTraceId(true), + span_id: span.context().toSpanId(), + parent_id: parentId, + name, + tags: this._processTags(span, mlApp, sessionId, error), + start_ns: Math.round(span._startTime * 1e6), + duration: Math.round(span._duration * 1e6), + status: error ? 'error' : 'ok', + meta, + metrics, + _dd: { + span_id: span.context().toSpanId(), + trace_id: span.context().toTraceId(true) + } + } + + if (sessionId) llmObsSpanEvent.session_id = sessionId + + return llmObsSpanEvent + } + + // For now, this only applies to metadata, as we let users annotate this field with any object + // However, we want to protect against circular references or BigInts (unserializable) + // This function can be reused for other fields if needed + // Messages, Documents, and Metrics are safeguarded in `llmobs/tagger.js` + _addObject (obj, carrier) { + const seenObjects = new WeakSet() + seenObjects.add(obj) // capture root object + + const isCircular = value => { + if (typeof value !== 'object') return false + if (seenObjects.has(value)) return true + seenObjects.add(value) + return false + } + + const add = (obj, carrier) => { + for (const key in obj) { + const value = obj[key] + if (!Object.prototype.hasOwnProperty.call(obj, key)) continue + if (typeof value === 'bigint' || isCircular(value)) { + // mark as unserializable instead of dropping + logger.warn(`Unserializable property found in metadata: ${key}`) + carrier[key] = UNSERIALIZABLE_VALUE_TEXT + continue + } + if (typeof value === 'object') { + add(value, carrier[key] = {}) + } else { + carrier[key] = value + } + } + } + + add(obj, carrier) + } + + _processTags (span, mlApp, sessionId, error) { + let tags = { + version: this._config.version, + env: this._config.env, + service: this._config.service, + source: 'integration', + ml_app: mlApp, + 'dd-trace.version': tracerVersion, + error: Number(!!error) || 0, + language: 'javascript' + } + const errType = span.context()._tags[ERROR_TYPE] || error?.name + if (errType) tags.error_type = errType + if (sessionId) tags.session_id = sessionId + const existingTags = LLMObsTagger.tagMap.get(span)?.[TAGS] || {} + if (existingTags) tags = { ...tags, ...existingTags } + return Object.entries(tags).map(([key, value]) => `${key}:${value ?? ''}`) + } +} + +module.exports = LLMObsSpanProcessor diff --git a/packages/dd-trace/src/llmobs/storage.js b/packages/dd-trace/src/llmobs/storage.js new file mode 100644 index 00000000000..1362aaf966e --- /dev/null +++ b/packages/dd-trace/src/llmobs/storage.js @@ -0,0 +1,7 @@ +'use strict' + +// TODO: remove this and use namespaced storage once available +const { AsyncLocalStorage } = require('async_hooks') +const storage = new AsyncLocalStorage() + +module.exports = { storage } diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js new file mode 100644 index 00000000000..9f1728e5d7b --- /dev/null +++ b/packages/dd-trace/src/llmobs/tagger.js @@ -0,0 +1,322 @@ +'use strict' + +const log = require('../log') +const { + MODEL_NAME, + MODEL_PROVIDER, + SESSION_ID, + ML_APP, + SPAN_KIND, + INPUT_VALUE, + OUTPUT_DOCUMENTS, + INPUT_DOCUMENTS, + OUTPUT_VALUE, + METADATA, + METRICS, + PARENT_ID_KEY, + INPUT_MESSAGES, + OUTPUT_MESSAGES, + TAGS, + NAME, + PROPAGATED_PARENT_ID_KEY, + ROOT_PARENT_ID, + INPUT_TOKENS_METRIC_KEY, + OUTPUT_TOKENS_METRIC_KEY, + TOTAL_TOKENS_METRIC_KEY +} = require('./constants/tags') + +// global registry of LLMObs spans +// maps LLMObs spans to their annotations +const registry = new WeakMap() + +class LLMObsTagger { + constructor (config, softFail = false) { + this._config = config + + this.softFail = softFail + } + + static get tagMap () { + return registry + } + + registerLLMObsSpan (span, { + modelName, + modelProvider, + sessionId, + mlApp, + parent, + kind, + name + } = {}) { + if (!this._config.llmobs.enabled) return + if (!kind) return // do not register it in the map if it doesn't have an llmobs span kind + + this._register(span) + + if (name) this._setTag(span, NAME, name) + + this._setTag(span, SPAN_KIND, kind) + if (modelName) this._setTag(span, MODEL_NAME, modelName) + if (modelProvider) this._setTag(span, MODEL_PROVIDER, modelProvider) + + sessionId = sessionId || parent?.context()._tags[SESSION_ID] + if (sessionId) this._setTag(span, SESSION_ID, sessionId) + + if (!mlApp) mlApp = parent?.context()._tags[ML_APP] || this._config.llmobs.mlApp + this._setTag(span, ML_APP, mlApp) + + const parentId = + parent?.context().toSpanId() || + span.context()._trace.tags[PROPAGATED_PARENT_ID_KEY] || + ROOT_PARENT_ID + this._setTag(span, PARENT_ID_KEY, parentId) + } + + // TODO: similarly for the following `tag` methods, + // how can we transition from a span weakmap to core API functionality + tagLLMIO (span, inputData, outputData) { + this._tagMessages(span, inputData, INPUT_MESSAGES) + this._tagMessages(span, outputData, OUTPUT_MESSAGES) + } + + tagEmbeddingIO (span, inputData, outputData) { + this._tagDocuments(span, inputData, INPUT_DOCUMENTS) + this._tagText(span, outputData, OUTPUT_VALUE) + } + + tagRetrievalIO (span, inputData, outputData) { + this._tagText(span, inputData, INPUT_VALUE) + this._tagDocuments(span, outputData, OUTPUT_DOCUMENTS) + } + + tagTextIO (span, inputData, outputData) { + this._tagText(span, inputData, INPUT_VALUE) + this._tagText(span, outputData, OUTPUT_VALUE) + } + + tagMetadata (span, metadata) { + this._setTag(span, METADATA, metadata) + } + + tagMetrics (span, metrics) { + const filterdMetrics = {} + for (const [key, value] of Object.entries(metrics)) { + let processedKey = key + + // processing these specifically for our metrics ingestion + switch (key) { + case 'inputTokens': + processedKey = INPUT_TOKENS_METRIC_KEY + break + case 'outputTokens': + processedKey = OUTPUT_TOKENS_METRIC_KEY + break + case 'totalTokens': + processedKey = TOTAL_TOKENS_METRIC_KEY + break + } + + if (typeof value === 'number') { + filterdMetrics[processedKey] = value + } else { + this._handleFailure(`Value for metric '${key}' must be a number, instead got ${value}`) + } + } + + this._setTag(span, METRICS, filterdMetrics) + } + + tagSpanTags (span, tags) { + // new tags will be merged with existing tags + const currentTags = registry.get(span)?.[TAGS] + if (currentTags) { + Object.assign(tags, currentTags) + } + this._setTag(span, TAGS, tags) + } + + _tagText (span, data, key) { + if (data) { + if (typeof data === 'string') { + this._setTag(span, key, data) + } else { + try { + this._setTag(span, key, JSON.stringify(data)) + } catch { + const type = key === INPUT_VALUE ? 'input' : 'output' + this._handleFailure(`Failed to parse ${type} value, must be JSON serializable.`) + } + } + } + } + + _tagDocuments (span, data, key) { + if (data) { + if (!Array.isArray(data)) { + data = [data] + } + + const documents = data.map(document => { + if (typeof document === 'string') { + return { text: document } + } + + if (document == null || typeof document !== 'object') { + this._handleFailure('Documents must be a string, object, or list of objects.') + return undefined + } + + const { text, name, id, score } = document + let validDocument = true + + if (typeof text !== 'string') { + this._handleFailure('Document text must be a string.') + validDocument = false + } + + const documentObj = { text } + + validDocument = this._tagConditionalString(name, 'Document name', documentObj, 'name') && validDocument + validDocument = this._tagConditionalString(id, 'Document ID', documentObj, 'id') && validDocument + validDocument = this._tagConditionalNumber(score, 'Document score', documentObj, 'score') && validDocument + + return validDocument ? documentObj : undefined + }).filter(doc => !!doc) + + if (documents.length) { + this._setTag(span, key, documents) + } + } + } + + _tagMessages (span, data, key) { + if (data) { + if (!Array.isArray(data)) { + data = [data] + } + + const messages = data.map(message => { + if (typeof message === 'string') { + return { content: message } + } + + if (message == null || typeof message !== 'object') { + this._handleFailure('Messages must be a string, object, or list of objects') + return undefined + } + + let validMessage = true + + const { content = '', role } = message + let toolCalls = message.toolCalls + const messageObj = { content } + + if (typeof content !== 'string') { + this._handleFailure('Message content must be a string.') + validMessage = false + } + + validMessage = this._tagConditionalString(role, 'Message role', messageObj, 'role') && validMessage + + if (toolCalls) { + if (!Array.isArray(toolCalls)) { + toolCalls = [toolCalls] + } + + const filteredToolCalls = toolCalls.map(toolCall => { + if (typeof toolCall !== 'object') { + this._handleFailure('Tool call must be an object.') + return undefined + } + + let validTool = true + + const { name, arguments: args, toolId, type } = toolCall + const toolCallObj = {} + + validTool = this._tagConditionalString(name, 'Tool name', toolCallObj, 'name') && validTool + validTool = this._tagConditionalObject(args, 'Tool arguments', toolCallObj, 'arguments') && validTool + validTool = this._tagConditionalString(toolId, 'Tool ID', toolCallObj, 'tool_id') && validTool + validTool = this._tagConditionalString(type, 'Tool type', toolCallObj, 'type') && validTool + + return validTool ? toolCallObj : undefined + }).filter(toolCall => !!toolCall) + + if (filteredToolCalls.length) { + messageObj.tool_calls = filteredToolCalls + } + } + + return validMessage ? messageObj : undefined + }).filter(msg => !!msg) + + if (messages.length) { + this._setTag(span, key, messages) + } + } + } + + _tagConditionalString (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'string') { + this._handleFailure(`"${type}" must be a string.`) + return false + } + carrier[key] = data + return true + } + + _tagConditionalNumber (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'number') { + this._handleFailure(`"${type}" must be a number.`) + return false + } + carrier[key] = data + return true + } + + _tagConditionalObject (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'object') { + this._handleFailure(`"${type}" must be an object.`) + return false + } + carrier[key] = data + return true + } + + // any public-facing LLMObs APIs using this tagger should not soft fail + // auto-instrumentation should soft fail + _handleFailure (msg) { + if (this.softFail) { + log.warn(msg) + } else { + throw new Error(msg) + } + } + + _register (span) { + if (!this._config.llmobs.enabled) return + if (registry.has(span)) { + this._handleFailure(`LLMObs Span "${span._name}" already registered.`) + return + } + + registry.set(span, {}) + } + + _setTag (span, key, value) { + if (!this._config.llmobs.enabled) return + if (!registry.has(span)) { + this._handleFailure('Span must be an LLMObs generated span.') + return + } + + const tagsCarrier = registry.get(span) + Object.assign(tagsCarrier, { [key]: value }) + } +} + +module.exports = LLMObsTagger diff --git a/packages/dd-trace/src/llmobs/util.js b/packages/dd-trace/src/llmobs/util.js new file mode 100644 index 00000000000..feba656f952 --- /dev/null +++ b/packages/dd-trace/src/llmobs/util.js @@ -0,0 +1,176 @@ +'use strict' + +const { SPAN_KINDS } = require('./constants/tags') + +function encodeUnicode (str) { + if (!str) return str + return str.split('').map(char => { + const code = char.charCodeAt(0) + if (code > 127) { + return `\\u${code.toString(16).padStart(4, '0')}` + } + return char + }).join('') +} + +function validateKind (kind) { + if (!SPAN_KINDS.includes(kind)) { + throw new Error(` + Invalid span kind specified: "${kind}" + Must be one of: ${SPAN_KINDS.join(', ')} + `) + } + + return kind +} + +// extracts the argument names from a function string +function parseArgumentNames (str) { + const result = [] + let current = '' + let closerCount = 0 + let recording = true + let inSingleLineComment = false + let inMultiLineComment = false + + for (let i = 0; i < str.length; i++) { + const char = str[i] + const nextChar = str[i + 1] + + // Handle single-line comments + if (!inMultiLineComment && char === '/' && nextChar === '/') { + inSingleLineComment = true + i++ // Skip the next character + continue + } + + // Handle multi-line comments + if (!inSingleLineComment && char === '/' && nextChar === '*') { + inMultiLineComment = true + i++ // Skip the next character + continue + } + + // End of single-line comment + if (inSingleLineComment && char === '\n') { + inSingleLineComment = false + continue + } + + // End of multi-line comment + if (inMultiLineComment && char === '*' && nextChar === '/') { + inMultiLineComment = false + i++ // Skip the next character + continue + } + + // Skip characters inside comments + if (inSingleLineComment || inMultiLineComment) { + continue + } + + if (['{', '[', '('].includes(char)) { + closerCount++ + } else if (['}', ']', ')'].includes(char)) { + closerCount-- + } else if (char === '=' && nextChar !== '>' && closerCount === 0) { + recording = false + // record the variable name early, and stop counting characters until we reach the next comma + result.push(current.trim()) + current = '' + continue + } else if (char === ',' && closerCount === 0) { + if (recording) { + result.push(current.trim()) + current = '' + } + + recording = true + continue + } + + if (recording) { + current += char + } + } + + if (current && recording) { + result.push(current.trim()) + } + + return result +} + +// finds the bounds of the arguments in a function string +function findArgumentsBounds (str) { + let start = -1 + let end = -1 + let closerCount = 0 + + for (let i = 0; i < str.length; i++) { + const char = str[i] + + if (char === '(') { + if (closerCount === 0) { + start = i + } + + closerCount++ + } else if (char === ')') { + closerCount-- + + if (closerCount === 0) { + end = i + break + } + } + } + + return { start, end } +} + +const memo = new WeakMap() +function getFunctionArguments (fn, args = []) { + if (!fn) return + if (!args.length) return + if (args.length === 1) return args[0] + + try { + let names + if (memo.has(fn)) { + names = memo.get(fn) + } else { + const fnString = fn.toString() + const { start, end } = findArgumentsBounds(fnString) + names = parseArgumentNames(fnString.slice(start + 1, end)) + memo.set(fn, names) + } + + const argsObject = {} + + for (const argIdx in args) { + const name = names[argIdx] + const arg = args[argIdx] + + const spread = name?.startsWith('...') + + // this can only be the last argument + if (spread) { + argsObject[name.slice(3)] = args.slice(argIdx) + break + } + + argsObject[name] = arg + } + + return argsObject + } catch { + return args + } +} + +module.exports = { + encodeUnicode, + validateKind, + getFunctionArguments +} diff --git a/packages/dd-trace/src/llmobs/writers/base.js b/packages/dd-trace/src/llmobs/writers/base.js new file mode 100644 index 00000000000..8a6cdae9c2f --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/base.js @@ -0,0 +1,111 @@ +'use strict' + +const request = require('../../exporters/common/request') +const { URL, format } = require('url') + +const logger = require('../../log') + +const { encodeUnicode } = require('../util') +const log = require('../../log') + +class BaseLLMObsWriter { + constructor ({ interval, timeout, endpoint, intake, eventType, protocol, port }) { + this._interval = interval || 1000 // 1s + this._timeout = timeout || 5000 // 5s + this._eventType = eventType + + this._buffer = [] + this._bufferLimit = 1000 + this._bufferSize = 0 + + this._url = new URL(format({ + protocol: protocol || 'https:', + hostname: intake, + port: port || 443, + pathname: endpoint + })) + + this._headers = { + 'Content-Type': 'application/json' + } + + this._periodic = setInterval(() => { + this.flush() + }, this._interval).unref() + + process.once('beforeExit', () => { + this.destroy() + }) + + this._destroyed = false + + logger.debug(`Started ${this.constructor.name} to ${this._url}`) + } + + append (event, byteLength) { + if (this._buffer.length >= this._bufferLimit) { + logger.warn(`${this.constructor.name} event buffer full (limit is ${this._bufferLimit}), dropping event`) + return + } + + this._bufferSize += byteLength || Buffer.from(JSON.stringify(event)).byteLength + this._buffer.push(event) + } + + flush () { + if (this._buffer.length === 0) { + return + } + + const events = this._buffer + this._buffer = [] + this._bufferSize = 0 + const payload = this._encode(this.makePayload(events)) + + const options = { + headers: this._headers, + method: 'POST', + url: this._url, + timeout: this._timeout + } + + log.debug(`Encoded LLMObs payload: ${payload}`) + + request(payload, options, (err, resp, code) => { + if (err) { + logger.error( + `Error sending ${events.length} LLMObs ${this._eventType} events to ${this._url}: ${err.message}` + ) + } else if (code >= 300) { + logger.error( + `Error sending ${events.length} LLMObs ${this._eventType} events to ${this._url}: ${code}` + ) + } else { + logger.debug(`Sent ${events.length} LLMObs ${this._eventType} events to ${this._url}`) + } + }) + } + + makePayload (events) {} + + destroy () { + if (!this._destroyed) { + logger.debug(`Stopping ${this.constructor.name}`) + clearInterval(this._periodic) + process.removeListener('beforeExit', this.destroy) + this.flush() + this._destroyed = true + } + } + + _encode (payload) { + return JSON.stringify(payload, (key, value) => { + if (typeof value === 'string') { + return encodeUnicode(value) // serialize unicode characters + } + return value + }).replace(/\\\\u/g, '\\u') // remove double escaping + } +} + +module.exports = BaseLLMObsWriter diff --git a/packages/dd-trace/src/llmobs/writers/evaluations.js b/packages/dd-trace/src/llmobs/writers/evaluations.js new file mode 100644 index 00000000000..d737f68c82c --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/evaluations.js @@ -0,0 +1,29 @@ +'use strict' + +const { AGENTLESS_EVALULATIONS_ENDPOINT } = require('../constants/writers') +const BaseWriter = require('./base') + +class LLMObsEvalMetricsWriter extends BaseWriter { + constructor (config) { + super({ + endpoint: AGENTLESS_EVALULATIONS_ENDPOINT, + intake: `api.${config.site}`, + eventType: 'evaluation_metric' + }) + + this._headers['DD-API-KEY'] = config.apiKey + } + + makePayload (events) { + return { + data: { + type: this._eventType, + attributes: { + metrics: events + } + } + } + } +} + +module.exports = LLMObsEvalMetricsWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js b/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js new file mode 100644 index 00000000000..6274f6117e0 --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js @@ -0,0 +1,23 @@ +'use strict' + +const { + EVP_SUBDOMAIN_HEADER_NAME, + EVP_SUBDOMAIN_HEADER_VALUE, + EVP_PROXY_AGENT_ENDPOINT +} = require('../../constants/writers') +const LLMObsBaseSpanWriter = require('./base') + +class LLMObsAgentProxySpanWriter extends LLMObsBaseSpanWriter { + constructor (config) { + super({ + intake: config.hostname || 'localhost', + protocol: 'http:', + endpoint: EVP_PROXY_AGENT_ENDPOINT, + port: config.port + }) + + this._headers[EVP_SUBDOMAIN_HEADER_NAME] = EVP_SUBDOMAIN_HEADER_VALUE + } +} + +module.exports = LLMObsAgentProxySpanWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/agentless.js b/packages/dd-trace/src/llmobs/writers/spans/agentless.js new file mode 100644 index 00000000000..452f41d541a --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/agentless.js @@ -0,0 +1,17 @@ +'use strict' + +const { AGENTLESS_SPANS_ENDPOINT } = require('../../constants/writers') +const LLMObsBaseSpanWriter = require('./base') + +class LLMObsAgentlessSpanWriter extends LLMObsBaseSpanWriter { + constructor (config) { + super({ + intake: `llmobs-intake.${config.site}`, + endpoint: AGENTLESS_SPANS_ENDPOINT + }) + + this._headers['DD-API-KEY'] = config.apiKey + } +} + +module.exports = LLMObsAgentlessSpanWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/base.js b/packages/dd-trace/src/llmobs/writers/spans/base.js new file mode 100644 index 00000000000..f5fe3443f2d --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/base.js @@ -0,0 +1,49 @@ +'use strict' + +const { EVP_EVENT_SIZE_LIMIT, EVP_PAYLOAD_SIZE_LIMIT } = require('../../constants/writers') +const { DROPPED_VALUE_TEXT } = require('../../constants/text') +const { DROPPED_IO_COLLECTION_ERROR } = require('../../constants/tags') +const BaseWriter = require('../base') +const logger = require('../../../log') + +class LLMObsSpanWriter extends BaseWriter { + constructor (options) { + super({ + ...options, + eventType: 'span' + }) + } + + append (event) { + const eventSizeBytes = Buffer.from(JSON.stringify(event)).byteLength + if (eventSizeBytes > EVP_EVENT_SIZE_LIMIT) { + logger.warn(`Dropping event input/output because its size (${eventSizeBytes}) exceeds the 1MB event size limit`) + event = this._truncateSpanEvent(event) + } + + if (this._bufferSize + eventSizeBytes > EVP_PAYLOAD_SIZE_LIMIT) { + logger.debug('Flusing queue because queing next event will exceed EvP payload limit') + this.flush() + } + + super.append(event, eventSizeBytes) + } + + makePayload (events) { + return { + '_dd.stage': 'raw', + event_type: this._eventType, + spans: events + } + } + + _truncateSpanEvent (event) { + event.meta.input = { value: DROPPED_VALUE_TEXT } + event.meta.output = { value: DROPPED_VALUE_TEXT } + + event.collection_errors = [DROPPED_IO_COLLECTION_ERROR] + return event + } +} + +module.exports = LLMObsSpanWriter diff --git a/packages/dd-trace/src/noop/proxy.js b/packages/dd-trace/src/noop/proxy.js index 417cb846f8d..ec8671a371e 100644 --- a/packages/dd-trace/src/noop/proxy.js +++ b/packages/dd-trace/src/noop/proxy.js @@ -3,16 +3,19 @@ const NoopTracer = require('./tracer') const NoopAppsecSdk = require('../appsec/sdk/noop') const NoopDogStatsDClient = require('./dogstatsd') +const NoopLLMObsSDK = require('../llmobs/noop') const noop = new NoopTracer() const noopAppsec = new NoopAppsecSdk() const noopDogStatsDClient = new NoopDogStatsDClient() +const noopLLMObs = new NoopLLMObsSDK(noop) class Tracer { constructor () { this._tracer = noop this.appsec = noopAppsec this.dogstatsd = noopDogStatsDClient + this.llmobs = noopLLMObs } init () { diff --git a/packages/dd-trace/src/proxy.js b/packages/dd-trace/src/proxy.js index b8916b205d4..32a7dcee10a 100644 --- a/packages/dd-trace/src/proxy.js +++ b/packages/dd-trace/src/proxy.js @@ -16,6 +16,7 @@ const NoopDogStatsDClient = require('./noop/dogstatsd') const spanleak = require('./spanleak') const { SSIHeuristics } = require('./profiling/ssi-heuristics') const appsecStandalone = require('./appsec/standalone') +const LLMObsSDK = require('./llmobs/sdk') class LazyModule { constructor (provider) { @@ -46,7 +47,8 @@ class Tracer extends NoopProxy { // these requires must work with esm bundler this._modules = { appsec: new LazyModule(() => require('./appsec')), - iast: new LazyModule(() => require('./appsec/iast')) + iast: new LazyModule(() => require('./appsec/iast')), + llmobs: new LazyModule(() => require('./llmobs')) } } @@ -195,11 +197,15 @@ class Tracer extends NoopProxy { if (config.appsec.enabled) { this._modules.appsec.enable(config) } + if (config.llmobs.enabled) { + this._modules.llmobs.enable(config) + } if (!this._tracingInitialized) { const prioritySampler = appsecStandalone.configure(config) this._tracer = new DatadogTracer(config, prioritySampler) this.dataStreamsCheckpointer = this._tracer.dataStreamsCheckpointer this.appsec = new AppsecSdk(this._tracer, config) + this.llmobs = new LLMObsSDK(this._tracer, this._modules.llmobs, config) this._tracingInitialized = true } if (config.iast.enabled) { @@ -208,6 +214,7 @@ class Tracer extends NoopProxy { } else if (this._tracingInitialized) { this._modules.appsec.disable() this._modules.iast.disable() + this._modules.llmobs.disable() } if (this._tracingInitialized) { diff --git a/packages/dd-trace/src/span_processor.js b/packages/dd-trace/src/span_processor.js index 6dc19407d56..deb92c02f34 100644 --- a/packages/dd-trace/src/span_processor.js +++ b/packages/dd-trace/src/span_processor.js @@ -10,6 +10,9 @@ const { SpanStatsProcessor } = require('./span_stats') const startedSpans = new WeakSet() const finishedSpans = new WeakSet() +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') + class SpanProcessor { constructor (exporter, prioritySampler, config) { this._exporter = exporter @@ -45,6 +48,8 @@ class SpanProcessor { const formattedSpan = format(span) this._stats.onSpanFinished(formattedSpan) formatted.push(formattedSpan) + + spanProcessCh.publish({ span }) } else { active.push(span) } diff --git a/packages/dd-trace/test/config.spec.js b/packages/dd-trace/test/config.spec.js index f083fa4b07d..804476a87c9 100644 --- a/packages/dd-trace/test/config.spec.js +++ b/packages/dd-trace/test/config.spec.js @@ -266,6 +266,9 @@ describe('Config', () => { expect(config).to.have.nested.property('installSignature.id', null) expect(config).to.have.nested.property('installSignature.time', null) expect(config).to.have.nested.property('installSignature.type', null) + expect(config).to.have.nested.property('llmobs.mlApp', undefined) + expect(config).to.have.nested.property('llmobs.agentlessEnabled', false) + expect(config).to.have.nested.property('llmobs.enabled', false) expect(updateConfig).to.be.calledOnce @@ -330,6 +333,8 @@ describe('Config', () => { { name: 'isGitUploadEnabled', value: false, origin: 'default' }, { name: 'isIntelligentTestRunnerEnabled', value: false, origin: 'default' }, { name: 'isManualApiEnabled', value: false, origin: 'default' }, + { name: 'llmobs.agentlessEnabled', value: false, origin: 'default' }, + { name: 'llmobs.mlApp', value: undefined, origin: 'default' }, { name: 'ciVisibilityTestSessionName', value: '', origin: 'default' }, { name: 'ciVisAgentlessLogSubmissionEnabled', value: false, origin: 'default' }, { name: 'isTestDynamicInstrumentationEnabled', value: false, origin: 'default' }, @@ -502,6 +507,8 @@ describe('Config', () => { process.env.DD_INSTRUMENTATION_INSTALL_TYPE = 'k8s_single_step' process.env.DD_INSTRUMENTATION_INSTALL_TIME = '1703188212' process.env.DD_INSTRUMENTATION_CONFIG_ID = 'abcdef123' + process.env.DD_LLMOBS_AGENTLESS_ENABLED = 'true' + process.env.DD_LLMOBS_ML_APP = 'myMlApp' process.env.DD_TRACE_ENABLED = 'true' process.env.DD_GRPC_CLIENT_ERROR_STATUSES = '3,13,400-403' process.env.DD_GRPC_SERVER_ERROR_STATUSES = '3,13,400-403' @@ -604,6 +611,8 @@ describe('Config', () => { type: 'k8s_single_step', time: '1703188212' }) + expect(config).to.have.nested.property('llmobs.mlApp', 'myMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', true) expect(updateConfig).to.be.calledOnce @@ -669,7 +678,9 @@ describe('Config', () => { { name: 'traceId128BitGenerationEnabled', value: true, origin: 'env_var' }, { name: 'traceId128BitLoggingEnabled', value: true, origin: 'env_var' }, { name: 'tracing', value: false, origin: 'env_var' }, - { name: 'version', value: '1.0.0', origin: 'env_var' } + { name: 'version', value: '1.0.0', origin: 'env_var' }, + { name: 'llmobs.mlApp', value: 'myMlApp', origin: 'env_var' }, + { name: 'llmobs.agentlessEnabled', value: true, origin: 'env_var' } ]) }) @@ -819,7 +830,12 @@ describe('Config', () => { pollInterval: 42 }, traceId128BitGenerationEnabled: true, - traceId128BitLoggingEnabled: true + traceId128BitLoggingEnabled: true, + llmobs: { + mlApp: 'myMlApp', + agentlessEnabled: true, + apiKey: 'myApiKey' + } }) expect(config).to.have.property('protocolVersion', '0.5') @@ -894,6 +910,8 @@ describe('Config', () => { a: 'aa', b: 'bb' }) + expect(config).to.have.nested.property('llmobs.mlApp', 'myMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', true) expect(updateConfig).to.be.calledOnce @@ -941,7 +959,9 @@ describe('Config', () => { { name: 'stats.enabled', value: false, origin: 'calculated' }, { name: 'traceId128BitGenerationEnabled', value: true, origin: 'code' }, { name: 'traceId128BitLoggingEnabled', value: true, origin: 'code' }, - { name: 'version', value: '0.1.0', origin: 'code' } + { name: 'version', value: '0.1.0', origin: 'code' }, + { name: 'llmobs.mlApp', value: 'myMlApp', origin: 'code' }, + { name: 'llmobs.agentlessEnabled', value: true, origin: 'code' } ]) }) @@ -1142,6 +1162,8 @@ describe('Config', () => { process.env.DD_IAST_REDACTION_VALUE_PATTERN = 'value_pattern_to_be_overriden_by_options' process.env.DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED = 'true' process.env.DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED = 'true' + process.env.DD_LLMOBS_ML_APP = 'myMlApp' + process.env.DD_LLMOBS_AGENTLESS_ENABLED = 'true' const config = new Config({ protocolVersion: '0.5', @@ -1223,7 +1245,11 @@ describe('Config', () => { enabled: false }, traceId128BitGenerationEnabled: false, - traceId128BitLoggingEnabled: false + traceId128BitLoggingEnabled: false, + llmobs: { + mlApp: 'myOtherMlApp', + agentlessEnabled: false + } }) expect(config).to.have.property('protocolVersion', '0.5') @@ -1284,6 +1310,8 @@ describe('Config', () => { expect(config).to.have.nested.property('iast.redactionEnabled', true) expect(config).to.have.nested.property('iast.redactionNamePattern', 'REDACTION_NAME_PATTERN') expect(config).to.have.nested.property('iast.redactionValuePattern', 'REDACTION_VALUE_PATTERN') + expect(config).to.have.nested.property('llmobs.mlApp', 'myOtherMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', false) }) it('should give priority to non-experimental options', () => { @@ -2076,6 +2104,61 @@ describe('Config', () => { }) }) + context('llmobs config', () => { + it('should disable llmobs by default', () => { + const config = new Config() + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'default' + }) + }) + + it('should enable llmobs if DD_LLMOBS_ENABLED is set to true', () => { + process.env.DD_LLMOBS_ENABLED = 'true' + const config = new Config() + expect(config.llmobs.enabled).to.be.true + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: true, origin: 'env_var' + }) + }) + + it('should disable llmobs if DD_LLMOBS_ENABLED is set to false', () => { + process.env.DD_LLMOBS_ENABLED = 'false' + const config = new Config() + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'env_var' + }) + }) + + it('should enable llmobs with options and DD_LLMOBS_ENABLED is not set', () => { + const config = new Config({ llmobs: {} }) + expect(config.llmobs.enabled).to.be.true + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: true, origin: 'code' + }) + }) + + it('should have DD_LLMOBS_ENABLED take priority over options', () => { + process.env.DD_LLMOBS_ENABLED = 'false' + const config = new Config({ llmobs: {} }) + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'env_var' + }) + }) + }) + it('should sanitize values for API Security sampling between 0 and 1', () => { expect(new Config({ appsec: { diff --git a/packages/dd-trace/test/llmobs/index.spec.js b/packages/dd-trace/test/llmobs/index.spec.js new file mode 100644 index 00000000000..cdceeab64ab --- /dev/null +++ b/packages/dd-trace/test/llmobs/index.spec.js @@ -0,0 +1,137 @@ +'use strict' + +const proxyquire = require('proxyquire') + +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const injectCh = channel('dd-trace:span:inject') + +const LLMObsEvalMetricsWriter = require('../../src/llmobs/writers/evaluations') + +const config = { + llmobs: { + mlApp: 'test' + } +} + +describe('module', () => { + let llmobsModule + let store + let logger + + let LLMObsAgentlessSpanWriter + let LLMObsAgentProxySpanWriter + + before(() => { + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'append') + }) + + beforeEach(() => { + store = {} + logger = { debug: sinon.stub() } + + LLMObsAgentlessSpanWriter = sinon.stub().returns({ + destroy: sinon.stub() + }) + LLMObsAgentProxySpanWriter = sinon.stub().returns({ + destroy: sinon.stub() + }) + + llmobsModule = proxyquire('../../../dd-trace/src/llmobs', { + '../log': logger, + './writers/spans/agentless': LLMObsAgentlessSpanWriter, + './writers/spans/agentProxy': LLMObsAgentProxySpanWriter, + './storage': { + storage: { + getStore () { + return store + } + } + } + }) + + process.removeAllListeners('beforeExit') + }) + + afterEach(() => { + LLMObsAgentProxySpanWriter.resetHistory() + LLMObsAgentlessSpanWriter.resetHistory() + LLMObsEvalMetricsWriter.prototype.append.resetHistory() + llmobsModule.disable() + }) + + after(() => { + LLMObsEvalMetricsWriter.prototype.append.restore() + sinon.restore() + + // get rid of mock stubs for writers + delete require.cache[require.resolve('../../../dd-trace/src/llmobs')] + }) + + describe('handle llmobs info injection', () => { + it('injects LLMObs parent ID when there is a parent LLMObs span', () => { + llmobsModule.enable(config) + store.span = { + context () { + return { + toSpanId () { + return 'parent-id' + } + } + } + } + + const carrier = { + 'x-datadog-tags': '' + } + injectCh.publish({ carrier }) + + expect(carrier['x-datadog-tags']).to.equal(',_dd.p.llmobs_parent_id=parent-id') + }) + + it('does not inject LLMObs parent ID when there is no parent LLMObs span', () => { + llmobsModule.enable(config) + + const carrier = { + 'x-datadog-tags': '' + } + injectCh.publish({ carrier }) + expect(carrier['x-datadog-tags']).to.equal('') + }) + }) + + it('uses the agent proxy span writer', () => { + llmobsModule.enable(config) + expect(LLMObsAgentProxySpanWriter).to.have.been.called + }) + + it('uses the agentless span writer', () => { + config.llmobs.agentlessEnabled = true + llmobsModule.enable(config) + expect(LLMObsAgentlessSpanWriter).to.have.been.called + delete config.llmobs.agentlessEnabled + }) + + it('appends to the eval metric writer', () => { + llmobsModule.enable(config) + + const payload = {} + + evalMetricAppendCh.publish(payload) + + expect(LLMObsEvalMetricsWriter.prototype.append).to.have.been.calledWith(payload) + }) + + it('removes all subscribers when disabling', () => { + llmobsModule.enable(config) + + llmobsModule.disable() + + expect(injectCh.hasSubscribers).to.be.false + expect(evalMetricAppendCh.hasSubscribers).to.be.false + expect(spanProcessCh.hasSubscribers).to.be.false + expect(flushCh.hasSubscribers).to.be.false + }) +}) diff --git a/packages/dd-trace/test/llmobs/noop.spec.js b/packages/dd-trace/test/llmobs/noop.spec.js new file mode 100644 index 00000000000..36dd2279390 --- /dev/null +++ b/packages/dd-trace/test/llmobs/noop.spec.js @@ -0,0 +1,58 @@ +'use strict' + +describe('noop', () => { + let tracer + let llmobs + + before(() => { + tracer = new (require('../../../dd-trace/src/noop/proxy'))() + llmobs = tracer.llmobs + }) + + const nonTracingOps = ['enable', 'disable', 'annotate', 'exportSpan', 'submitEvaluation', 'flush'] + for (const op of nonTracingOps) { + it(`using "${op}" should not throw`, () => { + llmobs[op]() + }) + } + + describe('trace', () => { + it('should not throw with just a span', () => { + const res = llmobs.trace({}, (span) => { + expect(() => span.setTag('foo', 'bar')).does.not.throw + return 1 + }) + + expect(res).to.equal(1) + }) + + it('should not throw with a span and a callback', async () => { + const prom = llmobs.trace({}, (span, cb) => { + expect(() => span.setTag('foo', 'bar')).does.not.throw + expect(() => cb()).does.not.throw + return Promise.resolve(5) + }) + + expect(await prom).to.equal(5) + }) + }) + + describe('wrap', () => { + it('should not throw with just a span', () => { + function fn () { + return 1 + } + + const wrapped = llmobs.wrap({}, fn) + expect(wrapped()).to.equal(1) + }) + + it('should not throw with a span and a callback', async () => { + function fn () { + return Promise.resolve(5) + } + const wrapped = llmobs.wrap({}, fn) + expect(await wrapped()).to.equal(5) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/index.spec.js b/packages/dd-trace/test/llmobs/sdk/index.spec.js new file mode 100644 index 00000000000..90415f9bd0b --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/index.spec.js @@ -0,0 +1,1027 @@ +'use strict' + +const { expect } = require('chai') +const Config = require('../../../src/config') + +const LLMObsTagger = require('../../../src/llmobs/tagger') +const LLMObsEvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') +const LLMObsAgentProxySpanWriter = require('../../../src/llmobs/writers/spans/agentProxy') +const LLMObsSpanProcessor = require('../../../src/llmobs/span_processor') + +const tracerVersion = require('../../../../../package.json').version + +const { channel } = require('dc-polyfill') +const injectCh = channel('dd-trace:span:inject') + +describe('sdk', () => { + let LLMObsSDK + let llmobs + let tracer + + before(() => { + tracer = require('../../../../dd-trace') + tracer.init({ + service: 'service', + llmobs: { + mlApp: 'mlApp' + } + }) + llmobs = tracer.llmobs + + // spy on properties + sinon.spy(LLMObsSpanProcessor.prototype, 'process') + sinon.spy(LLMObsSpanProcessor.prototype, 'format') + sinon.spy(tracer._tracer._processor, 'process') + + // stub writer functionality + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'append') + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'flush') + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'append') + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'flush') + + LLMObsSDK = require('../../../src/llmobs/sdk') + + // remove max listener warnings, we don't care about the writer anyways + process.removeAllListeners('beforeExit') + }) + + afterEach(() => { + LLMObsSpanProcessor.prototype.process.resetHistory() + LLMObsSpanProcessor.prototype.format.resetHistory() + tracer._tracer._processor.process.resetHistory() + + LLMObsEvalMetricsWriter.prototype.append.resetHistory() + LLMObsEvalMetricsWriter.prototype.flush.resetHistory() + + LLMObsAgentProxySpanWriter.prototype.append.resetHistory() + LLMObsAgentProxySpanWriter.prototype.flush.resetHistory() + + process.removeAllListeners('beforeExit') + }) + + after(() => { + sinon.restore() + llmobs.disable() + }) + + describe('enabled', () => { + for (const [value, label] of [ + [true, 'enabled'], + [false, 'disabled'] + ]) { + it(`returns ${value} when llmobs is ${label}`, () => { + const enabledOrDisabledLLMObs = new LLMObsSDK(null, { disable () {} }, { llmobs: { enabled: value } }) + + expect(enabledOrDisabledLLMObs.enabled).to.equal(value) + enabledOrDisabledLLMObs.disable() // unsubscribe + }) + } + }) + + describe('enable', () => { + it('enables llmobs if it is disabled', () => { + const config = new Config({}) + const llmobsModule = { + enable: sinon.stub(), + disable () {} + } + + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + + disabledLLMObs.enable({ + mlApp: 'mlApp' + }) + + expect(disabledLLMObs.enabled).to.be.true + expect(disabledLLMObs._config.llmobs.mlApp).to.equal('mlApp') + expect(disabledLLMObs._config.llmobs.agentlessEnabled).to.be.false + + expect(llmobsModule.enable).to.have.been.called + + disabledLLMObs.disable() // unsubscribe + }) + + it('does not enable llmobs if it is already enabled', () => { + sinon.spy(llmobs._llmobsModule, 'enable') + llmobs.enable({}) + + expect(llmobs.enabled).to.be.true + expect(llmobs._llmobsModule.enable).to.not.have.been.called + llmobs._llmobsModule.enable.restore() + }) + + it('does not enable llmobs if env var conflicts', () => { + const config = new Config({}) + const llmobsModule = { + enable: sinon.stub() + } + + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + process.env.DD_LLMOBS_ENABLED = 'false' + + disabledLLMObs.enable({}) + + expect(disabledLLMObs.enabled).to.be.false + delete process.env.DD_LLMOBS_ENABLED + disabledLLMObs.disable() // unsubscribe + }) + }) + + describe('disable', () => { + it('disables llmobs if it is enabled', () => { + const llmobsModule = { + disable: sinon.stub() + } + + const config = new Config({ + llmobs: {} + }) + + const enabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + + expect(enabledLLMObs.enabled).to.be.true + enabledLLMObs.disable() + + expect(enabledLLMObs.enabled).to.be.false + expect(llmobsModule.disable).to.have.been.called + }) + + it('does not disable llmobs if it is already disabled', () => { + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(null, { disable () {} }, { llmobs: { enabled: false } }) + sinon.spy(disabledLLMObs._llmobsModule, 'disable') + + disabledLLMObs.disable() + + expect(disabledLLMObs.enabled).to.be.false + expect(disabledLLMObs._llmobsModule.disable).to.not.have.been.called + }) + }) + + describe('tracing', () => { + describe('trace', () => { + describe('tracing behavior', () => { + it('starts a span if llmobs is disabled but does not process it in the LLMObs span processor', () => { + tracer._tracer._config.llmobs.enabled = false + + llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, (span, cb) => { + expect(LLMObsTagger.tagMap.get(span)).to.not.exist + expect(() => span.setTag('k', 'v')).to.not.throw() + expect(() => cb()).to.not.throw() + }) + + expect(llmobs._tracer._processor.process).to.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if the kind is invalid', () => { + expect(() => llmobs.trace({ kind: 'invalid' }, () => {})).to.throw() + + expect(llmobs._tracer._processor.process).to.not.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + }) + + // TODO: need span kind optional for this + it.skip('throws if no name is provided', () => { + expect(() => llmobs.trace({ kind: 'workflow' }, () => {})).to.throw() + + expect(llmobs._tracer._processor.process).to.not.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + }) + + it('traces a block', () => { + let span + + llmobs.trace({ kind: 'workflow' }, _span => { + span = _span + sinon.spy(span, 'finish') + }) + + expect(span.finish).to.have.been.called + }) + + it('traces a block with a callback', () => { + let span + let done + + llmobs.trace({ kind: 'workflow' }, (_span, _done) => { + span = _span + sinon.spy(span, 'finish') + done = _done + }) + + expect(span.finish).to.not.have.been.called + + done() + + expect(span.finish).to.have.been.called + }) + + it('traces a promise', done => { + const deferred = {} + const promise = new Promise(resolve => { + deferred.resolve = resolve + }) + + let span + + llmobs + .trace({ kind: 'workflow' }, _span => { + span = _span + sinon.spy(span, 'finish') + return promise + }) + .then(() => { + expect(span.finish).to.have.been.called + done() + }) + .catch(done) + + expect(span.finish).to.not.have.been.called + + deferred.resolve() + }) + }) + + describe('parentage', () => { + // TODO: need to implement custom trace IDs + it.skip('starts a span with a distinct trace id', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + expect(LLMObsTagger.tagMap.get(span)['_ml_obs.trace_id']) + .to.exist.and.to.not.equal(span.context().toTraceId(true)) + }) + }) + + it('sets span parentage correctly', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, outerLLMSpan => { + llmobs.trace({ kind: 'task', name: 'test' }, innerLLMSpan => { + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMSpan.context()._tags['_ml_obs.trace_id']) + }) + }) + }) + + it('maintains llmobs parentage separately from apm spans', () => { + llmobs.trace({ kind: 'workflow', name: 'outer-llm' }, outerLLMSpan => { + expect(llmobs._active()).to.equal(outerLLMSpan) + tracer.trace('apmSpan', apmSpan => { + expect(llmobs._active()).to.equal(outerLLMSpan) + llmobs.trace({ kind: 'workflow', name: 'inner-llm' }, innerLLMSpan => { + expect(llmobs._active()).to.equal(innerLLMSpan) + + // llmobs span linkage + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + + // apm span linkage + expect(innerLLMSpan.context()._parentId.toString(10)).to.equal(apmSpan.context().toSpanId()) + expect(apmSpan.context()._parentId.toString(10)).to.equal(outerLLMSpan.context().toSpanId()) + }) + }) + }) + }) + + // TODO: need to implement custom trace IDs + it.skip('starts different traces for llmobs spans as child spans of an apm root span', () => { + let apmTraceId, traceId1, traceId2 + tracer.trace('apmRootSpan', apmRootSpan => { + apmTraceId = apmRootSpan.context().toTraceId(true) + llmobs.trace('workflow', llmobsSpan1 => { + traceId1 = llmobsSpan1.context()._tags['_ml_obs.trace_id'] + }) + + llmobs.trace('workflow', llmobsSpan2 => { + traceId2 = llmobsSpan2.context()._tags['_ml_obs.trace_id'] + }) + }) + + expect(traceId1).to.not.equal(traceId2) + expect(traceId1).to.not.equal(apmTraceId) + expect(traceId2).to.not.equal(apmTraceId) + }) + + it('maintains the llmobs parentage when error callbacks are used', () => { + llmobs.trace({ kind: 'workflow' }, outer => { + llmobs.trace({ kind: 'task' }, (inner, cb) => { + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outer.context().toSpanId()) + cb() // finish the span + }) + + expect(llmobs._active()).to.equal(outer) + + llmobs.trace({ kind: 'task' }, (inner) => { + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outer.context().toSpanId()) + }) + }) + }) + }) + }) + + describe('wrap', () => { + describe('tracing behavior', () => { + it('starts a span if llmobs is disabled but does not process it in the LLMObs span processor', () => { + tracer._tracer._config.llmobs.enabled = false + + const fn = llmobs.wrap({ kind: 'workflow' }, (a) => { + expect(a).to.equal(1) + expect(LLMObsTagger.tagMap.get(llmobs._active())).to.not.exist + }) + + expect(() => fn(1)).to.not.throw() + + expect(llmobs._tracer._processor.process).to.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if the kind is invalid', () => { + expect(() => llmobs.wrap({ kind: 'invalid' }, () => {})).to.throw() + }) + + it('wraps a function', () => { + let span + const fn = llmobs.wrap({ kind: 'workflow' }, () => { + span = tracer.scope().active() + sinon.spy(span, 'finish') + }) + + fn() + + expect(span.finish).to.have.been.called + }) + + it('wraps a function with a callback', () => { + let span + let next + + const fn = llmobs.wrap({ kind: 'workflow' }, (_next) => { + span = tracer.scope().active() + sinon.spy(span, 'finish') + next = _next + }) + + fn(() => {}) + + expect(span.finish).to.not.have.been.called + + next() + + expect(span.finish).to.have.been.called + }) + + it('does not auto-annotate llm spans', () => { + let span + function myLLM (input) { + span = llmobs._active() + return '' + } + + const wrappedMyLLM = llmobs.wrap({ kind: 'llm' }, myLLM) + + wrappedMyLLM('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('does not auto-annotate embedding spans input', () => { + let span + function myEmbedding (input) { + span = llmobs._active() + return 'output' + } + + const wrappedMyEmbedding = llmobs.wrap({ kind: 'embedding' }, myEmbedding) + + wrappedMyEmbedding('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.output.value': 'output' + }) + }) + + it('does not auto-annotate retrieval spans output', () => { + let span + function myRetrieval (input) { + span = llmobs._active() + return 'output' + } + + const wrappedMyRetrieval = llmobs.wrap({ kind: 'retrieval' }, myRetrieval) + + wrappedMyRetrieval('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.value': 'input' + }) + }) + + // TODO: need span kind optional for this test + it.skip('sets the span name to "unnamed-anonymous-function" if no name is provided', () => { + let span + const fn = llmobs.wrap({ kind: 'workflow' }, () => { + span = llmobs._active() + }) + + fn() + + expect(span.context()._name).to.equal('unnamed-anonymous-function') + }) + }) + + describe('parentage', () => { + // TODO: need to implement custom trace IDs + it.skip('starts a span with a distinct trace id', () => { + const fn = llmobs.wrap('workflow', { name: 'test' }, () => { + const span = llmobs._active() + expect(span.context()._tags['_ml_obs.trace_id']) + .to.exist.and.to.not.equal(span.context().toTraceId(true)) + }) + + fn() + }) + + it('sets span parentage correctly', () => { + let outerLLMSpan, innerLLMSpan + + function outer () { + outerLLMSpan = llmobs._active() + innerWrapped() + } + + function inner () { + innerLLMSpan = llmobs._active() + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMSpan.context()._tags['_ml_obs.trace_id']) + } + + const outerWrapped = llmobs.wrap({ kind: 'workflow' }, outer) + const innerWrapped = llmobs.wrap({ kind: 'task' }, inner) + + outerWrapped() + }) + + it('maintains llmobs parentage separately from apm spans', () => { + let outerLLMObsSpan, innerLLMObsSpan + + function outerLLMObs () { + outerLLMObsSpan = llmobs._active() + expect(outerLLMObsSpan).to.equal(tracer.scope().active()) + + apmWrapped() + } + function apm () { + expect(llmobs._active()).to.equal(outerLLMObsSpan) + innerWrapped() + } + function innerLLMObs () { + innerLLMObsSpan = llmobs._active() + expect(innerLLMObsSpan).to.equal(tracer.scope().active()) + expect(LLMObsTagger.tagMap.get(innerLLMObsSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMObsSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMObsSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMObsSpan.context()._tags['_ml_obs.trace_id']) + } + + const outerWrapped = llmobs.wrap({ kind: 'workflow' }, outerLLMObs) + const apmWrapped = tracer.wrap('workflow', apm) + const innerWrapped = llmobs.wrap({ kind: 'workflow' }, innerLLMObs) + + outerWrapped() + }) + + // TODO: need to implement custom trace IDs + it.skip('starts different traces for llmobs spans as child spans of an apm root span', () => { + let traceId1, traceId2, apmTraceId + function apm () { + apmTraceId = tracer.scope().active().context().toTraceId(true) + llmObsWrapped1() + llmObsWrapped2() + } + function llmObs1 () { + traceId1 = LLMObsTagger.tagMap.get(llmobs._active())['_ml_obs.trace_id'] + } + function llmObs2 () { + traceId2 = LLMObsTagger.tagMap.get(llmobs._active())['_ml_obs.trace_id'] + } + + const apmWrapped = tracer.wrap('workflow', apm) + const llmObsWrapped1 = llmobs.wrap({ kind: 'workflow' }, llmObs1) + const llmObsWrapped2 = llmobs.wrap({ kind: 'workflow' }, llmObs2) + + apmWrapped() + + expect(traceId1).to.not.equal(traceId2) + expect(traceId1).to.not.equal(apmTraceId) + expect(traceId2).to.not.equal(apmTraceId) + }) + + it('maintains the llmobs parentage when callbacks are used', () => { + let outerSpan + function outer () { + outerSpan = llmobs._active() + wrappedInner1(() => {}) + expect(outerSpan).to.equal(tracer.scope().active()) + wrappedInner2() + } + + function inner1 (cb) { + const inner = tracer.scope().active() + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outerSpan.context().toSpanId()) + cb() + } + + function inner2 () { + const inner = tracer.scope().active() + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outerSpan.context().toSpanId()) + } + + const wrappedOuter = llmobs.wrap({ kind: 'workflow' }, outer) + const wrappedInner1 = llmobs.wrap({ kind: 'task' }, inner1) + const wrappedInner2 = llmobs.wrap({ kind: 'task' }, inner2) + + wrappedOuter() + }) + }) + }) + }) + + describe('annotate', () => { + it('returns if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + sinon.spy(llmobs, '_active') + llmobs.annotate() + + expect(llmobs._active).to.not.have.been.called + llmobs._active.restore() + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if no arguments are provided', () => { + expect(() => llmobs.annotate()).to.throw() + }) + + it('throws if there are no options given', () => { + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + expect(() => llmobs.annotate(span)).to.throw() + + // span should still exist in the registry, just with no annotations + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + }) + + it('throws if the provided span is not an LLMObs span', () => { + tracer.trace('test', span => { + expect(() => llmobs.annotate(span, {})).to.throw() + + // no span in registry, should not throw + expect(LLMObsTagger.tagMap.get(span)).to.not.exist + }) + }) + + it('throws if the span is finished', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + llmobs.trace({ kind: 'workflow', name: 'outer' }, () => { + let innerLLMSpan + llmobs.trace({ kind: 'task', name: 'inner' }, _span => { + innerLLMSpan = _span + }) + + expect(() => llmobs.annotate(innerLLMSpan, {})).to.throw() + expect(llmobs._tagger.tagTextIO).to.not.have.been.called + }) + llmobs._tagger.tagTextIO.restore() + }) + + it('throws for an llmobs span with an invalid kind', () => { + // TODO this might end up being obsolete with llmobs span kind as optional + sinon.spy(llmobs._tagger, 'tagLLMIO') + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + LLMObsTagger.tagMap.get(span)['_ml_obs.meta.span.kind'] = undefined // somehow this is set + expect(() => llmobs.annotate(span, {})).to.throw() + }) + + expect(llmobs._tagger.tagLLMIO).to.not.have.been.called + llmobs._tagger.tagLLMIO.restore() + }) + + it('annotates the current active llmobs span in an llmobs scope', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const inputData = {} + llmobs.annotate({ inputData }) + + expect(llmobs._tagger.tagTextIO).to.have.been.calledWith(span, inputData, undefined) + }) + + llmobs._tagger.tagTextIO.restore() + }) + + it('annotates the current active llmobs span in an apm scope', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + + llmobs.trace({ kind: 'workflow', name: 'test' }, llmobsSpan => { + tracer.trace('apmSpan', () => { + const inputData = {} + llmobs.annotate({ inputData }) + + expect(llmobs._tagger.tagTextIO).to.have.been.calledWith(llmobsSpan, inputData, undefined) + }) + }) + + llmobs._tagger.tagTextIO.restore() + }) + + it('annotates llm io for an llm span', () => { + const inputData = [{ role: 'system', content: 'system prompt' }] + const outputData = [{ role: 'ai', content: 'no question was asked' }] + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.messages': inputData, + '_ml_obs.meta.output.messages': outputData + }) + }) + }) + + it('annotates embedding io for an embedding span', () => { + const inputData = [{ text: 'input text' }] + const outputData = 'documents embedded' + + llmobs.trace({ kind: 'embedding', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.documents': inputData, + '_ml_obs.meta.output.value': outputData + }) + }) + }) + + it('annotates retrieval io for a retrieval span', () => { + const inputData = 'input text' + const outputData = [{ text: 'output text' }] + + llmobs.trace({ kind: 'retrieval', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.value': inputData, + '_ml_obs.meta.output.documents': outputData + }) + }) + }) + + it('annotates metadata if present', () => { + const metadata = { response_type: 'json' } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ metadata }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.metadata': metadata + }) + }) + }) + + it('annotates metrics if present', () => { + const metrics = { score: 0.6 } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ metrics }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.metrics': metrics + }) + }) + }) + + it('annotates tags if present', () => { + const tags = { 'custom.tag': 'value' } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ tags }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.tags': tags + }) + }) + }) + }) + + describe('exportSpan', () => { + it('throws if no span is provided', () => { + expect(() => llmobs.exportSpan()).to.throw() + }) + + it('throws if the provided span is not an LLMObs span', () => { + tracer.trace('test', span => { + expect(() => llmobs.exportSpan(span)).to.throw() + }) + }) + + it('uses the provided span', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const spanCtx = llmobs.exportSpan(span) + + const traceId = span.context().toTraceId(true) + const spanId = span.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + + it('uses the active span in an llmobs scope', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const spanCtx = llmobs.exportSpan() + + const traceId = span.context().toTraceId(true) + const spanId = span.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + + it('uses the active span in an apm scope', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, llmobsSpan => { + tracer.trace('apmSpan', () => { + const spanCtx = llmobs.exportSpan() + + const traceId = llmobsSpan.context().toTraceId(true) + const spanId = llmobsSpan.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + }) + + it('returns undefined if the provided span is not a span', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, fakeSpan => { + fakeSpan.context().toTraceId = undefined // something that would throw + LLMObsTagger.tagMap.set(fakeSpan, {}) + const spanCtx = llmobs.exportSpan(fakeSpan) + + expect(spanCtx).to.be.undefined + }) + }) + }) + + describe('submitEvaluation', () => { + let spanCtx + let originalApiKey + + before(() => { + originalApiKey = tracer._tracer._config.apiKey + tracer._tracer._config.apiKey = 'test' + }) + + beforeEach(() => { + spanCtx = { + traceId: '1234', + spanId: '5678' + } + }) + + after(() => { + tracer._tracer._config.apiKey = originalApiKey + }) + + it('does not submit an evaluation if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + llmobs.submitEvaluation() + + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws for a missing API key', () => { + const apiKey = tracer._tracer._config.apiKey + delete tracer._tracer._config.apiKey + + expect(() => llmobs.submitEvaluation(spanCtx)).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.apiKey = apiKey + }) + + it('throws for an invalid span context', () => { + const invalid = {} + + expect(() => llmobs.submitEvaluation(invalid, {})).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a missing mlApp', () => { + const mlApp = tracer._tracer._config.llmobs.mlApp + delete tracer._tracer._config.llmobs.mlApp + + expect(() => llmobs.submitEvaluation(spanCtx)).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.llmobs.mlApp = mlApp + }) + + it('throws for an invalid timestamp', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 'invalid' + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a missing label', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234 + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for an invalid metric type', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'invalid' + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a mismatched value for a categorical metric', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'categorical', + value: 1 + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a mismatched value for a score metric', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'score', + value: 'string' + }) + }).to.throw() + + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('submits an evaluation metric', () => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'score', + value: 0.6, + tags: { + host: 'localhost' + } + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.deep.equal({ + trace_id: spanCtx.traceId, + span_id: spanCtx.spanId, + ml_app: 'test', + timestamp_ms: 1234, + label: 'test', + metric_type: 'score', + score_value: 0.6, + tags: [`dd-trace.version:${tracerVersion}`, 'ml_app:test', 'host:localhost'] + }) + }) + + it('sets `categorical_value` for categorical metrics', () => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'categorical', + value: 'foo', + tags: { + host: 'localhost' + } + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.have.property('categorical_value', 'foo') + }) + + it('defaults to the current time if no timestamp is provided', () => { + sinon.stub(Date, 'now').returns(1234) + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + label: 'test', + metricType: 'score', + value: 0.6 + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.have.property('timestamp_ms', 1234) + Date.now.restore() + }) + }) + + describe('flush', () => { + it('does not flush if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + llmobs.flush() + + expect(LLMObsEvalMetricsWriter.prototype.flush).to.not.have.been.called + expect(LLMObsAgentProxySpanWriter.prototype.flush).to.not.have.been.called + tracer._tracer._config.llmobs.enabled = true + }) + + it('flushes the evaluation writer and span writer', () => { + llmobs.flush() + + expect(LLMObsEvalMetricsWriter.prototype.flush).to.have.been.called + expect(LLMObsAgentProxySpanWriter.prototype.flush).to.have.been.called + }) + + it('logs if there was an error flushing', () => { + LLMObsEvalMetricsWriter.prototype.flush.throws(new Error('boom')) + + expect(() => llmobs.flush()).to.not.throw() + }) + }) + + describe('distributed', () => { + it('adds the current llmobs span id to the injection context', () => { + const carrier = { 'x-datadog-tags': '' } + let parentId + llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, span => { + parentId = span.context().toSpanId() + + // simulate injection from http integration or from tracer + // something that triggers the text_map injection + injectCh.publish({ carrier }) + }) + + expect(carrier['x-datadog-tags']).to.equal(`,_dd.p.llmobs_parent_id=${parentId}`) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/integration.spec.js b/packages/dd-trace/test/llmobs/sdk/integration.spec.js new file mode 100644 index 00000000000..acba94d8f71 --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/integration.spec.js @@ -0,0 +1,256 @@ +'use strict' + +const { expectedLLMObsNonLLMSpanEvent, deepEqualWithMockValues } = require('../util') +const chai = require('chai') + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +const tags = { + ml_app: 'test', + language: 'javascript' +} + +const AgentProxyWriter = require('../../../src/llmobs/writers/spans/agentProxy') +const EvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') + +const tracerVersion = require('../../../../../package.json').version + +describe('end to end sdk integration tests', () => { + let tracer + let llmobs + let payloadGenerator + + function run (payloadGenerator) { + payloadGenerator() + return { + spans: tracer._tracer._processor.process.args.map(args => args[0]).reverse(), // spans finish in reverse order + llmobsSpans: AgentProxyWriter.prototype.append.args?.map(args => args[0]), + evaluationMetrics: EvalMetricsWriter.prototype.append.args?.map(args => args[0]) + } + } + + function check (expected, actual) { + for (const expectedLLMObsSpanIdx in expected) { + const expectedLLMObsSpan = expected[expectedLLMObsSpanIdx] + const actualLLMObsSpan = actual[expectedLLMObsSpanIdx] + expect(actualLLMObsSpan).to.deep.deepEqualWithMockValues(expectedLLMObsSpan) + } + } + + before(() => { + tracer = require('../../../../dd-trace') + tracer.init({ + llmobs: { + mlApp: 'test' + } + }) + + // another test suite may have disabled LLMObs + // to clear the intervals and unsubscribe + // in that case, the `init` call above won't have re-enabled it + // we'll re-enable it here + llmobs = tracer.llmobs + if (!llmobs.enabled) { + llmobs.enable({ + mlApp: 'test' + }) + } + + tracer._tracer._config.apiKey = 'test' + + sinon.spy(tracer._tracer._processor, 'process') + sinon.stub(AgentProxyWriter.prototype, 'append') + sinon.stub(EvalMetricsWriter.prototype, 'append') + }) + + afterEach(() => { + tracer._tracer._processor.process.resetHistory() + AgentProxyWriter.prototype.append.resetHistory() + EvalMetricsWriter.prototype.append.resetHistory() + + process.removeAllListeners('beforeExit') + + llmobs.disable() + llmobs.enable({ mlApp: 'test', apiKey: 'test' }) + }) + + after(() => { + sinon.restore() + llmobs.disable() + delete global._ddtrace + delete require.cache[require.resolve('../../../../dd-trace')] + }) + + it('uses trace correctly', () => { + payloadGenerator = function () { + const result = llmobs.trace({ kind: 'agent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world', metadata: { foo: 'bar' } }) + return tracer.trace('apmSpan', () => { + llmobs.annotate({ tags: { bar: 'baz' } }) // should use the current active llmobs span + return llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, () => { + llmobs.annotate({ inputData: 'world', outputData: 'hello' }) + return 'boom' + }) + }) + }) + + expect(result).to.equal('boom') + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(3) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[0], + spanKind: 'agent', + tags: { ...tags, bar: 'baz' }, + metadata: { foo: 'bar' }, + inputValue: 'hello', + outputValue: 'world' + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[2], + spanKind: 'workflow', + parentId: spans[0].context().toSpanId(), + tags, + name: 'myWorkflow', + inputValue: 'world', + outputValue: 'hello' + }) + ] + + check(expected, llmobsSpans) + }) + + it('uses wrap correctly', () => { + payloadGenerator = function () { + function agent (input) { + llmobs.annotate({ inputData: 'hello' }) + return apm(input) + } + // eslint-disable-next-line no-func-assign + agent = llmobs.wrap({ kind: 'agent' }, agent) + + function apm (input) { + llmobs.annotate({ metadata: { foo: 'bar' } }) // should annotate the agent span + return workflow(input) + } + // eslint-disable-next-line no-func-assign + apm = tracer.wrap('apm', apm) + + function workflow () { + llmobs.annotate({ outputData: 'custom' }) + return 'world' + } + // eslint-disable-next-line no-func-assign + workflow = llmobs.wrap({ kind: 'workflow', name: 'myWorkflow' }, workflow) + + agent('my custom input') + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(3) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[0], + spanKind: 'agent', + tags, + inputValue: 'hello', + outputValue: 'world', + metadata: { foo: 'bar' } + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[2], + spanKind: 'workflow', + parentId: spans[0].context().toSpanId(), + tags, + name: 'myWorkflow', + inputValue: 'my custom input', + outputValue: 'custom' + }) + ] + + check(expected, llmobsSpans) + }) + + it('instruments and uninstruments as needed', () => { + payloadGenerator = function () { + llmobs.disable() + llmobs.trace({ kind: 'agent', name: 'llmobsParent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world' }) + llmobs.enable({ mlApp: 'test1' }) + llmobs.trace({ kind: 'workflow', name: 'child1' }, () => { + llmobs.disable() + llmobs.trace({ kind: 'workflow', name: 'child2' }, () => { + llmobs.enable({ mlApp: 'test2' }) + llmobs.trace({ kind: 'workflow', name: 'child3' }, () => {}) + }) + }) + }) + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(4) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[1], + spanKind: 'workflow', + tags: { ...tags, ml_app: 'test1' }, + name: 'child1' + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[3], + spanKind: 'workflow', + tags: { ...tags, ml_app: 'test2' }, + name: 'child3', + parentId: spans[1].context().toSpanId() + }) + ] + + check(expected, llmobsSpans) + }) + + it('submits evaluations', () => { + sinon.stub(Date, 'now').returns(1234567890) + payloadGenerator = function () { + llmobs.trace({ kind: 'agent', name: 'myAgent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world' }) + const spanCtx = llmobs.exportSpan() + llmobs.submitEvaluation(spanCtx, { + label: 'foo', + metricType: 'categorical', + value: 'bar' + }) + }) + } + + const { spans, llmobsSpans, evaluationMetrics } = run(payloadGenerator) + expect(spans).to.have.lengthOf(1) + expect(llmobsSpans).to.have.lengthOf(1) + expect(evaluationMetrics).to.have.lengthOf(1) + + // check eval metrics content + const exptected = [ + { + trace_id: spans[0].context().toTraceId(true), + span_id: spans[0].context().toSpanId(), + label: 'foo', + metric_type: 'categorical', + categorical_value: 'bar', + ml_app: 'test', + timestamp_ms: 1234567890, + tags: [`dd-trace.version:${tracerVersion}`, 'ml_app:test'] + } + ] + + check(exptected, evaluationMetrics) + + Date.now.restore() + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js b/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js new file mode 100644 index 00000000000..b792a4fbdb7 --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js @@ -0,0 +1,133 @@ +'use strict' + +const { execSync } = require('child_process') +const { + FakeAgent, + createSandbox, + spawnProc +} = require('../../../../../../integration-tests/helpers') +const chai = require('chai') +const path = require('path') +const { expectedLLMObsNonLLMSpanEvent, deepEqualWithMockValues } = require('../../util') + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +function check (expected, actual) { + for (const expectedLLMObsSpanIdx in expected) { + const expectedLLMObsSpan = expected[expectedLLMObsSpanIdx] + const actualLLMObsSpan = actual[expectedLLMObsSpanIdx] + expect(actualLLMObsSpan).to.deep.deepEqualWithMockValues(expectedLLMObsSpan) + } +} + +const testVersions = [ + '^1', + '^2', + '^3', + '^4', + '^5' +] + +const testCases = [ + { + name: 'not initialized', + file: 'noop' + }, + { + name: 'instruments an application with decorators', + file: 'index', + setup: (agent, results = {}) => { + const llmobsRes = agent.assertLlmObsPayloadReceived(({ payload }) => { + results.llmobsSpans = payload.spans + }) + + const apmRes = agent.assertMessageReceived(({ payload }) => { + results.apmSpans = payload + }) + + return [llmobsRes, apmRes] + }, + runTest: ({ llmobsSpans, apmSpans }) => { + const actual = llmobsSpans + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: apmSpans[0][0], + spanKind: 'agent', + tags: { + ml_app: 'test', + language: 'javascript' + }, + inputValue: 'this is a', + outputValue: 'test' + }) + ] + + check(expected, actual) + } + } +] + +// a bit of devex to show the version we're actually testing +// so we don't need to know ahead of time +function getLatestVersion (range) { + const command = `npm show typescript@${range} version` + const output = execSync(command, { encoding: 'utf-8' }).trim() + const versions = output.split('\n').map(line => line.split(' ')[1].replace(/'/g, '')) + return versions[versions.length - 1] +} + +describe('typescript', () => { + let agent + let proc + let sandbox + + for (const version of testVersions) { + context(`with version ${getLatestVersion(version)}`, () => { + before(async function () { + this.timeout(20000) + sandbox = await createSandbox( + [`typescript@${version}`], false, ['./packages/dd-trace/test/llmobs/sdk/typescript/*'] + ) + }) + + after(async () => { + await sandbox.remove() + }) + + beforeEach(async () => { + agent = await new FakeAgent().start() + }) + + afterEach(async () => { + proc && proc.kill() + await agent.stop() + }) + + for (const test of testCases) { + const { name, file } = test + it(name, async () => { + const cwd = sandbox.folder + + const results = {} + const waiters = test.setup ? test.setup(agent, results) : [] + + // compile typescript + execSync( + `tsc --target ES6 --experimentalDecorators --module commonjs --sourceMap ${file}.ts`, + { cwd, stdio: 'inherit' } + ) + + proc = await spawnProc( + path.join(cwd, `${file}.js`), + { cwd, env: { DD_TRACE_AGENT_PORT: agent.port } } + ) + + await Promise.all(waiters) + + // some tests just need the file to run, not assert payloads + test.runTest && test.runTest(results) + }) + } + }) + } +}) diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/index.ts b/packages/dd-trace/test/llmobs/sdk/typescript/index.ts new file mode 100644 index 00000000000..9aa320fd92c --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/index.ts @@ -0,0 +1,23 @@ +// @ts-ignore +import tracer from 'dd-trace'; + +const llmobs = tracer.init({ + llmobs: { + mlApp: 'test', + } +}).llmobs; + +class Test { + @llmobs.decorate({ kind: 'agent' }) + runChain (input: string) { + llmobs.annotate({ + inputData: 'this is a', + outputData: 'test' + }) + + return 'world' + } +} + +const test: Test = new Test(); +test.runChain('hello'); diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts b/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts new file mode 100644 index 00000000000..e1b7c00837b --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts @@ -0,0 +1,19 @@ +// @ts-ignore +import tracer from 'dd-trace'; +import * as assert from 'assert'; +const llmobs = tracer.llmobs; + +class Test { + @llmobs.decorate({ kind: 'agent' }) + runChain (input: string) { + llmobs.annotate({ + inputData: 'this is a', + outputData: 'test' + }) + + return 'world' + } +} + +const test: Test = new Test(); +assert.equal(test.runChain('hello'), 'world') \ No newline at end of file diff --git a/packages/dd-trace/test/llmobs/span_processor.spec.js b/packages/dd-trace/test/llmobs/span_processor.spec.js new file mode 100644 index 00000000000..ae73c4a9677 --- /dev/null +++ b/packages/dd-trace/test/llmobs/span_processor.spec.js @@ -0,0 +1,360 @@ +'use strict' + +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +// we will use this to populate the span-tags map +const LLMObsTagger = require('../../src/llmobs/tagger') + +describe('span processor', () => { + let LLMObsSpanProcessor + let processor + let writer + let log + + beforeEach(() => { + writer = { + append: sinon.stub() + } + + log = { + warn: sinon.stub() + } + + LLMObsSpanProcessor = proxyquire('../../src/llmobs/span_processor', { + '../../../../package.json': { version: 'x.y.z' }, + '../log': log + }) + + processor = new LLMObsSpanProcessor({ llmobs: { enabled: true } }) + processor.setWriter(writer) + }) + + describe('process', () => { + let span + + it('should do nothing if llmobs is not enabled', () => { + processor = new LLMObsSpanProcessor({ llmobs: { enabled: false } }) + + expect(() => processor.process({ span })).not.to.throw() + }) + + it('should do nothing if the span is not an llm obs span', () => { + span = { context: () => ({ _tags: {} }) } + + expect(processor._writer.append).to.not.have.been.called + }) + + it('should format the span event for the writer', () => { + span = { + _name: 'test', + _startTime: 0, // this is in ms, will be converted to ns + _duration: 1, // this is in ms, will be converted to ns + context () { + return { + _tags: {}, + toTraceId () { return '123' }, // should not use this + toSpanId () { return '456' } + } + } + } + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'myModel', + '_ml_obs.meta.model_provider': 'myProvider', + '_ml_obs.meta.metadata': { foo: 'bar' }, + '_ml_obs.meta.ml_app': 'myApp', + '_ml_obs.meta.input.value': 'input-value', + '_ml_obs.meta.output.value': 'output-value', + '_ml_obs.meta.input.messages': [{ role: 'user', content: 'hello' }], + '_ml_obs.meta.output.messages': [{ role: 'assistant', content: 'world' }], + '_ml_obs.llmobs_parent_id': '1234' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload).to.deep.equal({ + trace_id: '123', + span_id: '456', + parent_id: '1234', + name: 'test', + tags: [ + 'version:', + 'env:', + 'service:', + 'source:integration', + 'ml_app:myApp', + 'dd-trace.version:x.y.z', + 'error:0', + 'language:javascript' + ], + start_ns: 0, + duration: 1000000, + status: 'ok', + meta: { + 'span.kind': 'llm', + model_name: 'myModel', + model_provider: 'myprovider', // should be lowercase + input: { + value: 'input-value', + messages: [{ role: 'user', content: 'hello' }] + }, + output: { + value: 'output-value', + messages: [{ role: 'assistant', content: 'world' }] + }, + metadata: { foo: 'bar' } + }, + metrics: {}, + _dd: { + trace_id: '123', + span_id: '456' + } + }) + + expect(writer.append).to.have.been.calledOnce + }) + + it('removes problematic fields from the metadata', () => { + // problematic fields are circular references or bigints + const metadata = { + bigint: BigInt(1), + deep: { + foo: 'bar' + }, + bar: 'baz' + } + metadata.circular = metadata + metadata.deep.circular = metadata.deep + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.metadata': metadata + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.metadata).to.deep.equal({ + bar: 'baz', + bigint: 'Unserializable value', + circular: 'Unserializable value', + deep: { foo: 'bar', circular: 'Unserializable value' } + }) + }) + + it('tags output documents for a retrieval span', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.output.documents': [{ text: 'hello', name: 'myDoc', id: '1', score: 0.6 }] + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.output.documents).to.deep.equal([{ + text: 'hello', + name: 'myDoc', + id: '1', + score: 0.6 + }]) + }) + + it('tags input documents for an embedding span', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.input.documents': [{ text: 'hello', name: 'myDoc', id: '1', score: 0.6 }] + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.input.documents).to.deep.equal([{ + text: 'hello', + name: 'myDoc', + id: '1', + score: 0.6 + }]) + }) + + it('defaults model provider to custom', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'myModel' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.model_provider).to.equal('custom') + }) + + it('sets an error appropriately', () => { + span = { + context () { + return { + _tags: { + 'error.message': 'error message', + 'error.type': 'error type', + 'error.stack': 'error stack' + }, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta['error.message']).to.equal('error message') + expect(payload.meta['error.type']).to.equal('error type') + expect(payload.meta['error.stack']).to.equal('error stack') + expect(payload.status).to.equal('error') + + expect(payload.tags).to.include('error_type:error type') + }) + + it('uses the error itself if the span does not have specific error fields', () => { + span = { + context () { + return { + _tags: { + error: new Error('error message') + }, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta['error.message']).to.equal('error message') + expect(payload.meta['error.type']).to.equal('Error') + expect(payload.meta['error.stack']).to.exist + expect(payload.status).to.equal('error') + + expect(payload.tags).to.include('error_type:Error') + }) + + it('uses the span name from the tag if provided', () => { + span = { + _name: 'test', + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.name': 'mySpan' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.name).to.equal('mySpan') + }) + + it('attaches session id if provided', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.session_id': '1234' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.session_id).to.equal('1234') + expect(payload.tags).to.include('session_id:1234') + }) + + it('sets span tags appropriately', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.tags': { hostname: 'localhost', foo: 'bar', source: 'mySource' } + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.tags).to.include('foo:bar') + expect(payload.tags).to.include('source:mySource') + expect(payload.tags).to.include('hostname:localhost') + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/tagger.spec.js b/packages/dd-trace/test/llmobs/tagger.spec.js new file mode 100644 index 00000000000..783ce91bdae --- /dev/null +++ b/packages/dd-trace/test/llmobs/tagger.spec.js @@ -0,0 +1,576 @@ +'use strict' + +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +function unserializbleObject () { + const obj = {} + obj.obj = obj + return obj +} + +describe('tagger', () => { + let span + let spanContext + let Tagger + let tagger + let logger + let util + + beforeEach(() => { + spanContext = { + _tags: {}, + _trace: { tags: {} } + } + + span = { + context () { return spanContext }, + setTag (k, v) { + this.context()._tags[k] = v + } + } + + util = { + generateTraceId: sinon.stub().returns('0123') + } + + logger = { + warn: sinon.stub() + } + + Tagger = proxyquire('../../src/llmobs/tagger', { + '../log': logger, + './util': util + }) + }) + + describe('without softFail', () => { + beforeEach(() => { + tagger = new Tagger({ llmobs: { enabled: true, mlApp: 'my-default-ml-app' } }) + }) + + describe('registerLLMObsSpan', () => { + it('will not set tags if llmobs is not enabled', () => { + tagger = new Tagger({ llmobs: { enabled: false } }) + tagger.registerLLMObsSpan(span, 'llm') + + expect(Tagger.tagMap.get(span)).to.deep.equal(undefined) + }) + + it('tags an llm obs span with basic and default properties', () => { + tagger.registerLLMObsSpan(span, { kind: 'workflow' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'workflow', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' // no parent id provided + }) + }) + + it('uses options passed in to set tags', () => { + tagger.registerLLMObsSpan(span, { + kind: 'llm', + modelName: 'my-model', + modelProvider: 'my-provider', + sessionId: 'my-session', + mlApp: 'my-app' + }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'my-model', + '_ml_obs.meta.model_provider': 'my-provider', + '_ml_obs.session_id': 'my-session', + '_ml_obs.meta.ml_app': 'my-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the name if provided', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm', name: 'my-span-name' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.name': 'my-span-name' + }) + }) + + it('defaults parent id to undefined', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the parent span if provided to populate fields', () => { + const parentSpan = { + context () { + return { + _tags: { + '_ml_obs.meta.ml_app': 'my-ml-app', + '_ml_obs.session_id': 'my-session' + }, + toSpanId () { return '5678' } + } + } + } + tagger.registerLLMObsSpan(span, { kind: 'llm', parent: parentSpan }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-ml-app', + '_ml_obs.session_id': 'my-session', + '_ml_obs.llmobs_parent_id': '5678' + }) + }) + + it('uses the propagated trace id if provided', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the propagated parent id if provided', () => { + spanContext._trace.tags['_dd.p.llmobs_parent_id'] = '-567' + + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': '-567' + }) + }) + + it('does not set span type if the LLMObs span kind is falsy', () => { + tagger.registerLLMObsSpan(span, { kind: false }) + + expect(Tagger.tagMap.get(span)).to.be.undefined + }) + }) + + describe('tagMetadata', () => { + it('tags a span with metadata', () => { + tagger._register(span) + tagger.tagMetadata(span, { a: 'foo', b: 'bar' }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.metadata': { a: 'foo', b: 'bar' } + }) + }) + }) + + describe('tagMetrics', () => { + it('tags a span with metrics', () => { + tagger._register(span) + tagger.tagMetrics(span, { a: 1, b: 2 }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.metrics': { a: 1, b: 2 } + }) + }) + + it('tags maps token metric names appropriately', () => { + tagger._register(span) + tagger.tagMetrics(span, { + inputTokens: 1, + outputTokens: 2, + totalTokens: 3, + foo: 10 + }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.metrics': { input_tokens: 1, output_tokens: 2, total_tokens: 3, foo: 10 } + }) + }) + + it('throws for non-number entries', () => { + const metrics = { + a: 1, + b: 'foo', + c: { depth: 1 }, + d: undefined + } + tagger._register(span) + expect(() => tagger.tagMetrics(span, metrics)).to.throw() + }) + }) + + describe('tagSpanTags', () => { + it('sets tags on a span', () => { + const tags = { foo: 'bar' } + tagger._register(span) + tagger.tagSpanTags(span, tags) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.tags': { foo: 'bar' } + }) + }) + + it('merges tags so they do not overwrite', () => { + Tagger.tagMap.set(span, { '_ml_obs.tags': { a: 1 } }) + const tags = { a: 2, b: 1 } + tagger.tagSpanTags(span, tags) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.tags': { a: 1, b: 1 } + }) + }) + }) + + describe('tagLLMIO', () => { + it('tags a span with llm io', () => { + const inputData = [ + 'you are an amazing assistant', + { content: 'hello! my name is foobar' }, + { content: 'I am a robot', role: 'assistant' }, + { content: 'I am a human', role: 'user' }, + {} + ] + + const outputData = 'Nice to meet you, human!' + + tagger._register(span) + tagger.tagLLMIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.messages': [ + { content: 'you are an amazing assistant' }, + { content: 'hello! my name is foobar' }, + { content: 'I am a robot', role: 'assistant' }, + { content: 'I am a human', role: 'user' }, + { content: '' } + ], + '_ml_obs.meta.output.messages': [{ content: 'Nice to meet you, human!' }] + }) + }) + + it('throws for a non-object message', () => { + const messages = [ + 5 + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string message content', () => { + const messages = [ + { content: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string message role', () => { + const messages = [ + { content: 'a', role: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + describe('tagging tool calls appropriately', () => { + it('tags a span with tool calls', () => { + const inputData = [ + { content: 'hello', toolCalls: [{ name: 'tool1' }, { name: 'tool2', arguments: { a: 1, b: 2 } }] }, + { content: 'goodbye', toolCalls: [{ name: 'tool3' }] } + ] + const outputData = [ + { content: 'hi', toolCalls: [{ name: 'tool4' }] } + ] + + tagger._register(span) + tagger.tagLLMIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.messages': [ + { + content: 'hello', + tool_calls: [{ name: 'tool1' }, { name: 'tool2', arguments: { a: 1, b: 2 } }] + }, { + content: 'goodbye', + tool_calls: [{ name: 'tool3' }] + }], + '_ml_obs.meta.output.messages': [{ content: 'hi', tool_calls: [{ name: 'tool4' }] }] + }) + }) + + it('throws for a non-object tool call', () => { + const messages = [ + { content: 'a', toolCalls: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool name', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-object tool arguments', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', arguments: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool id', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', toolId: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool type', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', type: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('logs multiple errors if there are multiple errors for a message and filters it out', () => { + const messages = [ + { content: 'a', toolCalls: [5, { name: 5, type: 7 }], role: 7 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + }) + }) + + describe('tagEmbeddingIO', () => { + it('tags a span with embedding io', () => { + const inputData = [ + 'my string document', + { text: 'my object document' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 } + ] + const outputData = 'embedded documents' + tagger._register(span) + tagger.tagEmbeddingIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.documents': [ + { text: 'my string document' }, + { text: 'my object document' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 }], + '_ml_obs.meta.output.value': 'embedded documents' + }) + }) + + it('throws for a non-object document', () => { + const documents = [ + 5 + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document text', () => { + const documents = [ + { text: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document name', () => { + const documents = [ + { text: 'a', name: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document id', () => { + const documents = [ + { text: 'a', id: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-number document score', () => { + const documents = [ + { text: 'a', score: '5' } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + }) + + describe('tagRetrievalIO', () => { + it('tags a span with retrieval io', () => { + const inputData = 'some query' + const outputData = [ + 'result 1', + { text: 'result 2' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 } + ] + + tagger._register(span) + tagger.tagRetrievalIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.value': 'some query', + '_ml_obs.meta.output.documents': [ + { text: 'result 1' }, + { text: 'result 2' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 }] + }) + }) + + it('throws for malformed properties on documents', () => { + const inputData = 'some query' + const outputData = [ + true, + { text: 5 }, + { text: 'foo', name: 5 }, + 'hi', + null, + undefined + ] + + // specific cases of throwing tested with embedding inputs + expect(() => tagger.tagRetrievalIO(span, inputData, outputData)).to.throw() + }) + }) + + describe('tagTextIO', () => { + it('tags a span with text io', () => { + const inputData = { some: 'object' } + const outputData = 'some text' + tagger._register(span) + tagger.tagTextIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.value': '{"some":"object"}', + '_ml_obs.meta.output.value': 'some text' + }) + }) + + it('throws when the value is not JSON serializable', () => { + const data = unserializbleObject() + expect(() => tagger.tagTextIO(span, data, 'output')).to.throw() + }) + }) + }) + + describe('with softFail', () => { + beforeEach(() => { + tagger = new Tagger({ llmobs: { enabled: true, mlApp: 'my-default-ml-app' } }, true) + }) + + it('logs a warning when an unexpected value is encountered for text tagging', () => { + const data = unserializbleObject() + tagger._register(span) + tagger.tagTextIO(span, data, 'input') + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs a warning when an unexpected value is encountered for metrics tagging', () => { + const metrics = { + a: 1, + b: 'foo' + } + + tagger._register(span) + tagger.tagMetrics(span, metrics) + expect(logger.warn).to.have.been.calledOnce + }) + + describe('tagDocuments', () => { + it('logs a warning when a document is not an object', () => { + const data = [undefined] + tagger._register(span) + tagger.tagEmbeddingIO(span, data, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const documents = [ + { + text: 'a', + name: 5, + id: 7, + score: '5' + } + ] + + tagger._register(span) + tagger.tagEmbeddingIO(span, documents, undefined) + expect(logger.warn.callCount).to.equal(3) + }) + }) + + describe('tagMessages', () => { + it('logs a warning when a message is not an object', () => { + const messages = [5] + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const messages = [ + { content: 5, role: 5 } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn.callCount).to.equal(2) + }) + + describe('tool call tagging', () => { + it('logs a warning when a message tool call is not an object', () => { + const messages = [ + { content: 'a', toolCalls: 5 } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const messages = [ + { + content: 'a', + toolCalls: [ + { + name: 5, + arguments: 'not an object', + toolId: 5, + type: 5 + } + ], + role: 7 + } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn.callCount).to.equal(5) // 4 for tool call + 1 for role + }) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/util.js b/packages/dd-trace/test/llmobs/util.js new file mode 100644 index 00000000000..4c3b76da090 --- /dev/null +++ b/packages/dd-trace/test/llmobs/util.js @@ -0,0 +1,201 @@ +'use strict' + +const chai = require('chai') + +const tracerVersion = require('../../../../package.json').version + +const MOCK_STRING = Symbol('string') +const MOCK_NUMBER = Symbol('number') +const MOCK_ANY = Symbol('any') + +function deepEqualWithMockValues (expected) { + const actual = this._obj + + for (const key in actual) { + if (expected[key] === MOCK_STRING) { + new chai.Assertion(typeof actual[key], `key ${key}`).to.equal('string') + } else if (expected[key] === MOCK_NUMBER) { + new chai.Assertion(typeof actual[key], `key ${key}`).to.equal('number') + } else if (expected[key] === MOCK_ANY) { + new chai.Assertion(actual[key], `key ${key}`).to.exist + } else if (Array.isArray(expected[key])) { + const sortedExpected = [...expected[key].sort()] + const sortedActual = [...actual[key].sort()] + new chai.Assertion(sortedActual, `key: ${key}`).to.deep.equal(sortedExpected) + } else if (typeof expected[key] === 'object') { + new chai.Assertion(actual[key], `key: ${key}`).to.deepEqualWithMockValues(expected[key]) + } else { + new chai.Assertion(actual[key], `key: ${key}`).to.equal(expected[key]) + } + } +} + +function expectedLLMObsLLMSpanEvent (options) { + const spanEvent = expectedLLMObsBaseEvent(options) + + const meta = { input: {}, output: {} } + const { + spanKind, + modelName, + modelProvider, + inputMessages, + inputDocuments, + outputMessages, + outputValue, + metadata, + tokenMetrics + } = options + + if (spanKind === 'llm') { + if (inputMessages) meta.input.messages = inputMessages + if (outputMessages) meta.output.messages = outputMessages + } else if (spanKind === 'embedding') { + if (inputDocuments) meta.input.documents = inputDocuments + if (outputValue) meta.output.value = outputValue + } + + if (!spanEvent.meta.input) delete spanEvent.meta.input + if (!spanEvent.meta.output) delete spanEvent.meta.output + + if (modelName) meta.model_name = modelName + if (modelProvider) meta.model_provider = modelProvider + if (metadata) meta.metadata = metadata + + Object.assign(spanEvent.meta, meta) + + if (tokenMetrics) spanEvent.metrics = tokenMetrics + + return spanEvent +} + +function expectedLLMObsNonLLMSpanEvent (options) { + const spanEvent = expectedLLMObsBaseEvent(options) + const { + spanKind, + inputValue, + outputValue, + outputDocuments, + metadata, + tokenMetrics + } = options + + const meta = { input: {}, output: {} } + if (spanKind === 'retrieval') { + if (inputValue) meta.input.value = inputValue + if (outputDocuments) meta.output.documents = outputDocuments + if (outputValue) meta.output.value = outputValue + } + if (inputValue) meta.input.value = inputValue + if (metadata) meta.metadata = metadata + if (outputValue) meta.output.value = outputValue + + if (!spanEvent.meta.input) delete spanEvent.meta.input + if (!spanEvent.meta.output) delete spanEvent.meta.output + + Object.assign(spanEvent.meta, meta) + + if (tokenMetrics) spanEvent.metrics = tokenMetrics + + return spanEvent +} + +function expectedLLMObsBaseEvent ({ + span, + parentId, + name, + spanKind, + tags, + sessionId, + error, + errorType, + errorMessage, + errorStack +} = {}) { + // the `span` could be a raw DatadogSpan or formatted span + const spanName = name || span.name || span._name + const spanId = span.span_id ? fromBuffer(span.span_id) : span.context().toSpanId() + const startNs = span.start ? fromBuffer(span.start, true) : Math.round(span._startTime * 1e6) + const duration = span.duration ? fromBuffer(span.duration, true) : Math.round(span._duration * 1e6) + + const spanEvent = { + trace_id: MOCK_STRING, + span_id: spanId, + parent_id: parentId || 'undefined', + name: spanName, + tags: expectedLLMObsTags({ span, tags, error, errorType, sessionId }), + start_ns: startNs, + duration, + status: error ? 'error' : 'ok', + meta: { 'span.kind': spanKind }, + metrics: {}, + _dd: { + trace_id: MOCK_STRING, + span_id: spanId + } + } + + if (sessionId) spanEvent.session_id = sessionId + + if (error) { + spanEvent.meta['error.type'] = errorType + spanEvent.meta['error.message'] = errorMessage + spanEvent.meta['error.stack'] = errorStack + } + + return spanEvent +} + +function expectedLLMObsTags ({ + span, + error, + errorType, + tags, + sessionId +}) { + tags = tags || {} + + const version = span.meta?.version || span._parentTracer?._version + const env = span.meta?.env || span._parentTracer?._env + const service = span.meta?.service || span._parentTracer?._service + + const spanTags = [ + `version:${version ?? ''}`, + `env:${env ?? ''}`, + `service:${service ?? ''}`, + 'source:integration', + `ml_app:${tags.ml_app}`, + `dd-trace.version:${tracerVersion}` + ] + + if (sessionId) spanTags.push(`session_id:${sessionId}`) + + if (error) { + spanTags.push('error:1') + if (errorType) spanTags.push(`error_type:${errorType}`) + } else { + spanTags.push('error:0') + } + + for (const [key, value] of Object.entries(tags)) { + if (!['version', 'env', 'service', 'ml_app'].includes(key)) { + spanTags.push(`${key}:${value}`) + } + } + + return spanTags +} + +function fromBuffer (spanProperty, isNumber = false) { + const { buffer, offset } = spanProperty + const strVal = buffer.readBigInt64BE(offset).toString() + return isNumber ? Number(strVal) : strVal +} + +module.exports = { + expectedLLMObsLLMSpanEvent, + expectedLLMObsNonLLMSpanEvent, + deepEqualWithMockValues, + MOCK_ANY, + MOCK_NUMBER, + MOCK_STRING +} diff --git a/packages/dd-trace/test/llmobs/util.spec.js b/packages/dd-trace/test/llmobs/util.spec.js new file mode 100644 index 00000000000..063e618c1ef --- /dev/null +++ b/packages/dd-trace/test/llmobs/util.spec.js @@ -0,0 +1,142 @@ +'use strict' + +const { + encodeUnicode, + getFunctionArguments, + validateKind +} = require('../../src/llmobs/util') + +describe('util', () => { + describe('encodeUnicode', () => { + it('should encode unicode characters', () => { + expect(encodeUnicode('πŸ˜€')).to.equal('\\ud83d\\ude00') + }) + + it('should encode only unicode characters in a string', () => { + expect(encodeUnicode('test πŸ˜€')).to.equal('test \\ud83d\\ude00') + }) + }) + + describe('validateKind', () => { + for (const kind of ['llm', 'agent', 'task', 'tool', 'workflow', 'retrieval', 'embedding']) { + it(`should return true for valid kind: ${kind}`, () => { + expect(validateKind(kind)).to.equal(kind) + }) + } + + it('should throw for an empty string', () => { + expect(() => validateKind('')).to.throw() + }) + + it('should throw for an invalid kind', () => { + expect(() => validateKind('invalid')).to.throw() + }) + + it('should throw for an undefined kind', () => { + expect(() => validateKind()).to.throw() + }) + }) + + describe('getFunctionArguments', () => { + describe('functionality', () => { + it('should return undefined for a function without arguments', () => { + expect(getFunctionArguments(() => {})).to.deep.equal(undefined) + }) + + it('should capture a single argument only by its value', () => { + expect(getFunctionArguments((arg) => {}, ['bar'])).to.deep.equal('bar') + }) + + it('should capture multiple arguments by name', () => { + expect(getFunctionArguments((foo, bar) => {}, ['foo', 'bar'])).to.deep.equal({ foo: 'foo', bar: 'bar' }) + }) + + it('should ignore arguments not passed in', () => { + expect(getFunctionArguments((foo, bar, baz) => {}, ['foo', 'bar'])).to.deep.equal({ foo: 'foo', bar: 'bar' }) + }) + + it('should capture spread arguments', () => { + expect( + getFunctionArguments((foo, bar, ...args) => {}, ['foo', 'bar', 1, 2, 3]) + ).to.deep.equal({ foo: 'foo', bar: 'bar', args: [1, 2, 3] }) + }) + }) + + describe('parsing configurations', () => { + it('should parse multiple arguments with single-line comments', () => { + function foo ( + bar, // bar comment + baz // baz comment + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('should parse multiple arguments with multi-line comments', () => { + function foo ( + bar, /* bar comment */ + baz /* baz comment */ + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('should parse multiple arguments with stacked multi-line comments', () => { + function foo ( + /** + * hello + */ + bar, + /** + * world + */ + baz + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('parses when simple default values are present', () => { + function foo (bar = 'baz') {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('should ignore the default value when no argument is passed', () => { + function foo (bar = 'baz') {} + + expect(getFunctionArguments(foo, [])).to.deep.equal(undefined) + }) + + it('parses when a default value is a function', () => { + function foo (bar = () => {}, baz = 4) {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('parses when a simple object is passed in', () => { + function foo (bar = { baz: 4 }) {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('parses when a complex object is passed in', () => { + function foo (bar = { baz: { a: 5, b: { c: 4 } }, bat: 0 }, baz) {} + + expect(getFunctionArguments(foo, [{ bar: 'baz' }, 'baz'])).to.deep.equal({ bar: { bar: 'baz' }, baz: 'baz' }) + }) + + it('parses when one of the arguments is an arrow function', () => { + function foo (fn = (a, b, c) => {}, ctx) {} + + expect(getFunctionArguments(foo, ['fn', 'ctx'])).to.deep.equal({ fn: 'fn', ctx: 'ctx' }) + }) + + it('parses when one of the arguments is a function', () => { + function foo (fn = function (a, b, c) {}, ctx) {} + + expect(getFunctionArguments(foo, ['fn', 'ctx'])).to.deep.equal({ fn: 'fn', ctx: 'ctx' }) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/base.spec.js b/packages/dd-trace/test/llmobs/writers/base.spec.js new file mode 100644 index 00000000000..8b971b2748a --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/base.spec.js @@ -0,0 +1,179 @@ +'use strict' +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +describe('BaseLLMObsWriter', () => { + let BaseLLMObsWriter + let writer + let request + let clock + let options + let logger + + beforeEach(() => { + request = sinon.stub() + logger = { + debug: sinon.stub(), + warn: sinon.stub(), + error: sinon.stub() + } + BaseLLMObsWriter = proxyquire('../../../src/llmobs/writers/base', { + '../../exporters/common/request': request, + '../../log': logger + }) + + clock = sinon.useFakeTimers() + + options = { + endpoint: '/api/v2/llmobs', + intake: 'llmobs-intake.datadoghq.com' + } + }) + + afterEach(() => { + clock.restore() + process.removeAllListeners('beforeExit') + }) + + it('constructs a writer with a url', () => { + writer = new BaseLLMObsWriter(options) + + expect(writer._url.href).to.equal('https://llmobs-intake.datadoghq.com/api/v2/llmobs') + expect(logger.debug).to.have.been.calledWith( + 'Started BaseLLMObsWriter to https://llmobs-intake.datadoghq.com/api/v2/llmobs' + ) + }) + + it('calls flush before the process exits', () => { + writer = new BaseLLMObsWriter(options) + writer.flush = sinon.spy() + + process.emit('beforeExit') + + expect(writer.flush).to.have.been.calledOnce + }) + + it('calls flush at the correct interval', async () => { + writer = new BaseLLMObsWriter(options) + + writer.flush = sinon.spy() + + clock.tick(1000) + + expect(writer.flush).to.have.been.calledOnce + }) + + it('appends an event to the buffer', () => { + writer = new BaseLLMObsWriter(options) + const event = { foo: 'bar–' } + writer.append(event) + + expect(writer._buffer).to.have.lengthOf(1) + expect(writer._buffer[0]).to.deep.equal(event) + expect(writer._bufferSize).to.equal(16) + }) + + it('does not append an event if the buffer is full', () => { + writer = new BaseLLMObsWriter(options) + + for (let i = 0; i < 1000; i++) { + writer.append({ foo: 'bar' }) + } + + writer.append({ foo: 'bar' }) + expect(writer._buffer).to.have.lengthOf(1000) + expect(logger.warn).to.have.been.calledWith('BaseLLMObsWriter event buffer full (limit is 1000), dropping event') + }) + + it('flushes the buffer', () => { + writer = new BaseLLMObsWriter(options) + + const event1 = { foo: 'bar' } + const event2 = { foo: 'baz' } + + writer.append(event1) + writer.append(event2) + + writer.makePayload = (events) => ({ events }) + + // Stub the request function to call its third argument + request.callsFake((url, options, callback) => { + callback(null, null, 202) + }) + + writer.flush() + + expect(request).to.have.been.calledOnce + const calledArgs = request.getCall(0).args + + expect(calledArgs[0]).to.deep.equal(JSON.stringify({ events: [event1, event2] })) + expect(calledArgs[1]).to.deep.equal({ + headers: { + 'Content-Type': 'application/json' + }, + method: 'POST', + url: writer._url, + timeout: 5000 + }) + + expect(logger.debug).to.have.been.calledWith( + 'Sent 2 LLMObs undefined events to https://llmobs-intake.datadoghq.com/api/v2/llmobs' + ) + + expect(writer._buffer).to.have.lengthOf(0) + expect(writer._bufferSize).to.equal(0) + }) + + it('does not flush an empty buffer', () => { + writer = new BaseLLMObsWriter(options) + writer.flush() + + expect(request).to.not.have.been.called + }) + + it('logs errors from the request', () => { + writer = new BaseLLMObsWriter(options) + writer.makePayload = (events) => ({ events }) + + writer.append({ foo: 'bar' }) + + const error = new Error('boom') + request.callsFake((url, options, callback) => { + callback(error) + }) + + writer.flush() + + expect(logger.error).to.have.been.calledWith( + 'Error sending 1 LLMObs undefined events to https://llmobs-intake.datadoghq.com/api/v2/llmobs: boom' + ) + }) + + describe('destroy', () => { + it('destroys the writer', () => { + sinon.spy(global, 'clearInterval') + sinon.spy(process, 'removeListener') + writer = new BaseLLMObsWriter(options) + writer.flush = sinon.stub() + + writer.destroy() + + expect(writer._destroyed).to.be.true + expect(clearInterval).to.have.been.calledWith(writer._periodic) + expect(process.removeListener).to.have.been.calledWith('beforeExit', writer.destroy) + expect(writer.flush).to.have.been.calledOnce + expect(logger.debug) + .to.have.been.calledWith('Stopping BaseLLMObsWriter') + }) + + it('does not destroy more than once', () => { + writer = new BaseLLMObsWriter(options) + + logger.debug.reset() // ignore log from constructor + writer.destroy() + writer.destroy() + + expect(logger.debug).to.have.been.calledOnce + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/evaluations.spec.js b/packages/dd-trace/test/llmobs/writers/evaluations.spec.js new file mode 100644 index 00000000000..e81955450c4 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/evaluations.spec.js @@ -0,0 +1,46 @@ +'use strict' + +describe('LLMObsEvalMetricsWriter', () => { + let LLMObsEvalMetricsWriter + let writer + let flush + + beforeEach(() => { + LLMObsEvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') + flush = sinon.stub() + }) + + afterEach(() => { + process.removeAllListeners('beforeExit') + }) + + it('constructs the writer with the correct values', () => { + writer = new LLMObsEvalMetricsWriter({ + site: 'datadoghq.com', + llmobs: {}, + apiKey: '1234' + }) + + writer.flush = flush // just to stop the beforeExit flush call + + expect(writer._url.href).to.equal('https://api.datadoghq.com/api/intake/llm-obs/v1/eval-metric') + expect(writer._headers['DD-API-KEY']).to.equal('1234') + expect(writer._eventType).to.equal('evaluation_metric') + }) + + it('builds the payload correctly', () => { + writer = new LLMObsEvalMetricsWriter({ + site: 'datadoghq.com', + apiKey: 'test' + }) + + const events = [ + { name: 'test', value: 1 } + ] + + const payload = writer.makePayload(events) + + expect(payload.data.type).to.equal('evaluation_metric') + expect(payload.data.attributes.metrics).to.deep.equal(events) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js b/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js new file mode 100644 index 00000000000..6ed0f150885 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js @@ -0,0 +1,28 @@ +'use stict' + +describe('LLMObsAgentProxySpanWriter', () => { + let LLMObsAgentProxySpanWriter + let writer + + beforeEach(() => { + LLMObsAgentProxySpanWriter = require('../../../../src/llmobs/writers/spans/agentProxy') + }) + + it('is initialized correctly', () => { + writer = new LLMObsAgentProxySpanWriter({ + hostname: '127.0.0.1', + port: 8126 + }) + + expect(writer._url.href).to.equal('http://127.0.0.1:8126/evp_proxy/v2/api/v2/llmobs') + expect(writer._headers['X-Datadog-EVP-Subdomain']).to.equal('llmobs-intake') + }) + + it('is initialized correctly with default hostname', () => { + writer = new LLMObsAgentProxySpanWriter({ + port: 8126 // port will always be defaulted by config + }) + + expect(writer._url.href).to.equal('http://localhost:8126/evp_proxy/v2/api/v2/llmobs') + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js b/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js new file mode 100644 index 00000000000..e3cf421a3ed --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js @@ -0,0 +1,21 @@ +'use stict' + +describe('LLMObsAgentlessSpanWriter', () => { + let LLMObsAgentlessSpanWriter + let writer + + beforeEach(() => { + LLMObsAgentlessSpanWriter = require('../../../../src/llmobs/writers/spans/agentless') + }) + + it('is initialized correctly', () => { + writer = new LLMObsAgentlessSpanWriter({ + site: 'datadoghq.com', + llmobs: {}, + apiKey: '1234' + }) + + expect(writer._url.href).to.equal('https://llmobs-intake.datadoghq.com/api/v2/llmobs') + expect(writer._headers['DD-API-KEY']).to.equal('1234') + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/base.spec.js b/packages/dd-trace/test/llmobs/writers/spans/base.spec.js new file mode 100644 index 00000000000..1c9965cd9c2 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/base.spec.js @@ -0,0 +1,99 @@ +'use strict' + +const proxyquire = require('proxyquire') + +describe('LLMObsSpanWriter', () => { + let LLMObsSpanWriter + let writer + let options + let logger + + beforeEach(() => { + logger = { + warn: sinon.stub(), + debug: sinon.stub() + } + LLMObsSpanWriter = proxyquire('../../../../src/llmobs/writers/spans/base', { + '../../../log': logger + }) + options = { + endpoint: '/api/v2/llmobs', + intake: 'llmobs-intake.datadoghq.com' + } + }) + + afterEach(() => { + process.removeAllListeners('beforeExit') + }) + + it('is initialized correctly', () => { + writer = new LLMObsSpanWriter(options) + + expect(writer._eventType).to.equal('span') + }) + + it('computes the number of bytes of the appended event', () => { + writer = new LLMObsSpanWriter(options) + + const event = { name: 'test', value: 1 } + const eventSizeBytes = Buffer.from(JSON.stringify(event)).byteLength + + writer.append(event) + + expect(writer._bufferSize).to.equal(eventSizeBytes) + }) + + it('truncates the event if it exceeds the size limit', () => { + writer = new LLMObsSpanWriter(options) + + const event = { + name: 'test', + meta: { + input: { value: 'a'.repeat(1024 * 1024) }, + output: { value: 'a'.repeat(1024 * 1024) } + } + } + + writer.append(event) + + const bufferEvent = writer._buffer[0] + expect(bufferEvent).to.deep.equal({ + name: 'test', + meta: { + input: { value: "[This value has been dropped because this span's size exceeds the 1MB size limit.]" }, + output: { value: "[This value has been dropped because this span's size exceeds the 1MB size limit.]" } + }, + collection_errors: ['dropped_io'] + }) + }) + + it('flushes the queue if the next event will exceed the payload limit', () => { + writer = new LLMObsSpanWriter(options) + writer.flush = sinon.stub() + + writer._bufferSize = (5 << 20) - 1 + writer._buffer = Array.from({ length: 10 }) + const event = { name: 'test', value: 'a'.repeat(1024) } + + writer.append(event) + + expect(writer.flush).to.have.been.calledOnce + expect(logger.debug).to.have.been.calledWith( + 'Flusing queue because queing next event will exceed EvP payload limit' + ) + }) + + it('creates the payload correctly', () => { + writer = new LLMObsSpanWriter(options) + + const events = [ + { name: 'test', value: 1 } + ] + + const payload = writer.makePayload(events) + + expect(payload['_dd.stage']).to.equal('raw') + expect(payload.event_type).to.equal('span') + expect(payload.spans).to.deep.equal(events) + }) +}) diff --git a/packages/dd-trace/test/proxy.spec.js b/packages/dd-trace/test/proxy.spec.js index a21e2f4226a..3d7ebbc5a2a 100644 --- a/packages/dd-trace/test/proxy.spec.js +++ b/packages/dd-trace/test/proxy.spec.js @@ -131,7 +131,8 @@ describe('TracerProxy', () => { remoteConfig: { enabled: true }, - configure: sinon.spy() + configure: sinon.spy(), + llmobs: {} } Config = sinon.stub().returns(config)