From 5f6857d4526e51e6f4a0d6b6ba8a2b0bbe88190a Mon Sep 17 00:00:00 2001 From: Farzad Abdolhosseini <farzad@fixie.ai> Date: Fri, 24 Nov 2023 11:20:55 -0800 Subject: [PATCH 1/5] Bugfix: removing partial unicode when streaming --- packages/ai-jsx/src/lib/openai.tsx | 13 ++++++++++--- packages/ai-jsx/src/lib/util.ts | 7 ++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/packages/ai-jsx/src/lib/openai.tsx b/packages/ai-jsx/src/lib/openai.tsx index 48871a5ca..bec8d6334 100644 --- a/packages/ai-jsx/src/lib/openai.tsx +++ b/packages/ai-jsx/src/lib/openai.tsx @@ -648,9 +648,16 @@ export async function* OpenAIChatModel( argsJson += toolCall.function.arguments; } - yield ( - <FunctionCall id={id} partial name={name} args={JSON.parse(patchedUntruncateJson(argsJson || '{}'))} /> - ); + try { + yield ( + <FunctionCall id={id} partial name={name} args={JSON.parse(patchedUntruncateJson(argsJson || '{}'))} /> + ); + } catch (e: any) { + // If the JSON is incomplete and we get an error, we can ignore it. + if (!('Unexpected string in JSON' in e.message || 'Unexpected end of JSON input' in e.message)) { + throw e; + } + } delta = await advance(); } diff --git a/packages/ai-jsx/src/lib/util.ts b/packages/ai-jsx/src/lib/util.ts index f383cd99a..dc3b89efd 100644 --- a/packages/ai-jsx/src/lib/util.ts +++ b/packages/ai-jsx/src/lib/util.ts @@ -36,4 +36,9 @@ export function getEnvVar(name: string, shouldThrow: boolean = true) { * There's an ESM issue with untruncate-json, so we need to do this to support running on both client & server. */ /** @hidden */ -export const patchedUntruncateJson = 'default' in untruncateJson ? untruncateJson.default : untruncateJson; +const _patchedUntruncateJson = 'default' in untruncateJson ? untruncateJson.default : untruncateJson; + +export function patchedUntruncateJson(str: string) { + // Remove partial unicode characters: e.g. "\\u5728\\u5fA" -> "\\u5728" + return _patchedUntruncateJson(str).replace(/\u[\dA-F]{0,3}[^\dA-F]/gi, ''); +} From 6505e5e672da722593a7babc60805efa067f58e0 Mon Sep 17 00:00:00 2001 From: Farzad Abdolhosseini <farzad@fixie.ai> Date: Fri, 24 Nov 2023 11:29:25 -0800 Subject: [PATCH 2/5] update version and changelog --- packages/ai-jsx/package.json | 4 +++- packages/docs/docs/changelog.md | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/ai-jsx/package.json b/packages/ai-jsx/package.json index 718bb56da..ccd9c3bbc 100644 --- a/packages/ai-jsx/package.json +++ b/packages/ai-jsx/package.json @@ -4,7 +4,7 @@ "repository": "fixie-ai/ai-jsx", "bugs": "https://github.com/fixie-ai/ai-jsx/issues", "homepage": "https://ai-jsx.com", - "version": "0.28.1", + "version": "0.28.2", "volta": { "extends": "../../package.json" }, @@ -334,6 +334,7 @@ "@types/eslint": "^8", "@types/jest": "^29.5.2", "@types/js-yaml": "^4.0.5", + "@types/jsesc": "^3.0.3", "@types/lodash": "^4.14.194", "@types/node": "^20.2.1", "@types/react": "^18.2.7", @@ -387,6 +388,7 @@ "ink": "^4.2.0", "js-tiktoken": "^1.0.8", "js-yaml": "^4.1.0", + "jsesc": "^3.0.2", "langchain": "^0.0.143", "lodash": "^4.17.21", "ml-distance": "^4.0.1", diff --git a/packages/docs/docs/changelog.md b/packages/docs/docs/changelog.md index e09495104..2511ad966 100644 --- a/packages/docs/docs/changelog.md +++ b/packages/docs/docs/changelog.md @@ -1,6 +1,10 @@ # Changelog -## 0.28.1 +## 0.28.2 + +- Fix bug where partially streamed unicode characters (e.g. Chinese) would cause an error in OpenAI function calls. + +## [0.28.1](https://github.com/fixie-ai/ai-jsx/tree/4c67d845f48585dc3f26e90a9a656471f40c82ed) - Add `openai.finish_reason` span attribute for `OpenAIChatModel` From 384e0f903e0563b0220a2e122c4403eb0256aa1d Mon Sep 17 00:00:00 2001 From: Farzad Abdolhosseini <farzad@fixie.ai> Date: Fri, 24 Nov 2023 12:19:06 -0800 Subject: [PATCH 3/5] adding tests and fixing corner cases --- packages/ai-jsx/package.json | 2 -- packages/ai-jsx/src/lib/openai.tsx | 5 +++-- packages/ai-jsx/src/lib/util.ts | 2 +- packages/ai-jsx/test/lib/util.test.ts | 19 ++++++++++++++++++- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/packages/ai-jsx/package.json b/packages/ai-jsx/package.json index ccd9c3bbc..af4070c28 100644 --- a/packages/ai-jsx/package.json +++ b/packages/ai-jsx/package.json @@ -334,7 +334,6 @@ "@types/eslint": "^8", "@types/jest": "^29.5.2", "@types/js-yaml": "^4.0.5", - "@types/jsesc": "^3.0.3", "@types/lodash": "^4.14.194", "@types/node": "^20.2.1", "@types/react": "^18.2.7", @@ -388,7 +387,6 @@ "ink": "^4.2.0", "js-tiktoken": "^1.0.8", "js-yaml": "^4.1.0", - "jsesc": "^3.0.2", "langchain": "^0.0.143", "lodash": "^4.17.21", "ml-distance": "^4.0.1", diff --git a/packages/ai-jsx/src/lib/openai.tsx b/packages/ai-jsx/src/lib/openai.tsx index bec8d6334..8e2c67b1e 100644 --- a/packages/ai-jsx/src/lib/openai.tsx +++ b/packages/ai-jsx/src/lib/openai.tsx @@ -653,8 +653,9 @@ export async function* OpenAIChatModel( <FunctionCall id={id} partial name={name} args={JSON.parse(patchedUntruncateJson(argsJson || '{}'))} /> ); } catch (e: any) { - // If the JSON is incomplete and we get an error, we can ignore it. - if (!('Unexpected string in JSON' in e.message || 'Unexpected end of JSON input' in e.message)) { + // If the JSON is incomplete and we get an error, e can ignore it. + const acceptedErrorPattern = /Unexpected .* JSON/; + if (!acceptedErrorPattern.test(e.message)) { throw e; } } diff --git a/packages/ai-jsx/src/lib/util.ts b/packages/ai-jsx/src/lib/util.ts index dc3b89efd..72502265c 100644 --- a/packages/ai-jsx/src/lib/util.ts +++ b/packages/ai-jsx/src/lib/util.ts @@ -40,5 +40,5 @@ const _patchedUntruncateJson = 'default' in untruncateJson ? untruncateJson.defa export function patchedUntruncateJson(str: string) { // Remove partial unicode characters: e.g. "\\u5728\\u5fA" -> "\\u5728" - return _patchedUntruncateJson(str).replace(/\u[\dA-F]{0,3}[^\dA-F]/gi, ''); + return _patchedUntruncateJson(str).replace(/\\u[\dA-F]{0,3}([^\dA-F]|$)/gi, '$1'); } diff --git a/packages/ai-jsx/test/lib/util.test.ts b/packages/ai-jsx/test/lib/util.test.ts index b78f71eb1..909bf844a 100644 --- a/packages/ai-jsx/test/lib/util.test.ts +++ b/packages/ai-jsx/test/lib/util.test.ts @@ -1,4 +1,4 @@ -import { getEnvVar } from '../../dist/cjs/lib/util.cjs'; +import { getEnvVar, patchedUntruncateJson } from '../../dist/cjs/lib/util.cjs'; process.env.EXISTS = 'exists'; process.env.REACT_APP_ONLY = 'react-value'; @@ -42,3 +42,20 @@ test('env is not defined', () => { globalThis.process.env = originalEnv; }); + +test('Basic untrucation of JSON', () => { + expect(patchedUntruncateJson('{"a":')).toEqual('{}'); + expect(patchedUntruncateJson('{"a":"b')).toEqual('{"a":"b"}'); + expect(patchedUntruncateJson('{"a":"b"')).toEqual('{"a":"b"}'); +}); + +test('Partial unicode characters are removed', () => { + expect(patchedUntruncateJson('{"a":"\\u5728\\u5fA"}')).toEqual('{"a":"\\u5728"}'); + expect(patchedUntruncateJson('\\u5728\\u')).toEqual('\\u5728'); + expect(patchedUntruncateJson('\\u5728\\u0')).toEqual('\\u5728'); + expect(patchedUntruncateJson('\\u5728\\u5fA')).toEqual('\\u5728'); +}); + +test('Unicode characters are allowed', () => { + expect(patchedUntruncateJson('{"a":"\\u5728什么是"}')).toEqual('{"a":"\\u5728什么是"}'); +}); From 572644637f076f665c30cc5e64756a3bce073a3c Mon Sep 17 00:00:00 2001 From: Farzad Abdolhosseini <farzad@fixie.ai> Date: Fri, 24 Nov 2023 12:23:14 -0800 Subject: [PATCH 4/5] improve test cases --- packages/ai-jsx/test/lib/util.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/ai-jsx/test/lib/util.test.ts b/packages/ai-jsx/test/lib/util.test.ts index 909bf844a..5c5c95f86 100644 --- a/packages/ai-jsx/test/lib/util.test.ts +++ b/packages/ai-jsx/test/lib/util.test.ts @@ -51,9 +51,9 @@ test('Basic untrucation of JSON', () => { test('Partial unicode characters are removed', () => { expect(patchedUntruncateJson('{"a":"\\u5728\\u5fA"}')).toEqual('{"a":"\\u5728"}'); - expect(patchedUntruncateJson('\\u5728\\u')).toEqual('\\u5728'); - expect(patchedUntruncateJson('\\u5728\\u0')).toEqual('\\u5728'); - expect(patchedUntruncateJson('\\u5728\\u5fA')).toEqual('\\u5728'); + expect(patchedUntruncateJson('"\\u5728\\u')).toEqual('"\\u5728"'); + expect(patchedUntruncateJson('"\\u5728\\u0')).toEqual('"\\u5728"'); + expect(patchedUntruncateJson('"\\u5728\\u5fA"')).toEqual('"\\u5728"'); }); test('Unicode characters are allowed', () => { From 456ad406b1c767a40808f64506dd09bc4677a434 Mon Sep 17 00:00:00 2001 From: Farzad Abdolhosseini <farzad@fixie.ai> Date: Fri, 24 Nov 2023 15:26:40 -0800 Subject: [PATCH 5/5] applying Peter's comments --- packages/ai-jsx/src/lib/openai.tsx | 10 ++++++---- packages/ai-jsx/src/lib/util.ts | 2 +- packages/ai-jsx/test/lib/util.test.ts | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/packages/ai-jsx/src/lib/openai.tsx b/packages/ai-jsx/src/lib/openai.tsx index 8e2c67b1e..ccf7131ba 100644 --- a/packages/ai-jsx/src/lib/openai.tsx +++ b/packages/ai-jsx/src/lib/openai.tsx @@ -648,17 +648,19 @@ export async function* OpenAIChatModel( argsJson += toolCall.function.arguments; } + let partialArgs: Record<string, string | number | boolean | null> | undefined = undefined; try { - yield ( - <FunctionCall id={id} partial name={name} args={JSON.parse(patchedUntruncateJson(argsJson || '{}'))} /> - ); + partialArgs = JSON.parse(patchedUntruncateJson(argsJson || '{}')); } catch (e: any) { - // If the JSON is incomplete and we get an error, e can ignore it. + // If the JSON is incomplete and we get an error, we can ignore it. const acceptedErrorPattern = /Unexpected .* JSON/; if (!acceptedErrorPattern.test(e.message)) { throw e; } } + if (partialArgs !== undefined) { + yield <FunctionCall id={id} partial name={name} args={partialArgs} />; + } delta = await advance(); } diff --git a/packages/ai-jsx/src/lib/util.ts b/packages/ai-jsx/src/lib/util.ts index 72502265c..056b2b302 100644 --- a/packages/ai-jsx/src/lib/util.ts +++ b/packages/ai-jsx/src/lib/util.ts @@ -40,5 +40,5 @@ const _patchedUntruncateJson = 'default' in untruncateJson ? untruncateJson.defa export function patchedUntruncateJson(str: string) { // Remove partial unicode characters: e.g. "\\u5728\\u5fA" -> "\\u5728" - return _patchedUntruncateJson(str).replace(/\\u[\dA-F]{0,3}([^\dA-F]|$)/gi, '$1'); + return _patchedUntruncateJson(str.replace(/\\u[\dA-F]{0,3}$/gi, '')); } diff --git a/packages/ai-jsx/test/lib/util.test.ts b/packages/ai-jsx/test/lib/util.test.ts index 5c5c95f86..9c104c7fb 100644 --- a/packages/ai-jsx/test/lib/util.test.ts +++ b/packages/ai-jsx/test/lib/util.test.ts @@ -50,10 +50,10 @@ test('Basic untrucation of JSON', () => { }); test('Partial unicode characters are removed', () => { - expect(patchedUntruncateJson('{"a":"\\u5728\\u5fA"}')).toEqual('{"a":"\\u5728"}'); + expect(patchedUntruncateJson('{"a":"\\u5728\\u5fA')).toEqual('{"a":"\\u5728"}'); expect(patchedUntruncateJson('"\\u5728\\u')).toEqual('"\\u5728"'); expect(patchedUntruncateJson('"\\u5728\\u0')).toEqual('"\\u5728"'); - expect(patchedUntruncateJson('"\\u5728\\u5fA"')).toEqual('"\\u5728"'); + expect(patchedUntruncateJson('"\\u5728\\u5fA')).toEqual('"\\u5728"'); }); test('Unicode characters are allowed', () => {