From d1beb9d8c507183f166972c946e8a41a93ce21e5 Mon Sep 17 00:00:00 2001 From: Peter Salas Date: Wed, 2 Aug 2023 14:18:35 -0700 Subject: [PATCH 1/2] Add ability to shrink conversation based on cost/budget/importance --- packages/ai-jsx/package.json | 4 +- .../src/batteries/natural-language-router.tsx | 2 +- packages/ai-jsx/src/core/conversation.tsx | 219 +++++++++++++++++- packages/ai-jsx/src/core/node.ts | 31 ++- packages/ai-jsx/src/lib/openai.tsx | 103 +++++++- packages/docs/docs/changelog.md | 6 +- .../examples/src/conversation-shrinking.tsx | 44 ++++ yarn.lock | 20 +- 8 files changed, 386 insertions(+), 43 deletions(-) create mode 100644 packages/examples/src/conversation-shrinking.tsx diff --git a/packages/ai-jsx/package.json b/packages/ai-jsx/package.json index 0ea866478..e89662f95 100644 --- a/packages/ai-jsx/package.json +++ b/packages/ai-jsx/package.json @@ -4,7 +4,7 @@ "repository": "fixie-ai/ai-jsx", "bugs": "https://github.com/fixie-ai/ai-jsx/issues", "homepage": "https://ai-jsx.com", - "version": "0.8.0", + "version": "0.8.1", "volta": { "extends": "../../package.json" }, @@ -342,8 +342,8 @@ "axios": "^1.4.0", "cli-highlight": "^2.1.11", "cli-spinners": "^2.9.0", - "gpt3-tokenizer": "^1.1.5", "ink": "^4.2.0", + "js-tiktoken": "^1.0.7", "js-yaml": "^4.1.0", "langchain": "^0.0.81", "lodash": "^4.17.21", diff --git a/packages/ai-jsx/src/batteries/natural-language-router.tsx b/packages/ai-jsx/src/batteries/natural-language-router.tsx index 23953d822..4492ff0ad 100644 --- a/packages/ai-jsx/src/batteries/natural-language-router.tsx +++ b/packages/ai-jsx/src/batteries/natural-language-router.tsx @@ -112,7 +112,7 @@ export async function* NaturalLanguageRouter(props: { children: Node; query: Nod } const props = e.props as RouteProps; - return props.unmatched ? choiceIndex === 0 : props.when === whenOptions[choiceIndex]; + return props.unmatched ? choiceIndex === whenOptions.length - 1 : props.when === whenOptions[choiceIndex]; }); } diff --git a/packages/ai-jsx/src/core/conversation.tsx b/packages/ai-jsx/src/core/conversation.tsx index 0c621a5c4..8802bf3be 100644 --- a/packages/ai-jsx/src/core/conversation.tsx +++ b/packages/ai-jsx/src/core/conversation.tsx @@ -2,6 +2,7 @@ import { ChatCompletionResponseMessage } from 'openai'; import * as AI from '../index.js'; import { Node } from '../index.js'; import { AIJSXError, ErrorCode } from '../core/errors.js'; +import { debug } from './debug.js'; /** * Provide a System Message to the LLM, for use within a {@link ChatCompletion}. @@ -183,8 +184,21 @@ function toConversationMessages(partialRendering: AI.PartiallyRendered[]): Conve } /** @hidden */ -export async function renderToConversation(conversation: AI.Node, render: AI.ComponentContext['render']) { - return toConversationMessages(await render(conversation, { stop: isConversationalComponent })); +export async function renderToConversation( + conversation: AI.Node, + render: AI.ComponentContext['render'], + cost?: (message: ConversationMessage, render: AI.ComponentContext['render']) => Promise, + budget?: number +) { + const conversationToUse = + cost && budget ? ( + + {conversation} + + ) : ( + conversation + ); + return toConversationMessages(await render(conversationToUse, { stop: isConversationalComponent })); } /** @@ -309,3 +323,204 @@ export async function* ShowConversation( // we can indicate that we've already yielded the final frame. return AI.AppendOnlyStream; } + +/** + * @hidden + * "Shrinks" a conversation messages according to a cost function (i.e. token length), + * a budget (i.e. context window size), and the `importance` prop set on any `` + * components within the conversation. + * + * Currently, `` components must wrap conversational messages and have no + * effect within the messages themselves. + */ +export async function ShrinkConversation( + { + cost: costFn, + budget: budget, + children, + }: { + cost: (message: ConversationMessage, render: AI.RenderContext['render']) => Promise; + budget: number; + children: Node; + }, + { render, memo, logger }: AI.ComponentContext +) { + /** + * We construct a tree of immutable and shrinkable nodes such that shrinkable nodes + * can contain other nodes. + */ + type TreeNode = ImmutableTreeNode | ShrinkableTreeNode; + + interface ImmutableTreeNode { + type: 'immutable'; + element: AI.Element; + cost: number; + } + + interface ShrinkableTreeNode { + type: 'shrinkable'; + element: AI.Element>; + cost: number; + children: TreeNode[]; + } + + /** Converts a conversational `AI.Node` into a shrinkable tree. */ + async function conversationToTreeRoots(conversation: AI.Node): Promise { + const rendered = await render(conversation, { + stop: (e) => isConversationalComponent(e) || e.tag === InternalShrinkable, + }); + + const asTreeNodes = await Promise.all( + rendered.map>(async (value) => { + if (typeof value === 'string') { + return null; + } + + if (value.tag === InternalShrinkable) { + const children = await conversationToTreeRoots(value.props.children); + return { type: 'shrinkable', element: value, cost: aggregateCost(children), children }; + } + + return { + type: 'immutable', + element: value, + cost: await costFn(toConversationMessages([value])[0], render), + }; + }) + ); + + return asTreeNodes.filter((n): n is TreeNode => n !== null); + } + + /** Finds the least important node in the tree, considering cost as a second factor. */ + function leastImportantNode(roots: TreeNode[]): ShrinkableTreeNode | undefined { + function compareImportance(nodeA: ShrinkableTreeNode, nodeB: ShrinkableTreeNode) { + // If the two nodes are of the same importance, consider the higher cost node less important. + return nodeA.element.props.importance - nodeB.element.props.importance || nodeB.cost - nodeA.cost; + } + + let current = undefined as ShrinkableTreeNode | undefined; + roots.forEach((node) => { + if (node.type !== 'shrinkable') { + return; + } + + if (current === undefined || compareImportance(node, current) < 0) { + current = node; + } + + const leastImportantDescendant = leastImportantNode(node.children); + if (leastImportantDescendant !== undefined && compareImportance(leastImportantDescendant, current) < 0) { + current = leastImportantDescendant; + } + }); + + return current; + } + + function aggregateCost(roots: TreeNode[]): number { + return roots.reduce((cost, node) => cost + node.cost, 0); + } + + /** Replaces a single ShrinkableTreeNode in the tree. */ + async function replaceNode(roots: TreeNode[], nodeToReplace: ShrinkableTreeNode): Promise { + const newRoots = await Promise.all( + roots.map>(async (root) => { + if (root === nodeToReplace) { + return conversationToTreeRoots(root.element.props.replacement); + } + + if (root.type !== 'shrinkable') { + return [root]; + } + + // Look for a replacement among the children and recalculate the cost. + const replacementChildren = await replaceNode(root.children, nodeToReplace); + return [ + { + type: 'shrinkable', + element: root.element, + cost: aggregateCost(replacementChildren), + children: replacementChildren, + }, + ]; + }) + ); + + return newRoots.flat(1); + } + + /** Converts the shrinkable tree into a single AI.Node for rendering. */ + function treeRootsToNode(roots: TreeNode[]): AI.Node { + return roots.map((root) => (root.type === 'immutable' ? root.element : treeRootsToNode(root.children))); + } + + const memoized = memo(children); + + // If there are no shrinkable elements, there's no need to evaluate the cost. + const shrinkableOrConversationElements = ( + await render(memoized, { + stop: (e) => isConversationalComponent(e) || e.tag === InternalShrinkable, + }) + ).filter(AI.isElement); + if (!shrinkableOrConversationElements.find((value) => value.tag === InternalShrinkable)) { + return shrinkableOrConversationElements; + } + + let roots = await conversationToTreeRoots(shrinkableOrConversationElements); + while (aggregateCost(roots) > budget) { + const nodeToReplace = leastImportantNode(roots); + if (nodeToReplace === undefined) { + // Nothing left to replace. + break; + } + + logger.debug( + { + node: debug(nodeToReplace.element.props.children, true), + importance: nodeToReplace.element.props.importance, + replacement: debug(nodeToReplace.element.props.replacement, true), + nodeCost: nodeToReplace.cost, + totalCost: aggregateCost(roots), + budget, + }, + 'Replacing shrinkable content' + ); + + // N.B. This currently quadratic in that each time we replace a node we search the entire + // tree for the least important node, and then search _again_ to replace it. If we end up + // doing many replacements we should be smarter about this. + roots = await replaceNode(roots, nodeToReplace); + } + + return treeRootsToNode(roots); +} + +/** + * @hidden + * Indicates that a portion of a conversation is "shrinkable". + */ +export function Shrinkable( + { children, importance, replacement }: { children: Node; importance: number; replacement?: Node }, + { memo }: AI.ComponentContext +) { + // We renders to a separate component so that: + // + // a) The memoization happens in the expected context (that of the ) + // b) The memoization can be applied directly to the replacement and children + // + // This allows them to be taken off the props correctly memoized. + return ( + + {children && memo(children)} + + ); +} + +/** + * @hidden + * An internal component to facilitate prop memoization. See comment in {@link Shrinkable}. + */ +function InternalShrinkable({ children }: { children: Node; importance: number; replacement: Node }) { + return children; +} diff --git a/packages/ai-jsx/src/core/node.ts b/packages/ai-jsx/src/core/node.ts index 28f16cad7..92b1d0a2d 100644 --- a/packages/ai-jsx/src/core/node.ts +++ b/packages/ai-jsx/src/core/node.ts @@ -77,17 +77,30 @@ export function makeIndirectNode(value: T, node: Node): T & In /** @hidden */ export function withContext(renderable: Renderable, context: RenderContext): Element { - function SwitchContext() { - return renderable; + if (isElement(renderable)) { + if (renderable[attachedContextSymbol]) { + // It's already been bound to a context; don't replace it. + return renderable; + } + + const elementWithContext = { + ...renderable, + [attachedContextSymbol]: context, + }; + Object.freeze(elementWithContext); + return elementWithContext; } - const elementWithContext = { - ...(isElement(renderable) ? renderable : createElement(SwitchContext, null)), - [attachedContextSymbol]: context, - }; - - Object.freeze(elementWithContext); - return elementWithContext; + // Wrap it in an element and bind to that. + return withContext( + createElement( + function SwitchContext({ children }) { + return children; + }, + { children: renderable } + ), + context + ); } /** @hidden */ diff --git a/packages/ai-jsx/src/lib/openai.tsx b/packages/ai-jsx/src/lib/openai.tsx index a884ff425..ba91789b0 100644 --- a/packages/ai-jsx/src/lib/openai.tsx +++ b/packages/ai-jsx/src/lib/openai.tsx @@ -2,8 +2,6 @@ * This module provides interfaces to OpenAI's various models. * @packageDocumentation */ - -import GPT3Tokenizer from 'gpt3-tokenizer'; import { ChatCompletionFunctions, ChatCompletionRequestMessage, @@ -26,7 +24,7 @@ import { ModelPropsWithChildren, getParametersSchema, } from '../core/completion.js'; -import { AssistantMessage, FunctionCall, renderToConversation } from '../core/conversation.js'; +import { AssistantMessage, ConversationMessage, FunctionCall, renderToConversation } from '../core/conversation.js'; import { AIJSXError, ErrorCode, HttpError } from '../core/errors.js'; import { Image, ImageGenPropsWithChildren } from '../core/image-gen.js'; import { Logger } from '../core/log.js'; @@ -36,6 +34,8 @@ import { ChatOrCompletionModelOrBoth } from './model.js'; import { getEnvVar, patchedUntruncateJson } from './util.js'; import { CreateChatCompletionRequest } from 'openai'; import { debug } from '../core/debug.js'; +import { getEncoding } from 'js-tiktoken'; +import _ from 'lodash'; // https://platform.openai.com/docs/models/model-endpoint-compatibility type ValidCompletionModel = @@ -151,20 +151,21 @@ async function* openAiEventsToJson(iterable: AsyncIterable): AsyncGen } } +const getEncoder = _.once(() => getEncoding('cl100k_base')); + function logitBiasOfTokens(tokens: Record) { - // N.B. We're using GPT3Tokenizer which per https://platform.openai.com/tokenizer "works for most GPT-3 models". - const tokenizer = new GPT3Tokenizer.default({ type: 'gpt3' }); + const tokenizer = getEncoder(); return Object.fromEntries( Object.entries(tokens).map(([token, bias]) => { - const encoded = tokenizer.encode(token) as { bpe: number[]; text: string[] }; - if (encoded.bpe.length > 1) { + const encoded = tokenizer.encode(token); + if (encoded.length > 1) { throw new AIJSXError( - `You can only set logit_bias for a single token, but "${bias}" is ${encoded.bpe.length} tokens.`, + `You can only set logit_bias for a single token, but "${bias}" is ${encoded.length} tokens.`, ErrorCode.LogitBiasBadInput, 'user' ); } - return [encoded.bpe[0], bias]; + return [encoded[0], bias]; }) ); } @@ -282,6 +283,76 @@ export async function* OpenAICompletionModel( return AI.AppendOnlyStream; } +function estimateFunctionTokenCount(functions: Record): number { + // According to https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573 + // function definitions are serialized as TypeScript. We'll use JSON-serialization as an approximation (which + // is almost certainly an overestimate). + return getEncoder().encode(JSON.stringify(functions)).length; +} + +function tokenLimitForChatModel( + model: ValidChatModel, + functionDefinitions?: Record +): number | undefined { + const TOKENS_CONSUMED_BY_REPLY_PREFIX = 3; + const functionEstimate = + chatModelSupportsFunctions(model) && functionDefinitions ? estimateFunctionTokenCount(functionDefinitions) : 0; + + switch (model) { + case 'gpt-4': + case 'gpt-4-0314': + case 'gpt-4-0613': + return 8192 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX; + case 'gpt-4-32k': + case 'gpt-4-32k-0314': + case 'gpt-4-32k-0613': + return 32768 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX; + case 'gpt-3.5-turbo': + case 'gpt-3.5-turbo-0301': + case 'gpt-3.5-turbo-0613': + return 4096 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX; + case 'gpt-3.5-turbo-16k': + case 'gpt-3.5-turbo-16k-0613': + return 16384 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX; + default: + return undefined; + } +} + +async function tokenCountForConversationMessage( + message: ConversationMessage, + render: AI.RenderContext['render'] +): Promise { + const TOKENS_PER_MESSAGE = 3; + const TOKENS_PER_NAME = 1; + const encoder = getEncoder(); + switch (message.type) { + case 'user': + return ( + TOKENS_PER_MESSAGE + + encoder.encode(await render(message.element)).length + + (message.element.props.name ? encoder.encode(message.element.props.name).length + TOKENS_PER_NAME : 0) + ); + case 'assistant': + case 'system': + return TOKENS_PER_MESSAGE + encoder.encode(await render(message.element)).length; + case 'functionCall': + return ( + TOKENS_PER_MESSAGE + + TOKENS_PER_NAME + + encoder.encode(message.element.props.name).length + + encoder.encode(JSON.stringify(message.element.props.args)).length + ); + case 'functionResponse': + return ( + TOKENS_PER_MESSAGE + + TOKENS_PER_NAME + + encoder.encode(await render(message.element.props.children)).length + + encoder.encode(message.element.props.name).length + ); + } +} + /** * Represents an OpenAI text chat model (e.g., `gpt-4`). */ @@ -321,7 +392,19 @@ export async function* OpenAIChatModel( yield AI.AppendOnlyStream; - const conversationMessages = await renderToConversation(props.children, render); + let promptTokenLimit = tokenLimitForChatModel(props.model, props.functionDefinitions); + + // If maxTokens is set, reserve that many tokens for the reply. + if (promptTokenLimit !== undefined && props.maxTokens) { + promptTokenLimit -= props.maxTokens; + } + + const conversationMessages = await renderToConversation( + props.children, + render, + tokenCountForConversationMessage, + promptTokenLimit + ); logger.debug({ messages: conversationMessages.map((m) => debug(m.element, true)) }, 'Got input conversation'); const messages: ChatCompletionRequestMessage[] = await Promise.all( conversationMessages.map(async (message) => { diff --git a/packages/docs/docs/changelog.md b/packages/docs/docs/changelog.md index 54e640dc9..3798b1ecf 100644 --- a/packages/docs/docs/changelog.md +++ b/packages/docs/docs/changelog.md @@ -1,6 +1,10 @@ # Changelog -## 0.8.0 +## 0.8.1 + +- Add support for token-based conversation shrinking via ``. + +## [0.8.0](https://github.com/fixie-ai/ai-jsx/commit/58062b9e42b2ccecd467de90ee1dedf7ec70dfbf) - Move `MdxChatCompletion` to be `MdxSystemMessage`. You can now put this `SystemMessage` in any `ChatCompletion` to prompt the model to give MDX output. diff --git a/packages/examples/src/conversation-shrinking.tsx b/packages/examples/src/conversation-shrinking.tsx new file mode 100644 index 000000000..d30473c89 --- /dev/null +++ b/packages/examples/src/conversation-shrinking.tsx @@ -0,0 +1,44 @@ +import * as AI from 'ai-jsx'; +import { ChatCompletion } from 'ai-jsx/core/completion'; +import { ShowConversation, UserMessage, Shrinkable } from 'ai-jsx/core/conversation'; +import { showInspector } from 'ai-jsx/core/inspector'; + +function App() { + const messages = [] as AI.Node[]; + let lastValue = 0; + while (messages.length < 16) { + const counting = []; + for (let i = 0; i < 2 ** messages.length; ++i) { + counting.push(++lastValue); + } + + messages.push( + + {counting.join(' ')} + + ); + } + + return ( + <> + We counted to {lastValue} but messages will be evicted such that the AI won't see the messages that overflowed the + context window.{'\n\n'} + ( + <> + {m.type}: {m.element} + {'\n'} + + )} + > + + I'm going to count now: + {messages} + How high did I count? + + + + ); +} + +showInspector(); diff --git a/yarn.lock b/yarn.lock index 70e9115c5..aef875099 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7791,9 +7791,9 @@ __metadata: eslint-config-nth: ^2.0.1 eslint-plugin-jest: ^27.2.2 globby: ^13.1.4 - gpt3-tokenizer: ^1.1.5 ink: ^4.2.0 jest: ^29.5.0 + js-tiktoken: ^1.0.7 js-yaml: ^4.1.0 langchain: ^0.0.81 load-json-file: ^7.0.1 @@ -8184,13 +8184,6 @@ __metadata: languageName: node linkType: hard -"array-keyed-map@npm:^2.1.3": - version: 2.1.3 - resolution: "array-keyed-map@npm:2.1.3" - checksum: 53b45671922bbe7a7eb34950887fd4b1ba4154b0e0002523efa8fb352b320aaa6f798e4d7af151fc8dd4f3365996974f40f141e201d9aae90ea8b3383daf98f8 - languageName: node - linkType: hard - "array-union@npm:^2.1.0": version: 2.1.0 resolution: "array-union@npm:2.1.0" @@ -13690,15 +13683,6 @@ __metadata: languageName: node linkType: hard -"gpt3-tokenizer@npm:^1.1.5": - version: 1.1.5 - resolution: "gpt3-tokenizer@npm:1.1.5" - dependencies: - array-keyed-map: ^2.1.3 - checksum: 9d458f1d57fc381f7e348780c90aa25758a224b2b06424ea255c6f0693b16d06eaaa4f4a26635d916cd6131ed47eba7f66bde81190a5f5bc606aa9a97bacb51a - languageName: node - linkType: hard - "graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9": version: 4.2.11 resolution: "graceful-fs@npm:4.2.11" @@ -16359,7 +16343,7 @@ __metadata: languageName: node linkType: hard -"js-tiktoken@npm:^1.0.6": +"js-tiktoken@npm:^1.0.6, js-tiktoken@npm:^1.0.7": version: 1.0.7 resolution: "js-tiktoken@npm:1.0.7" dependencies: From 9246387b45761350bcd34449a7d618de304c1904 Mon Sep 17 00:00:00 2001 From: Peter Salas Date: Thu, 3 Aug 2023 09:58:11 -0700 Subject: [PATCH 2/2] Address PR feedback --- packages/ai-jsx/src/core/conversation.tsx | 48 +++++++++++++++----- packages/ai-jsx/src/lib/anthropic.tsx | 1 + packages/ai-jsx/src/lib/replicate-llama2.tsx | 1 + packages/examples/package.json | 1 + 4 files changed, 40 insertions(+), 11 deletions(-) diff --git a/packages/ai-jsx/src/core/conversation.tsx b/packages/ai-jsx/src/core/conversation.tsx index 8802bf3be..89d5d3ff0 100644 --- a/packages/ai-jsx/src/core/conversation.tsx +++ b/packages/ai-jsx/src/core/conversation.tsx @@ -3,6 +3,7 @@ import * as AI from '../index.js'; import { Node } from '../index.js'; import { AIJSXError, ErrorCode } from '../core/errors.js'; import { debug } from './debug.js'; +import _ from 'lodash'; /** * Provide a System Message to the LLM, for use within a {@link ChatCompletion}. @@ -326,17 +327,40 @@ export async function* ShowConversation( /** * @hidden - * "Shrinks" a conversation messages according to a cost function (i.e. token length), - * a budget (i.e. context window size), and the `importance` prop set on any `` + * "Shrinks" a conversation messages according to a cost function (e.g. token length), + * a budget (e.g. context window size), and the `importance` prop set on any `` * components within the conversation. * - * Currently, `` components must wrap conversational messages and have no - * effect within the messages themselves. + * Currently, `` components must wrap conversational components and do not allow + * content to shrink _within_ conversational components. For example this: + * + * @example + * ```tsx + * // Do not do this! + * + * Content + * Not shrinkable! + * Content + * + * ``` + * + * is not shrinkable. Instead, do this: + * + * * @example + * ```tsx + * Content Content}> + * + * Content + * Shrinkable! + * Content + * + * ``` */ export async function ShrinkConversation( { cost: costFn, - budget: budget, + budget, children, }: { cost: (message: ConversationMessage, render: AI.RenderContext['render']) => Promise; @@ -392,7 +416,7 @@ export async function ShrinkConversation( return asTreeNodes.filter((n): n is TreeNode => n !== null); } - /** Finds the least important node in the tree, considering cost as a second factor. */ + /** Finds the least important node in any of the trees, considering cost as a second factor. */ function leastImportantNode(roots: TreeNode[]): ShrinkableTreeNode | undefined { function compareImportance(nodeA: ShrinkableTreeNode, nodeB: ShrinkableTreeNode) { // If the two nodes are of the same importance, consider the higher cost node less important. @@ -419,13 +443,13 @@ export async function ShrinkConversation( } function aggregateCost(roots: TreeNode[]): number { - return roots.reduce((cost, node) => cost + node.cost, 0); + return _.sumBy(roots, (node) => node.cost); } /** Replaces a single ShrinkableTreeNode in the tree. */ async function replaceNode(roots: TreeNode[], nodeToReplace: ShrinkableTreeNode): Promise { const newRoots = await Promise.all( - roots.map>(async (root) => { + roots.flatMap>(async (root) => { if (root === nodeToReplace) { return conversationToTreeRoots(root.element.props.replacement); } @@ -488,7 +512,7 @@ export async function ShrinkConversation( ); // N.B. This currently quadratic in that each time we replace a node we search the entire - // tree for the least important node, and then search _again_ to replace it. If we end up + // tree for the least important node (and then search again to replace it). If we end up // doing many replacements we should be smarter about this. roots = await replaceNode(roots, nodeToReplace); } @@ -504,12 +528,14 @@ export function Shrinkable( { children, importance, replacement }: { children: Node; importance: number; replacement?: Node }, { memo }: AI.ComponentContext ) { - // We renders to a separate component so that: + // We render to a separate component so that: // // a) The memoization happens in the expected context (that of the ) // b) The memoization can be applied directly to the replacement and children // - // This allows them to be taken off the props correctly memoized. + // This allows `children` and `replacement` to be taken off the props of + // and be correctly memoized, which would not otherwise be the case even if the + // or were memoized. return ( {children && memo(children)} diff --git a/packages/ai-jsx/src/lib/anthropic.tsx b/packages/ai-jsx/src/lib/anthropic.tsx index f062f530e..f82dd94ae 100644 --- a/packages/ai-jsx/src/lib/anthropic.tsx +++ b/packages/ai-jsx/src/lib/anthropic.tsx @@ -96,6 +96,7 @@ export async function* AnthropicChatModel( } yield AI.AppendOnlyStream; const messages = await Promise.all( + // TODO: Support token budget/conversation shrinking ( await renderToConversation(props.children, render) ) diff --git a/packages/ai-jsx/src/lib/replicate-llama2.tsx b/packages/ai-jsx/src/lib/replicate-llama2.tsx index f947324bb..2e047ac76 100644 --- a/packages/ai-jsx/src/lib/replicate-llama2.tsx +++ b/packages/ai-jsx/src/lib/replicate-llama2.tsx @@ -77,6 +77,7 @@ export async function* Llama2ChatModel( ): AI.RenderableStream { yield AI.AppendOnlyStream; + // TODO: Support token budget/conversation shrinking const messageElements = await renderToConversation(props.children, render); const systemMessage = messageElements.filter((e) => e.type == 'system'); const userMessages = messageElements.filter((e) => e.type == 'user'); diff --git a/packages/examples/package.json b/packages/examples/package.json index 356c83987..61f0e59ec 100644 --- a/packages/examples/package.json +++ b/packages/examples/package.json @@ -51,6 +51,7 @@ "demo:prompting-basics": "yarn build && node dist/prompting-basics.js", "demo:validated-generation": "yarn build && node dist/validated-generation.js", "demo:image-generation": "yarn build && node dist/image-generation.js", + "demo:shrink": "yarn build && node dist/covnersation-shrinking.js", "view-logs": "cat ai-jsx.log | pino-pretty", "lint": "eslint . --max-warnings 0", "lint:fix": "eslint . --fix",