From d1beb9d8c507183f166972c946e8a41a93ce21e5 Mon Sep 17 00:00:00 2001
From: Peter Salas <peter@fixie.ai>
Date: Wed, 2 Aug 2023 14:18:35 -0700
Subject: [PATCH 1/2] Add ability to shrink conversation based on
 cost/budget/importance

---
 packages/ai-jsx/package.json                  |   4 +-
 .../src/batteries/natural-language-router.tsx |   2 +-
 packages/ai-jsx/src/core/conversation.tsx     | 219 +++++++++++++++++-
 packages/ai-jsx/src/core/node.ts              |  31 ++-
 packages/ai-jsx/src/lib/openai.tsx            | 103 +++++++-
 packages/docs/docs/changelog.md               |   6 +-
 .../examples/src/conversation-shrinking.tsx   |  44 ++++
 yarn.lock                                     |  20 +-
 8 files changed, 386 insertions(+), 43 deletions(-)
 create mode 100644 packages/examples/src/conversation-shrinking.tsx
diff --git a/packages/ai-jsx/package.json b/packages/ai-jsx/package.json
index 0ea866478..e89662f95 100644
--- a/packages/ai-jsx/package.json
+++ b/packages/ai-jsx/package.json
@@ -4,7 +4,7 @@
   "repository": "fixie-ai/ai-jsx",
   "bugs": "https://github.com/fixie-ai/ai-jsx/issues",
   "homepage": "https://ai-jsx.com",
-  "version": "0.8.0",
+  "version": "0.8.1",
   "volta": {
     "extends": "../../package.json"
   },
@@ -342,8 +342,8 @@
     "axios": "^1.4.0",
     "cli-highlight": "^2.1.11",
     "cli-spinners": "^2.9.0",
-    "gpt3-tokenizer": "^1.1.5",
     "ink": "^4.2.0",
+    "js-tiktoken": "^1.0.7",
     "js-yaml": "^4.1.0",
     "langchain": "^0.0.81",
     "lodash": "^4.17.21",
diff --git a/packages/ai-jsx/src/batteries/natural-language-router.tsx b/packages/ai-jsx/src/batteries/natural-language-router.tsx
index 23953d822..4492ff0ad 100644
--- a/packages/ai-jsx/src/batteries/natural-language-router.tsx
+++ b/packages/ai-jsx/src/batteries/natural-language-router.tsx
@@ -112,7 +112,7 @@ export async function* NaturalLanguageRouter(props: { children: Node; query: Nod
     }
 
     const props = e.props as RouteProps;
-    return props.unmatched ? choiceIndex === 0 : props.when === whenOptions[choiceIndex];
+    return props.unmatched ? choiceIndex === whenOptions.length - 1 : props.when === whenOptions[choiceIndex];
   });
 }
 
diff --git a/packages/ai-jsx/src/core/conversation.tsx b/packages/ai-jsx/src/core/conversation.tsx
index 0c621a5c4..8802bf3be 100644
--- a/packages/ai-jsx/src/core/conversation.tsx
+++ b/packages/ai-jsx/src/core/conversation.tsx
@@ -2,6 +2,7 @@ import { ChatCompletionResponseMessage } from 'openai';
 import * as AI from '../index.js';
 import { Node } from '../index.js';
 import { AIJSXError, ErrorCode } from '../core/errors.js';
+import { debug } from './debug.js';
 
 /**
  * Provide a System Message to the LLM, for use within a {@link ChatCompletion}.
@@ -183,8 +184,21 @@ function toConversationMessages(partialRendering: AI.PartiallyRendered[]): Conve
 }
 
 /** @hidden */
-export async function renderToConversation(conversation: AI.Node, render: AI.ComponentContext['render']) {
-  return toConversationMessages(await render(conversation, { stop: isConversationalComponent }));
+export async function renderToConversation(
+  conversation: AI.Node,
+  render: AI.ComponentContext['render'],
+  cost?: (message: ConversationMessage, render: AI.ComponentContext['render']) => Promise<number>,
+  budget?: number
+) {
+  const conversationToUse =
+    cost && budget ? (
+      <ShrinkConversation cost={cost} budget={budget}>
+        {conversation}
+      </ShrinkConversation>
+    ) : (
+      conversation
+    );
+  return toConversationMessages(await render(conversationToUse, { stop: isConversationalComponent }));
 }
 
 /**
@@ -309,3 +323,204 @@ export async function* ShowConversation(
   // we can indicate that we've already yielded the final frame.
   return AI.AppendOnlyStream;
 }
+
+/**
+ * @hidden
+ * "Shrinks" a conversation messages according to a cost function (i.e. token length),
+ * a budget (i.e. context window size), and the `importance` prop set on any `<Shrinkable>`
+ * components within the conversation.
+ *
+ * Currently, `<Shrinkable>` components must wrap conversational messages and have no
+ * effect within the messages themselves.
+ */
+export async function ShrinkConversation(
+  {
+    cost: costFn,
+    budget: budget,
+    children,
+  }: {
+    cost: (message: ConversationMessage, render: AI.RenderContext['render']) => Promise<number>;
+    budget: number;
+    children: Node;
+  },
+  { render, memo, logger }: AI.ComponentContext
+) {
+  /**
+   * We construct a tree of immutable and shrinkable nodes such that shrinkable nodes
+   * can contain other nodes.
+   */
+  type TreeNode = ImmutableTreeNode | ShrinkableTreeNode;
+
+  interface ImmutableTreeNode {
+    type: 'immutable';
+    element: AI.Element<any>;
+    cost: number;
+  }
+
+  interface ShrinkableTreeNode {
+    type: 'shrinkable';
+    element: AI.Element<AI.PropsOfComponent<typeof InternalShrinkable>>;
+    cost: number;
+    children: TreeNode[];
+  }
+
+  /** Converts a conversational `AI.Node` into a shrinkable tree. */
+  async function conversationToTreeRoots(conversation: AI.Node): Promise<TreeNode[]> {
+    const rendered = await render(conversation, {
+      stop: (e) => isConversationalComponent(e) || e.tag === InternalShrinkable,
+    });
+
+    const asTreeNodes = await Promise.all(
+      rendered.map<Promise<TreeNode | null>>(async (value) => {
+        if (typeof value === 'string') {
+          return null;
+        }
+
+        if (value.tag === InternalShrinkable) {
+          const children = await conversationToTreeRoots(value.props.children);
+          return { type: 'shrinkable', element: value, cost: aggregateCost(children), children };
+        }
+
+        return {
+          type: 'immutable',
+          element: value,
+          cost: await costFn(toConversationMessages([value])[0], render),
+        };
+      })
+    );
+
+    return asTreeNodes.filter((n): n is TreeNode => n !== null);
+  }
+
+  /** Finds the least important node in the tree, considering cost as a second factor. */
+  function leastImportantNode(roots: TreeNode[]): ShrinkableTreeNode | undefined {
+    function compareImportance(nodeA: ShrinkableTreeNode, nodeB: ShrinkableTreeNode) {
+      // If the two nodes are of the same importance, consider the higher cost node less important.
+      return nodeA.element.props.importance - nodeB.element.props.importance || nodeB.cost - nodeA.cost;
+    }
+
+    let current = undefined as ShrinkableTreeNode | undefined;
+    roots.forEach((node) => {
+      if (node.type !== 'shrinkable') {
+        return;
+      }
+
+      if (current === undefined || compareImportance(node, current) < 0) {
+        current = node;
+      }
+
+      const leastImportantDescendant = leastImportantNode(node.children);
+      if (leastImportantDescendant !== undefined && compareImportance(leastImportantDescendant, current) < 0) {
+        current = leastImportantDescendant;
+      }
+    });
+
+    return current;
+  }
+
+  function aggregateCost(roots: TreeNode[]): number {
+    return roots.reduce((cost, node) => cost + node.cost, 0);
+  }
+
+  /** Replaces a single ShrinkableTreeNode in the tree. */
+  async function replaceNode(roots: TreeNode[], nodeToReplace: ShrinkableTreeNode): Promise<TreeNode[]> {
+    const newRoots = await Promise.all(
+      roots.map<Promise<TreeNode[]>>(async (root) => {
+        if (root === nodeToReplace) {
+          return conversationToTreeRoots(root.element.props.replacement);
+        }
+
+        if (root.type !== 'shrinkable') {
+          return [root];
+        }
+
+        // Look for a replacement among the children and recalculate the cost.
+        const replacementChildren = await replaceNode(root.children, nodeToReplace);
+        return [
+          {
+            type: 'shrinkable',
+            element: root.element,
+            cost: aggregateCost(replacementChildren),
+            children: replacementChildren,
+          },
+        ];
+      })
+    );
+
+    return newRoots.flat(1);
+  }
+
+  /** Converts the shrinkable tree into a single AI.Node for rendering. */
+  function treeRootsToNode(roots: TreeNode[]): AI.Node {
+    return roots.map((root) => (root.type === 'immutable' ? root.element : treeRootsToNode(root.children)));
+  }
+
+  const memoized = memo(children);
+
+  // If there are no shrinkable elements, there's no need to evaluate the cost.
+  const shrinkableOrConversationElements = (
+    await render(memoized, {
+      stop: (e) => isConversationalComponent(e) || e.tag === InternalShrinkable,
+    })
+  ).filter(AI.isElement);
+  if (!shrinkableOrConversationElements.find((value) => value.tag === InternalShrinkable)) {
+    return shrinkableOrConversationElements;
+  }
+
+  let roots = await conversationToTreeRoots(shrinkableOrConversationElements);
+  while (aggregateCost(roots) > budget) {
+    const nodeToReplace = leastImportantNode(roots);
+    if (nodeToReplace === undefined) {
+      // Nothing left to replace.
+      break;
+    }
+
+    logger.debug(
+      {
+        node: debug(nodeToReplace.element.props.children, true),
+        importance: nodeToReplace.element.props.importance,
+        replacement: debug(nodeToReplace.element.props.replacement, true),
+        nodeCost: nodeToReplace.cost,
+        totalCost: aggregateCost(roots),
+        budget,
+      },
+      'Replacing shrinkable content'
+    );
+
+    // N.B. This currently quadratic in that each time we replace a node we search the entire
+    // tree for the least important node, and then search _again_ to replace it. If we end up
+    // doing many replacements we should be smarter about this.
+    roots = await replaceNode(roots, nodeToReplace);
+  }
+
+  return treeRootsToNode(roots);
+}
+
+/**
+ * @hidden
+ * Indicates that a portion of a conversation is "shrinkable".
+ */
+export function Shrinkable(
+  { children, importance, replacement }: { children: Node; importance: number; replacement?: Node },
+  { memo }: AI.ComponentContext
+) {
+  // We renders to a separate component so that:
+  //
+  // a) The memoization happens in the expected context (that of the <Shrinkable>)
+  // b) The memoization can be applied directly to the replacement and children
+  //
+  // This allows them to be taken off the props correctly memoized.
+  return (
+    <InternalShrinkable importance={importance} replacement={replacement && memo(replacement)}>
+      {children && memo(children)}
+    </InternalShrinkable>
+  );
+}
+
+/**
+ * @hidden
+ * An internal component to facilitate prop memoization. See comment in {@link Shrinkable}.
+ */
+function InternalShrinkable({ children }: { children: Node; importance: number; replacement: Node }) {
+  return children;
+}
diff --git a/packages/ai-jsx/src/core/node.ts b/packages/ai-jsx/src/core/node.ts
index 28f16cad7..92b1d0a2d 100644
--- a/packages/ai-jsx/src/core/node.ts
+++ b/packages/ai-jsx/src/core/node.ts
@@ -77,17 +77,30 @@ export function makeIndirectNode<T extends object>(value: T, node: Node): T & In
 
 /** @hidden */
 export function withContext(renderable: Renderable, context: RenderContext): Element<any> {
-  function SwitchContext() {
-    return renderable;
+  if (isElement(renderable)) {
+    if (renderable[attachedContextSymbol]) {
+      // It's already been bound to a context; don't replace it.
+      return renderable;
+    }
+
+    const elementWithContext = {
+      ...renderable,
+      [attachedContextSymbol]: context,
+    };
+    Object.freeze(elementWithContext);
+    return elementWithContext;
   }
 
-  const elementWithContext = {
-    ...(isElement(renderable) ? renderable : createElement(SwitchContext, null)),
-    [attachedContextSymbol]: context,
-  };
-
-  Object.freeze(elementWithContext);
-  return elementWithContext;
+  // Wrap it in an element and bind to that.
+  return withContext(
+    createElement(
+      function SwitchContext({ children }) {
+        return children;
+      },
+      { children: renderable }
+    ),
+    context
+  );
 }
 
 /** @hidden */
diff --git a/packages/ai-jsx/src/lib/openai.tsx b/packages/ai-jsx/src/lib/openai.tsx
index a884ff425..ba91789b0 100644
--- a/packages/ai-jsx/src/lib/openai.tsx
+++ b/packages/ai-jsx/src/lib/openai.tsx
@@ -2,8 +2,6 @@
  * This module provides interfaces to OpenAI's various models.
  * @packageDocumentation
  */
-
-import GPT3Tokenizer from 'gpt3-tokenizer';
 import {
   ChatCompletionFunctions,
   ChatCompletionRequestMessage,
@@ -26,7 +24,7 @@ import {
   ModelPropsWithChildren,
   getParametersSchema,
 } from '../core/completion.js';
-import { AssistantMessage, FunctionCall, renderToConversation } from '../core/conversation.js';
+import { AssistantMessage, ConversationMessage, FunctionCall, renderToConversation } from '../core/conversation.js';
 import { AIJSXError, ErrorCode, HttpError } from '../core/errors.js';
 import { Image, ImageGenPropsWithChildren } from '../core/image-gen.js';
 import { Logger } from '../core/log.js';
@@ -36,6 +34,8 @@ import { ChatOrCompletionModelOrBoth } from './model.js';
 import { getEnvVar, patchedUntruncateJson } from './util.js';
 import { CreateChatCompletionRequest } from 'openai';
 import { debug } from '../core/debug.js';
+import { getEncoding } from 'js-tiktoken';
+import _ from 'lodash';
 
 // https://platform.openai.com/docs/models/model-endpoint-compatibility
 type ValidCompletionModel =
@@ -151,20 +151,21 @@ async function* openAiEventsToJson<T>(iterable: AsyncIterable<String>): AsyncGen
   }
 }
 
+const getEncoder = _.once(() => getEncoding('cl100k_base'));
+
 function logitBiasOfTokens(tokens: Record<string, number>) {
-  // N.B. We're using GPT3Tokenizer which per https://platform.openai.com/tokenizer "works for most GPT-3 models".
-  const tokenizer = new GPT3Tokenizer.default({ type: 'gpt3' });
+  const tokenizer = getEncoder();
   return Object.fromEntries(
     Object.entries(tokens).map(([token, bias]) => {
-      const encoded = tokenizer.encode(token) as { bpe: number[]; text: string[] };
-      if (encoded.bpe.length > 1) {
+      const encoded = tokenizer.encode(token);
+      if (encoded.length > 1) {
         throw new AIJSXError(
-          `You can only set logit_bias for a single token, but "${bias}" is ${encoded.bpe.length} tokens.`,
+          `You can only set logit_bias for a single token, but "${bias}" is ${encoded.length} tokens.`,
           ErrorCode.LogitBiasBadInput,
           'user'
         );
       }
-      return [encoded.bpe[0], bias];
+      return [encoded[0], bias];
     })
   );
 }
@@ -282,6 +283,76 @@ export async function* OpenAICompletionModel(
   return AI.AppendOnlyStream;
 }
 
+function estimateFunctionTokenCount(functions: Record<string, FunctionDefinition>): number {
+  // According to https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573
+  // function definitions are serialized as TypeScript. We'll use JSON-serialization as an approximation (which
+  // is almost certainly an overestimate).
+  return getEncoder().encode(JSON.stringify(functions)).length;
+}
+
+function tokenLimitForChatModel(
+  model: ValidChatModel,
+  functionDefinitions?: Record<string, FunctionDefinition>
+): number | undefined {
+  const TOKENS_CONSUMED_BY_REPLY_PREFIX = 3;
+  const functionEstimate =
+    chatModelSupportsFunctions(model) && functionDefinitions ? estimateFunctionTokenCount(functionDefinitions) : 0;
+
+  switch (model) {
+    case 'gpt-4':
+    case 'gpt-4-0314':
+    case 'gpt-4-0613':
+      return 8192 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX;
+    case 'gpt-4-32k':
+    case 'gpt-4-32k-0314':
+    case 'gpt-4-32k-0613':
+      return 32768 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX;
+    case 'gpt-3.5-turbo':
+    case 'gpt-3.5-turbo-0301':
+    case 'gpt-3.5-turbo-0613':
+      return 4096 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX;
+    case 'gpt-3.5-turbo-16k':
+    case 'gpt-3.5-turbo-16k-0613':
+      return 16384 - functionEstimate - TOKENS_CONSUMED_BY_REPLY_PREFIX;
+    default:
+      return undefined;
+  }
+}
+
+async function tokenCountForConversationMessage(
+  message: ConversationMessage,
+  render: AI.RenderContext['render']
+): Promise<number> {
+  const TOKENS_PER_MESSAGE = 3;
+  const TOKENS_PER_NAME = 1;
+  const encoder = getEncoder();
+  switch (message.type) {
+    case 'user':
+      return (
+        TOKENS_PER_MESSAGE +
+        encoder.encode(await render(message.element)).length +
+        (message.element.props.name ? encoder.encode(message.element.props.name).length + TOKENS_PER_NAME : 0)
+      );
+    case 'assistant':
+    case 'system':
+      return TOKENS_PER_MESSAGE + encoder.encode(await render(message.element)).length;
+    case 'functionCall':
+      return (
+        TOKENS_PER_MESSAGE +
+        TOKENS_PER_NAME +
+        encoder.encode(message.element.props.name).length +
+        encoder.encode(JSON.stringify(message.element.props.args)).length
+      );
+    case 'functionResponse':
+      return (
+        TOKENS_PER_MESSAGE +
+        TOKENS_PER_NAME +
+        encoder.encode(await render(message.element.props.children)).length +
+        encoder.encode(message.element.props.name).length
+      );
+  }
+}
+
 /**
  * Represents an OpenAI text chat model (e.g., `gpt-4`).
  */
@@ -321,7 +392,19 @@ export async function* OpenAIChatModel(
 
   yield AI.AppendOnlyStream;
 
-  const conversationMessages = await renderToConversation(props.children, render);
+  let promptTokenLimit = tokenLimitForChatModel(props.model, props.functionDefinitions);
+
+  // If maxTokens is set, reserve that many tokens for the reply.
+  if (promptTokenLimit !== undefined && props.maxTokens) {
+    promptTokenLimit -= props.maxTokens;
+  }
+
+  const conversationMessages = await renderToConversation(
+    props.children,
+    render,
+    tokenCountForConversationMessage,
+    promptTokenLimit
+  );
   logger.debug({ messages: conversationMessages.map((m) => debug(m.element, true)) }, 'Got input conversation');
   const messages: ChatCompletionRequestMessage[] = await Promise.all(
     conversationMessages.map(async (message) => {
diff --git a/packages/docs/docs/changelog.md b/packages/docs/docs/changelog.md
index 54e640dc9..3798b1ecf 100644
--- a/packages/docs/docs/changelog.md
+++ b/packages/docs/docs/changelog.md
@@ -1,6 +1,10 @@
 # Changelog
 
-## 0.8.0
+## 0.8.1
+
+- Add support for token-based conversation shrinking via `<Shrinkable>`.
+
+## [0.8.0](https://github.com/fixie-ai/ai-jsx/commit/58062b9e42b2ccecd467de90ee1dedf7ec70dfbf)
 
 - Move `MdxChatCompletion` to be `MdxSystemMessage`. You can now put this `SystemMessage` in any `ChatCompletion` to prompt the model to give MDX output.
 
diff --git a/packages/examples/src/conversation-shrinking.tsx b/packages/examples/src/conversation-shrinking.tsx
new file mode 100644
index 000000000..d30473c89
--- /dev/null
+++ b/packages/examples/src/conversation-shrinking.tsx
@@ -0,0 +1,44 @@
+import * as AI from 'ai-jsx';
+import { ChatCompletion } from 'ai-jsx/core/completion';
+import { ShowConversation, UserMessage, Shrinkable } from 'ai-jsx/core/conversation';
+import { showInspector } from 'ai-jsx/core/inspector';
+
+function App() {
+  const messages = [] as AI.Node[];
+  let lastValue = 0;
+  while (messages.length < 16) {
+    const counting = [];
+    for (let i = 0; i < 2 ** messages.length; ++i) {
+      counting.push(++lastValue);
+    }
+
+    messages.push(
+      <Shrinkable importance={0}>
+        <UserMessage>{counting.join(' ')}</UserMessage>
+      </Shrinkable>
+    );
+  }
+
+  return (
+    <>
+      We counted to {lastValue} but messages will be evicted such that the AI won't see the messages that overflowed the
+      context window.{'\n\n'}
+      <ShowConversation
+        present={(m) => (
+          <>
+            {m.type}: {m.element}
+            {'\n'}
+          </>
+        )}
+      >
+        <ChatCompletion>
+          <UserMessage>I'm going to count now:</UserMessage>
+          {messages}
+          <UserMessage>How high did I count?</UserMessage>
+        </ChatCompletion>
+      </ShowConversation>
+    </>
+  );
+}
+
+showInspector(<App />);
diff --git a/yarn.lock b/yarn.lock
index 70e9115c5..aef875099 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -7791,9 +7791,9 @@ __metadata:
     eslint-config-nth: ^2.0.1
     eslint-plugin-jest: ^27.2.2
     globby: ^13.1.4
-    gpt3-tokenizer: ^1.1.5
     ink: ^4.2.0
     jest: ^29.5.0
+    js-tiktoken: ^1.0.7
     js-yaml: ^4.1.0
     langchain: ^0.0.81
     load-json-file: ^7.0.1
@@ -8184,13 +8184,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"array-keyed-map@npm:^2.1.3":
-  version: 2.1.3
-  resolution: "array-keyed-map@npm:2.1.3"
-  checksum: 53b45671922bbe7a7eb34950887fd4b1ba4154b0e0002523efa8fb352b320aaa6f798e4d7af151fc8dd4f3365996974f40f141e201d9aae90ea8b3383daf98f8
-  languageName: node
-  linkType: hard
-
 "array-union@npm:^2.1.0":
   version: 2.1.0
   resolution: "array-union@npm:2.1.0"
@@ -13690,15 +13683,6 @@ __metadata:
   languageName: node
   linkType: hard
 
-"gpt3-tokenizer@npm:^1.1.5":
-  version: 1.1.5
-  resolution: "gpt3-tokenizer@npm:1.1.5"
-  dependencies:
-    array-keyed-map: ^2.1.3
-  checksum: 9d458f1d57fc381f7e348780c90aa25758a224b2b06424ea255c6f0693b16d06eaaa4f4a26635d916cd6131ed47eba7f66bde81190a5f5bc606aa9a97bacb51a
-  languageName: node
-  linkType: hard
-
 "graceful-fs@npm:^4.1.11, graceful-fs@npm:^4.1.2, graceful-fs@npm:^4.1.6, graceful-fs@npm:^4.2.0, graceful-fs@npm:^4.2.4, graceful-fs@npm:^4.2.6, graceful-fs@npm:^4.2.9":
   version: 4.2.11
   resolution: "graceful-fs@npm:4.2.11"
@@ -16359,7 +16343,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"js-tiktoken@npm:^1.0.6":
+"js-tiktoken@npm:^1.0.6, js-tiktoken@npm:^1.0.7":
   version: 1.0.7
   resolution: "js-tiktoken@npm:1.0.7"
   dependencies:

From 9246387b45761350bcd34449a7d618de304c1904 Mon Sep 17 00:00:00 2001
From: Peter Salas <peter@fixie.ai>
Date: Thu, 3 Aug 2023 09:58:11 -0700
Subject: [PATCH 2/2] Address PR feedback

---
 packages/ai-jsx/src/core/conversation.tsx    | 48 +++++++++++++++-----
 packages/ai-jsx/src/lib/anthropic.tsx        |  1 +
 packages/ai-jsx/src/lib/replicate-llama2.tsx |  1 +
 packages/examples/package.json               |  1 +
 4 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/packages/ai-jsx/src/core/conversation.tsx b/packages/ai-jsx/src/core/conversation.tsx
index 8802bf3be..89d5d3ff0 100644
--- a/packages/ai-jsx/src/core/conversation.tsx
+++ b/packages/ai-jsx/src/core/conversation.tsx
@@ -3,6 +3,7 @@ import * as AI from '../index.js';
 import { Node } from '../index.js';
 import { AIJSXError, ErrorCode } from '../core/errors.js';
 import { debug } from './debug.js';
+import _ from 'lodash';
 
 /**
  * Provide a System Message to the LLM, for use within a {@link ChatCompletion}.
@@ -326,17 +327,40 @@ export async function* ShowConversation(
 
 /**
  * @hidden
- * "Shrinks" a conversation messages according to a cost function (i.e. token length),
- * a budget (i.e. context window size), and the `importance` prop set on any `<Shrinkable>`
+ * "Shrinks" a conversation messages according to a cost function (e.g. token length),
+ * a budget (e.g. context window size), and the `importance` prop set on any `<Shrinkable>`
  * components within the conversation.
  *
- * Currently, `<Shrinkable>` components must wrap conversational messages and have no
- * effect within the messages themselves.
+ * Currently, `<Shrinkable>` components must wrap conversational components and do not allow
+ * content to shrink _within_ conversational components. For example this:
+ *
+ * @example
+ * ```tsx
+ *    // Do not do this!
+ *    <UserMessage>
+ *      Content
+ *      <Shrinkable importance={0}>Not shrinkable!</Shrinkable>
+ *      Content
+ *    </UserMessage>
+ * ```
+ *
+ * is not shrinkable. Instead, do this:
+ *
+ * * @example
+ * ```tsx
+ *    <Shrinkable importance={0} replacement={<UserMessage>Content Content</UserMessage>}>
+ *      <UserMessage>
+ *        Content
+ *        Shrinkable!
+ *        Content
+ *      </UserMessage
+ *    </Shrinkable>
+ * ```
  */
 export async function ShrinkConversation(
   {
     cost: costFn,
-    budget: budget,
+    budget,
     children,
   }: {
     cost: (message: ConversationMessage, render: AI.RenderContext['render']) => Promise<number>;
@@ -392,7 +416,7 @@ export async function ShrinkConversation(
     return asTreeNodes.filter((n): n is TreeNode => n !== null);
   }
 
-  /** Finds the least important node in the tree, considering cost as a second factor. */
+  /** Finds the least important node in any of the trees, considering cost as a second factor. */
   function leastImportantNode(roots: TreeNode[]): ShrinkableTreeNode | undefined {
     function compareImportance(nodeA: ShrinkableTreeNode, nodeB: ShrinkableTreeNode) {
       // If the two nodes are of the same importance, consider the higher cost node less important.
@@ -419,13 +443,13 @@ export async function ShrinkConversation(
   }
 
   function aggregateCost(roots: TreeNode[]): number {
-    return roots.reduce((cost, node) => cost + node.cost, 0);
+    return _.sumBy(roots, (node) => node.cost);
   }
 
   /** Replaces a single ShrinkableTreeNode in the tree. */
   async function replaceNode(roots: TreeNode[], nodeToReplace: ShrinkableTreeNode): Promise<TreeNode[]> {
     const newRoots = await Promise.all(
-      roots.map<Promise<TreeNode[]>>(async (root) => {
+      roots.flatMap<Promise<TreeNode[]>>(async (root) => {
         if (root === nodeToReplace) {
           return conversationToTreeRoots(root.element.props.replacement);
         }
@@ -488,7 +512,7 @@ export async function ShrinkConversation(
     );
 
     // N.B. This currently quadratic in that each time we replace a node we search the entire
-    // tree for the least important node, and then search _again_ to replace it. If we end up
+    // tree for the least important node (and then search again to replace it). If we end up
     // doing many replacements we should be smarter about this.
     roots = await replaceNode(roots, nodeToReplace);
   }
@@ -504,12 +528,14 @@ export function Shrinkable(
   { children, importance, replacement }: { children: Node; importance: number; replacement?: Node },
   { memo }: AI.ComponentContext
 ) {
-  // We renders to a separate component so that:
+  // We render to a separate component so that:
   //
   // a) The memoization happens in the expected context (that of the <Shrinkable>)
   // b) The memoization can be applied directly to the replacement and children
   //
-  // This allows them to be taken off the props correctly memoized.
+  // This allows `children` and `replacement` to be taken off the props of <InternalShrinkable>
+  // and be correctly memoized, which would not otherwise be the case even if the <Shrinkable>
+  // or <InternalShrinkable> were memoized.
   return (
     <InternalShrinkable importance={importance} replacement={replacement && memo(replacement)}>
       {children && memo(children)}
diff --git a/packages/ai-jsx/src/lib/anthropic.tsx b/packages/ai-jsx/src/lib/anthropic.tsx
index f062f530e..f82dd94ae 100644
--- a/packages/ai-jsx/src/lib/anthropic.tsx
+++ b/packages/ai-jsx/src/lib/anthropic.tsx
@@ -96,6 +96,7 @@ export async function* AnthropicChatModel(
   }
   yield AI.AppendOnlyStream;
   const messages = await Promise.all(
+    // TODO: Support token budget/conversation shrinking
     (
       await renderToConversation(props.children, render)
     )
diff --git a/packages/ai-jsx/src/lib/replicate-llama2.tsx b/packages/ai-jsx/src/lib/replicate-llama2.tsx
index f947324bb..2e047ac76 100644
--- a/packages/ai-jsx/src/lib/replicate-llama2.tsx
+++ b/packages/ai-jsx/src/lib/replicate-llama2.tsx
@@ -77,6 +77,7 @@ export async function* Llama2ChatModel(
 ): AI.RenderableStream {
   yield AI.AppendOnlyStream;
 
+  // TODO: Support token budget/conversation shrinking
   const messageElements = await renderToConversation(props.children, render);
   const systemMessage = messageElements.filter((e) => e.type == 'system');
   const userMessages = messageElements.filter((e) => e.type == 'user');
diff --git a/packages/examples/package.json b/packages/examples/package.json
index 356c83987..61f0e59ec 100644
--- a/packages/examples/package.json
+++ b/packages/examples/package.json
@@ -51,6 +51,7 @@
     "demo:prompting-basics": "yarn build && node dist/prompting-basics.js",
     "demo:validated-generation": "yarn build && node dist/validated-generation.js",
     "demo:image-generation": "yarn build && node dist/image-generation.js",
+    "demo:shrink": "yarn build && node dist/covnersation-shrinking.js",
     "view-logs": "cat ai-jsx.log | pino-pretty",
     "lint": "eslint . --max-warnings 0",
     "lint:fix": "eslint . --fix",