From 0f75f5d36e7c8c0d0f7b34887afa99497f4f9a3c Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 14 Dec 2024 19:15:41 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=89=20add=20custom=20weights?= =?UTF-8?q?=20and=20pre-encoded=20tokens=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../content/docs/reference/scripts/choices.md | 31 ++++++++++++++++--- packages/core/src/chat.ts | 13 +++++--- packages/core/src/types/prompt_template.d.ts | 2 +- packages/sample/genaisrc/choices.genai.mjs | 2 +- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/docs/src/content/docs/reference/scripts/choices.md b/docs/src/content/docs/reference/scripts/choices.md index a4f7e1ea46..eed6d15085 100644 --- a/docs/src/content/docs/reference/scripts/choices.md +++ b/docs/src/content/docs/reference/scripts/choices.md @@ -8,8 +8,8 @@ sidebar: You can specify a list of preferred words (choices) in the script metadata. It will increase the probability of the model generating the specified words. -- Each word should match a single token for the desired model! -- For some models, GenAIScript does not have a token encoder so it won't be able to compute the logit bias for the choices +- Each word should match a single token for the desired model! +- For some models, GenAIScript does not have a token encoder so it won't be able to compute the logit bias for the choices ```js script({ @@ -22,12 +22,33 @@ script({ ERR ``` +## Custom weights + +You can tune the probability of each choice by providing a weight for each choice. +The default weight is `5`. + +```js '{ token: "ERR", weight: 10 }' +script({ + choices: ["OK", { token: "ERR", weight: 10 }], +}) +``` + +## Pre-encoded tokens + +For models where GenAIScript does not have a token encoder, you can provide the pre-encoded tokens. + +```js +script({ + choices: [{ token: 12345, weight: 10 }], +}) +``` + ## Logit Bias Internally, GenAIScript tokenizes the word and build the [logit_bias](https://help.openai.com/en/articles/5247780-using-logit-bias-to-alter-token-probability-with-the-openai-api) for each token. -- choices: `OK`, `ERR` -- logit bias: `{"5175":5,"5392":5}` +- choices: `OK`, `ERR` +- logit bias: `{"5175":5,"5392":5}` ## Logprobs @@ -38,4 +59,4 @@ You can enable [logprobs](/genaiscript/reference/scripts/logprobs) to visualize ERR . ---- \ No newline at end of file +--- diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index c1d92433e9..e673f891e8 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -748,7 +748,9 @@ export function mergeGenerationOptions( async function choicesToLogitBias( trace: MarkdownTrace, model: string, - choices: ElementOrArray + choices: ElementOrArray< + string | { token: string | number; weight?: number } + > ) { choices = arrayify(choices) if (!choices?.length) return undefined @@ -764,12 +766,13 @@ async function choicesToLogitBias( } const res = Object.fromEntries( choices.map((c) => { - const tokens = encode(c) - if (tokens.length !== 1) + const { token, weight } = typeof c === "string" ? { token: c } : c + const encoded = typeof token === "number" ? [token] : encode(token) + if (encoded.length !== 1) trace.warn( - `choice ${c} tokenizes to ${tokens.join(", ")} (expected one token)` + `choice ${c} tokenizes to ${encoded.join(", ")} (expected one token)` ) - return [tokens[0], CHOICE_LOGIT_BIAS] + return [encoded[0], isNaN(weight) ? CHOICE_LOGIT_BIAS : weight] }) ) trace.itemValue("choices", choices.join(", ")) diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 1e0c6805c5..fbc4b4a8b1 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -219,7 +219,7 @@ interface ModelOptions extends ModelConnectionOptions, ModelTemplateOptions { /** * A list of keywords that should be found in the output. */ - choices?: ElementOrArray + choices?: ElementOrArray /** * Returns the log probabilities of the each tokens. Not supported in all models. diff --git a/packages/sample/genaisrc/choices.genai.mjs b/packages/sample/genaisrc/choices.genai.mjs index c992a480f9..d0e26313a1 100644 --- a/packages/sample/genaisrc/choices.genai.mjs +++ b/packages/sample/genaisrc/choices.genai.mjs @@ -1,5 +1,5 @@ script({ - choices: ["OK", "ERR"], + choices: ["OK", { token: "ERR", weight: 0.2 }], }) // tests logit_bias const res = await runPrompt(