diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index 948e92a7..4061c7d2 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -1,5 +1,6 @@ import { and, Column, eq, inArray, sql } from "drizzle-orm"; import { DequeuedJob, Runner } from "liteque"; +import { buildImpersonatingTRPCClient } from "trpc"; import { z } from "zod"; import type { InferenceClient } from "@hoarder/shared/inference"; @@ -200,7 +201,47 @@ async function fetchCustomPrompts( }, }); - return prompts.map((p) => p.text); + let promptTexts = prompts.map((p) => p.text); + if (containsTagsPlaceholder(prompts)) { + promptTexts = await replaceTagsPlaceholders(promptTexts, userId); + } + + return promptTexts; +} + +async function replaceTagsPlaceholders( + prompts: string[], + userId: string, +): Promise { + const api = await buildImpersonatingTRPCClient(userId); + const tags = (await api.tags.list()).tags; + const tagsString = `[${tags.map((tag) => tag.name).join(",")}]`; + const aiTagsString = `[${tags + .filter((tag) => tag.numBookmarksByAttachedType.human ?? 0 == 0) + .map((tag) => tag.name) + .join(",")}]`; + const userTagsString = `[${tags + .filter((tag) => tag.numBookmarksByAttachedType.human ?? 0 > 0) + .map((tag) => tag.name) + .join(",")}]`; + + return prompts.map((p) => + p + .replaceAll("$tags", tagsString) + .replaceAll("$aiTags", aiTagsString) + .replaceAll("$userTags", userTagsString), + ); +} + +function containsTagsPlaceholder(prompts: { text: string }[]): boolean { + return ( + prompts.filter( + (p) => + p.text.includes("$tags") || + p.text.includes("$aiTags") || + p.text.includes("$userTags"), + ).length > 0 + ); } async function inferTagsFromPDF( diff --git a/docs/docs/03-configuration.md b/docs/docs/03-configuration.md index 49971ec4..a5720092 100644 --- a/docs/docs/03-configuration.md +++ b/docs/docs/03-configuration.md @@ -60,6 +60,11 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin | INFERENCE_LANG | No | english | The language in which the tags will be generated. | | INFERENCE_JOB_TIMEOUT_SEC | No | 30 | How long to wait for the inference job to finish before timing out. If you're running ollama without powerful GPUs, you might want to increase the timeout a bit. | +:::info +- You can append additional instructions to the prompt used for automatic tagging, in the `AI Settings` (in the `User Settings` screen) +- You can use the placeholders `$tags`, `$aiTags`, `$userTags` in the prompt. These placeholders will be replaced with all tags, ai generated tags or human created tags when automatic tagging is performed (e.g. `[hoarder, computer, ai]`) +::: + ## Crawler Configs | Name | Required | Default | Description |