From bce4ddbb7392c60dcbcad2493afac0e0fce24558 Mon Sep 17 00:00:00 2001 From: kamtschatka Date: Sun, 3 Nov 2024 12:33:09 +0100 Subject: [PATCH 1/2] PR for #111 added a $tags,$aiTags and $userTags placeholder that will be replaced with all tags, ai tags or user tags during inference --- apps/workers/openaiWorker.ts | 68 +++++++++++++++++++++++++++++++++++ docs/docs/03-configuration.md | 5 +++ 2 files changed, 73 insertions(+) diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index 5a4fd69d..f1108eab 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -198,9 +198,77 @@ async function fetchCustomPrompts( }, }); + if (containsTagsPlaceholder(prompts)) { + return replaceTagsPlaceholders(prompts, userId); + } + return prompts.map((p) => p.text); } +async function replaceTagsPlaceholders( + prompts: { text: string }[], + userId: string, +): Promise { + const tags = await loadTagsForUser(userId); + const tagsString = `[${tags.map((tag) => tag.name).join(",")}]`; + const aiTagsString = `[${tags + .filter((tag) => tag.aiCount > 0) + .map((tag) => tag.name) + .join(",")}]`; + const userTagsString = `[${tags + .filter((tag) => tag.humanCount > 0) + .map((tag) => tag.name) + .join(",")}]`; + + return prompts.map((p) => + p.text + .replaceAll("$tags", tagsString) + .replaceAll("$aiTags", aiTagsString) + .replaceAll("$userTags", userTagsString), + ); +} + +async function loadTagsForUser(userId: string) { + const tagsWithCounts = await db.query.bookmarkTags.findMany({ + where: eq(bookmarkTags.userId, userId), + columns: { + name: true, + }, + with: { + tagsOnBookmarks: { + columns: { + attachedBy: true, + }, + }, + }, + }); + + return tagsWithCounts.map((tag) => { + const aiCount = tag.tagsOnBookmarks.filter( + (tob) => tob.attachedBy === "ai", + ).length; + const humanCount = tag.tagsOnBookmarks.filter( + (tob) => tob.attachedBy === "human", + ).length; + return { + name: tag.name, + aiCount, + humanCount, + }; + }); +} + +function containsTagsPlaceholder(prompts: { text: string }[]): boolean { + return ( + prompts.filter( + (p) => + p.text.includes("$tags") || + p.text.includes("$aiTags") || + p.text.includes("$userTags"), + ).length > 0 + ); +} + async function inferTagsFromPDF( jobId: string, bookmark: NonNullable>>, diff --git a/docs/docs/03-configuration.md b/docs/docs/03-configuration.md index c8fb4cc2..b8932ee5 100644 --- a/docs/docs/03-configuration.md +++ b/docs/docs/03-configuration.md @@ -60,6 +60,11 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin | INFERENCE_LANG | No | english | The language in which the tags will be generated. | | INFERENCE_JOB_TIMEOUT_SEC | No | 30 | How long to wait for the inference job to finish before timing out. If you're running ollama without powerful GPUs, you might want to increase the timeout a bit. | +:::info +- You can append additional instructions to the prompt used for automatic tagging, in the `AI Settings` (in the `User Settings` screen) +- You can use the placeholders `$tags`, `$aiTags`, `$userTags` in the prompt. These placeholders will be replaced with all tags, ai generated tags or human created tags when automatic tagging is performed (e.g. `[hoarder, computer, ai]`) +::: + ## Crawler Configs | Name | Required | Default | Description | From 02ddedd2bdb95bd315f73d6601ca296841101adb Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sun, 24 Nov 2024 18:47:20 +0000 Subject: [PATCH 2/2] Use the new buildImpersonatingTRPCClient util --- apps/workers/openaiWorker.ts | 47 ++++++++---------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index 0bfb43a3..4061c7d2 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -1,5 +1,6 @@ import { and, Column, eq, inArray, sql } from "drizzle-orm"; import { DequeuedJob, Runner } from "liteque"; +import { buildImpersonatingTRPCClient } from "trpc"; import { z } from "zod"; import type { InferenceClient } from "@hoarder/shared/inference"; @@ -200,66 +201,38 @@ async function fetchCustomPrompts( }, }); + let promptTexts = prompts.map((p) => p.text); if (containsTagsPlaceholder(prompts)) { - return replaceTagsPlaceholders(prompts, userId); + promptTexts = await replaceTagsPlaceholders(promptTexts, userId); } - return prompts.map((p) => p.text); + return promptTexts; } async function replaceTagsPlaceholders( - prompts: { text: string }[], + prompts: string[], userId: string, ): Promise { - const tags = await loadTagsForUser(userId); + const api = await buildImpersonatingTRPCClient(userId); + const tags = (await api.tags.list()).tags; const tagsString = `[${tags.map((tag) => tag.name).join(",")}]`; const aiTagsString = `[${tags - .filter((tag) => tag.aiCount > 0) + .filter((tag) => tag.numBookmarksByAttachedType.human ?? 0 == 0) .map((tag) => tag.name) .join(",")}]`; const userTagsString = `[${tags - .filter((tag) => tag.humanCount > 0) + .filter((tag) => tag.numBookmarksByAttachedType.human ?? 0 > 0) .map((tag) => tag.name) .join(",")}]`; return prompts.map((p) => - p.text + p .replaceAll("$tags", tagsString) .replaceAll("$aiTags", aiTagsString) .replaceAll("$userTags", userTagsString), ); } -async function loadTagsForUser(userId: string) { - const tagsWithCounts = await db.query.bookmarkTags.findMany({ - where: eq(bookmarkTags.userId, userId), - columns: { - name: true, - }, - with: { - tagsOnBookmarks: { - columns: { - attachedBy: true, - }, - }, - }, - }); - - return tagsWithCounts.map((tag) => { - const aiCount = tag.tagsOnBookmarks.filter( - (tob) => tob.attachedBy === "ai", - ).length; - const humanCount = tag.tagsOnBookmarks.filter( - (tob) => tob.attachedBy === "human", - ).length; - return { - name: tag.name, - aiCount, - humanCount, - }; - }); -} - function containsTagsPlaceholder(prompts: { text: string }[]): boolean { return ( prompts.filter(