From f263f9e48a38d6b6f0132d8c6ea2a38a6d19d1b6 Mon Sep 17 00:00:00 2001 From: MohamedBassem Date: Sun, 15 Sep 2024 00:03:39 +0000 Subject: [PATCH] feature(worker): Allow configuring inference job timeout and ollama keep alive. Fixes #389 #224 --- apps/workers/inference.ts | 1 + apps/workers/openaiWorker.ts | 2 +- docs/docs/03-configuration.md | 38 ++++++++++++++++++----------------- packages/shared/config.ts | 4 ++++ 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/apps/workers/inference.ts b/apps/workers/inference.ts index fa83140f..071f4742 100644 --- a/apps/workers/inference.ts +++ b/apps/workers/inference.ts @@ -103,6 +103,7 @@ class OllamaInferenceClient implements InferenceClient { model: model, format: "json", stream: true, + keep_alive: serverConfig.inference.ollamaKeepAlive, messages: [ { role: "user", content: prompt, images: image ? [image] : undefined }, ], diff --git a/apps/workers/openaiWorker.ts b/apps/workers/openaiWorker.ts index 55695938..8bd2cf4a 100644 --- a/apps/workers/openaiWorker.ts +++ b/apps/workers/openaiWorker.ts @@ -81,7 +81,7 @@ export class OpenAiWorker { { concurrency: 1, pollIntervalMs: 1000, - timeoutSecs: 30, + timeoutSecs: serverConfig.inference.jobTimeoutSec, }, ); diff --git a/docs/docs/03-configuration.md b/docs/docs/03-configuration.md index ad94ff9b..4237e294 100644 --- a/docs/docs/03-configuration.md +++ b/docs/docs/03-configuration.md @@ -2,16 +2,16 @@ The app is mainly configured by environment variables. All the used environment variables are listed in [packages/shared/config.ts](https://github.com/hoarder-app/hoarder/blob/main/packages/shared/config.ts). The most important ones are: -| Name | Required | Default | Description | -| ------------------------- | ------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | -| DATA_DIR | Yes | Not set | The path for the persistent data directory. This is where the db and the uploaded assets live. | -| NEXTAUTH_URL | Yes | Not set | Should point to the address of your server. The app will function without it, but will redirect you to wrong addresses on signout for example. | -| NEXTAUTH_SECRET | Yes | Not set | Random string used to sign the JWT tokens. Generate one with `openssl rand -base64 36`. | -| MEILI_ADDR | No | Not set | The address of meilisearch. If not set, Search will be disabled. E.g. (`http://meilisearch:7700`) | -| MEILI_MASTER_KEY | Only in Prod and if search is enabled | Not set | The master key configured for meilisearch. Not needed in development environment. Generate one with `openssl rand -base64 36` | -| DISABLE_SIGNUPS | No | false | If enabled, no new signups will be allowed and the signup button will be disabled in the UI | -| MAX_ASSET_SIZE_MB | No | 4 | Sets the maximum allowed asset size (in MB) to be uploaded | -| DISABLE_NEW_RELEASE_CHECK | No | false | If set to true, latest release check will be disabled in the admin panel. | +| Name | Required | Default | Description | +| ------------------------- | ------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| DATA_DIR | Yes | Not set | The path for the persistent data directory. This is where the db and the uploaded assets live. | +| NEXTAUTH_URL | Yes | Not set | Should point to the address of your server. The app will function without it, but will redirect you to wrong addresses on signout for example. | +| NEXTAUTH_SECRET | Yes | Not set | Random string used to sign the JWT tokens. Generate one with `openssl rand -base64 36`. | +| MEILI_ADDR | No | Not set | The address of meilisearch. If not set, Search will be disabled. E.g. (`http://meilisearch:7700`) | +| MEILI_MASTER_KEY | Only in Prod and if search is enabled | Not set | The master key configured for meilisearch. Not needed in development environment. Generate one with `openssl rand -base64 36` | +| DISABLE_SIGNUPS | No | false | If enabled, no new signups will be allowed and the signup button will be disabled in the UI | +| MAX_ASSET_SIZE_MB | No | 4 | Sets the maximum allowed asset size (in MB) to be uploaded | +| DISABLE_NEW_RELEASE_CHECK | No | false | If set to true, latest release check will be disabled in the admin panel. | ## Inference Configs (For automatic tagging) @@ -23,14 +23,16 @@ Either `OPENAI_API_KEY` or `OLLAMA_BASE_URL` need to be set for automatic taggin - Running local models is a recent addition and not as battle tested as using OpenAI, so proceed with care (and potentially expect a bunch of inference failures). ::: -| Name | Required | Default | Description | -| --------------------- | -------- | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| OPENAI_API_KEY | No | Not set | The OpenAI key used for automatic tagging. More on that in [here](/openai). | -| OPENAI_BASE_URL | No | Not set | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API. | -| OLLAMA_BASE_URL | No | Not set | If you want to use ollama for local inference, set the address of ollama API here. | -| INFERENCE_TEXT_MODEL | No | gpt-4o-mini | The model to use for text inference. You'll need to change this to some other model if you're using ollama. | -| INFERENCE_IMAGE_MODEL | No | gpt-4o-mini | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava). | -| INFERENCE_LANG | No | english | The language in which the tags will be generated. | +| Name | Required | Default | Description | +| ------------------------- | -------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OPENAI_API_KEY | No | Not set | The OpenAI key used for automatic tagging. More on that in [here](/openai). | +| OPENAI_BASE_URL | No | Not set | If you just want to use OpenAI you don't need to pass this variable. If, however, you want to use some other openai compatible API (e.g. azure openai service), set this to the url of the API. | +| OLLAMA_BASE_URL | No | Not set | If you want to use ollama for local inference, set the address of ollama API here. | +| OLLAMA_KEEP_ALIVE | No | Not set | Controls how long the model will stay loaded into memory following the request (example value: "5m"). | +| INFERENCE_TEXT_MODEL | No | gpt-4o-mini | The model to use for text inference. You'll need to change this to some other model if you're using ollama. | +| INFERENCE_IMAGE_MODEL | No | gpt-4o-mini | The model to use for image inference. You'll need to change this to some other model if you're using ollama and that model needs to support vision APIs (e.g. llava). | +| INFERENCE_LANG | No | english | The language in which the tags will be generated. | +| INFERENCE_JOB_TIMEOUT_SEC | No | 30 | How long to wait for the inference job to finish before timing out. If you're running ollama without powerful GPUs, you might want to increase the timeout a bit. | ## Crawler Configs diff --git a/packages/shared/config.ts b/packages/shared/config.ts index 3cd20eff..b2de8677 100644 --- a/packages/shared/config.ts +++ b/packages/shared/config.ts @@ -13,6 +13,8 @@ const allEnv = z.object({ OPENAI_API_KEY: z.string().optional(), OPENAI_BASE_URL: z.string().url().optional(), OLLAMA_BASE_URL: z.string().url().optional(), + OLLAMA_KEEP_ALIVE: z.string().optional(), + INFERENCE_JOB_TIMEOUT_SEC: z.coerce.number().default(30), INFERENCE_TEXT_MODEL: z.string().default("gpt-4o-mini"), INFERENCE_IMAGE_MODEL: z.string().default("gpt-4o-mini"), CRAWLER_HEADLESS_BROWSER: stringBool("true"), @@ -47,9 +49,11 @@ const serverConfigSchema = allEnv.transform((val) => { disableSignups: val.DISABLE_SIGNUPS, }, inference: { + jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC, openAIApiKey: val.OPENAI_API_KEY, openAIBaseUrl: val.OPENAI_BASE_URL, ollamaBaseUrl: val.OLLAMA_BASE_URL, + ollamaKeepAlive: val.OLLAMA_KEEP_ALIVE, textModel: val.INFERENCE_TEXT_MODEL, imageModel: val.INFERENCE_IMAGE_MODEL, inferredTagLang: val.INFERENCE_LANG,