From 11d960b2a6cca4028d6b3aff64e1bb5e09a3bb2f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 30 Aug 2024 00:10:17 +0200 Subject: [PATCH] chore(cli): be consistent between workers and expose ExtraLLamaCPPArgs to both (#3428) * chore(cli): be consistent between workers and expose ExtraLLamaCPPArgs to both Fixes: https://github.com/mudler/LocalAI/issues/3427 Signed-off-by: Ettore Di Giacinto * bump grpcio Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- backend/python/autogptq/requirements.txt | 2 +- backend/python/bark/requirements.txt | 2 +- backend/python/common/template/requirements.txt | 2 +- backend/python/coqui/requirements.txt | 2 +- backend/python/diffusers/requirements.txt | 2 +- backend/python/exllama/requirements.txt | 2 +- backend/python/exllama2/requirements.txt | 2 +- backend/python/mamba/requirements.txt | 2 +- backend/python/openvoice/requirements-intel.txt | 2 +- backend/python/openvoice/requirements.txt | 2 +- backend/python/parler-tts/requirements.txt | 2 +- backend/python/rerankers/requirements.txt | 2 +- backend/python/sentencetransformers/requirements.txt | 2 +- backend/python/transformers-musicgen/requirements.txt | 2 +- backend/python/transformers/requirements.txt | 2 +- backend/python/vall-e-x/requirements.txt | 2 +- backend/python/vllm/requirements.txt | 2 +- core/cli/worker/worker.go | 1 + core/cli/worker/worker_llamacpp.go | 5 ++--- core/cli/worker/worker_p2p.go | 1 - docs/content/docs/features/distributed_inferencing.md | 4 ++-- 21 files changed, 22 insertions(+), 23 deletions(-) diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt index 174ccc940d4c..150fcc1bc7dc 100644 --- a/backend/python/autogptq/requirements.txt +++ b/backend/python/autogptq/requirements.txt @@ -1,6 +1,6 @@ accelerate auto-gptq==0.7.1 -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi transformers \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index ed15b67805f9..6404b98ebd60 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,4 +1,4 @@ bark==0.1.5 -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index 047ef7d5d7e2..21610c1c97ff 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,2 +1,2 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index 8fb684c05b30..d77083638dfd 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,4 +1,4 @@ TTS==0.22.0 -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 2f85b4e38d56..043c7aba8f59 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,5 +1,5 @@ setuptools -grpcio==1.66.0 +grpcio==1.66.1 pillow protobuf certifi diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt index 3e227c2c8254..b9c192d5d304 100644 --- a/backend/python/exllama/requirements.txt +++ b/backend/python/exllama/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi setuptools \ No newline at end of file diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index d5c2cc5c6f23..6fb018a0c391 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi wheel diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt index 9b4dd772b2ca..8e1b01957c75 100644 --- a/backend/python/mamba/requirements.txt +++ b/backend/python/mamba/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index 75184a33f6d5..a9a4cc2043d0 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -2,7 +2,7 @@ intel-extension-for-pytorch torch optimum[openvino] -grpcio==1.66.0 +grpcio==1.66.1 protobuf librosa==0.9.1 faster-whisper==1.0.3 diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index 71991dc01d54..b38805be42ca 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf librosa faster-whisper diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt index b843981e00b9..0da3da13bb31 100644 --- a/backend/python/parler-tts/requirements.txt +++ b/backend/python/parler-tts/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi llvmlite==0.43.0 \ No newline at end of file diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 9b4dd772b2ca..8e1b01957c75 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt index 9b4dd772b2ca..8e1b01957c75 100644 --- a/backend/python/sentencetransformers/requirements.txt +++ b/backend/python/sentencetransformers/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt index f4512663cc04..fb1119a90b2c 100644 --- a/backend/python/transformers-musicgen/requirements.txt +++ b/backend/python/transformers-musicgen/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf scipy==1.14.0 certifi \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index 9e056af6dcc5..b19c59c00ae5 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt index 9b4dd772b2ca..8e1b01957c75 100644 --- a/backend/python/vall-e-x/requirements.txt +++ b/backend/python/vall-e-x/requirements.txt @@ -1,3 +1,3 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi \ No newline at end of file diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index 3e227c2c8254..b9c192d5d304 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.66.0 +grpcio==1.66.1 protobuf certifi setuptools \ No newline at end of file diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go index da1b0288aae1..a5d065773bad 100644 --- a/core/cli/worker/worker.go +++ b/core/cli/worker/worker.go @@ -2,6 +2,7 @@ package worker type WorkerFlags struct { BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` + ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"` } type Worker struct { diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go index 2baf51ec4c1b..9fb69ca1cb64 100644 --- a/core/cli/worker/worker_llamacpp.go +++ b/core/cli/worker/worker_llamacpp.go @@ -3,6 +3,7 @@ package worker import ( "fmt" "os" + "strings" "syscall" cliContext "github.com/mudler/LocalAI/core/cli/context" @@ -12,7 +13,6 @@ import ( ) type LLamaCPP struct { - Args []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"` WorkerFlags `embed:""` } @@ -34,9 +34,8 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error { "llama-cpp-rpc-server", ) - args := os.Args[4:] + args := strings.Split(r.ExtraLLamaCPPArgs, " ") args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess) - args = append([]string{grpcProcess}, args...) return syscall.Exec( grpcProcess, diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index adfd28190db9..6275481b8fac 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -25,7 +25,6 @@ type P2P struct { NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"` RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"` RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"` - ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"` Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"` } diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index c8c60f8d0b71..8c7790c6b4a2 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -68,7 +68,7 @@ And navigate the WebUI to the "Swarm" section to see the instructions to connect To start workers for distributing the computational load, run: ```bash -local-ai worker llama-cpp-rpc +local-ai worker llama-cpp-rpc --llama-cpp-args="-H -p -m " ``` And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable: @@ -98,7 +98,7 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE 2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token: ```bash -TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc +TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc --llama-cpp-args="-m " # 1:06AM INF loading environment variables from file envFile=.env # 1:06AM INF Setting logging to info # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}