From 11d960b2a6cca4028d6b3aff64e1bb5e09a3bb2f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 30 Aug 2024 00:10:17 +0200
Subject: [PATCH] chore(cli): be consistent between workers and expose
 ExtraLLamaCPPArgs to both (#3428)

* chore(cli): be consistent between workers and expose ExtraLLamaCPPArgs to both

Fixes: https://github.com/mudler/LocalAI/issues/3427

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* bump grpcio

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/python/autogptq/requirements.txt              | 2 +-
 backend/python/bark/requirements.txt                  | 2 +-
 backend/python/common/template/requirements.txt       | 2 +-
 backend/python/coqui/requirements.txt                 | 2 +-
 backend/python/diffusers/requirements.txt             | 2 +-
 backend/python/exllama/requirements.txt               | 2 +-
 backend/python/exllama2/requirements.txt              | 2 +-
 backend/python/mamba/requirements.txt                 | 2 +-
 backend/python/openvoice/requirements-intel.txt       | 2 +-
 backend/python/openvoice/requirements.txt             | 2 +-
 backend/python/parler-tts/requirements.txt            | 2 +-
 backend/python/rerankers/requirements.txt             | 2 +-
 backend/python/sentencetransformers/requirements.txt  | 2 +-
 backend/python/transformers-musicgen/requirements.txt | 2 +-
 backend/python/transformers/requirements.txt          | 2 +-
 backend/python/vall-e-x/requirements.txt              | 2 +-
 backend/python/vllm/requirements.txt                  | 2 +-
 core/cli/worker/worker.go                             | 1 +
 core/cli/worker/worker_llamacpp.go                    | 5 ++---
 core/cli/worker/worker_p2p.go                         | 1 -
 docs/content/docs/features/distributed_inferencing.md | 4 ++--
 21 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index 174ccc940d4c..150fcc1bc7dc 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index ed15b67805f9..6404b98ebd60 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,4 +1,4 @@
 bark==0.1.5
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index 047ef7d5d7e2..21610c1c97ff 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index 8fb684c05b30..d77083638dfd 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,4 +1,4 @@
 TTS==0.22.0
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index 2f85b4e38d56..043c7aba8f59 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,5 +1,5 @@
 setuptools
-grpcio==1.66.0
+grpcio==1.66.1
 pillow
 protobuf
 certifi
diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
index 3e227c2c8254..b9c192d5d304 100644
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 setuptools
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index d5c2cc5c6f23..6fb018a0c391 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 wheel
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index 9b4dd772b2ca..8e1b01957c75 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index 75184a33f6d5..a9a4cc2043d0 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index 71991dc01d54..b38805be42ca 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 librosa
 faster-whisper
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index b843981e00b9..0da3da13bb31 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 llvmlite==0.43.0
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 9b4dd772b2ca..8e1b01957c75 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index 9b4dd772b2ca..8e1b01957c75 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index f4512663cc04..fb1119a90b2c 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 scipy==1.14.0
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 9e056af6dcc5..b19c59c00ae5 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index 9b4dd772b2ca..8e1b01957c75 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,3 +1,3 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 3e227c2c8254..b9c192d5d304 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.66.0
+grpcio==1.66.1
 protobuf
 certifi
 setuptools
\ No newline at end of file
diff --git a/core/cli/worker/worker.go b/core/cli/worker/worker.go
index da1b0288aae1..a5d065773bad 100644
--- a/core/cli/worker/worker.go
+++ b/core/cli/worker/worker.go
@@ -2,6 +2,7 @@ package worker
 
 type WorkerFlags struct {
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
+	ExtraLLamaCPPArgs string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 }
 
 type Worker struct {
diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go
index 2baf51ec4c1b..9fb69ca1cb64 100644
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -3,6 +3,7 @@ package worker
 import (
 	"fmt"
 	"os"
+	"strings"
 	"syscall"
 
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
@@ -12,7 +13,6 @@ import (
 )
 
 type LLamaCPP struct {
-	Args        []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
 	WorkerFlags `embed:""`
 }
 
@@ -34,9 +34,8 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 		"llama-cpp-rpc-server",
 	)
 
-	args := os.Args[4:]
+	args := strings.Split(r.ExtraLLamaCPPArgs, " ")
 	args, grpcProcess = library.LoadLDSO(r.BackendAssetsPath, args, grpcProcess)
-
 	args = append([]string{grpcProcess}, args...)
 	return syscall.Exec(
 		grpcProcess,
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index adfd28190db9..6275481b8fac 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -25,7 +25,6 @@ type P2P struct {
 	NoRunner           bool   `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
 	RunnerAddress      string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
 	RunnerPort         string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
-	ExtraLLamaCPPArgs  string `name:"llama-cpp-args" env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
 	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 }
 
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index c8c60f8d0b71..8c7790c6b4a2 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -68,7 +68,7 @@ And navigate the WebUI to the "Swarm" section to see the instructions to connect
 To start workers for distributing the computational load, run:
 
 ```bash
-local-ai worker llama-cpp-rpc <listening_address> <listening_port>
+local-ai worker llama-cpp-rpc --llama-cpp-args="-H <listening_address> -p <listening_port> -m <memory>" 
 ```
 
 And you can specify the address of the workers when starting LocalAI with the `LLAMACPP_GRPC_SERVERS` environment variable:
@@ -98,7 +98,7 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE
 2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token:
 
 ```bash
-TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc
+TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc --llama-cpp-args="-m <memory>" 
 # 1:06AM INF loading environment variables from file envFile=.env
 # 1:06AM INF Setting logging to info
 # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}