Skip to content

Commit

Permalink
Merge branch 'master' into feat-request-middleware
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Lee <dave@gray101.com>
  • Loading branch information
dave-gray101 committed Dec 16, 2024
2 parents 347cf9b + 708cba0 commit 158dc03
Show file tree
Hide file tree
Showing 19 changed files with 295 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=dafae66cc242eb766797194d3c85c5e502625623
CPPLLAMA_VERSION?=08ea539df211e46bb4d0dd275e541cb591d5ebc8

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
Expand Down
40 changes: 32 additions & 8 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,6 @@ struct llama_server_context
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_sparams.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
Expand Down Expand Up @@ -1213,13 +1212,12 @@ struct llama_server_context
{"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta},
{"penalize_nl", slot.sparams.penalize_nl},
{"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep},
{"ignore_eos", slot.sparams.ignore_eos},
{"stream", slot.params.stream},
// {"logit_bias", slot.sparams.logit_bias},
// {"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar},
Expand Down Expand Up @@ -2112,7 +2110,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
// slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
// slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
// slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
// slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
// slot->params.seed = json_value(data, "seed", default_params.seed);
// slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
Expand All @@ -2135,7 +2132,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
data["mirostat"] = predict->mirostat();
data["mirostat_tau"] = predict->mirostattau();
data["mirostat_eta"] = predict->mirostateta();
data["penalize_nl"] = predict->penalizenl();
data["n_keep"] = predict->nkeep();
data["seed"] = predict->seed();
data["grammar"] = predict->grammar();
Expand Down Expand Up @@ -2181,7 +2177,6 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// llama.params.sparams.mirostat = predict->mirostat();
// llama.params.sparams.mirostat_tau = predict->mirostattau();
// llama.params.sparams.mirostat_eta = predict->mirostateta();
// llama.params.sparams.penalize_nl = predict->penalizenl();
// llama.params.n_keep = predict->nkeep();
// llama.params.seed = predict->seed();
// llama.params.sparams.grammar = predict->grammar();
Expand Down Expand Up @@ -2228,6 +2223,35 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
// }
// }

const std::vector<ggml_type> kv_cache_types = {
GGML_TYPE_F32,
GGML_TYPE_F16,
GGML_TYPE_BF16,
GGML_TYPE_Q8_0,
GGML_TYPE_Q4_0,
GGML_TYPE_Q4_1,
GGML_TYPE_IQ4_NL,
GGML_TYPE_Q5_0,
GGML_TYPE_Q5_1,
};

static ggml_type kv_cache_type_from_str(const std::string & s) {
for (const auto & type : kv_cache_types) {
if (ggml_type_name(type) == s) {
return type;
}
}
throw std::runtime_error("Unsupported cache type: " + s);
}

static std::string get_all_kv_cache_types() {
std::ostringstream msg;
for (const auto & type : kv_cache_types) {
msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", ");
}
return msg.str();
}

static void params_parse(const backend::ModelOptions* request,
common_params & params) {

Expand All @@ -2242,10 +2266,10 @@ static void params_parse(const backend::ModelOptions* request,
// params.model_alias ??
params.model_alias = request->modelfile();
if (!request->cachetypekey().empty()) {
params.cache_type_k = request->cachetypekey();
params.cache_type_k = kv_cache_type_from_str(request->cachetypekey());
}
if (!request->cachetypevalue().empty()) {
params.cache_type_v = request->cachetypevalue();
params.cache_type_v = kv_cache_type_from_str(request->cachetypevalue());
}
params.n_ctx = request->contextsize();
//params.memory_f16 = request->f16memory();
Expand Down
2 changes: 1 addition & 1 deletion backend/python/autogptq/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
2 changes: 1 addition & 1 deletion backend/python/bark/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
transformers
accelerate
5 changes: 4 additions & 1 deletion backend/python/common/libbackend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
# LIMIT_TARGETS="cublas12"
# source $(dirname $0)/../common/libbackend.sh
#

PYTHON_VERSION="3.10"

function init() {
# Name of the backend (directory name)
BACKEND_NAME=${PWD##*/}
Expand Down Expand Up @@ -88,7 +91,7 @@ function getBuildProfile() {
# always result in an activated virtual environment
function ensureVenv() {
if [ ! -d "${EDIR}/venv" ]; then
uv venv ${EDIR}/venv
uv venv --python ${PYTHON_VERSION} ${EDIR}/venv
echo "virtualenv created"
fi

Expand Down
2 changes: 1 addition & 1 deletion backend/python/coqui/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
transformers
accelerate
coqui-tts
2 changes: 1 addition & 1 deletion backend/python/diffusers/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ intel-extension-for-pytorch
torch
torchvision
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
diffusers
opencv-python
transformers
Expand Down
1 change: 1 addition & 0 deletions backend/python/openvoice/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ jieba==0.42.1
gradio==3.48.0
langid==1.1.6
llvmlite==0.43.0
setuptools
1 change: 0 additions & 1 deletion backend/python/parler-tts/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ intel-extension-for-pytorch
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers
accelerate
1 change: 1 addition & 0 deletions backend/python/parler-tts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
grpcio==1.68.1
certifi
llvmlite==0.43.0
setuptools
2 changes: 1 addition & 1 deletion backend/python/rerankers/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ accelerate
torch
rerankers[transformers]
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
2 changes: 1 addition & 1 deletion backend/python/sentencetransformers/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
intel-extension-for-pytorch
torch
optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools
accelerate
sentence-transformers==3.3.1
transformers
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ transformers
accelerate
torch
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
2 changes: 1 addition & 1 deletion backend/python/transformers/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio==1.68.1
protobuf
certifi
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
setuptools
3 changes: 1 addition & 2 deletions backend/python/vall-e-x/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ intel-extension-for-pytorch
accelerate
torch
torchaudio
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
optimum[openvino]
3 changes: 2 additions & 1 deletion backend/python/vall-e-x/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
grpcio==1.68.1
protobuf
certifi
certifi
setuptools
2 changes: 1 addition & 1 deletion backend/python/vllm/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ accelerate
torch
transformers
optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
setuptools
bitsandbytes
2 changes: 1 addition & 1 deletion core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,7 @@ var _ = Describe("API test", func() {
Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))

Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
Expect(resp.Header.Get("Content-Type")).To(Or(Equal("audio/x-wav"), Equal("audio/vnd.wave")))
})
It("installs and is capable to generate images", Label("stablediffusion"), func() {
if runtime.GOOS != "linux" {
Expand Down
Loading

0 comments on commit 158dc03

Please sign in to comment.