diff --git a/README.md b/README.md index b2fd0e5..7fca81e 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,12 @@ cmp_ai:setup({ provider = 'Ollama', provider_options = { model = 'codellama:7b-code', + prompt = function(lines_before, lines_after) + -- prompt depends on the model you use. Here is an example for deepseek coder + return '
' .. lines_before .. '' .. lines_after .. ' ' -- for codellama + end, }, + debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it notify = true, notify_callback = function(msg) vim.notify(msg) @@ -168,6 +173,54 @@ cmp_ai:setup({ }, }) ``` +Models for Ollama are available at [here](https://ollama.ai/library). For code completions use model that supports it - e.g. [DeepSeek Base 6.7b](https://ollama.ai/library/deepseek-coder) + +To use with [LlamaCpp](https://github.com/ggerganov/llama.cpp): + +```lua +local cmp_ai = require('cmp_ai.config') + +cmp_ai:setup { + max_lines = 30, + provider = "LlamaCpp", + provider_options = { + options = { + n_predict = 20, -- number of generated predictions + min_p = 0.05, -- default 0.05, Cut off predictions with probability below Max_prob * min_p + -- repeat_last_n = 64, -- default 64 + -- repeat_penalty = 1.100, -- default 1.1 + -- see llama server link above - to see other options + }, + prompt = function(lines_before, lines_after) + -- prompt depends on the model you use. Here is an example for deepseek coder + return " <|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>" -- for deepseek coder + end, + }, + debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it + notify = true, + notify_callback = function(msg) + vim.notify(msg) + end, + run_on_every_keystroke = false, + ignored_file_types = { + -- default is not to ignore + -- uncomment to ignore in lua: + -- lua = true + }, +} +``` + + +[LlamaCpp Server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) has to be started manually with: + +```bash +./server -m ./models/deepseek-coder-6.7b-base.Q4_K_M.gguf -ngl 50 -c 2048 --log-disable +``` + +LlamaCpp requires model in GGUP format. Here is the current model I use for coding: + - [DeepSeek Base 6.7b](https://huggingface.co/TheBloke/deepseek-coder-6.7B-base-GGUF/blob/main/deepseek-coder-6.7b-base.Q4_K_M.gguf) + It is good to have at least 12GB of VRAM to run it (works best with NVIDIA - due to CUDA acceleration). If you want you can grab smaller models too (faster to run, but lower quality of completions) + ### `notify` diff --git a/lua/cmp_ai/backends/docilellamacpp.lua b/lua/cmp_ai/backends/docilellamacpp.lua new file mode 100644 index 0000000..27d4191 --- /dev/null +++ b/lua/cmp_ai/backends/docilellamacpp.lua @@ -0,0 +1,58 @@ +local requests = require('cmp_ai.requests') + +DocileLlamaCpp = requests:new(nil) + + +function DocileLlamaCpp:new(o, params) + o = o or {} + setmetatable(o, self) + self.__index = self + self.params = vim.tbl_deep_extend('keep', o or {}, { + base_url = 'http://localhost:5000/forward', + -- model = 'codellama:7b-code', + options = { + temperature = 0.2, + }, + }) + return o +end + +function DocileLlamaCpp:complete(lines_before, lines_after, cb) + local data = { + -- model = self.params.model, + -- prompt = '' .. lines_before .. '' .. lines_after .. ' ', -- for codellama + prompt = " <|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>", -- for deepseek coder + stream = false, + } + data = vim.tbl_extend('keep', data, self.params.options) + data.prompt = self.params.prompt(lines_before, lines_after) + + self:Get(self.params.base_url, {}, data, function(answer) + local new_data = {} + -- vim.print('answer', answer) + if answer.error ~= nil then + vim.notify('Docile error: ' .. answer.error) + return + end + if answer.stop then + local result = answer.content:gsub('', '') + + -- detect if 'CodeQwen' string in answer.generation_settings.model + if string.find(answer.generation_settings.model, 'CodeQwen') then + -- also get rid first letter - which is always the same space - but only for CodeQwen.... + result = result:gsub('^.', '') + end + -- vim.print('results', result) + table.insert(new_data, result) + end + cb(new_data) + end) +end + +function DocileLlamaCpp:test() + self:complete('def factorial(n)\n if', ' return ans\n', function(data) + dump(data) + end) +end + +return DocileLlamaCpp diff --git a/lua/cmp_ai/backends/llamacpp.lua b/lua/cmp_ai/backends/llamacpp.lua new file mode 100644 index 0000000..2f62ef0 --- /dev/null +++ b/lua/cmp_ai/backends/llamacpp.lua @@ -0,0 +1,51 @@ +local requests = require('cmp_ai.requests') + +LlamaCpp = requests:new(nil) + +function LlamaCpp:new(o, params) + o = o or {} + setmetatable(o, self) + self.__index = self + self.params = vim.tbl_deep_extend('keep', o or {}, { + base_url = 'http://localhost:8080/completion', + -- model = 'codellama:7b-code', + options = { + temperature = 0.2, + }, + }) + return o +end + +function LlamaCpp:complete(lines_before, lines_after, cb) + local data = { + -- model = self.params.model, + -- prompt = ' ' .. lines_before .. '' .. lines_after .. ' ', -- for codellama + prompt = " <|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>", -- for deepseek coder + stream = false, + } + data = vim.tbl_extend('keep', data, self.params.options) + data.prompt = self.params.prompt(lines_before, lines_after) + + self:Get(self.params.base_url, {}, data, function(answer) + local new_data = {} + -- vim.print('answer', answer) + if answer.error ~= nil then + vim.notify('Llamacp error: ' .. answer.error) + return + end + if answer.stop then + local result = answer.content:gsub('', '') + -- vim.print('results', result) + table.insert(new_data, result) + end + cb(new_data) + end) +end + +function LlamaCpp:test() + self:complete('def factorial(n)\n if', ' return ans\n', function(data) + dump(data) + end) +end + +return LlamaCpp diff --git a/lua/cmp_ai/backends/ollama.lua b/lua/cmp_ai/backends/ollama.lua index 60f282e..ccdefa0 100644 --- a/lua/cmp_ai/backends/ollama.lua +++ b/lua/cmp_ai/backends/ollama.lua @@ -6,6 +6,7 @@ function Ollama:new(o) o = o or {} setmetatable(o, self) self.__index = self + self.params = vim.tbl_deep_extend('keep', o or {}, { self.params = vim.tbl_deep_extend('keep', o or {}, { base_url = 'http://127.0.0.1:11434/api/generate', model = 'codellama:7b-code', @@ -13,7 +14,6 @@ function Ollama:new(o) temperature = 0.2, }, }) - return o end diff --git a/lua/cmp_ai/config.lua b/lua/cmp_ai/config.lua index d53d568..87474c1 100644 --- a/lua/cmp_ai/config.lua +++ b/lua/cmp_ai/config.lua @@ -5,6 +5,7 @@ local conf = { run_on_every_keystroke = true, provider = 'HF', provider_options = {}, + debounce_delay = 200, -- ms notify = true, notify_callback = function(msg) vim.notify(msg) diff --git a/lua/cmp_ai/requests.lua b/lua/cmp_ai/requests.lua index 5b49dfc..36f0a78 100644 --- a/lua/cmp_ai/requests.lua +++ b/lua/cmp_ai/requests.lua @@ -35,6 +35,7 @@ function Service:Get(url, headers, data, cb) vim.notify('Cannot open temporary message file: ' .. tmpfname, vim.log.levels.ERROR) return end + -- vim.print("Request Data: ", vim.fn.json_encode(data)) f:write(vim.fn.json_encode(data)) f:close() @@ -57,6 +58,7 @@ function Service:Get(url, headers, data, cb) local result = table.concat(response:result(), '\n') local json = self:json_decode(result) + -- vim.print("Response: ", json ) if json == nil then cb({ { error = 'No Response.' } }) else diff --git a/lua/cmp_ai/source.lua b/lua/cmp_ai/source.lua index ce63038..7b80892 100644 --- a/lua/cmp_ai/source.lua +++ b/lua/cmp_ai/source.lua @@ -40,19 +40,70 @@ function Source:_do_complete(ctx, cb) local service = conf:get('provider') service:complete(before, after, function(data) self:end_complete(data, ctx, cb) - if conf:get('notify') then - conf:get('notify_callback')('Completion started') - end + -- why 2x ? + -- if conf:get('notify') then + -- conf:get('notify_callback')('Completion started') + -- end end) end +function Source:trigger(ctx, callback) + if vim.fn.mode() == 'i' then + self:_do_complete(ctx, callback) + end +end + +-- based on https://github.com/runiq/neovim-throttle-debounce/blob/main/lua/throttle-debounce/init.lua (MIT) +local function debounce_trailing(fn, ms) + local timer = vim.loop.new_timer() + local wrapped_fn + + function wrapped_fn(...) + local argv = {...} + local argc = select('#', ...) + -- timer:stop() -- seems not needed? + timer:start(ms, 0, function() + pcall(vim.schedule_wrap(fn), unpack(argv, 1, argc)) + end) + end + return wrapped_fn, timer +end + +local bounce_complete, ret_tim = debounce_trailing( + Source.trigger, + conf:get('debounce_delay') +) + +local self_cp, ctx_cp, call_cp -- variables to store last completion context + +local bounce_autogroup = vim.api.nvim_create_augroup("BounceCompletion", { clear = true }) +vim.api.nvim_create_autocmd({"TextChangedI","InsertEnter","TextChangedP"},{ + pattern = "*", + callback = function() + if self_cp ~= nil then + bounce_complete(self_cp, ctx_cp, call_cp) + end + end, + group = bounce_autogroup +}) + +vim.api.nvim_create_autocmd({"InsertLeave"},{ + pattern = "*", + callback = function() + ret_tim:stop() + end, + group = bounce_autogroup +}) + + --- complete function Source:complete(ctx, callback) if conf:get('ignored_file_types')[vim.bo.filetype] then callback() return end - self:_do_complete(ctx, callback) + self_cp, ctx_cp, call_cp = self, ctx, callback + bounce_complete(self_cp, ctx, callback) end function Source:end_complete(data, ctx, cb)