Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debounce, LlamaCpp support, expose prompt as setup option, fix passing parameters to model (ollama) #11

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,12 @@ cmp_ai:setup({
provider = 'Ollama',
provider_options = {
model = 'codellama:7b-code',
prompt = function(lines_before, lines_after)
-- prompt depends on the model you use. Here is an example for deepseek coder
return '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>' -- for codellama
end,
},
debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it
notify = true,
notify_callback = function(msg)
vim.notify(msg)
Expand All @@ -168,6 +173,54 @@ cmp_ai:setup({
},
})
```
Models for Ollama are available at [here](https://ollama.ai/library). For code completions use model that supports it - e.g. [DeepSeek Base 6.7b](https://ollama.ai/library/deepseek-coder)

To use with [LlamaCpp](https://github.com/ggerganov/llama.cpp):

```lua
local cmp_ai = require('cmp_ai.config')

cmp_ai:setup {
max_lines = 30,
provider = "LlamaCpp",
provider_options = {
options = {
n_predict = 20, -- number of generated predictions
min_p = 0.05, -- default 0.05, Cut off predictions with probability below Max_prob * min_p
-- repeat_last_n = 64, -- default 64
-- repeat_penalty = 1.100, -- default 1.1
-- see llama server link above - to see other options
},
prompt = function(lines_before, lines_after)
-- prompt depends on the model you use. Here is an example for deepseek coder
return "<s><|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>" -- for deepseek coder
end,
},
debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it
notify = true,
notify_callback = function(msg)
vim.notify(msg)
end,
run_on_every_keystroke = false,
ignored_file_types = {
-- default is not to ignore
-- uncomment to ignore in lua:
-- lua = true
},
}
```


[LlamaCpp Server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) has to be started manually with:

```bash
./server -m ./models/deepseek-coder-6.7b-base.Q4_K_M.gguf -ngl 50 -c 2048 --log-disable
```

LlamaCpp requires model in GGUP format. Here is the current model I use for coding:
- [DeepSeek Base 6.7b](https://huggingface.co/TheBloke/deepseek-coder-6.7B-base-GGUF/blob/main/deepseek-coder-6.7b-base.Q4_K_M.gguf)
It is good to have at least 12GB of VRAM to run it (works best with NVIDIA - due to CUDA acceleration). If you want you can grab smaller models too (faster to run, but lower quality of completions)


### `notify`

Expand Down
58 changes: 58 additions & 0 deletions lua/cmp_ai/backends/docilellamacpp.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
local requests = require('cmp_ai.requests')

DocileLlamaCpp = requests:new(nil)


function DocileLlamaCpp:new(o, params)
o = o or {}
setmetatable(o, self)
self.__index = self
self.params = vim.tbl_deep_extend('keep', o or {}, {
base_url = 'http://localhost:5000/forward',
-- model = 'codellama:7b-code',
options = {
temperature = 0.2,
},
})
return o
end

function DocileLlamaCpp:complete(lines_before, lines_after, cb)
local data = {
-- model = self.params.model,
-- prompt = '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>', -- for codellama
prompt = "<s><|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>", -- for deepseek coder
stream = false,
}
data = vim.tbl_extend('keep', data, self.params.options)
data.prompt = self.params.prompt(lines_before, lines_after)

self:Get(self.params.base_url, {}, data, function(answer)
local new_data = {}
-- vim.print('answer', answer)
if answer.error ~= nil then
vim.notify('Docile error: ' .. answer.error)
return
end
if answer.stop then
local result = answer.content:gsub('<EOT>', '')

-- detect if 'CodeQwen' string in answer.generation_settings.model
if string.find(answer.generation_settings.model, 'CodeQwen') then
-- also get rid first letter - which is always the same space - but only for CodeQwen....
result = result:gsub('^.', '')
end
-- vim.print('results', result)
table.insert(new_data, result)
end
cb(new_data)
end)
end

function DocileLlamaCpp:test()
self:complete('def factorial(n)\n if', ' return ans\n', function(data)
dump(data)
end)
end

return DocileLlamaCpp
51 changes: 51 additions & 0 deletions lua/cmp_ai/backends/llamacpp.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
local requests = require('cmp_ai.requests')

LlamaCpp = requests:new(nil)

function LlamaCpp:new(o, params)
o = o or {}
setmetatable(o, self)
self.__index = self
self.params = vim.tbl_deep_extend('keep', o or {}, {
base_url = 'http://localhost:8080/completion',
-- model = 'codellama:7b-code',
options = {
temperature = 0.2,
},
})
return o
end

function LlamaCpp:complete(lines_before, lines_after, cb)
local data = {
-- model = self.params.model,
-- prompt = '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>', -- for codellama
prompt = "<s><|fim▁begin|>" .. lines_before .. "<|fim▁hole|>" .. lines_after .. "<|fim▁end|>", -- for deepseek coder
stream = false,
}
data = vim.tbl_extend('keep', data, self.params.options)
data.prompt = self.params.prompt(lines_before, lines_after)

self:Get(self.params.base_url, {}, data, function(answer)
local new_data = {}
-- vim.print('answer', answer)
if answer.error ~= nil then
vim.notify('Llamacp error: ' .. answer.error)
return
end
if answer.stop then
local result = answer.content:gsub('<EOT>', '')
-- vim.print('results', result)
table.insert(new_data, result)
end
cb(new_data)
end)
end

function LlamaCpp:test()
self:complete('def factorial(n)\n if', ' return ans\n', function(data)
dump(data)
end)
end

return LlamaCpp
2 changes: 1 addition & 1 deletion lua/cmp_ai/backends/ollama.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ function Ollama:new(o)
o = o or {}
setmetatable(o, self)
self.__index = self
self.params = vim.tbl_deep_extend('keep', o or {}, {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this bug apply to the openai or bard backends as well? They similarly use params instead of o

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know, since I do not used Bard and openai. I can only assume that if they work ok, then this line is not needed.

self.params = vim.tbl_deep_extend('keep', o or {}, {
base_url = 'http://127.0.0.1:11434/api/generate',
model = 'codellama:7b-code',
options = {
temperature = 0.2,
},
})

return o
end

Expand Down
1 change: 1 addition & 0 deletions lua/cmp_ai/config.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ local conf = {
run_on_every_keystroke = true,
provider = 'HF',
provider_options = {},
debounce_delay = 200, -- ms
notify = true,
notify_callback = function(msg)
vim.notify(msg)
Expand Down
2 changes: 2 additions & 0 deletions lua/cmp_ai/requests.lua
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function Service:Get(url, headers, data, cb)
vim.notify('Cannot open temporary message file: ' .. tmpfname, vim.log.levels.ERROR)
return
end
-- vim.print("Request Data: ", vim.fn.json_encode(data))
f:write(vim.fn.json_encode(data))
f:close()

Expand All @@ -57,6 +58,7 @@ function Service:Get(url, headers, data, cb)

local result = table.concat(response:result(), '\n')
local json = self:json_decode(result)
-- vim.print("Response: ", json )
if json == nil then
cb({ { error = 'No Response.' } })
else
Expand Down
59 changes: 55 additions & 4 deletions lua/cmp_ai/source.lua
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,70 @@ function Source:_do_complete(ctx, cb)
local service = conf:get('provider')
service:complete(before, after, function(data)
self:end_complete(data, ctx, cb)
if conf:get('notify') then
conf:get('notify_callback')('Completion started')
end
-- why 2x ?
-- if conf:get('notify') then
-- conf:get('notify_callback')('Completion started')
-- end
end)
end

function Source:trigger(ctx, callback)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not comfortable with this entire debounce concept.
First, I dont think it is need here. cmp already has a debounce implementation.
Second, I think this implementation is wrong; there should not be a global autocommand which handles the debounce.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Built in debounce in cmp - has issue where it is not working as it should:

  • to my understanding, it should show completion popup x mseconds after last key press in insert mode
  • But what it does - it shows completion x ms after typing in first letter in insert mode.
    I tested this with 2 second delay, and cmp will not wait for last key press.
    In my implementation it will wait till last key is pressed - thus it wont spin my gpu fans as much. I'm not sure if the implementation is ok, I just copied someone else code. I know that copilot - cmp extension is also using its own debounce code.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, looking at cmp sources I can only agree.

if vim.fn.mode() == 'i' then
self:_do_complete(ctx, callback)
end
end

-- based on https://github.com/runiq/neovim-throttle-debounce/blob/main/lua/throttle-debounce/init.lua (MIT)
local function debounce_trailing(fn, ms)
local timer = vim.loop.new_timer()
local wrapped_fn

function wrapped_fn(...)
local argv = {...}
local argc = select('#', ...)
-- timer:stop() -- seems not needed?
timer:start(ms, 0, function()
pcall(vim.schedule_wrap(fn), unpack(argv, 1, argc))
end)
end
return wrapped_fn, timer
end

local bounce_complete, ret_tim = debounce_trailing(
Source.trigger,
conf:get('debounce_delay')
)

local self_cp, ctx_cp, call_cp -- variables to store last completion context

local bounce_autogroup = vim.api.nvim_create_augroup("BounceCompletion", { clear = true })
vim.api.nvim_create_autocmd({"TextChangedI","InsertEnter","TextChangedP"},{
pattern = "*",
callback = function()
if self_cp ~= nil then
bounce_complete(self_cp, ctx_cp, call_cp)
end
end,
group = bounce_autogroup
})

vim.api.nvim_create_autocmd({"InsertLeave"},{
pattern = "*",
callback = function()
ret_tim:stop()
end,
group = bounce_autogroup
})


--- complete
function Source:complete(ctx, callback)
if conf:get('ignored_file_types')[vim.bo.filetype] then
callback()
return
end
self:_do_complete(ctx, callback)
self_cp, ctx_cp, call_cp = self, ctx, callback
bounce_complete(self_cp, ctx, callback)
end

function Source:end_complete(data, ctx, cb)
Expand Down