tzachar · JoseConseco · Dec 10, 2023 · Dec 10, 2023 · Dec 11, 2023 · Dec 12, 2023
diff --git a/README.md b/README.md
@@ -155,7 +155,12 @@ cmp_ai:setup({
  provider = 'Ollama',
  provider_options = {
  model = 'codellama:7b-code',
+ prompt = function(lines_before, lines_after)
+ -- prompt depends on the model you use. Here is an example for deepseek coder
+ return '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>' -- for codellama
+ end,
  },
+ debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it
  notify = true,
  notify_callback = function(msg)
  vim.notify(msg)
@@ -168,6 +173,54 @@ cmp_ai:setup({
  },
 })
 ```
+Models for Ollama are available at [here](https://ollama.ai/library). For code completions use model that supports it - e.g. [DeepSeek Base 6.7b](https://ollama.ai/library/deepseek-coder)
+
+To use with [LlamaCpp](https://github.com/ggerganov/llama.cpp):
+
+```lua
+local cmp_ai = require('cmp_ai.config')
+
+cmp_ai:setup {
+ max_lines = 30,
+ provider = "LlamaCpp",
+ provider_options = {
+ options = {
+ n_predict = 20, -- number of generated predictions
+ min_p = 0.05, -- default 0.05, Cut off predictions with probability below Max_prob * min_p
+ -- repeat_last_n = 64, -- default 64
+ -- repeat_penalty = 1.100, -- default 1.1
+ -- see llama server link above - to see other options
+ },
+ prompt = function(lines_before, lines_after)
+ -- prompt depends on the model you use. Here is an example for deepseek coder
+ return "<s><｜fim▁begin｜>" .. lines_before .. "<｜fim▁hole｜>" .. lines_after .. "<｜fim▁end｜>" -- for deepseek coder
+ end,
+ },
+ debounce_delay = 600, -- ms llama may be GPU hungry, wait x ms after last key input, before sending request to it
+ notify = true,
+ notify_callback = function(msg)
+ vim.notify(msg)
+ end,
+ run_on_every_keystroke = false,
+ ignored_file_types = {
+ -- default is not to ignore
+ -- uncomment to ignore in lua:
+ -- lua = true
+ },
+}
+```
+
+
+[LlamaCpp Server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md) has to be started manually with:
+
+```bash
+./server -m ./models/deepseek-coder-6.7b-base.Q4_K_M.gguf -ngl 50 -c 2048 --log-disable
+```
+
+LlamaCpp requires model in GGUP format. Here is the current model I use for coding:
+ - [DeepSeek Base 6.7b](https://huggingface.co/TheBloke/deepseek-coder-6.7B-base-GGUF/blob/main/deepseek-coder-6.7b-base.Q4_K_M.gguf)
+ It is good to have at least 12GB of VRAM to run it (works best with NVIDIA - due to CUDA acceleration). If you want you can grab smaller models too (faster to run, but lower quality of completions)
+
 
 ### `notify`
 

diff --git a/lua/cmp_ai/backends/docilellamacpp.lua b/lua/cmp_ai/backends/docilellamacpp.lua
@@ -0,0 +1,58 @@
+local requests = require('cmp_ai.requests')
+
+DocileLlamaCpp = requests:new(nil)
+
+
+function DocileLlamaCpp:new(o, params)
+ o = o or {}
+ setmetatable(o, self)
+ self.__index = self
+ self.params = vim.tbl_deep_extend('keep', o or {}, {
+ base_url = 'http://localhost:5000/forward',
+ -- model = 'codellama:7b-code',
+ options = {
+ temperature = 0.2,
+ },
+ })
+ return o
+end
+
+function DocileLlamaCpp:complete(lines_before, lines_after, cb)
+ local data = {
+ -- model = self.params.model,
+ -- prompt = '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>', -- for codellama
+ prompt = "<s><｜fim▁begin｜>" .. lines_before .. "<｜fim▁hole｜>" .. lines_after .. "<｜fim▁end｜>", -- for deepseek coder
+ stream = false,
+ }
+ data = vim.tbl_extend('keep', data, self.params.options)
+ data.prompt = self.params.prompt(lines_before, lines_after)
+
+ self:Get(self.params.base_url, {}, data, function(answer)
+ local new_data = {}
+ -- vim.print('answer', answer)
+ if answer.error ~= nil then
+ vim.notify('Docile error: ' .. answer.error)
+ return
+ end
+ if answer.stop then
+ local result = answer.content:gsub('<EOT>', '')
+
+ -- detect if 'CodeQwen' string in answer.generation_settings.model
+ if string.find(answer.generation_settings.model, 'CodeQwen') then
+ -- also get rid first letter - which is always the same space - but only for CodeQwen....
+ result = result:gsub('^.', '')
+ end
+ -- vim.print('results', result)
+ table.insert(new_data, result)
+ end
+ cb(new_data)
+ end)
+end
+
+function DocileLlamaCpp:test()
+ self:complete('def factorial(n)\n if', ' return ans\n', function(data)
+ dump(data)
+ end)
+end
+
+return DocileLlamaCpp
diff --git a/lua/cmp_ai/backends/llamacpp.lua b/lua/cmp_ai/backends/llamacpp.lua
@@ -0,0 +1,51 @@
+local requests = require('cmp_ai.requests')
+
+LlamaCpp = requests:new(nil)
+
+function LlamaCpp:new(o, params)
+ o = o or {}
+ setmetatable(o, self)
+ self.__index = self
+ self.params = vim.tbl_deep_extend('keep', o or {}, {
+ base_url = 'http://localhost:8080/completion',
+ -- model = 'codellama:7b-code',
+ options = {
+ temperature = 0.2,
+ },
+ })
+ return o
+end
+
+function LlamaCpp:complete(lines_before, lines_after, cb)
+ local data = {
+ -- model = self.params.model,
+ -- prompt = '<PRE> ' .. lines_before .. ' <SUF>' .. lines_after .. ' <MID>', -- for codellama
+ prompt = "<s><｜fim▁begin｜>" .. lines_before .. "<｜fim▁hole｜>" .. lines_after .. "<｜fim▁end｜>", -- for deepseek coder
+ stream = false,
+ }
+ data = vim.tbl_extend('keep', data, self.params.options)
+ data.prompt = self.params.prompt(lines_before, lines_after)
+
+ self:Get(self.params.base_url, {}, data, function(answer)
+ local new_data = {}
+ -- vim.print('answer', answer)
+ if answer.error ~= nil then
+ vim.notify('Llamacp error: ' .. answer.error)
+ return
+ end
+ if answer.stop then
+ local result = answer.content:gsub('<EOT>', '')
+ -- vim.print('results', result)
+ table.insert(new_data, result)
+ end
+ cb(new_data)
+ end)
+end
+
+function LlamaCpp:test()
+ self:complete('def factorial(n)\n if', ' return ans\n', function(data)
+ dump(data)
+ end)
+end
+
+return LlamaCpp
diff --git a/lua/cmp_ai/backends/ollama.lua b/lua/cmp_ai/backends/ollama.lua
@@ -6,14 +6,14 @@ function Ollama:new(o)
  o = o or {}
  setmetatable(o, self)
  self.__index = self
+ self.params = vim.tbl_deep_extend('keep', o or {}, {
  self.params = vim.tbl_deep_extend('keep', o or {}, {
  base_url = 'http://127.0.0.1:11434/api/generate',
  model = 'codellama:7b-code',
  options = {
  temperature = 0.2,
  },
  })
-
  return o
 end
 

diff --git a/lua/cmp_ai/config.lua b/lua/cmp_ai/config.lua
@@ -5,6 +5,7 @@ local conf = {
  run_on_every_keystroke = true,
  provider = 'HF',
  provider_options = {},
+ debounce_delay = 200, -- ms
  notify = true,
  notify_callback = function(msg)
  vim.notify(msg)

diff --git a/lua/cmp_ai/requests.lua b/lua/cmp_ai/requests.lua
@@ -35,6 +35,7 @@ function Service:Get(url, headers, data, cb)
  vim.notify('Cannot open temporary message file: ' .. tmpfname, vim.log.levels.ERROR)
  return
  end
+ -- vim.print("Request Data: ", vim.fn.json_encode(data))
  f:write(vim.fn.json_encode(data))
  f:close()
 
@@ -57,6 +58,7 @@ function Service:Get(url, headers, data, cb)
 
  local result = table.concat(response:result(), '\n')
  local json = self:json_decode(result)
+ -- vim.print("Response: ", json )
  if json == nil then
  cb({ { error = 'No Response.' } })
  else

diff --git a/lua/cmp_ai/source.lua b/lua/cmp_ai/source.lua
@@ -40,19 +40,70 @@ function Source:_do_complete(ctx, cb)
  local service = conf:get('provider')
  service:complete(before, after, function(data)
  self:end_complete(data, ctx, cb)
- if conf:get('notify') then
- conf:get('notify_callback')('Completion started')
- end
+ -- why 2x ?
+ -- if conf:get('notify') then
+ -- conf:get('notify_callback')('Completion started')
+ -- end
  end)
 end
 
+function Source:trigger(ctx, callback)
+ if vim.fn.mode() == 'i' then
+ self:_do_complete(ctx, callback)
+ end
+end
+
+-- based on https://github.com/runiq/neovim-throttle-debounce/blob/main/lua/throttle-debounce/init.lua (MIT)
+local function debounce_trailing(fn, ms)
+ local timer = vim.loop.new_timer()
+ local wrapped_fn
+
+ function wrapped_fn(...)
+ local argv = {...}
+ local argc = select('#', ...)
+ -- timer:stop() -- seems not needed?
+ timer:start(ms, 0, function()
+ pcall(vim.schedule_wrap(fn), unpack(argv, 1, argc))
+ end)
+ end
+ return wrapped_fn, timer
+end
+
+local bounce_complete, ret_tim = debounce_trailing(
+ Source.trigger,
+ conf:get('debounce_delay')
+)
+
+local self_cp, ctx_cp, call_cp -- variables to store last completion context
+
+local bounce_autogroup = vim.api.nvim_create_augroup("BounceCompletion", { clear = true })
+vim.api.nvim_create_autocmd({"TextChangedI","InsertEnter","TextChangedP"},{
+ pattern = "*",
+ callback = function()
+ if self_cp ~= nil then
+ bounce_complete(self_cp, ctx_cp, call_cp)
+ end
+ end,
+ group = bounce_autogroup
+})
+
+vim.api.nvim_create_autocmd({"InsertLeave"},{
+ pattern = "*",
+ callback = function()
+ ret_tim:stop()
+ end,
+ group = bounce_autogroup
+})
+
+
 --- complete
 function Source:complete(ctx, callback)
  if conf:get('ignored_file_types')[vim.bo.filetype] then
  callback()
  return
  end
- self:_do_complete(ctx, callback)
+ self_cp, ctx_cp, call_cp = self, ctx, callback
+ bounce_complete(self_cp, ctx, callback)
 end
 
 function Source:end_complete(data, ctx, cb)