Skip to content

Commit

Permalink
use llama binary instead of extending ruby
Browse files Browse the repository at this point in the history
Using the binary simplifies the code, but the biggest reason for this
change is that wrapping the binary is significantly faster. I'm not
sure why this is, but the same C++ code compiled as a Ruby extension
takes about 4x as long to run.

I'm keeping the code in ext/ because it's sort of like a native
extension in that it compiles code on the user's machine. This seems
like the least confusing way to manage that. (E.g. if the compilation
fails, the user gets an error saying "ERROR: Failed to build gem native
extension" rather than "some random script failed.")
  • Loading branch information
zfletch committed Apr 6, 2023
1 parent aa36953 commit 71aaa2c
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 111 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
/tmp/

# ext files
/ext/llama/llama
/bin/llama

# rspec failure tracking
.rspec_status
Expand Down
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
llama-rb (0.1.0)
llama-rb (0.2.0)

GEM
remote: https://rubygems.org/
Expand Down
7 changes: 0 additions & 7 deletions bin/console
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,5 @@
require 'bundler/setup'
require 'llama'

# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.

# (If you use this, don't forget to add pry to your Gemfile!)
# require "pry"
# Pry.start

require 'irb'
IRB.start(__FILE__)
3 changes: 1 addition & 2 deletions bin/setup
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ set -vx

bundle install

cd ext/llama
ruby extconf.rb
ruby ext/extconf.rb
4 changes: 4 additions & 0 deletions ext/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# dummy file to make gem installer happy
all:
clean:
install:
10 changes: 10 additions & 0 deletions ext/extconf.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
require 'fileutils'

root = File.dirname(__FILE__)
llama_root = File.join(root, '..', 'llama.cpp')

main = File.join(root, '..', 'bin', 'llama')
llama_main = File.join(llama_root, 'main')

Dir.chdir(llama_root) { system('make main', exception: true) }
FileUtils.cp(llama_main, main)
10 changes: 0 additions & 10 deletions ext/llama/extconf.rb

This file was deleted.

134 changes: 54 additions & 80 deletions lib/llama/model.rb
Original file line number Diff line number Diff line change
@@ -1,84 +1,58 @@
require 'open3'
require 'shellwords'

module Llama
class Model
# move methods defined in `model.cpp` from public to private
# private :initialize_cpp, :predict_cpp
#
# # rubocop:disable Metrics/MethodLength
# def self.new(
# model, # path to model file, e.g. "models/7B/ggml-model-q4_0.bin"
# n_ctx: 512, # context size
# n_parts: -1, # amount of model parts (-1 = determine from model dimensions)
# seed: Time.now.to_i, # RNG seed
# memory_f16: true, # use f16 instead of f32 for memory kv
# use_mlock: false # use mlock to keep model in memory
# )
# instance = allocate
#
# instance.instance_eval do
# initialize
#
# @model = model
# @n_ctx = n_ctx
# @n_parts = n_parts
# @seed = seed
# @memory_f16 = memory_f16
# @use_mlock = use_mlock
#
# capture_stderr do
# initialize_cpp(
# model,
# n_ctx,
# n_parts,
# seed,
# memory_f16,
# use_mlock,
# )
# end
# end
#
# instance
# end
# # rubocop:enable Metrics/MethodLength
#
# def predict(
# prompt, # string used as prompt
# n_predict: 128 # number of tokens to predict
# )
# text = ''
#
# capture_stderr { text = predict_cpp(prompt, n_predict) }
#
# process_text(text)
# end
#
# attr_reader :model, :n_ctx, :n_parts, :seed, :memory_f16, :use_mlock, :stderr
#
# private
#
# def capture_stderr
# previous = $stderr.dup
# tmp = Tempfile.open('llama-rb-stderr')
#
# begin
# $stderr.reopen(tmp)
#
# yield
#
# tmp.rewind
# @stderr = tmp.read
# ensure
# tmp.close(true)
# $stderr.reopen(previous)
# end
# end
#
# def process_text(text)
# text = text.force_encoding(Encoding.default_external)
#
# # remove the space that was added as a tokenizer hack in model.cpp
# text[0] = '' if text.size.positive?
#
# text
# end
class ModelError < StandardError
end

def initialize(
model,
seed: Time.now.to_i,
n_predict: 128,
binary: default_binary
)
@model = model
@seed = seed
@n_predict = n_predict
@binary = binary
end

def predict(prompt)
stdout, @stderr, @status = Open3.capture3(command(prompt))

raise ModelError, "Error #{status.to_i}" unless status.success?

# remove the space that is added as a tokenizer hack in examples/main/main.cpp
stdout[0] = ''
stdout
end

attr_reader :model, :seed, :n_predict, :binary

private

attr_reader :stderr, :status

def default_binary
File.join(File.dirname(__FILE__), '..', '..', 'bin', 'llama')
end

def command(prompt)
escape_command(binary,
model: model,
prompt: prompt,
seed: seed,
n_predict: n_predict)
end

def escape_command(command, **flags)
flags_string = flags.map do |key, value|
"--#{Shellwords.escape(key)} #{Shellwords.escape(value)}"
end.join(' ')
command_string = Shellwords.escape(command)

"#{command_string} #{flags_string}"
end
end
end
2 changes: 1 addition & 1 deletion lib/llama/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Llama
VERSION = '0.1.0'.freeze
VERSION = '0.2.0'.freeze
end
21 changes: 15 additions & 6 deletions llama-rb.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Gem::Specification.new do |spec|
spec.version = Llama::VERSION
spec.licenses = ['MIT']
spec.authors = ['zfletch']
spec.email = ['zfletch2@gmail.com']
spec.email = ['zf.rubygems@gmail.com']

spec.summary = 'Ruby interface for Llama'
spec.description = 'ggerganov/llama.cpp with Ruby hooks'
Expand All @@ -16,26 +16,35 @@ Gem::Specification.new do |spec|
spec.metadata['source_code_uri'] = spec.homepage
spec.metadata['changelog_uri'] = "#{spec.homepage}/releases"

# Specify which files should be added to the gem when it is released.
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
spec.files = [
'Gemfile',
'Gemfile.lock',
'LICENSE',
'README.md',
'Rakefile',
'ext/llama/extconf.rb',
'bin/console',
'ext/extconf.rb',
'ext/Makefile',
'lib/llama.rb',
'lib/llama/model.rb',
'lib/llama/version.rb',
'llama-rb.gemspec',
'llama.cpp',
'llama.cpp/LICENSE',
'llama.cpp/Makefile',
'llama.cpp/README.md',
'llama.cpp/examples/common.cpp',
'llama.cpp/examples/common.h',
'llama.cpp/examples/main/main.cpp',
'llama.cpp/ggml.c',
'llama.cpp/ggml.h',
'llama.cpp/llama.cpp',
'llama.cpp/llama.h',
'models/.gitkeep',
]
spec.bindir = 'exe'
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
spec.require_paths = ['lib']

spec.extensions = %w[ext/llama/extconf.rb]
spec.extensions = %w[ext/extconf.rb]
spec.metadata['rubygems_mfa_required'] = 'true'
end
6 changes: 3 additions & 3 deletions spec/llama/model_spec.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
RSpec.describe Llama::Model do
subject(:model) { described_class.new('models/7B/ggml-model-q4_0.bin', seed: 2) }
subject(:model) { described_class.new('models/7B/ggml-model-q4_0.bin', seed: 10, n_predict: 1) }

it 'predicts text' do
expect(
model.predict('The most common words for testing a new programming language are: h', n_predict: 2),
model.predict('hello, wo'),
).to eq(
'The most common words for testing a new programming language are: hmmm',
'hello, woof',
)
end
end

0 comments on commit 71aaa2c

Please sign in to comment.