Skip to content

Commit

Permalink
Local III
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Jun 18, 2024
1 parent a46a27a commit 144d8d3
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 67 deletions.
8 changes: 3 additions & 5 deletions interpreter/core/computer/utils/computer_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@


def pytesseract_get_text(img):
try:
return pytesseract.image_to_string(img)
except ImportError:
print("\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n")
return ""
import pytesseract

return pytesseract.image_to_string(img)


def pytesseract_get_text_bounding_boxes(img):
Expand Down
59 changes: 30 additions & 29 deletions interpreter/core/computer/vision/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,32 @@ def __init__(self, computer):

def load(self):
print("\nLoading Moondream (vision)...\n")
try:
with contextlib.redirect_stdout(
open(os.devnull, "w")
), contextlib.redirect_stderr(open(os.devnull, "w")):
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!

os.environ["TOKENIZERS_PARALLELISM"] = "false"

if self.computer.debug:
print(
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
)
print(
"Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
)
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
print("loading model")

self.model = transformers.AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision

with contextlib.redirect_stdout(
open(os.devnull, "w")
), contextlib.redirect_stderr(open(os.devnull, "w")):
import transformers # Wait until we use it. Transformers can't be lazy loaded for some reason!

os.environ["TOKENIZERS_PARALLELISM"] = "false"

if self.computer.debug:
print(
"Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id, revision=revision
print(
"Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
)
return True
except ImportError:
print(
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
print("loading model")

self.model = transformers.AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision
)
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id, revision=revision
)
return False
return True

def ocr(
self,
Expand Down Expand Up @@ -103,7 +98,13 @@ def ocr(
# Set path to the path of the temporary file
path = temp_file_path

return pytesseract_get_text(path)
try:
return pytesseract_get_text(path)
except ImportError:
print(
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
)
return ""

def query(
self,
Expand Down
35 changes: 22 additions & 13 deletions interpreter/core/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,28 @@ def run(self, messages):
precursor = "Imagine I have just shown you an image with this description: "
postcursor = ""

image_description = self.vision_renderer(lmc=img_msg)

# It would be nice to format this as a message to the user and display it like: "I see: image_description"

img_msg["content"] = (
precursor
+ image_description
+ "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
+ self.interpreter.computer.vision.ocr(lmc=img_msg)
+ "\n'''"
+ postcursor
)
img_msg["format"] = "description"
try:
image_description = self.vision_renderer(lmc=img_msg)
ocr = self.interpreter.computer.vision.ocr(lmc=img_msg)

# It would be nice to format this as a message to the user and display it like: "I see: image_description"

img_msg["content"] = (
precursor
+ image_description
+ "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
+ ocr
+ "\n'''"
+ postcursor
)
img_msg["format"] = "description"

except ImportError:
print(
"\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
)
img_msg["format"] = "description"
img_msg["content"] = ""

# Convert to OpenAI messages format
messages = convert_to_openai_messages(
Expand Down
19 changes: 0 additions & 19 deletions interpreter/core/llm/vision_for_text_llms.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
EXPERIMENTAL
"""

print("Remember to `pip install open-interpreter[local]`.")

import subprocess

from interpreter import interpreter
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "open-interpreter"
packages = [
{include = "interpreter"},
]
version = "0.2.6" # Use "-rc1", "-rc2", etc. for pre-release versions
version = "0.3.0" # Use "-rc1", "-rc2", etc. for pre-release versions
description = "Let language models run code"
authors = ["Killian Lucas <killian@openinterpreter.com>"]
readme = "README.md"
Expand Down

0 comments on commit 144d8d3

Please sign in to comment.