Local III

OpenInterpreter · Jun 18, 2024 · 144d8d3 · 144d8d3
1 parent a46a27a
commit 144d8d3
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 67 deletions.
diff --git a/interpreter/core/computer/utils/computer_vision.py b/interpreter/core/computer/utils/computer_vision.py
@@ -14,11 +14,9 @@
 
 
 def pytesseract_get_text(img):
-    try:
-        return pytesseract.image_to_string(img)
-    except ImportError:
-        print("\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n")
-        return ""
+    import pytesseract
+
+    return pytesseract.image_to_string(img)
 
 
 def pytesseract_get_text_bounding_boxes(img):

diff --git a/interpreter/core/computer/vision/vision.py b/interpreter/core/computer/vision/vision.py
@@ -20,37 +20,32 @@ def __init__(self, computer):
 
     def load(self):
         print("\nLoading Moondream (vision)...\n")
-        try:
-            with contextlib.redirect_stdout(
-                open(os.devnull, "w")
-            ), contextlib.redirect_stderr(open(os.devnull, "w")):
-                import transformers  # Wait until we use it. Transformers can't be lazy loaded for some reason!
-
-                os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
-                if self.computer.debug:
-                    print(
-                        "Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
-                    )
-                    print(
-                        "Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
-                    )
-                model_id = "vikhyatk/moondream2"
-                revision = "2024-04-02"
-                print("loading model")
-
-                self.model = transformers.AutoModelForCausalLM.from_pretrained(
-                    model_id, trust_remote_code=True, revision=revision
+
+        with contextlib.redirect_stdout(
+            open(os.devnull, "w")
+        ), contextlib.redirect_stderr(open(os.devnull, "w")):
+            import transformers  # Wait until we use it. Transformers can't be lazy loaded for some reason!
+
+            os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+            if self.computer.debug:
+                print(
+                    "Open Interpreter will use Moondream (tiny vision model) to describe images to the language model. Set `interpreter.llm.vision_renderer = None` to disable this behavior."
                 )
-                self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-                    model_id, revision=revision
+                print(
+                    "Alternatively, you can use a vision-supporting LLM and set `interpreter.llm.supports_vision = True`."
                 )
-                return True
-        except ImportError:
-            print(
-                "\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
+            model_id = "vikhyatk/moondream2"
+            revision = "2024-04-02"
+            print("loading model")
+
+            self.model = transformers.AutoModelForCausalLM.from_pretrained(
+                model_id, trust_remote_code=True, revision=revision
+            )
+            self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+                model_id, revision=revision
             )
-            return False
+            return True
 
     def ocr(
         self,
@@ -103,7 +98,13 @@ def ocr(
             # Set path to the path of the temporary file
             path = temp_file_path
 
-        return pytesseract_get_text(path)
+        try:
+            return pytesseract_get_text(path)
+        except ImportError:
+            print(
+                "\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
+            )
+            return ""
 
     def query(
         self,

diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
@@ -134,19 +134,28 @@ def run(self, messages):
                         precursor = "Imagine I have just shown you an image with this description: "
                         postcursor = ""
 
-                    image_description = self.vision_renderer(lmc=img_msg)
-
-                    # It would be nice to format this as a message to the user and display it like: "I see: image_description"
-
-                    img_msg["content"] = (
-                        precursor
-                        + image_description
-                        + "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
-                        + self.interpreter.computer.vision.ocr(lmc=img_msg)
-                        + "\n'''"
-                        + postcursor
-                    )
-                    img_msg["format"] = "description"
+                    try:
+                        image_description = self.vision_renderer(lmc=img_msg)
+                        ocr = self.interpreter.computer.vision.ocr(lmc=img_msg)
+
+                        # It would be nice to format this as a message to the user and display it like: "I see: image_description"
+
+                        img_msg["content"] = (
+                            precursor
+                            + image_description
+                            + "\n---\nThe image contains the following text exactly, which may or may not be relevant (if it's not relevant, ignore this): '''\n"
+                            + ocr
+                            + "\n'''"
+                            + postcursor
+                        )
+                        img_msg["format"] = "description"
+
+                    except ImportError:
+                        print(
+                            "\nTo use local vision, run `pip install 'open-interpreter[local]'`.\n"
+                        )
+                        img_msg["format"] = "description"
+                        img_msg["content"] = ""
 
         # Convert to OpenAI messages format
         messages = convert_to_openai_messages(

diff --git a/interpreter/core/llm/vision_for_text_llms.py b/interpreter/core/llm/vision_for_text_llms.py
diff --git a/interpreter/terminal_interface/profiles/defaults/codestral-few-shot.py b/interpreter/terminal_interface/profiles/defaults/codestral-few-shot.py
@@ -2,6 +2,8 @@
 EXPERIMENTAL
 """
 
+print("Remember to `pip install open-interpreter[local]`.")
+
 import subprocess
 
 from interpreter import interpreter

diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "open-interpreter"
 packages = [
     {include = "interpreter"},
 ]
-version = "0.2.6" # Use "-rc1", "-rc2", etc. for pre-release versions
+version = "0.3.0" # Use "-rc1", "-rc2", etc. for pre-release versions
 description = "Let language models run code"
 authors = ["Killian Lucas <killian@openinterpreter.com>"]
 readme = "README.md"