Merge pull request #1455 from OpenInterpreter/development

Development
OpenInterpreter · Sep 13, 2024 · c81d910 · c81d910
2 parents dbc5259 + 874cb4b
commit c81d910
Show file tree

Hide file tree

Showing 7 changed files with 1,249 additions and 1,347 deletions.
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -60,7 +60,7 @@ def __init__(self, *args, **kwargs):
         self.server = Server(self)
 
         # For the 01. This lets the OAI compatible server accumulate context before responding.
-        self.context_mode = False
+        self.context_mode = True
 
     async def input(self, chunk):
         """
@@ -737,6 +737,7 @@ async def openai_compatible_generator():
             for i, chunk in enumerate(
                 async_interpreter.chat(message=message, stream=True, display=True)
             ):
+                await asyncio.sleep(0)  # Yield control to the event loop
                 made_chunk = True
 
                 if async_interpreter.stop_event.is_set():
@@ -832,6 +833,11 @@ async def chat_completion(request: ChatCompletionRequest):
                     # Remove that {START} message that would have just been added
                     async_interpreter.messages = async_interpreter.messages[:-1]
                 last_start_time = time.time()
+                if (
+                    async_interpreter.messages
+                    and async_interpreter.messages[-1].get("role") != "user"
+                ):
+                    return
             else:
                 # Check if we're within 6 seconds of last_start_time
                 current_time = time.time()

diff --git a/interpreter/core/computer/browser/browser.py b/interpreter/core/computer/browser/browser.py
@@ -1,3 +1,4 @@
+import threading
 import time
 
 import html2text
@@ -34,6 +35,33 @@ def search(self, query):
         )
         return response.json()["result"]
 
+    def fast_search(self, query):
+        """
+        Searches the web for the specified query and returns the results.
+        """
+
+        # Start the request in a separate thread
+        response_thread = threading.Thread(
+            target=lambda: setattr(
+                threading.current_thread(),
+                "response",
+                requests.get(
+                    f'{self.computer.api_base.strip("/")}/browser/search',
+                    params={"query": query},
+                ),
+            )
+        )
+        response_thread.start()
+
+        # Perform the Google search
+        self.search_google(query, delays=False)
+
+        # Wait for the request to complete and get the result
+        response_thread.join()
+        response = response_thread.response
+
+        return response.json()["result"]
+
     def setup(self):
         self.service = Service(ChromeDriverManager().install())
         self.options = webdriver.ChromeOptions()
@@ -42,9 +70,9 @@ def setup(self):
     def go_to_url(self, url):
         """Navigate to a URL"""
         self.driver.get(url)
-        time.sleep(3)
+        time.sleep(1)
 
-    def search_google(self, query):
+    def search_google(self, query, delays=True):
         """Perform a Google search"""
         self.driver.get("https://www.perplexity.ai")
         # search_box = self.driver.find_element(By.NAME, 'q')
@@ -56,13 +84,16 @@ def search_google(self, query):
         active_element = self.driver.switch_to.active_element
         active_element.send_keys(query)
         active_element.send_keys(Keys.RETURN)
-        time.sleep(5)
+        if delays:
+            time.sleep(3)
 
     def analyze_page(self, intent):
         """Extract HTML, list interactive elements, and analyze with AI"""
         html_content = self.driver.page_source
         text_content = html2text.html2text(html_content)
 
+        # text_content = text_content[:len(text_content)//2]
+
         elements = (
             self.driver.find_elements(By.TAG_NAME, "a")
             + self.driver.find_elements(By.TAG_NAME, "button")

diff --git a/interpreter/core/computer/skills/skills.py b/interpreter/core/computer/skills/skills.py
@@ -24,8 +24,7 @@ class Skills:
     def __init__(self, computer):
         self.computer = computer
         self.path = str(Path(oi_dir) / "skills")
-        self.new_skill = NewSkill()
-        self.new_skill.path = self.path
+        self.new_skill = NewSkill(self)
 
     def list(self):
         return [
@@ -97,8 +96,9 @@ def import_skills(self):
 
 
 class NewSkill:
-    def __init__(self):
+    def __init__(self, skills):
         self.path = ""
+        self.skills = skills
 
     def create(self):
         self.steps = []
@@ -180,18 +180,27 @@ def {normalized_name}(step=0):
         if step + 1 < len(steps):
             print("After completing the above, I need you to run {normalized_name}(step=" + str(step + 1) + ") immediatly.")
         else:
-            print("You have completed all the steps, the task/skill has been run!")
+            print("After executing the code, you have completed all the steps, the task/skill has been run!")
     else:
         print("The specified step number exceeds the available steps. Please run with a valid step number.")
 '''.strip()
 
-        if not os.path.exists(self.path):
-            os.makedirs(self.path)
-        with open(self.path + "/" + normalized_name + ".py", "w") as file:
+        skill_file_path = os.path.join(self.skills.path, f"{normalized_name}.py")
+
+        if not os.path.exists(self.skills.path):
+            os.makedirs(self.skills.path)
+
+        with open(skill_file_path, "w") as file:
             file.write(skill_string)
 
-        print("SKILL SAVED:", self.name.upper())
+        # Execute the code in skill_string to define the function
+        exec(skill_string)
 
-        print(
-            "Teaching session finished. Tell the user that the skill above has been saved. Great work!"
-        )
+        # Verify that the file was written
+        if os.path.exists(skill_file_path):
+            print("SKILL SAVED:", self.name.upper())
+            print(
+                "Teaching session finished. Tell the user that the skill above has been saved. Great work!"
+            )
+        else:
+            print(f"Error: Failed to write skill file to {skill_file_path}")
diff --git a/interpreter/core/computer/terminal/languages/jupyter_language.py b/interpreter/core/computer/terminal/languages/jupyter_language.py
@@ -13,6 +13,8 @@
 import time
 import traceback
 
+os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+import litellm
 from jupyter_client import KernelManager
 
 from ..base_language import BaseLanguage
@@ -88,6 +90,9 @@ def run(self, code):
         while not self.kc.is_alive():
             time.sleep(0.1)
 
+        self.last_output_time = time.time()
+        self.last_output_message_time = time.time()
+
         ################################################################
         ### OFFICIAL OPEN INTERPRETER GOVERNMENT ISSUE SKILL LIBRARY ###
         ################################################################
@@ -144,7 +149,52 @@ def iopub_message_listener():
                     self.finish_flag = True
                     return
                 try:
+                    if (
+                        time.time() - self.last_output_time > 15
+                        and time.time() - self.last_output_message_time > 15
+                    ):
+                        self.last_output_message_time = time.time()
+
+                        text = f"{self.computer.interpreter.messages}\n\nThe program above has been running for over 15 seconds. It might require user input. Are there keystrokes that the user should type in, to proceed after the last command?"
+                        if time.time() - self.last_output_time > 500:
+                            text += f" If you think the process is frozen, or that the user wasn't expect it to run for this long (it has been {time.time() - self.last_output_time} seconds since last output) then say <input>CTRL-C</input>."
+
+                        messages = [
+                            {
+                                "role": "system",
+                                "type": "message",
+                                "content": "You are an expert programming assistant. You will help the user determine if they should enter input into the terminal, per the user's requests. If you think the user would want you to type something into stdin, enclose it in <input></input> XML tags, like <input>y</input> to type 'y'.",
+                            },
+                            {"role": "user", "type": "message", "content": text},
+                        ]
+                        params = {
+                            "messages": messages,
+                            "model": self.computer.interpreter.llm.model,
+                            "stream": True,
+                            "temperature": 0,
+                        }
+                        if self.computer.interpreter.llm.api_key:
+                            params["api_key"] = self.computer.interpreter.llm.api_key
+
+                        response = ""
+                        for chunk in litellm.completion(**params):
+                            content = chunk.choices[0].delta.content
+                            if type(content) == str:
+                                response += content
+
+                        # Parse the response for input tags
+                        input_match = re.search(r"<input>(.*?)</input>", response)
+                        if input_match:
+                            user_input = input_match.group(1)
+                            # Check if the user input is CTRL-C
+                            self.finish_flag = True
+                            if user_input.upper() == "CTRL-C":
+                                self.finish_flag = True
+                            else:
+                                self.kc.input(user_input)
+
                     msg = self.kc.iopub_channel.get_msg(timeout=0.05)
+                    self.last_output_time = time.time()
                 except queue.Empty:
                     continue
                 except Exception as e:
@@ -255,7 +305,10 @@ def detect_active_line(self, line):
             # Split the line by "##active_line" and grab the last element
             last_active_line = line.split("##active_line")[-1]
             # Split the last active line by "##" and grab the first element
-            active_line = int(last_active_line.split("##")[0])
+            try:
+                active_line = int(last_active_line.split("##")[0])
+            except:
+                active_line = 0
             # Remove all ##active_line{number}##\n
             line = re.sub(r"##active_line\d+##\n", "", line)
             return line, active_line
@@ -279,6 +332,7 @@ def _capture_output(self, message_queue):
                     if DEBUG_MODE:
                         print(output)
                     yield output
+
                 except queue.Empty:
                     if self.finish_flag:
                         time.sleep(0.1)