From e8f19159965ea6b755cb090b2de6022564647bc2 Mon Sep 17 00:00:00 2001
From: Matt Zhou <mattzh1314@gmail.com>
Date: Tue, 17 Dec 2024 10:35:10 -0800
Subject: [PATCH 1/4] wip

---
 tests/integration_test_base_tools.py          | 170 ++++++++++++++++++
 ...integration_test_tool_execution_sandbox.py |  68 -------
 2 files changed, 170 insertions(+), 68 deletions(-)
 create mode 100644 tests/integration_test_base_tools.py

diff --git a/tests/integration_test_base_tools.py b/tests/integration_test_base_tools.py
new file mode 100644
index 0000000000..9ee244ba51
--- /dev/null
+++ b/tests/integration_test_base_tools.py
@@ -0,0 +1,170 @@
+import uuid
+from pathlib import Path
+
+import pytest
+from sqlalchemy import delete
+
+from letta import create_client
+from letta.functions.function_sets.base import core_memory_replace
+from letta.orm import SandboxConfig, SandboxEnvironmentVariable
+from letta.schemas.embedding_config import EmbeddingConfig
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.memory import ChatMemory
+from letta.schemas.organization import Organization
+from letta.schemas.sandbox_config import LocalSandboxConfig, SandboxConfigCreate
+from letta.schemas.user import User
+from letta.services.organization_manager import OrganizationManager
+from letta.services.sandbox_config_manager import SandboxConfigManager
+from letta.services.tool_execution_sandbox import ToolExecutionSandbox
+from letta.services.tool_manager import ToolManager
+from letta.services.user_manager import UserManager
+from letta.settings import tool_settings
+from tests.helpers.utils import create_tool_from_func
+
+# Constants
+namespace = uuid.NAMESPACE_DNS
+org_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-org"))
+user_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-user"))
+
+
+# Fixtures
+@pytest.fixture(autouse=True)
+def clear_tables():
+    """Fixture to clear the organization table before each test."""
+    from letta.server.server import db_context
+
+    with db_context() as session:
+        session.execute(delete(SandboxEnvironmentVariable))
+        session.execute(delete(SandboxConfig))
+        session.commit()  # Commit the deletion
+
+    # Kill all sandboxes
+    from e2b_code_interpreter import Sandbox
+
+    for sandbox in Sandbox.list():
+        Sandbox.connect(sandbox.sandbox_id).kill()
+
+
+@pytest.fixture
+def check_e2b_key_is_set():
+    original_api_key = tool_settings.e2b_api_key
+    assert original_api_key is not None, "Missing e2b key! Cannot execute these tests."
+    yield
+
+
+@pytest.fixture
+def check_composio_key_set():
+    original_api_key = tool_settings.composio_api_key
+    assert original_api_key is not None, "Missing composio key! Cannot execute this test."
+    yield
+
+
+@pytest.fixture
+def test_organization():
+    """Fixture to create and return the default organization."""
+    org = OrganizationManager().create_organization(Organization(name=org_name))
+    yield org
+
+
+@pytest.fixture
+def test_user(test_organization):
+    """Fixture to create and return the default user within the default organization."""
+    user = UserManager().create_user(User(name=user_name, organization_id=test_organization.id))
+    yield user
+
+
+@pytest.fixture
+def core_memory_replace_tool(test_user):
+    tool = create_tool_from_func(core_memory_replace)
+    tool = ToolManager().create_or_update_tool(tool, test_user)
+    yield tool
+
+
+@pytest.fixture
+def agent_state():
+    client = create_client()
+    agent_state = client.create_agent(
+        memory=ChatMemory(persona="This is the persona", human="My name is Chad"),
+        embedding_config=EmbeddingConfig.default_config(provider="openai"),
+        llm_config=LLMConfig.default_config(model_name="gpt-4"),
+    )
+    yield agent_state
+
+
+@pytest.fixture
+def custom_test_sandbox_config(test_user):
+    """
+    Fixture to create a consistent local sandbox configuration for tests.
+
+    Args:
+        test_user: The test user to be used for creating the sandbox configuration.
+
+    Returns:
+        A tuple containing the SandboxConfigManager and the created sandbox configuration.
+    """
+    # Create the SandboxConfigManager
+    manager = SandboxConfigManager(tool_settings)
+
+    # Set the sandbox to be within the external codebase path and use a venv
+    external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system")
+    local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True)
+
+    # Create the sandbox configuration
+    config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump())
+
+    # Create or update the sandbox configuration
+    manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user)
+
+    return manager, local_sandbox_config
+
+
+@pytest.mark.local_sandbox
+def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
+    new_name = "Matt"
+    args = {"label": "human", "old_content": "Chad", "new_content": new_name}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    result = sandbox.run(agent_state=agent_state)
+    assert new_name in result.agent_state.memory.get_block("human").value
+    assert result.func_return is None
+
+
+@pytest.mark.local_sandbox
+def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
+    nonexistent_name = "Alexander Wang"
+    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    result = sandbox.run(agent_state=agent_state)
+    assert len(result.stderr) != 0, "stderr not empty"
+    assert (
+        f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
+    ), "stderr contains expected error"
+
+
+@pytest.mark.e2b_sandbox
+def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
+    new_name = "Matt"
+    args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    result = sandbox.run(agent_state=agent_state)
+    assert new_name in result.agent_state.memory.get_block("human").value
+    assert result.func_return is None
+
+
+@pytest.mark.e2b_sandbox
+def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
+    nonexistent_name = "Alexander Wang"
+    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    result = sandbox.run(agent_state=agent_state)
+    assert len(result.stderr) != 0, "stderr not empty"
+    assert (
+        f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
+    ), "stderr contains expected error"
diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py
index 37597106a6..0934e0f42b 100644
--- a/tests/integration_test_tool_execution_sandbox.py
+++ b/tests/integration_test_tool_execution_sandbox.py
@@ -8,7 +8,6 @@
 from sqlalchemy import delete
 
 from letta import create_client
-from letta.functions.function_sets.base import core_memory_replace
 from letta.orm import SandboxConfig, SandboxEnvironmentVariable
 from letta.schemas.agent import AgentState
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -217,13 +216,6 @@ def clear_memory(agent_state: AgentState):
     yield tool
 
 
-@pytest.fixture
-def core_memory_replace_tool(test_user):
-    tool = create_tool_from_func(core_memory_replace)
-    tool = ToolManager().create_or_update_tool(tool, test_user)
-    yield tool
-
-
 @pytest.fixture
 def external_codebase_tool(test_user):
     from tests.test_tool_sandbox.restaurant_management_system.adjust_menu_prices import (
@@ -303,30 +295,6 @@ def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_memory_to
     assert result.func_return is None
 
 
-@pytest.mark.local_sandbox
-def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
-    new_name = "Matt"
-    args = {"label": "human", "old_content": "Chad", "new_content": new_name}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert new_name in result.agent_state.memory.get_block("human").value
-    assert result.func_return is None
-
-
-@pytest.mark.local_sandbox
-def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
-    nonexistent_name = "Alexander Wang"
-    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert len(result.stderr) != 0, "stderr not empty"
-    assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error"
-
-
 @pytest.mark.local_sandbox
 def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user):
     sandbox = ToolExecutionSandbox(list_tool.name, {}, user_id=test_user.id)
@@ -474,42 +442,6 @@ def test_e2b_sandbox_stateful_tool(check_e2b_key_is_set, clear_core_memory_tool,
     assert result.func_return is None
 
 
-@pytest.mark.e2b_sandbox
-def test_e2b_sandbox_core_memory_replace(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
-    new_name = "Matt"
-    args = {"label": "human", "old_content": "Chad", "new_content": new_name}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert new_name in result.agent_state.memory.get_block("human").value
-    assert result.func_return is None
-
-
-@pytest.mark.e2b_sandbox
-def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
-    new_name = "Matt"
-    args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert new_name in result.agent_state.memory.get_block("human").value
-    assert result.func_return is None
-
-
-@pytest.mark.e2b_sandbox
-def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
-    nonexistent_name = "Alexander Wang"
-    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert len(result.stderr) != 0, "stderr not empty"
-    assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error"
-
-
 @pytest.mark.e2b_sandbox
 def test_e2b_sandbox_inject_env_var_existing_sandbox(check_e2b_key_is_set, get_env_tool, test_user):
     manager = SandboxConfigManager(tool_settings)

From 67234d641dff8ff284425ec190948fcf143365e1 Mon Sep 17 00:00:00 2001
From: Matt Zhou <mattzh1314@gmail.com>
Date: Tue, 17 Dec 2024 14:26:43 -0800
Subject: [PATCH 2/4] Finish testing and fix conversation search

---
 letta/agent.py                                |  15 +-
 letta/constants.py                            |   3 +-
 letta/functions/function_sets/base.py         |  66 +------
 letta/functions/schema_generator.py           |   2 +-
 letta/local_llm/function_parser.py            |   2 +-
 scripts/migrate_tools.py                      |  10 --
 tests/conftest.py                             |   9 +
 tests/integration_test_base_tools.py          | 170 ------------------
 ...integration_test_tool_execution_sandbox.py |  99 +++++++++-
 tests/test_base_functions.py                  |  33 ++--
 10 files changed, 133 insertions(+), 276 deletions(-)
 delete mode 100644 tests/integration_test_base_tools.py

diff --git a/letta/agent.py b/letta/agent.py
index 3e4d244323..f50520300d 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -295,8 +295,6 @@ def __init__(
         self.agent_manager = AgentManager()
 
         # State needed for heartbeat pausing
-        self.pause_heartbeats_start = None
-        self.pause_heartbeats_minutes = 0
 
         self.first_message_verify_mono = first_message_verify_mono
 
@@ -1235,17 +1233,6 @@ def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True,
 
         printd(f"Ran summarizer, messages length {prior_len} -> {len(self.messages)}")
 
-    def heartbeat_is_paused(self):
-        """Check if there's a requested pause on timed heartbeats"""
-
-        # Check if the pause has been initiated
-        if self.pause_heartbeats_start is None:
-            return False
-
-        # Check if it's been more than pause_heartbeats_minutes since pause_heartbeats_start
-        elapsed_time = get_utc_time() - self.pause_heartbeats_start
-        return elapsed_time.total_seconds() < self.pause_heartbeats_minutes * 60
-
     def _swap_system_message_in_buffer(self, new_system_message: str):
         """Update the system message (NOT prompt) of the Agent (requires updating the internal buffer)"""
         assert isinstance(new_system_message, str)
@@ -1370,7 +1357,7 @@ def attach_source(
         agent_manager: AgentManager,
     ):
         """Attach a source to the agent using the SourcesAgents ORM relationship.
- 
+
         Args:
             user: User performing the action
             source_id: ID of the source to attach
diff --git a/letta/constants.py b/letta/constants.py
index 5e9ac9b268..4b0175134b 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -37,7 +37,8 @@
 DEFAULT_PRESET = "memgpt_chat"
 
 # Base tools that cannot be edited, as they access agent state directly
-BASE_TOOLS = ["send_message", "conversation_search", "conversation_search_date", "archival_memory_insert", "archival_memory_search"]
+# Note that we don't include "conversation_search_date" for now
+BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "archival_memory_search"]
 O1_BASE_TOOLS = ["send_thinking_message", "send_final_message"]
 # Base memory tools CAN be edited, and are added by default by the server
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py
index e35739dd64..f559bf4a7d 100644
--- a/letta/functions/function_sets/base.py
+++ b/letta/functions/function_sets/base.py
@@ -1,16 +1,6 @@
-from datetime import datetime
 from typing import Optional
 
 from letta.agent import Agent
-from letta.constants import MAX_PAUSE_HEARTBEATS
-from letta.services.agent_manager import AgentManager
-
-# import math
-# from letta.utils import json_dumps
-
-### Functions / tools the agent can use
-# All functions should return a response string (or None)
-# If the function fails, throw an exception
 
 
 def send_message(self: "Agent", message: str) -> Optional[str]:
@@ -28,36 +18,6 @@ def send_message(self: "Agent", message: str) -> Optional[str]:
     return None
 
 
-# Construct the docstring dynamically (since it should use the external constants)
-pause_heartbeats_docstring = f"""
-Temporarily ignore timed heartbeats. You may still receive messages from manual heartbeats and other events.
-
-Args:
-    minutes (int): Number of minutes to ignore heartbeats for. Max value of {MAX_PAUSE_HEARTBEATS} minutes ({MAX_PAUSE_HEARTBEATS // 60} hours).
-
-Returns:
-    str: Function status response
-"""
-
-
-def pause_heartbeats(self: "Agent", minutes: int) -> Optional[str]:
-    import datetime
-
-    from letta.constants import MAX_PAUSE_HEARTBEATS
-
-    minutes = min(MAX_PAUSE_HEARTBEATS, minutes)
-
-    # Record the current time
-    self.pause_heartbeats_start = datetime.datetime.now(datetime.timezone.utc)
-    # And record how long the pause should go for
-    self.pause_heartbeats_minutes = int(minutes)
-
-    return f"Pausing timed heartbeats for {minutes} min"
-
-
-pause_heartbeats.__doc__ = pause_heartbeats_docstring
-
-
 def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> Optional[str]:
     """
     Search prior conversation history using case-insensitive string matching.
@@ -84,19 +44,19 @@ def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> O
     count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
     # TODO: add paging by page number. currently cursor only works with strings.
     # original: start=page * count
-    results = self.message_manager.list_user_messages_for_agent(
+    messages = self.message_manager.list_user_messages_for_agent(
         agent_id=self.agent_state.id,
         actor=self.user,
         query_text=query,
         limit=count,
     )
-    total = len(results)
+    total = len(messages)
     num_pages = math.ceil(total / count) - 1  # 0 index
-    if len(results) == 0:
+    if len(messages) == 0:
         results_str = f"No results found."
     else:
-        results_pref = f"Showing {len(results)} of {total} results (page {page}/{num_pages}):"
-        results_formatted = [f"timestamp: {d['timestamp']}, {d['message']['role']} - {d['message']['content']}" for d in results]
+        results_pref = f"Showing {len(messages)} of {total} results (page {page}/{num_pages}):"
+        results_formatted = [message.text for message in messages]
         results_str = f"{results_pref} {json_dumps(results_formatted)}"
     return results_str
 
@@ -114,6 +74,7 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page
         str: Query result string
     """
     import math
+    from datetime import datetime
 
     from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
     from letta.utils import json_dumps
@@ -142,7 +103,6 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page
         start_date=start_datetime,
         end_date=end_datetime,
         limit=count,
-        # start_date=start_date, end_date=end_date, limit=count, start=page * count
     )
     total = len(results)
     num_pages = math.ceil(total / count) - 1  # 0 index
@@ -186,10 +146,8 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
     Returns:
         str: Query result string
     """
-    import math
 
     from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
-    from letta.utils import json_dumps
 
     if page is None or (isinstance(page, str) and page.lower().strip() == "none"):
         page = 0
@@ -198,7 +156,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
     except:
         raise ValueError(f"'page' argument must be an integer")
     count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE
-    
+
     try:
         # Get results using passage manager
         all_results = self.agent_manager.list_passages(
@@ -207,7 +165,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
             query_text=query,
             limit=count + start,  # Request enough results to handle offset
             embedding_config=self.agent_state.embedding_config,
-            embed_query=True
+            embed_query=True,
         )
 
         # Apply pagination
@@ -215,13 +173,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s
         paged_results = all_results[start:end]
 
         # Format results to match previous implementation
-        formatted_results = [
-            {
-                "timestamp": str(result.created_at),
-                "content": result.text
-            }
-            for result in paged_results
-        ]
+        formatted_results = [{"timestamp": str(result.created_at), "content": result.text} for result in paged_results]
 
         return formatted_results, len(formatted_results)
 
diff --git a/letta/functions/schema_generator.py b/letta/functions/schema_generator.py
index 170bea3015..89409cb2be 100644
--- a/letta/functions/schema_generator.py
+++ b/letta/functions/schema_generator.py
@@ -386,7 +386,7 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
     # append the heartbeat
     # TODO: don't hard-code
     # TODO: if terminal, don't include this
-    if function.__name__ not in ["send_message", "pause_heartbeats"]:
+    if function.__name__ not in ["send_message"]:
         schema["parameters"]["properties"]["request_heartbeat"] = {
             "type": "boolean",
             "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
diff --git a/letta/local_llm/function_parser.py b/letta/local_llm/function_parser.py
index 6dd788da2a..0cb79edd24 100644
--- a/letta/local_llm/function_parser.py
+++ b/letta/local_llm/function_parser.py
@@ -3,7 +3,7 @@
 
 from letta.utils import json_dumps, json_loads
 
-NO_HEARTBEAT_FUNCS = ["send_message", "pause_heartbeats"]
+NO_HEARTBEAT_FUNCS = ["send_message"]
 
 
 def insert_heartbeat(message):
diff --git a/scripts/migrate_tools.py b/scripts/migrate_tools.py
index 6ab9ed9e97..7ea6bac1d6 100644
--- a/scripts/migrate_tools.py
+++ b/scripts/migrate_tools.py
@@ -40,13 +40,3 @@ def deprecated_tool():
             ),
             actor=fake_user,
         )
-
-        ToolManager().create_or_update_tool(
-            Tool(
-                name="pause_heartbeats",
-                source_code=source_code,
-                source_type=source_type,
-                description=description,
-            ),
-            actor=fake_user,
-        )
diff --git a/tests/conftest.py b/tests/conftest.py
index 17ae8ef9c0..eb261d0fcb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,3 +22,12 @@ def mock_e2b_api_key_none():
 
     # Restore the original value of e2b_api_key
     tool_settings.e2b_api_key = original_api_key
+
+
+@pytest.fixture
+def check_e2b_key_is_set():
+    from letta.settings import tool_settings
+
+    original_api_key = tool_settings.e2b_api_key
+    assert original_api_key is not None, "Missing e2b key! Cannot execute these tests."
+    yield
diff --git a/tests/integration_test_base_tools.py b/tests/integration_test_base_tools.py
deleted file mode 100644
index 9ee244ba51..0000000000
--- a/tests/integration_test_base_tools.py
+++ /dev/null
@@ -1,170 +0,0 @@
-import uuid
-from pathlib import Path
-
-import pytest
-from sqlalchemy import delete
-
-from letta import create_client
-from letta.functions.function_sets.base import core_memory_replace
-from letta.orm import SandboxConfig, SandboxEnvironmentVariable
-from letta.schemas.embedding_config import EmbeddingConfig
-from letta.schemas.llm_config import LLMConfig
-from letta.schemas.memory import ChatMemory
-from letta.schemas.organization import Organization
-from letta.schemas.sandbox_config import LocalSandboxConfig, SandboxConfigCreate
-from letta.schemas.user import User
-from letta.services.organization_manager import OrganizationManager
-from letta.services.sandbox_config_manager import SandboxConfigManager
-from letta.services.tool_execution_sandbox import ToolExecutionSandbox
-from letta.services.tool_manager import ToolManager
-from letta.services.user_manager import UserManager
-from letta.settings import tool_settings
-from tests.helpers.utils import create_tool_from_func
-
-# Constants
-namespace = uuid.NAMESPACE_DNS
-org_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-org"))
-user_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-user"))
-
-
-# Fixtures
-@pytest.fixture(autouse=True)
-def clear_tables():
-    """Fixture to clear the organization table before each test."""
-    from letta.server.server import db_context
-
-    with db_context() as session:
-        session.execute(delete(SandboxEnvironmentVariable))
-        session.execute(delete(SandboxConfig))
-        session.commit()  # Commit the deletion
-
-    # Kill all sandboxes
-    from e2b_code_interpreter import Sandbox
-
-    for sandbox in Sandbox.list():
-        Sandbox.connect(sandbox.sandbox_id).kill()
-
-
-@pytest.fixture
-def check_e2b_key_is_set():
-    original_api_key = tool_settings.e2b_api_key
-    assert original_api_key is not None, "Missing e2b key! Cannot execute these tests."
-    yield
-
-
-@pytest.fixture
-def check_composio_key_set():
-    original_api_key = tool_settings.composio_api_key
-    assert original_api_key is not None, "Missing composio key! Cannot execute this test."
-    yield
-
-
-@pytest.fixture
-def test_organization():
-    """Fixture to create and return the default organization."""
-    org = OrganizationManager().create_organization(Organization(name=org_name))
-    yield org
-
-
-@pytest.fixture
-def test_user(test_organization):
-    """Fixture to create and return the default user within the default organization."""
-    user = UserManager().create_user(User(name=user_name, organization_id=test_organization.id))
-    yield user
-
-
-@pytest.fixture
-def core_memory_replace_tool(test_user):
-    tool = create_tool_from_func(core_memory_replace)
-    tool = ToolManager().create_or_update_tool(tool, test_user)
-    yield tool
-
-
-@pytest.fixture
-def agent_state():
-    client = create_client()
-    agent_state = client.create_agent(
-        memory=ChatMemory(persona="This is the persona", human="My name is Chad"),
-        embedding_config=EmbeddingConfig.default_config(provider="openai"),
-        llm_config=LLMConfig.default_config(model_name="gpt-4"),
-    )
-    yield agent_state
-
-
-@pytest.fixture
-def custom_test_sandbox_config(test_user):
-    """
-    Fixture to create a consistent local sandbox configuration for tests.
-
-    Args:
-        test_user: The test user to be used for creating the sandbox configuration.
-
-    Returns:
-        A tuple containing the SandboxConfigManager and the created sandbox configuration.
-    """
-    # Create the SandboxConfigManager
-    manager = SandboxConfigManager(tool_settings)
-
-    # Set the sandbox to be within the external codebase path and use a venv
-    external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system")
-    local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True)
-
-    # Create the sandbox configuration
-    config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump())
-
-    # Create or update the sandbox configuration
-    manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user)
-
-    return manager, local_sandbox_config
-
-
-@pytest.mark.local_sandbox
-def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
-    new_name = "Matt"
-    args = {"label": "human", "old_content": "Chad", "new_content": new_name}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert new_name in result.agent_state.memory.get_block("human").value
-    assert result.func_return is None
-
-
-@pytest.mark.local_sandbox
-def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
-    nonexistent_name = "Alexander Wang"
-    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert len(result.stderr) != 0, "stderr not empty"
-    assert (
-        f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
-    ), "stderr contains expected error"
-
-
-@pytest.mark.e2b_sandbox
-def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
-    new_name = "Matt"
-    args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert new_name in result.agent_state.memory.get_block("human").value
-    assert result.func_return is None
-
-
-@pytest.mark.e2b_sandbox
-def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
-    nonexistent_name = "Alexander Wang"
-    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
-    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
-
-    # run the sandbox
-    result = sandbox.run(agent_state=agent_state)
-    assert len(result.stderr) != 0, "stderr not empty"
-    assert (
-        f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
-    ), "stderr contains expected error"
diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py
index 0934e0f42b..1b29073f43 100644
--- a/tests/integration_test_tool_execution_sandbox.py
+++ b/tests/integration_test_tool_execution_sandbox.py
@@ -8,6 +8,7 @@
 from sqlalchemy import delete
 
 from letta import create_client
+from letta.functions.function_sets.base import core_memory_append, core_memory_replace
 from letta.orm import SandboxConfig, SandboxEnvironmentVariable
 from letta.schemas.agent import AgentState
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -56,13 +57,6 @@ def clear_tables():
         Sandbox.connect(sandbox.sandbox_id).kill()
 
 
-@pytest.fixture
-def check_e2b_key_is_set():
-    original_api_key = tool_settings.e2b_api_key
-    assert original_api_key is not None, "Missing e2b key! Cannot execute these tests."
-    yield
-
-
 @pytest.fixture
 def check_composio_key_set():
     original_api_key = tool_settings.composio_api_key
@@ -265,6 +259,21 @@ def custom_test_sandbox_config(test_user):
     return manager, local_sandbox_config
 
 
+# Tool-specific fixtures
+@pytest.fixture
+def core_memory_tools(test_user):
+    """Create all base tools for testing."""
+    tools = {}
+    for func in [
+        core_memory_replace,
+        core_memory_append,
+    ]:
+        tool = create_tool_from_func(func)
+        tool = ToolManager().create_or_update_tool(tool, test_user)
+        tools[func.__name__] = tool
+    yield tools
+
+
 # Local sandbox tests
 
 
@@ -517,3 +526,79 @@ def test_e2b_e2e_composio_star_github(check_e2b_key_is_set, check_composio_key_s
 
     result = ToolExecutionSandbox(composio_github_star_tool.name, {"owner": "letta-ai", "repo": "letta"}, user_id=test_user.id).run()
     assert result.func_return["details"] == "Action executed successfully"
+
+
+# Core memory integration tests
+class TestCoreMemoryTools:
+    """
+    Tests for core memory manipulation tools.
+    Tests run in both local sandbox and e2b environments.
+    """
+
+    # Local sandbox tests
+    @pytest.mark.local_sandbox
+    def test_core_memory_replace_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state):
+        """Test successful replacement of content in core memory - local sandbox."""
+        new_name = "Charles"
+        args = {"label": "human", "old_content": "Chad", "new_content": new_name}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert new_name in result.agent_state.memory.get_block("human").value
+        assert result.func_return is None
+
+    @pytest.mark.local_sandbox
+    def test_core_memory_append_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state):
+        """Test successful appending of content to core memory - local sandbox."""
+        append_text = "\nLikes coffee"
+        args = {"label": "human", "content": append_text}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_append"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert append_text in result.agent_state.memory.get_block("human").value
+        assert result.func_return is None
+
+    @pytest.mark.local_sandbox
+    def test_core_memory_replace_error_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state):
+        """Test error handling when trying to replace non-existent content - local sandbox."""
+        nonexistent_name = "Alexander Wang"
+        args = {"label": "human", "old_content": nonexistent_name, "new_content": "Charles"}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert len(result.stderr) != 0
+        assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
+
+    # E2B sandbox tests
+    @pytest.mark.e2b_sandbox
+    def test_core_memory_replace_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state):
+        """Test successful replacement of content in core memory - e2b sandbox."""
+        new_name = "Charles"
+        args = {"label": "human", "old_content": "Chad", "new_content": new_name}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert new_name in result.agent_state.memory.get_block("human").value
+        assert result.func_return is None
+
+    @pytest.mark.e2b_sandbox
+    def test_core_memory_append_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state):
+        """Test successful appending of content to core memory - e2b sandbox."""
+        append_text = "\nLikes coffee"
+        args = {"label": "human", "content": append_text}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_append"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert append_text in result.agent_state.memory.get_block("human").value
+        assert result.func_return is None
+
+    @pytest.mark.e2b_sandbox
+    def test_core_memory_replace_error_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state):
+        """Test error handling when trying to replace non-existent content - e2b sandbox."""
+        nonexistent_name = "Alexander Wang"
+        args = {"label": "human", "old_content": nonexistent_name, "new_content": "Charles"}
+        sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id)
+
+        result = sandbox.run(agent_state=agent_state)
+        assert len(result.stderr) != 0
+        assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0]
diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index 8144671969..c4f034b118 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -1,28 +1,25 @@
 import pytest
 
 import letta.functions.function_sets.base as base_functions
-from letta import create_client
+from letta import LocalClient, create_client
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
 
-from .utils import wipe_config
-
-# test_agent_id = "test_agent"
-client = None
-
 
 @pytest.fixture(scope="module")
-def agent_obj():
-    """Create a test agent that we can call functions on"""
-    wipe_config()
-    global client
+def client():
     client = create_client()
     client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini"))
     client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai"))
 
+    yield client
+
+
+@pytest.fixture(scope="module")
+def agent_obj(client: LocalClient):
+    """Create a test agent that we can call functions on"""
     agent_state = client.create_agent()
 
-    global agent_obj
     agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user)
     yield agent_obj
 
@@ -88,7 +85,13 @@ def test_archival(agent_obj):
         pass
 
 
-def test_recall(agent_obj):
-    base_functions.conversation_search(agent_obj, "banana")
-    base_functions.conversation_search(agent_obj, "banana", page=0)
-    base_functions.conversation_search_date(agent_obj, start_date="2022-01-01", end_date="2022-01-02")
+def test_recall(client, agent_obj):
+    # keyword
+    keyword = "banana"
+
+    # Send message to agent
+    response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword)
+
+    # Conversation search
+    result = base_functions.conversation_search(agent_obj, "banana")
+    assert keyword in result

From f6620f1a060e1b103edd02deba7b1566253ae910 Mon Sep 17 00:00:00 2001
From: Matt Zhou <mattzh1314@gmail.com>
Date: Tue, 17 Dec 2024 14:29:32 -0800
Subject: [PATCH 3/4] Fix typo

---
 tests/test_base_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index c4f034b118..c425065f56 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -90,7 +90,7 @@ def test_recall(client, agent_obj):
     keyword = "banana"
 
     # Send message to agent
-    response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword)
+    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword)
 
     # Conversation search
     result = base_functions.conversation_search(agent_obj, "banana")

From 2cb7d0af42184338f75b882d6e5baef0976ab490 Mon Sep 17 00:00:00 2001
From: Matt Zhou <mattzh1314@gmail.com>
Date: Tue, 17 Dec 2024 14:52:47 -0800
Subject: [PATCH 4/4] Finish

---
 tests/test_base_functions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py
index c425065f56..5b5bec6f4f 100644
--- a/tests/test_base_functions.py
+++ b/tests/test_base_functions.py
@@ -89,8 +89,10 @@ def test_recall(client, agent_obj):
     # keyword
     keyword = "banana"
 
-    # Send message to agent
+    # Send messages to agent
+    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="hello")
     client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword)
+    client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="tell me a fun fact")
 
     # Conversation search
     result = base_functions.conversation_search(agent_obj, "banana")