From e8f19159965ea6b755cb090b2de6022564647bc2 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 17 Dec 2024 10:35:10 -0800 Subject: [PATCH 1/4] wip --- tests/integration_test_base_tools.py | 170 ++++++++++++++++++ ...integration_test_tool_execution_sandbox.py | 68 ------- 2 files changed, 170 insertions(+), 68 deletions(-) create mode 100644 tests/integration_test_base_tools.py diff --git a/tests/integration_test_base_tools.py b/tests/integration_test_base_tools.py new file mode 100644 index 0000000000..9ee244ba51 --- /dev/null +++ b/tests/integration_test_base_tools.py @@ -0,0 +1,170 @@ +import uuid +from pathlib import Path + +import pytest +from sqlalchemy import delete + +from letta import create_client +from letta.functions.function_sets.base import core_memory_replace +from letta.orm import SandboxConfig, SandboxEnvironmentVariable +from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.llm_config import LLMConfig +from letta.schemas.memory import ChatMemory +from letta.schemas.organization import Organization +from letta.schemas.sandbox_config import LocalSandboxConfig, SandboxConfigCreate +from letta.schemas.user import User +from letta.services.organization_manager import OrganizationManager +from letta.services.sandbox_config_manager import SandboxConfigManager +from letta.services.tool_execution_sandbox import ToolExecutionSandbox +from letta.services.tool_manager import ToolManager +from letta.services.user_manager import UserManager +from letta.settings import tool_settings +from tests.helpers.utils import create_tool_from_func + +# Constants +namespace = uuid.NAMESPACE_DNS +org_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-org")) +user_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-user")) + + +# Fixtures +@pytest.fixture(autouse=True) +def clear_tables(): + """Fixture to clear the organization table before each test.""" + from letta.server.server import db_context + + with db_context() as session: + session.execute(delete(SandboxEnvironmentVariable)) + session.execute(delete(SandboxConfig)) + session.commit() # Commit the deletion + + # Kill all sandboxes + from e2b_code_interpreter import Sandbox + + for sandbox in Sandbox.list(): + Sandbox.connect(sandbox.sandbox_id).kill() + + +@pytest.fixture +def check_e2b_key_is_set(): + original_api_key = tool_settings.e2b_api_key + assert original_api_key is not None, "Missing e2b key! Cannot execute these tests." + yield + + +@pytest.fixture +def check_composio_key_set(): + original_api_key = tool_settings.composio_api_key + assert original_api_key is not None, "Missing composio key! Cannot execute this test." + yield + + +@pytest.fixture +def test_organization(): + """Fixture to create and return the default organization.""" + org = OrganizationManager().create_organization(Organization(name=org_name)) + yield org + + +@pytest.fixture +def test_user(test_organization): + """Fixture to create and return the default user within the default organization.""" + user = UserManager().create_user(User(name=user_name, organization_id=test_organization.id)) + yield user + + +@pytest.fixture +def core_memory_replace_tool(test_user): + tool = create_tool_from_func(core_memory_replace) + tool = ToolManager().create_or_update_tool(tool, test_user) + yield tool + + +@pytest.fixture +def agent_state(): + client = create_client() + agent_state = client.create_agent( + memory=ChatMemory(persona="This is the persona", human="My name is Chad"), + embedding_config=EmbeddingConfig.default_config(provider="openai"), + llm_config=LLMConfig.default_config(model_name="gpt-4"), + ) + yield agent_state + + +@pytest.fixture +def custom_test_sandbox_config(test_user): + """ + Fixture to create a consistent local sandbox configuration for tests. + + Args: + test_user: The test user to be used for creating the sandbox configuration. + + Returns: + A tuple containing the SandboxConfigManager and the created sandbox configuration. + """ + # Create the SandboxConfigManager + manager = SandboxConfigManager(tool_settings) + + # Set the sandbox to be within the external codebase path and use a venv + external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system") + local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True) + + # Create the sandbox configuration + config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump()) + + # Create or update the sandbox configuration + manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user) + + return manager, local_sandbox_config + + +@pytest.mark.local_sandbox +def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): + new_name = "Matt" + args = {"label": "human", "old_content": "Chad", "new_content": new_name} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + result = sandbox.run(agent_state=agent_state) + assert new_name in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + +@pytest.mark.local_sandbox +def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + result = sandbox.run(agent_state=agent_state) + assert len(result.stderr) != 0, "stderr not empty" + assert ( + f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] + ), "stderr contains expected error" + + +@pytest.mark.e2b_sandbox +def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): + new_name = "Matt" + args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + result = sandbox.run(agent_state=agent_state) + assert new_name in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + +@pytest.mark.e2b_sandbox +def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + result = sandbox.run(agent_state=agent_state) + assert len(result.stderr) != 0, "stderr not empty" + assert ( + f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] + ), "stderr contains expected error" diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py index 37597106a6..0934e0f42b 100644 --- a/tests/integration_test_tool_execution_sandbox.py +++ b/tests/integration_test_tool_execution_sandbox.py @@ -8,7 +8,6 @@ from sqlalchemy import delete from letta import create_client -from letta.functions.function_sets.base import core_memory_replace from letta.orm import SandboxConfig, SandboxEnvironmentVariable from letta.schemas.agent import AgentState from letta.schemas.embedding_config import EmbeddingConfig @@ -217,13 +216,6 @@ def clear_memory(agent_state: AgentState): yield tool -@pytest.fixture -def core_memory_replace_tool(test_user): - tool = create_tool_from_func(core_memory_replace) - tool = ToolManager().create_or_update_tool(tool, test_user) - yield tool - - @pytest.fixture def external_codebase_tool(test_user): from tests.test_tool_sandbox.restaurant_management_system.adjust_menu_prices import ( @@ -303,30 +295,6 @@ def test_local_sandbox_stateful_tool(mock_e2b_api_key_none, clear_core_memory_to assert result.func_return is None -@pytest.mark.local_sandbox -def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): - new_name = "Matt" - args = {"label": "human", "old_content": "Chad", "new_content": new_name} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert new_name in result.agent_state.memory.get_block("human").value - assert result.func_return is None - - -@pytest.mark.local_sandbox -def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): - nonexistent_name = "Alexander Wang" - args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert len(result.stderr) != 0, "stderr not empty" - assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error" - - @pytest.mark.local_sandbox def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user): sandbox = ToolExecutionSandbox(list_tool.name, {}, user_id=test_user.id) @@ -474,42 +442,6 @@ def test_e2b_sandbox_stateful_tool(check_e2b_key_is_set, clear_core_memory_tool, assert result.func_return is None -@pytest.mark.e2b_sandbox -def test_e2b_sandbox_core_memory_replace(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): - new_name = "Matt" - args = {"label": "human", "old_content": "Chad", "new_content": new_name} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert new_name in result.agent_state.memory.get_block("human").value - assert result.func_return is None - - -@pytest.mark.e2b_sandbox -def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): - new_name = "Matt" - args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert new_name in result.agent_state.memory.get_block("human").value - assert result.func_return is None - - -@pytest.mark.e2b_sandbox -def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): - nonexistent_name = "Alexander Wang" - args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert len(result.stderr) != 0, "stderr not empty" - assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error" - - @pytest.mark.e2b_sandbox def test_e2b_sandbox_inject_env_var_existing_sandbox(check_e2b_key_is_set, get_env_tool, test_user): manager = SandboxConfigManager(tool_settings) From 67234d641dff8ff284425ec190948fcf143365e1 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 17 Dec 2024 14:26:43 -0800 Subject: [PATCH 2/4] Finish testing and fix conversation search --- letta/agent.py | 15 +- letta/constants.py | 3 +- letta/functions/function_sets/base.py | 66 +------ letta/functions/schema_generator.py | 2 +- letta/local_llm/function_parser.py | 2 +- scripts/migrate_tools.py | 10 -- tests/conftest.py | 9 + tests/integration_test_base_tools.py | 170 ------------------ ...integration_test_tool_execution_sandbox.py | 99 +++++++++- tests/test_base_functions.py | 33 ++-- 10 files changed, 133 insertions(+), 276 deletions(-) delete mode 100644 tests/integration_test_base_tools.py diff --git a/letta/agent.py b/letta/agent.py index 3e4d244323..f50520300d 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -295,8 +295,6 @@ def __init__( self.agent_manager = AgentManager() # State needed for heartbeat pausing - self.pause_heartbeats_start = None - self.pause_heartbeats_minutes = 0 self.first_message_verify_mono = first_message_verify_mono @@ -1235,17 +1233,6 @@ def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True, printd(f"Ran summarizer, messages length {prior_len} -> {len(self.messages)}") - def heartbeat_is_paused(self): - """Check if there's a requested pause on timed heartbeats""" - - # Check if the pause has been initiated - if self.pause_heartbeats_start is None: - return False - - # Check if it's been more than pause_heartbeats_minutes since pause_heartbeats_start - elapsed_time = get_utc_time() - self.pause_heartbeats_start - return elapsed_time.total_seconds() < self.pause_heartbeats_minutes * 60 - def _swap_system_message_in_buffer(self, new_system_message: str): """Update the system message (NOT prompt) of the Agent (requires updating the internal buffer)""" assert isinstance(new_system_message, str) @@ -1370,7 +1357,7 @@ def attach_source( agent_manager: AgentManager, ): """Attach a source to the agent using the SourcesAgents ORM relationship. - + Args: user: User performing the action source_id: ID of the source to attach diff --git a/letta/constants.py b/letta/constants.py index 5e9ac9b268..4b0175134b 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -37,7 +37,8 @@ DEFAULT_PRESET = "memgpt_chat" # Base tools that cannot be edited, as they access agent state directly -BASE_TOOLS = ["send_message", "conversation_search", "conversation_search_date", "archival_memory_insert", "archival_memory_search"] +# Note that we don't include "conversation_search_date" for now +BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "archival_memory_search"] O1_BASE_TOOLS = ["send_thinking_message", "send_final_message"] # Base memory tools CAN be edited, and are added by default by the server BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"] diff --git a/letta/functions/function_sets/base.py b/letta/functions/function_sets/base.py index e35739dd64..f559bf4a7d 100644 --- a/letta/functions/function_sets/base.py +++ b/letta/functions/function_sets/base.py @@ -1,16 +1,6 @@ -from datetime import datetime from typing import Optional from letta.agent import Agent -from letta.constants import MAX_PAUSE_HEARTBEATS -from letta.services.agent_manager import AgentManager - -# import math -# from letta.utils import json_dumps - -### Functions / tools the agent can use -# All functions should return a response string (or None) -# If the function fails, throw an exception def send_message(self: "Agent", message: str) -> Optional[str]: @@ -28,36 +18,6 @@ def send_message(self: "Agent", message: str) -> Optional[str]: return None -# Construct the docstring dynamically (since it should use the external constants) -pause_heartbeats_docstring = f""" -Temporarily ignore timed heartbeats. You may still receive messages from manual heartbeats and other events. - -Args: - minutes (int): Number of minutes to ignore heartbeats for. Max value of {MAX_PAUSE_HEARTBEATS} minutes ({MAX_PAUSE_HEARTBEATS // 60} hours). - -Returns: - str: Function status response -""" - - -def pause_heartbeats(self: "Agent", minutes: int) -> Optional[str]: - import datetime - - from letta.constants import MAX_PAUSE_HEARTBEATS - - minutes = min(MAX_PAUSE_HEARTBEATS, minutes) - - # Record the current time - self.pause_heartbeats_start = datetime.datetime.now(datetime.timezone.utc) - # And record how long the pause should go for - self.pause_heartbeats_minutes = int(minutes) - - return f"Pausing timed heartbeats for {minutes} min" - - -pause_heartbeats.__doc__ = pause_heartbeats_docstring - - def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> Optional[str]: """ Search prior conversation history using case-insensitive string matching. @@ -84,19 +44,19 @@ def conversation_search(self: "Agent", query: str, page: Optional[int] = 0) -> O count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE # TODO: add paging by page number. currently cursor only works with strings. # original: start=page * count - results = self.message_manager.list_user_messages_for_agent( + messages = self.message_manager.list_user_messages_for_agent( agent_id=self.agent_state.id, actor=self.user, query_text=query, limit=count, ) - total = len(results) + total = len(messages) num_pages = math.ceil(total / count) - 1 # 0 index - if len(results) == 0: + if len(messages) == 0: results_str = f"No results found." else: - results_pref = f"Showing {len(results)} of {total} results (page {page}/{num_pages}):" - results_formatted = [f"timestamp: {d['timestamp']}, {d['message']['role']} - {d['message']['content']}" for d in results] + results_pref = f"Showing {len(messages)} of {total} results (page {page}/{num_pages}):" + results_formatted = [message.text for message in messages] results_str = f"{results_pref} {json_dumps(results_formatted)}" return results_str @@ -114,6 +74,7 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page str: Query result string """ import math + from datetime import datetime from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE from letta.utils import json_dumps @@ -142,7 +103,6 @@ def conversation_search_date(self: "Agent", start_date: str, end_date: str, page start_date=start_datetime, end_date=end_datetime, limit=count, - # start_date=start_date, end_date=end_date, limit=count, start=page * count ) total = len(results) num_pages = math.ceil(total / count) - 1 # 0 index @@ -186,10 +146,8 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s Returns: str: Query result string """ - import math from letta.constants import RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE - from letta.utils import json_dumps if page is None or (isinstance(page, str) and page.lower().strip() == "none"): page = 0 @@ -198,7 +156,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s except: raise ValueError(f"'page' argument must be an integer") count = RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE - + try: # Get results using passage manager all_results = self.agent_manager.list_passages( @@ -207,7 +165,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s query_text=query, limit=count + start, # Request enough results to handle offset embedding_config=self.agent_state.embedding_config, - embed_query=True + embed_query=True, ) # Apply pagination @@ -215,13 +173,7 @@ def archival_memory_search(self: "Agent", query: str, page: Optional[int] = 0, s paged_results = all_results[start:end] # Format results to match previous implementation - formatted_results = [ - { - "timestamp": str(result.created_at), - "content": result.text - } - for result in paged_results - ] + formatted_results = [{"timestamp": str(result.created_at), "content": result.text} for result in paged_results] return formatted_results, len(formatted_results) diff --git a/letta/functions/schema_generator.py b/letta/functions/schema_generator.py index 170bea3015..89409cb2be 100644 --- a/letta/functions/schema_generator.py +++ b/letta/functions/schema_generator.py @@ -386,7 +386,7 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[ # append the heartbeat # TODO: don't hard-code # TODO: if terminal, don't include this - if function.__name__ not in ["send_message", "pause_heartbeats"]: + if function.__name__ not in ["send_message"]: schema["parameters"]["properties"]["request_heartbeat"] = { "type": "boolean", "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.", diff --git a/letta/local_llm/function_parser.py b/letta/local_llm/function_parser.py index 6dd788da2a..0cb79edd24 100644 --- a/letta/local_llm/function_parser.py +++ b/letta/local_llm/function_parser.py @@ -3,7 +3,7 @@ from letta.utils import json_dumps, json_loads -NO_HEARTBEAT_FUNCS = ["send_message", "pause_heartbeats"] +NO_HEARTBEAT_FUNCS = ["send_message"] def insert_heartbeat(message): diff --git a/scripts/migrate_tools.py b/scripts/migrate_tools.py index 6ab9ed9e97..7ea6bac1d6 100644 --- a/scripts/migrate_tools.py +++ b/scripts/migrate_tools.py @@ -40,13 +40,3 @@ def deprecated_tool(): ), actor=fake_user, ) - - ToolManager().create_or_update_tool( - Tool( - name="pause_heartbeats", - source_code=source_code, - source_type=source_type, - description=description, - ), - actor=fake_user, - ) diff --git a/tests/conftest.py b/tests/conftest.py index 17ae8ef9c0..eb261d0fcb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,3 +22,12 @@ def mock_e2b_api_key_none(): # Restore the original value of e2b_api_key tool_settings.e2b_api_key = original_api_key + + +@pytest.fixture +def check_e2b_key_is_set(): + from letta.settings import tool_settings + + original_api_key = tool_settings.e2b_api_key + assert original_api_key is not None, "Missing e2b key! Cannot execute these tests." + yield diff --git a/tests/integration_test_base_tools.py b/tests/integration_test_base_tools.py deleted file mode 100644 index 9ee244ba51..0000000000 --- a/tests/integration_test_base_tools.py +++ /dev/null @@ -1,170 +0,0 @@ -import uuid -from pathlib import Path - -import pytest -from sqlalchemy import delete - -from letta import create_client -from letta.functions.function_sets.base import core_memory_replace -from letta.orm import SandboxConfig, SandboxEnvironmentVariable -from letta.schemas.embedding_config import EmbeddingConfig -from letta.schemas.llm_config import LLMConfig -from letta.schemas.memory import ChatMemory -from letta.schemas.organization import Organization -from letta.schemas.sandbox_config import LocalSandboxConfig, SandboxConfigCreate -from letta.schemas.user import User -from letta.services.organization_manager import OrganizationManager -from letta.services.sandbox_config_manager import SandboxConfigManager -from letta.services.tool_execution_sandbox import ToolExecutionSandbox -from letta.services.tool_manager import ToolManager -from letta.services.user_manager import UserManager -from letta.settings import tool_settings -from tests.helpers.utils import create_tool_from_func - -# Constants -namespace = uuid.NAMESPACE_DNS -org_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-org")) -user_name = str(uuid.uuid5(namespace, "test-tool-execution-sandbox-user")) - - -# Fixtures -@pytest.fixture(autouse=True) -def clear_tables(): - """Fixture to clear the organization table before each test.""" - from letta.server.server import db_context - - with db_context() as session: - session.execute(delete(SandboxEnvironmentVariable)) - session.execute(delete(SandboxConfig)) - session.commit() # Commit the deletion - - # Kill all sandboxes - from e2b_code_interpreter import Sandbox - - for sandbox in Sandbox.list(): - Sandbox.connect(sandbox.sandbox_id).kill() - - -@pytest.fixture -def check_e2b_key_is_set(): - original_api_key = tool_settings.e2b_api_key - assert original_api_key is not None, "Missing e2b key! Cannot execute these tests." - yield - - -@pytest.fixture -def check_composio_key_set(): - original_api_key = tool_settings.composio_api_key - assert original_api_key is not None, "Missing composio key! Cannot execute this test." - yield - - -@pytest.fixture -def test_organization(): - """Fixture to create and return the default organization.""" - org = OrganizationManager().create_organization(Organization(name=org_name)) - yield org - - -@pytest.fixture -def test_user(test_organization): - """Fixture to create and return the default user within the default organization.""" - user = UserManager().create_user(User(name=user_name, organization_id=test_organization.id)) - yield user - - -@pytest.fixture -def core_memory_replace_tool(test_user): - tool = create_tool_from_func(core_memory_replace) - tool = ToolManager().create_or_update_tool(tool, test_user) - yield tool - - -@pytest.fixture -def agent_state(): - client = create_client() - agent_state = client.create_agent( - memory=ChatMemory(persona="This is the persona", human="My name is Chad"), - embedding_config=EmbeddingConfig.default_config(provider="openai"), - llm_config=LLMConfig.default_config(model_name="gpt-4"), - ) - yield agent_state - - -@pytest.fixture -def custom_test_sandbox_config(test_user): - """ - Fixture to create a consistent local sandbox configuration for tests. - - Args: - test_user: The test user to be used for creating the sandbox configuration. - - Returns: - A tuple containing the SandboxConfigManager and the created sandbox configuration. - """ - # Create the SandboxConfigManager - manager = SandboxConfigManager(tool_settings) - - # Set the sandbox to be within the external codebase path and use a venv - external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system") - local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True) - - # Create the sandbox configuration - config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump()) - - # Create or update the sandbox configuration - manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user) - - return manager, local_sandbox_config - - -@pytest.mark.local_sandbox -def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): - new_name = "Matt" - args = {"label": "human", "old_content": "Chad", "new_content": new_name} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert new_name in result.agent_state.memory.get_block("human").value - assert result.func_return is None - - -@pytest.mark.local_sandbox -def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): - nonexistent_name = "Alexander Wang" - args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert len(result.stderr) != 0, "stderr not empty" - assert ( - f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] - ), "stderr contains expected error" - - -@pytest.mark.e2b_sandbox -def test_e2b_sandbox_escape_strings_in_args(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): - new_name = "Matt" - args = {"label": "human", "old_content": "Chad", "new_content": new_name + "\n"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert new_name in result.agent_state.memory.get_block("human").value - assert result.func_return is None - - -@pytest.mark.e2b_sandbox -def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): - nonexistent_name = "Alexander Wang" - args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} - sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) - - # run the sandbox - result = sandbox.run(agent_state=agent_state) - assert len(result.stderr) != 0, "stderr not empty" - assert ( - f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] - ), "stderr contains expected error" diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py index 0934e0f42b..1b29073f43 100644 --- a/tests/integration_test_tool_execution_sandbox.py +++ b/tests/integration_test_tool_execution_sandbox.py @@ -8,6 +8,7 @@ from sqlalchemy import delete from letta import create_client +from letta.functions.function_sets.base import core_memory_append, core_memory_replace from letta.orm import SandboxConfig, SandboxEnvironmentVariable from letta.schemas.agent import AgentState from letta.schemas.embedding_config import EmbeddingConfig @@ -56,13 +57,6 @@ def clear_tables(): Sandbox.connect(sandbox.sandbox_id).kill() -@pytest.fixture -def check_e2b_key_is_set(): - original_api_key = tool_settings.e2b_api_key - assert original_api_key is not None, "Missing e2b key! Cannot execute these tests." - yield - - @pytest.fixture def check_composio_key_set(): original_api_key = tool_settings.composio_api_key @@ -265,6 +259,21 @@ def custom_test_sandbox_config(test_user): return manager, local_sandbox_config +# Tool-specific fixtures +@pytest.fixture +def core_memory_tools(test_user): + """Create all base tools for testing.""" + tools = {} + for func in [ + core_memory_replace, + core_memory_append, + ]: + tool = create_tool_from_func(func) + tool = ToolManager().create_or_update_tool(tool, test_user) + tools[func.__name__] = tool + yield tools + + # Local sandbox tests @@ -517,3 +526,79 @@ def test_e2b_e2e_composio_star_github(check_e2b_key_is_set, check_composio_key_s result = ToolExecutionSandbox(composio_github_star_tool.name, {"owner": "letta-ai", "repo": "letta"}, user_id=test_user.id).run() assert result.func_return["details"] == "Action executed successfully" + + +# Core memory integration tests +class TestCoreMemoryTools: + """ + Tests for core memory manipulation tools. + Tests run in both local sandbox and e2b environments. + """ + + # Local sandbox tests + @pytest.mark.local_sandbox + def test_core_memory_replace_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + """Test successful replacement of content in core memory - local sandbox.""" + new_name = "Charles" + args = {"label": "human", "old_content": "Chad", "new_content": new_name} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert new_name in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + @pytest.mark.local_sandbox + def test_core_memory_append_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + """Test successful appending of content to core memory - local sandbox.""" + append_text = "\nLikes coffee" + args = {"label": "human", "content": append_text} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_append"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert append_text in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + @pytest.mark.local_sandbox + def test_core_memory_replace_error_local(self, mock_e2b_api_key_none, core_memory_tools, test_user, agent_state): + """Test error handling when trying to replace non-existent content - local sandbox.""" + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Charles"} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert len(result.stderr) != 0 + assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] + + # E2B sandbox tests + @pytest.mark.e2b_sandbox + def test_core_memory_replace_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state): + """Test successful replacement of content in core memory - e2b sandbox.""" + new_name = "Charles" + args = {"label": "human", "old_content": "Chad", "new_content": new_name} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert new_name in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + @pytest.mark.e2b_sandbox + def test_core_memory_append_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state): + """Test successful appending of content to core memory - e2b sandbox.""" + append_text = "\nLikes coffee" + args = {"label": "human", "content": append_text} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_append"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert append_text in result.agent_state.memory.get_block("human").value + assert result.func_return is None + + @pytest.mark.e2b_sandbox + def test_core_memory_replace_error_e2b(self, check_e2b_key_is_set, core_memory_tools, test_user, agent_state): + """Test error handling when trying to replace non-existent content - e2b sandbox.""" + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Charles"} + sandbox = ToolExecutionSandbox(core_memory_tools["core_memory_replace"].name, args, user_id=test_user.id) + + result = sandbox.run(agent_state=agent_state) + assert len(result.stderr) != 0 + assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0] diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py index 8144671969..c4f034b118 100644 --- a/tests/test_base_functions.py +++ b/tests/test_base_functions.py @@ -1,28 +1,25 @@ import pytest import letta.functions.function_sets.base as base_functions -from letta import create_client +from letta import LocalClient, create_client from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.llm_config import LLMConfig -from .utils import wipe_config - -# test_agent_id = "test_agent" -client = None - @pytest.fixture(scope="module") -def agent_obj(): - """Create a test agent that we can call functions on""" - wipe_config() - global client +def client(): client = create_client() client.set_default_llm_config(LLMConfig.default_config("gpt-4o-mini")) client.set_default_embedding_config(EmbeddingConfig.default_config(provider="openai")) + yield client + + +@pytest.fixture(scope="module") +def agent_obj(client: LocalClient): + """Create a test agent that we can call functions on""" agent_state = client.create_agent() - global agent_obj agent_obj = client.server.load_agent(agent_id=agent_state.id, actor=client.user) yield agent_obj @@ -88,7 +85,13 @@ def test_archival(agent_obj): pass -def test_recall(agent_obj): - base_functions.conversation_search(agent_obj, "banana") - base_functions.conversation_search(agent_obj, "banana", page=0) - base_functions.conversation_search_date(agent_obj, start_date="2022-01-01", end_date="2022-01-02") +def test_recall(client, agent_obj): + # keyword + keyword = "banana" + + # Send message to agent + response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword) + + # Conversation search + result = base_functions.conversation_search(agent_obj, "banana") + assert keyword in result From f6620f1a060e1b103edd02deba7b1566253ae910 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 17 Dec 2024 14:29:32 -0800 Subject: [PATCH 3/4] Fix typo --- tests/test_base_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py index c4f034b118..c425065f56 100644 --- a/tests/test_base_functions.py +++ b/tests/test_base_functions.py @@ -90,7 +90,7 @@ def test_recall(client, agent_obj): keyword = "banana" # Send message to agent - response = client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword) + client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword) # Conversation search result = base_functions.conversation_search(agent_obj, "banana") From 2cb7d0af42184338f75b882d6e5baef0976ab490 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Tue, 17 Dec 2024 14:52:47 -0800 Subject: [PATCH 4/4] Finish --- tests/test_base_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_base_functions.py b/tests/test_base_functions.py index c425065f56..5b5bec6f4f 100644 --- a/tests/test_base_functions.py +++ b/tests/test_base_functions.py @@ -89,8 +89,10 @@ def test_recall(client, agent_obj): # keyword keyword = "banana" - # Send message to agent + # Send messages to agent + client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="hello") client.send_message(agent_id=agent_obj.agent_state.id, role="user", message=keyword) + client.send_message(agent_id=agent_obj.agent_state.id, role="user", message="tell me a fun fact") # Conversation search result = base_functions.conversation_search(agent_obj, "banana")