From 759c47c5490c29a028a777c32bf85f01049b3d1d Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Mon, 16 Dec 2024 17:50:41 -0800 Subject: [PATCH 1/6] Adding init tool rule for Anthropic endpoint * Fixed first message flag * Added support for Claude-3-sonnet configs * New test case for init tool rule --- letta/agent.py | 13 +++++- letta/llm_api/anthropic.py | 20 ++++---- letta/llm_api/llm_api_tools.py | 13 +++++- .../claude-3-sonnet-20240229.json | 9 ++++ tests/integration_test_agent_tool_graph.py | 46 ++++++++++++++++++- 5 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 tests/configs/llm_model_configs/claude-3-sonnet-20240229.json diff --git a/letta/agent.py b/letta/agent.py index 341b25fda3..cb15c61e52 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -596,6 +596,12 @@ def _get_ai_reply( self.functions if not allowed_tool_names else [func for func in self.functions if func["name"] in allowed_tool_names] ) + # For the first message, force the initial tool if one is specified + force_tool_call = None + if first_message and self.tool_rules_solver.init_tool_rules: + assert len(self.tool_rules_solver.init_tool_rules) == 1, "Multiple initial tools not supported" + force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name + for attempt in range(1, empty_response_retry_limit + 1): try: response = create( @@ -606,6 +612,7 @@ def _get_ai_reply( functions_python=self.functions_python, function_call=function_call, first_message=first_message, + force_tool_call=force_tool_call, stream=stream, stream_interface=self.interface, ) @@ -896,7 +903,10 @@ def step( total_usage = UsageStatistics() step_count = 0 while True: - kwargs["first_message"] = False + if step_count > 0: + kwargs["first_message"] = False + else: + kwargs["first_message"] = True step_response = self.inner_step( messages=next_input_message, **kwargs, @@ -1014,6 +1024,7 @@ def inner_step( else: response = self._get_ai_reply( message_sequence=input_message_sequence, + first_message=first_message, stream=stream, ) diff --git a/letta/llm_api/anthropic.py b/letta/llm_api/anthropic.py index 9df4cec248..912ac4567f 100644 --- a/letta/llm_api/anthropic.py +++ b/letta/llm_api/anthropic.py @@ -99,16 +99,20 @@ def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]: - 1 level less of nesting - "parameters" -> "input_schema" """ - tools_dict_list = [] + formatted_tools = [] for tool in tools: - tools_dict_list.append( - { - "name": tool.function.name, - "description": tool.function.description, - "input_schema": tool.function.parameters, + formatted_tool = { + "name" : tool.function.name, + "description" : tool.function.description, + "input_schema" : tool.function.parameters or { + "type": "object", + "properties": {}, + "required": [] } - ) - return tools_dict_list + } + formatted_tools.append(formatted_tool) + + return formatted_tools def merge_tool_results_into_user_messages(messages: List[dict]): diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 163c4e1868..dadd128aa9 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -113,6 +113,7 @@ def create( function_call: str = "auto", # hint first_message: bool = False, + force_tool_call: Optional[str] = None, # Force a specific tool to be called # use tool naming? # if false, will use deprecated 'functions' style use_tool_naming: bool = True, @@ -252,6 +253,16 @@ def create( if not use_tool_naming: raise NotImplementedError("Only tool calling supported on Anthropic API requests") + tool_call = None + if force_tool_call is not None: + tool_call = { + "type": "function", + "function": { + "name": force_tool_call + } + } + assert functions is not None + return anthropic_chat_completions_request( url=llm_config.model_endpoint, api_key=model_settings.anthropic_api_key, @@ -259,7 +270,7 @@ def create( model=llm_config.model, messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages], tools=[{"type": "function", "function": f} for f in functions] if functions else None, - # tool_choice=function_call, + tool_choice=tool_call, # user=str(user_id), # NOTE: max_tokens is required for Anthropic API max_tokens=1024, # TODO make dynamic diff --git a/tests/configs/llm_model_configs/claude-3-sonnet-20240229.json b/tests/configs/llm_model_configs/claude-3-sonnet-20240229.json new file mode 100644 index 0000000000..5eef194bea --- /dev/null +++ b/tests/configs/llm_model_configs/claude-3-sonnet-20240229.json @@ -0,0 +1,9 @@ +{ + "context_window": 200000, + "model": "claude-3-5-sonnet-20241022", + "model_endpoint_type": "anthropic", + "model_endpoint": "https://api.anthropic.com/v1", + "context_window": 200000, + "model_wrapper": null, + "put_inner_thoughts_in_kwargs": true +} diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index ff8700c1c3..3e87e3aaea 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -1,9 +1,9 @@ import uuid import pytest - from letta import create_client from letta.schemas.letta_message import FunctionCallMessage +from letta.schemas.llm_config import LLMConfig from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule from tests.helpers.endpoints_helper import ( assert_invoked_function_call, @@ -127,3 +127,47 @@ def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): print(f"Got successful response from client: \n\n{response}") cleanup(client=client, agent_uuid=agent_uuid) + + +def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): + """Test that the initial tool rule is enforced for the first message.""" + client = create_client() + client.set_default_llm_config( + LLMConfig( + model="claude-3-opus-20240229", + model_endpoint_type="anthropic", + model_endpoint="https://api.anthropic.com/v1", + context_window=200000, # NOTE: can be set to <= 200000 + ) + ) + cleanup(client=client, agent_uuid=agent_uuid) + + # Create tool rules that require tool_a to be called first + t1 = client.create_or_update_tool(first_secret_word) + t2 = client.create_or_update_tool(second_secret_word) + tool_rules = [ + InitToolRule(tool_name="first_secret_word"), + ChildToolRule(tool_name="first_secret_word", children=["second_secret_word"]), + ] + tools = [t1, t2] + + # Make agent state + anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" + agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) + response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") + + assert_sanity_checks(response) + messages = response.messages + + assert_invoked_function_call(messages, "first_secret_word") + assert_invoked_function_call(messages, "second_secret_word") + + tool_names = [t.name for t in [t1, t2]] + tool_names += ["send_message"] + for m in messages: + if isinstance(m, FunctionCallMessage): + # Check that it's equal to the first one + assert m.function_call.name == tool_names[0] + + # Pop out first one + tool_names = tool_names[1:] From df97d1f41e24462e2feb49d793399daf6a5dd308 Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Tue, 17 Dec 2024 10:32:29 -0800 Subject: [PATCH 2/6] more robust testing, structured output support defined at agent initialization --- letta/agent.py | 12 ++++++-- letta/constants.py | 3 ++ tests/integration_test_agent_tool_graph.py | 35 ++++++++++++++-------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/letta/agent.py b/letta/agent.py index cb15c61e52..f5dc279943 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -18,6 +18,7 @@ MESSAGE_SUMMARY_WARNING_FRAC, O1_BASE_TOOLS, REQ_HEARTBEAT_MESSAGE, + STRUCTURED_OUTPUT_MODELS ) from letta.errors import LLMError from letta.helpers import ToolRulesSolver @@ -276,6 +277,7 @@ def __init__( # gpt-4, gpt-3.5-turbo, ... self.model = self.agent_state.llm_config.model + self.check_tool_rules() # state managers self.block_manager = BlockManager() @@ -381,6 +383,13 @@ def __init__( # Create the agent in the DB self.update_state() + def check_tool_rules(self): + if self.model not in STRUCTURED_OUTPUT_MODELS: + assert len(self.tool_rules_solver.init_tool_rules) <= 1, "Multiple initial tools not supported for non-structured models" + self.supports_structured_output = False + else: + self.supports_structured_output = True + def update_memory_if_change(self, new_memory: Memory) -> bool: """ Update internal memory object and system prompt if there have been modifications. @@ -598,8 +607,7 @@ def _get_ai_reply( # For the first message, force the initial tool if one is specified force_tool_call = None - if first_message and self.tool_rules_solver.init_tool_rules: - assert len(self.tool_rules_solver.init_tool_rules) == 1, "Multiple initial tools not supported" + if first_message and not self.supports_structured_output and len(self.tool_rules_solver.init_tool_rules) > 0: force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name for attempt in range(1, empty_response_retry_limit + 1): diff --git a/letta/constants.py b/letta/constants.py index 5e9ac9b268..22ebcee820 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -48,6 +48,9 @@ DEFAULT_MESSAGE_TOOL = "send_message" DEFAULT_MESSAGE_TOOL_KWARG = "message" +# Structured output models +STRUCTURED_OUTPUT_MODELS = {"gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06"} + # LOGGER_LOG_LEVEL is use to convert Text to Logging level value for logging mostly for Cli input to setting level LOGGER_LOG_LEVELS = {"CRITICAL": CRITICAL, "ERROR": ERROR, "WARN": WARN, "WARNING": WARNING, "INFO": INFO, "DEBUG": DEBUG, "NOTSET": NOTSET} diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index 3e87e3aaea..d8bca58de7 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -1,3 +1,4 @@ +import time import uuid import pytest @@ -154,20 +155,28 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): # Make agent state anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) - response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") + for i in range(3): + response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") - assert_sanity_checks(response) - messages = response.messages + assert_sanity_checks(response) + messages = response.messages - assert_invoked_function_call(messages, "first_secret_word") - assert_invoked_function_call(messages, "second_secret_word") + assert_invoked_function_call(messages, "first_secret_word") + assert_invoked_function_call(messages, "second_secret_word") - tool_names = [t.name for t in [t1, t2]] - tool_names += ["send_message"] - for m in messages: - if isinstance(m, FunctionCallMessage): - # Check that it's equal to the first one - assert m.function_call.name == tool_names[0] + tool_names = [t.name for t in [t1, t2]] + tool_names += ["send_message"] + for m in messages: + if isinstance(m, FunctionCallMessage): + # Check that it's equal to the first one + assert m.function_call.name == tool_names[0] - # Pop out first one - tool_names = tool_names[1:] + # Pop out first one + tool_names = tool_names[1:] + + print(f"Passed iteration {i}") + + # Implement exponential backoff with initial time of 10 seconds + if i < 2: + backoff_time = 10 * (2 ** i) + time.sleep(backoff_time) From 30a1d5450ea8f2afd343a51b431b6495a07587ac Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Tue, 17 Dec 2024 11:21:44 -0800 Subject: [PATCH 3/6] fixed create agent bug on base tools --- letta/client/client.py | 1 + tests/integration_test_offline_memory_agent.py | 1 + 2 files changed, 2 insertions(+) diff --git a/letta/client/client.py b/letta/client/client.py index d3259214e4..af2edcca4a 100644 --- a/letta/client/client.py +++ b/letta/client/client.py @@ -2156,6 +2156,7 @@ def create_agent( "block_ids": [b.id for b in memory.get_blocks()] + block_ids, "tool_ids": tool_ids, "tool_rules": tool_rules, + "include_base_tools": include_base_tools, "system": system, "agent_type": agent_type, "llm_config": llm_config if llm_config else self._default_llm_config, diff --git a/tests/integration_test_offline_memory_agent.py b/tests/integration_test_offline_memory_agent.py index 8a4fb81c54..07b7c732b2 100644 --- a/tests/integration_test_offline_memory_agent.py +++ b/tests/integration_test_offline_memory_agent.py @@ -126,6 +126,7 @@ def test_chat_only_agent(client, mock_e2b_api_key_none): ) assert chat_only_agent is not None assert set(chat_only_agent.memory.list_block_labels()) == {"chat_agent_persona", "chat_agent_human"} + assert len(chat_only_agent.tools) == 1 for message in ["hello", "my name is not chad, my name is swoodily"]: client.send_message(agent_id=chat_only_agent.id, message=message, role="user") From 6c061f22e064b52c7940771328e789416c56a47b Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Tue, 17 Dec 2024 14:24:42 -0800 Subject: [PATCH 4/6] test for structured output models --- letta/agent.py | 3 +- letta/constants.py | 2 +- .../openai-gpt-3.5-turbo.json | 7 ++ tests/integration_test_agent_tool_graph.py | 86 +++++++++++++++---- 4 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json diff --git a/letta/agent.py b/letta/agent.py index f5dc279943..268e8c8fb5 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -385,7 +385,8 @@ def __init__( def check_tool_rules(self): if self.model not in STRUCTURED_OUTPUT_MODELS: - assert len(self.tool_rules_solver.init_tool_rules) <= 1, "Multiple initial tools not supported for non-structured models" + if len(self.tool_rules_solver.init_tool_rules) > 1: + raise ValueError("Multiple initial tools are not supported for non-structured models. Please use only one initial tool rule.") self.supports_structured_output = False else: self.supports_structured_output = True diff --git a/letta/constants.py b/letta/constants.py index 22ebcee820..437d956c49 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -49,7 +49,7 @@ DEFAULT_MESSAGE_TOOL_KWARG = "message" # Structured output models -STRUCTURED_OUTPUT_MODELS = {"gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06"} +STRUCTURED_OUTPUT_MODELS = {"gpt-4o", "gpt-4o-mini"} # LOGGER_LOG_LEVEL is use to convert Text to Logging level value for logging mostly for Cli input to setting level LOGGER_LOG_LEVELS = {"CRITICAL": CRITICAL, "ERROR": ERROR, "WARN": WARN, "WARNING": WARNING, "INFO": INFO, "DEBUG": DEBUG, "NOTSET": NOTSET} diff --git a/tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json b/tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json new file mode 100644 index 0000000000..059d6ad82f --- /dev/null +++ b/tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json @@ -0,0 +1,7 @@ +{ + "context_window": 16385, + "model": "gpt-3.5-turbo", + "model_endpoint_type": "openai", + "model_endpoint": "https://api.openai.com/v1", + "model_wrapper": null +} diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index d8bca58de7..a125eb22ed 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -2,7 +2,8 @@ import uuid import pytest -from letta import create_client +from sqlalchemy.engine import create +from letta import agent, create_client from letta.schemas.letta_message import FunctionCallMessage from letta.schemas.llm_config import LLMConfig from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule @@ -130,32 +131,86 @@ def test_single_path_agent_tool_call_graph(mock_e2b_api_key_none): cleanup(client=client, agent_uuid=agent_uuid) +def test_check_tool_rules_with_different_models(mock_e2b_api_key_none): + """Test that tool rules are properly checked for different model configurations.""" + client = create_client() + + config_files = [ + "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json", + "tests/configs/llm_model_configs/openai-gpt-3.5-turbo.json", + "tests/configs/llm_model_configs/openai-gpt-4o.json", + ] + + # Create two test tools + t1_name = "first_secret_word" + t2_name = "second_secret_word" + t1 = client.create_or_update_tool(first_secret_word, name=t1_name) + t2 = client.create_or_update_tool(second_secret_word, name=t2_name) + tool_rules = [ + InitToolRule(tool_name=t1_name), + InitToolRule(tool_name=t2_name) + ] + tools = [t1, t2] + + for config_file in config_files: + # Setup tools + agent_uuid = str(uuid.uuid4()) + + if "gpt-4o" in config_file: + # Structured output model (should work with multiple init tools) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, + tool_ids=[t.id for t in tools], + tool_rules=tool_rules) + assert agent_state is not None + else: + # Non-structured output model (should raise error with multiple init tools) + with pytest.raises(ValueError, match="Multiple initial tools are not supported for non-structured models"): + setup_agent(client, config_file, agent_uuid=agent_uuid, + tool_ids=[t.id for t in tools], + tool_rules=tool_rules) + + # Cleanup + cleanup(client=client, agent_uuid=agent_uuid) + + # Create tool rule with single initial tool + t3_name = "third_secret_word" + t3 = client.create_or_update_tool(third_secret_word, name=t3_name) + tool_rules = [ + InitToolRule(tool_name=t3_name) + ] + tools = [t3] + for config_file in config_files: + agent_uuid = str(uuid.uuid4()) + + # Structured output model (should work with single init tool) + agent_state = setup_agent(client, config_file, agent_uuid=agent_uuid, + tool_ids=[t.id for t in tools], + tool_rules=tool_rules) + assert agent_state is not None + + cleanup(client=client, agent_uuid=agent_uuid) + + def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): """Test that the initial tool rule is enforced for the first message.""" client = create_client() - client.set_default_llm_config( - LLMConfig( - model="claude-3-opus-20240229", - model_endpoint_type="anthropic", - model_endpoint="https://api.anthropic.com/v1", - context_window=200000, # NOTE: can be set to <= 200000 - ) - ) - cleanup(client=client, agent_uuid=agent_uuid) # Create tool rules that require tool_a to be called first - t1 = client.create_or_update_tool(first_secret_word) - t2 = client.create_or_update_tool(second_secret_word) + t1_name = "first_secret_word" + t2_name = "second_secret_word" + t1 = client.create_or_update_tool(first_secret_word, name=t1_name) + t2 = client.create_or_update_tool(second_secret_word, name=t2_name) tool_rules = [ - InitToolRule(tool_name="first_secret_word"), - ChildToolRule(tool_name="first_secret_word", children=["second_secret_word"]), + InitToolRule(tool_name=t1_name), + ChildToolRule(tool_name=t1_name, children=[t2_name]), ] tools = [t1, t2] # Make agent state anthropic_config_file = "tests/configs/llm_model_configs/claude-3-sonnet-20240229.json" - agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) for i in range(3): + agent_uuid = str(uuid.uuid4()) + agent_state = setup_agent(client, anthropic_config_file, agent_uuid=agent_uuid, tool_ids=[t.id for t in tools], tool_rules=tool_rules) response = client.user_message(agent_id=agent_state.id, message="What is the second secret word?") assert_sanity_checks(response) @@ -175,6 +230,7 @@ def test_claude_initial_tool_rule_enforced(mock_e2b_api_key_none): tool_names = tool_names[1:] print(f"Passed iteration {i}") + cleanup(client=client, agent_uuid=agent_uuid) # Implement exponential backoff with initial time of 10 seconds if i < 2: From 02d951bc877392ffcb4d52a0bbd8922cecb42b67 Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Tue, 17 Dec 2024 14:37:50 -0800 Subject: [PATCH 5/6] removed unused imports --- tests/integration_test_agent_tool_graph.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration_test_agent_tool_graph.py b/tests/integration_test_agent_tool_graph.py index a125eb22ed..19c7dbd6cb 100644 --- a/tests/integration_test_agent_tool_graph.py +++ b/tests/integration_test_agent_tool_graph.py @@ -2,10 +2,8 @@ import uuid import pytest -from sqlalchemy.engine import create -from letta import agent, create_client +from letta import create_client from letta.schemas.letta_message import FunctionCallMessage -from letta.schemas.llm_config import LLMConfig from letta.schemas.tool_rule import ChildToolRule, InitToolRule, TerminalToolRule from tests.helpers.endpoints_helper import ( assert_invoked_function_call, From 72ec8bf26dad1ebc6b59107f3de002948cd16602 Mon Sep 17 00:00:00 2001 From: Mindy Long Date: Tue, 17 Dec 2024 15:05:37 -0800 Subject: [PATCH 6/6] tracking first message in agent chain with step=0 --- letta/agent.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/letta/agent.py b/letta/agent.py index 268e8c8fb5..0fac370ad3 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -598,6 +598,7 @@ def _get_ai_reply( empty_response_retry_limit: int = 3, backoff_factor: float = 0.5, # delay multiplier for exponential backoff max_delay: float = 10.0, # max delay between retries + step_count: Optional[int] = None, ) -> ChatCompletionResponse: """Get response from LLM API with robust retry mechanism.""" @@ -608,7 +609,12 @@ def _get_ai_reply( # For the first message, force the initial tool if one is specified force_tool_call = None - if first_message and not self.supports_structured_output and len(self.tool_rules_solver.init_tool_rules) > 0: + if ( + step_count is not None + and step_count == 0 + and not self.supports_structured_output + and len(self.tool_rules_solver.init_tool_rules) > 0 + ): force_tool_call = self.tool_rules_solver.init_tool_rules[0].tool_name for attempt in range(1, empty_response_retry_limit + 1): @@ -912,10 +918,8 @@ def step( total_usage = UsageStatistics() step_count = 0 while True: - if step_count > 0: - kwargs["first_message"] = False - else: - kwargs["first_message"] = True + kwargs["first_message"] = False + kwargs["step_count"] = step_count step_response = self.inner_step( messages=next_input_message, **kwargs, @@ -991,6 +995,7 @@ def inner_step( first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS, skip_verify: bool = False, stream: bool = False, # TODO move to config? + step_count: Optional[int] = None, ) -> AgentStepResponse: """Runs a single step in the agent loop (generates at most one LLM call)""" @@ -1035,6 +1040,7 @@ def inner_step( message_sequence=input_message_sequence, first_message=first_message, stream=stream, + step_count=step_count, ) # Step 3: check if LLM wanted to call a function