refactor: reorganize script location

OpenBMB · Oct 10, 2023 · 6bdce1d · 6bdce1d
1 parent ccf4319
commit 6bdce1d
Show file tree

Hide file tree

Showing 20 changed files with 22 additions and 21 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -38,4 +38,4 @@ jobs:
       run: |
         python setup.py develop 
         python agentverse_command/benchmark.py --task tasksolving/mgsm/gpt-3.5 --dataset_path data/mgsm/test_sample.jsonl --overwrite --output_path ci_smoke_test_output --tasks_dir ./agentverse/tasks
-        python evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test
+        python scripts/evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test
diff --git a/.gitignore b/.gitignore
@@ -172,4 +172,5 @@ raw/
 results
 tmp/
 data/toolbench
-logs/
+logs/
+ci_smoke_test_output/
diff --git a/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py b/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py
@@ -29,16 +29,18 @@ def step(
         *args,
         **kwargs,
     ) -> Any:
-        from evaluate_commongen import scoring
+        from scripts.evaluate_commongen import scoring
 
-        coverage, missing_tokens = scoring([s.content for s in solution], [task_description])
+        coverage, missing_tokens = scoring(
+            [s.content for s in solution], [task_description]
+        )
         if len(missing_tokens[0]) == 0:
             missing_tokens = "No missing tokens."
         else:
             missing_tokens = ", ".join(missing_tokens[0])
         result = f"Coverage: {coverage*100:.2f}%\nMissing Tokens: {missing_tokens}"
         return [ExecutorMessage(content=result)]
-    
+
     async def astep(
         self,
         agent: ExecutorAgent,
@@ -47,9 +49,11 @@ async def astep(
         *args,
         **kwargs,
     ) -> Any:
-        from evaluate_commongen import scoring
+        from scripts.evaluate_commongen import scoring
 
-        coverage, missing_tokens = scoring([s.content for s in solution], [task_description])
+        coverage, missing_tokens = scoring(
+            [s.content for s in solution], [task_description]
+        )
         if len(missing_tokens[0]) == 0:
             missing_tokens = "No missing tokens."
         else:

diff --git a/agentverse/tasks/tasksolving/tool_using/24point/config.yaml b/agentverse/tasks/tasksolving/tool_using/24point/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3
 cnt_tool_agents: &cnt_tool_agents 2
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: Recently, it has become popular in the AI field to verify the mathematical reasoning abilities of large language models by observing if they can solve the "24-Point Game." What is this game? Does it have a code-based solution? If it does, provide a Python code along with test cases and test its functionality. What are some other similar games that can be used to test the models' mathematical reasoning abilities?
 

diff --git a/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml b/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3
 cnt_tool_agents: &cnt_tool_agents 2
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I want to lose 5kg in the next 2 months. I weigh 70kg, am 170cm tall, and my age is 25. Calculate my BMI and based on that, suggest a workout routine and daily calorie intake to help me achieve my goal.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml b/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3
 cnt_tool_agents: &cnt_tool_agents 2
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I want to kick off a book club with my friends. Can you tell me the top 5 bestselling books this month, gather the content summary for each, and find online platforms where we can buy or borrow them?
 

diff --git a/agentverse/tasks/tasksolving/tool_using/car/config.yaml b/agentverse/tasks/tasksolving/tool_using/car/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I am planning to buy a new car. Could you help me compare the features and prices of the latest models of Tesla, Ford, and Toyota? Include details about range, charging time, safety features, and after-sales service. Also, provide a brief analysis of the pros and cons of each car.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/date/config.yaml b/agentverse/tasks/tasksolving/tool_using/date/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I am planning a date with my girlfriend this week, please search for a good movie theater and a restaurant near Tsinghua University in Beijing and recommend a good movie to watch. Please search the web.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/diy/config.yaml b/agentverse/tasks/tasksolving/tool_using/diy/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I've recently taken an interest in DIY home projects. Search for beginner-friendly DIY projects that can be completed over the weekend. Also, provide a list of materials required and a step-by-step guide for each project.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/party/config.yaml b/agentverse/tasks/tasksolving/tool_using/party/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I want to hold a party at somewhere around Tsinghua University tomorrow. I need you to look for some best places for holding a party nearby, and tell me whether the weather is good for holding a party tomorrow. Also, I want to know what activities can be considered in my party. Help me search the web.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml b/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3
 cnt_tool_agents: &cnt_tool_agents 2
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I've just heard an interesting game called 'sudoku'. Can you search for the rules of this game and the solution to this game? Finally, write a python script to automatically solve this game if possible.
 

diff --git a/tools_simplified.json → ...ksolving/tool_using/tools_simplified.json b/tools_simplified.json → ...ksolving/tool_using/tools_simplified.json
diff --git a/agentverse/tasks/tasksolving/tool_using/trending/config.yaml b/agentverse/tasks/tasksolving/tool_using/trending/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I'm currently analyzing what is popular on the website. Can you help me find the recent trending stuff. It could be anything, like trending news, products, books, movies, music, etc. Give a summarization for me.
 

diff --git a/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml b/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml
@@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4
 cnt_tool_agents: &cnt_tool_agents 3
 max_rounds: &max_rounds 5
 max_criticizing_rounds: 3
-tool_config: &tool_config tools_simplified.json
+tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json
 
 task_description: I'm planning a two-week vacation to Japan next month. Help me plan my itinerary. I want to visit Tokyo, Kyoto, and Osaka. Look for the top tourist attractions in each city, and also suggest the best mode of travel between these cities. Additionally, find out the weather forecast for the month I'll be visiting.
 

diff --git a/scripts/__init__.py b/scripts/__init__.py
diff --git a/evaluate_commongen.py → scripts/evaluate_commongen.py b/evaluate_commongen.py → scripts/evaluate_commongen.py
diff --git a/evaluate_logic.py → scripts/evaluate_logic.py b/evaluate_logic.py → scripts/evaluate_logic.py
diff --git a/evaluate_math.py → scripts/evaluate_math.py b/evaluate_math.py → scripts/evaluate_math.py
diff --git a/evaluate_responsegen.py → scripts/evaluate_responsegen.py b/evaluate_responsegen.py → scripts/evaluate_responsegen.py
diff --git a/test_pokemon_env.py b/test_pokemon_env.py