1

2024-02-28 09:05:11 +00:00 · 2024-02-28 09:05:11 +00:00 · f5d0c5a322
commit f5d0c5a322
parent b802e0bc44
11 changed files with 28 additions and 32669 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,45 +0,0 @@
 data/
 data
 data_0830/
 data*.zip
 *.zip
 *io.txt
 error.txt
 miss*.txt
 output.txt
 token_count_in.txt
 token_count_out.txt
 *local.sh
 *.DS_store
 openchat*/
 toolllama*/
 ws/
 .history/
 reproduction_data*/
 output/
 *result/
 result*/
 __MACOSX/
 api_test_results_with_docs.json
 customized_api_test_results_with_docs.json
 model_list.txt
 run.bash
 tool_data*
 api_test_results*
 api_details.json
 api_details*
 category_tool_details*
 config.py
 *dy.py
 *dy.json
 OAI_CONFIG_LIST
 repos/
 rapidapi_key_list.json
 openai_utils_dy.py
 .chroma/
 *.pyc
 **/__pycache__
 .vscode/
 .cache/42/
 retrieval_model/
--- a/README.md
+++ b/README.md
@ -1,9 +1,9 @@
 # AnyTool
 ![Static Badge](https://img.shields.io/badge/anytool-blue)
-<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a>  <a href='https://github.com/buaacyw/GaussianEditor/blob/master/LICENSE.txt'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
+<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a>  <a href='https://github.com/dyabel/AnyTool/blob/public/LICENSE'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
 This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
-![Figure](https://media.discordapp.net/attachments/1202909094470492163/1202909161755648010/image.png?ex=65d865f5&is=65c5f0f5&hm=a399dda2c4b1c6caf17d3a0d29bc7dc9c504012ba7a4cc856283ce9dc9a3ebd5&=&format=webp&quality=lossless&width=781&height=601)
+![Figure](./assets/anytool.png)
 # 🔧 Installation
 ## ✅ Dependencies
@ -14,6 +14,10 @@ Require Python 3.9+
 pip install -r requirements.txt
 ```
 # OpenAI GPT API config
 Fill your  and toolbench key into the config.py (see config_example.py). 
 # 🔆 Data Preparation
 **ToolBench**
@ -23,30 +27,30 @@ Refer to [ToolBench](https://github.com/OpenBMB/ToolBench).
 You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
 ```
-python extract_api_details.py
+python scripts/extract_api_details.py
-python extract_category_tool_details.py
+python scripts/extract_category_tool_details.py
-python extract_tool_database.py
+python scripts/extract_tool_database.py
 ```
 **AnyToolBench**
 Generation script
 ```
-python data_generation_by_gpt4.py
+python scripts/data_generation_by_gpt4.py
 ```
-We provide sample data in anytoolbench.json file.
+We provide sample data in [anytoolbench.json]() file.
 # 🚗 Run AnyTool
-Fill your OpenAI config and toolbench key into the config.py (see config_example.py). 
+Fill your OpenAI GPT API config and toolbench key into the config.py (see config_example.py). 
-Run ToolBench
+Experiment on ToolBench
 ```
 python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
 ```
-Run AnyToolBench
+Experiment on AnyToolBench
 ```
 python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
 ```
--- a/assets/anytool.png
+++ b/assets/anytool.png
--- a/atb_data/anytoolbench.json
+++ b/atb_data/anytoolbench.json
--- a/misc/solved_dict.json
+++ b/misc/solved_dict.json
--- a/scripts/anytool.py
+++ b/scripts/anytool.py
--- a/scripts/data_generation_by_gpt4.py
+++ b/scripts/data_generation_by_gpt4.py
--- a/scripts/extract_api_details.py
+++ b/scripts/extract_api_details.py
@ -2,18 +2,10 @@ import zipfile
 import os
 import json
 from copy import deepcopy
 # Extract the new zip file
 # with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
 #     zip_ref.extractall(extracted_folder_path_small)
 extracted_folder_path_small = 'data/toolenv/tools'
 # api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
 # Walk through the extracted files and read the JSON data
-detailed_data_small = {}  # Initialize an empty dictionary to store the extracted data
+detailed_data = {}  # Initialize an empty dictionary to store the extracted data
 cnt = 0
 api_name_list = []
 data_for_retrieval = []
@ -34,26 +26,17 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                    tool_name = json_data['tool_name']
                api_list = json_data.get('api_list', [])
                # Extract necessary data for each API and organize it in the dictionary
-                if category not in detailed_data_small:
+                if category not in detailed_data:
-                    detailed_data_small[category] = {}
+                    detailed_data[category] = {}
-                if tool_name not in detailed_data_small[category]:
+                if tool_name not in detailed_data[category]:
-                    detailed_data_small[category][tool_name] = {"api_list": []}
+                    detailed_data[category][tool_name] = {"api_list": []}
                else:
                    tool_name += '_new'
                    raise ValueError('duplicate tool name')
-                    detailed_data_small[category][tool_name] = {"api_list": []}
+                    detailed_data[category][tool_name] = {"api_list": []}
                for api in api_list:
                    cnt += 1
                    api_name = api.get('name', 'Unknown API')
                    # try:
                    #     if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
                    #         print('remove')
                    #         continue
                    # except:
                    #     print(category, tool_name, api_name)
                    #     pass
                    # if api_name in api_name_list:
                    #     raise Exception('duplicate api name')
                    api_name_list.append(api_name)
                    description = api.get('description', 'No description available.')
                    required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
@ -61,7 +44,6 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                    test_endpoint = api.get('test_endpoint', '') 
                    tool_description = json_data.get('tool_description', 'No description available.'),
                    # Organizing the data
                    # print(len(detailed_data_small[category][tool_name]['api_list']))
                    if tool_description is not None:
                        tool_description = tool_description[:100]
                    if description is not None:
@ -75,36 +57,22 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                        "required_parameters": required_parameters,
                        "optional_parameters": optional_parameters,
                    })
-                    detailed_data_small[category][tool_name]["api_list"].append({
+                    detailed_data[category][tool_name]["api_list"].append({
                        "name": api_name,
                        "description": description,
                        "required_parameters": required_parameters,
                        "optional_parameters": optional_parameters,
                        # "test_endpoint": test_endpoint
                    })
                # except Exception as e:
                    # Store the error message if we fail to process a file
                    # if category not in detailed_data_small:
                    #     detailed_data_small[category] = {}
                    # detailed_data_small[category][file] = {"error": str(e)}
 # Verifying the structure of the detailed_data_small by displaying a sample
 # sample_detailed_data_small = {
 #     category: {
 #         tool_name: detailed_data_small[category][tool_name] 
 #         for tool_name in list(detailed_data_small[category].keys())[:1]
 #     }
 #     for category in list(detailed_data_small.keys())[:3]
 # }
 cnt = 0 
-for category in detailed_data_small:
+for category in detailed_data:
-    for tool_name in detailed_data_small[category]:
+    for tool_name in detailed_data[category]:
-        cnt += len(detailed_data_small[category][tool_name]['api_list'])
+        cnt += len(detailed_data[category][tool_name]['api_list'])
 print('total api number:', cnt)
-# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+# json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
-print(len(data_for_retrieval))
+# print(len(data_for_retrieval))
-json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+# json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
-json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
 print(cnt)
--- a/scripts/extract_category_tool_details.py
+++ b/scripts/extract_category_tool_details.py
--- a/scripts/extract_tool_database.py
+++ b/scripts/extract_tool_database.py
--- a/tool_details.json
+++ b/tool_details.json