1

2024-02-28 09:05:11 +00:00 · 2024-02-28 09:05:11 +00:00 · f5d0c5a322
commit f5d0c5a322
parent b802e0bc44
11 changed files with 28 additions and 32669 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,45 +0,0 @@
-data/
-data
-data_0830/
-data*.zip
-*.zip
-*io.txt
-error.txt
-miss*.txt
-output.txt
-token_count_in.txt
-token_count_out.txt
-*local.sh
-*.DS_store
-openchat*/
-toolllama*/
-ws/
-.history/
-reproduction_data*/
-output/
-*result/
-result*/
-__MACOSX/
-api_test_results_with_docs.json
-customized_api_test_results_with_docs.json
-model_list.txt
-run.bash
-tool_data*
-api_test_results*
-api_details.json
-api_details*
-category_tool_details*
-config.py
-*dy.py
-*dy.json
-OAI_CONFIG_LIST
-repos/
-rapidapi_key_list.json
-openai_utils_dy.py
-.chroma/
-
-*.pyc
-**/__pycache__
-.vscode/
-.cache/42/
-retrieval_model/
--- a/README.md
+++ b/README.md
@ -1,9 +1,9 @@
 # AnyTool
 ![Static Badge](https://img.shields.io/badge/anytool-blue)
-<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a>  <a href='https://github.com/buaacyw/GaussianEditor/blob/master/LICENSE.txt'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
+<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a>  <a href='https://github.com/dyabel/AnyTool/blob/public/LICENSE'><img src='https://img.shields.io/badge/License-Apache-blue'></a>

 This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
-![Figure](https://media.discordapp.net/attachments/1202909094470492163/1202909161755648010/image.png?ex=65d865f5&is=65c5f0f5&hm=a399dda2c4b1c6caf17d3a0d29bc7dc9c504012ba7a4cc856283ce9dc9a3ebd5&=&format=webp&quality=lossless&width=781&height=601)
+![Figure](./assets/anytool.png)

 # 🔧 Installation
 ## ✅ Dependencies
@ -14,6 +14,10 @@ Require Python 3.9+
 pip install -r requirements.txt
 ```

+# OpenAI GPT API config
+Fill your  and toolbench key into the config.py (see config_example.py). 
+
+
 # 🔆 Data Preparation
 **ToolBench**

@ -23,30 +27,30 @@ Refer to [ToolBench](https://github.com/OpenBMB/ToolBench).

 You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
 ```
-python extract_api_details.py
-python extract_category_tool_details.py
-python extract_tool_database.py
+python scripts/extract_api_details.py
+python scripts/extract_category_tool_details.py
+python scripts/extract_tool_database.py
 ```

 **AnyToolBench**

 Generation script
 ```
-python data_generation_by_gpt4.py
+python scripts/data_generation_by_gpt4.py
 ```

-We provide sample data in anytoolbench.json file.
+We provide sample data in [anytoolbench.json]() file.



 # 🚗 Run AnyTool
-Fill your OpenAI config and toolbench key into the config.py (see config_example.py). 
+Fill your OpenAI GPT API config and toolbench key into the config.py (see config_example.py). 

-Run ToolBench
+Experiment on ToolBench
 ```
 python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
 ```
-Run AnyToolBench
+Experiment on AnyToolBench
 ```
 python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
 ```
--- a/assets/anytool.png
+++ b/assets/anytool.png
--- a/atb_data/anytoolbench.json
+++ b/atb_data/anytoolbench.json
--- a/misc/solved_dict.json
+++ b/misc/solved_dict.json
--- a/scripts/anytool.py
+++ b/scripts/anytool.py
--- a/scripts/data_generation_by_gpt4.py
+++ b/scripts/data_generation_by_gpt4.py
--- a/scripts/extract_api_details.py
+++ b/scripts/extract_api_details.py
@ -2,18 +2,10 @@ import zipfile
 import os
 import json
 from copy import deepcopy
-# Extract the new zip file
-# with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
-#     zip_ref.extractall(extracted_folder_path_small)
 extracted_folder_path_small = 'data/toolenv/tools'

-
-
-# api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
-
-
 # Walk through the extracted files and read the JSON data
-detailed_data_small = {}  # Initialize an empty dictionary to store the extracted data
+detailed_data = {}  # Initialize an empty dictionary to store the extracted data
 cnt = 0
 api_name_list = []
 data_for_retrieval = []
@ -34,26 +26,17 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                    tool_name = json_data['tool_name']
                api_list = json_data.get('api_list', [])
                # Extract necessary data for each API and organize it in the dictionary
-                if category not in detailed_data_small:
-                    detailed_data_small[category] = {}
-                if tool_name not in detailed_data_small[category]:
-                    detailed_data_small[category][tool_name] = {"api_list": []}
+                if category not in detailed_data:
+                    detailed_data[category] = {}
+                if tool_name not in detailed_data[category]:
+                    detailed_data[category][tool_name] = {"api_list": []}
                else:
                    tool_name += '_new'
                    raise ValueError('duplicate tool name')
-                    detailed_data_small[category][tool_name] = {"api_list": []}
+                    detailed_data[category][tool_name] = {"api_list": []}
                for api in api_list:
                    cnt += 1
                    api_name = api.get('name', 'Unknown API')
-                    # try:
-                    #     if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
-                    #         print('remove')
-                    #         continue
-                    # except:
-                    #     print(category, tool_name, api_name)
-                    #     pass
-                    # if api_name in api_name_list:
-                    #     raise Exception('duplicate api name')
                    api_name_list.append(api_name)
                    description = api.get('description', 'No description available.')
                    required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
@ -61,7 +44,6 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                    test_endpoint = api.get('test_endpoint', '') 
                    tool_description = json_data.get('tool_description', 'No description available.'),
                    # Organizing the data
-                    # print(len(detailed_data_small[category][tool_name]['api_list']))
                    if tool_description is not None:
                        tool_description = tool_description[:100]
                    if description is not None:
@ -75,36 +57,22 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
                        "required_parameters": required_parameters,
                        "optional_parameters": optional_parameters,
                    })
-                    detailed_data_small[category][tool_name]["api_list"].append({
+                    detailed_data[category][tool_name]["api_list"].append({
                        "name": api_name,
                        "description": description,
                        "required_parameters": required_parameters,
                        "optional_parameters": optional_parameters,
                        # "test_endpoint": test_endpoint
                    })
-                # except Exception as e:
-                    # Store the error message if we fail to process a file
-                    # if category not in detailed_data_small:
-                    #     detailed_data_small[category] = {}
-                    # detailed_data_small[category][file] = {"error": str(e)}
-
-# Verifying the structure of the detailed_data_small by displaying a sample
-# sample_detailed_data_small = {
-#     category: {
-#         tool_name: detailed_data_small[category][tool_name] 
-#         for tool_name in list(detailed_data_small[category].keys())[:1]
-#     }
-#     for category in list(detailed_data_small.keys())[:3]
-# }
 cnt = 0 
-for category in detailed_data_small:
-    for tool_name in detailed_data_small[category]:
-        cnt += len(detailed_data_small[category][tool_name]['api_list'])
+for category in detailed_data:
+    for tool_name in detailed_data[category]:
+        cnt += len(detailed_data[category][tool_name]['api_list'])
 print('total api number:', cnt)

-# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
-print(len(data_for_retrieval))
-json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
-json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+# json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+# print(len(data_for_retrieval))
+# json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
+json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
 print(cnt)

--- a/scripts/extract_category_tool_details.py
+++ b/scripts/extract_category_tool_details.py
--- a/scripts/extract_tool_database.py
+++ b/scripts/extract_tool_database.py
--- a/tool_details.json
+++ b/tool_details.json