This commit is contained in:
dyabel 2024-02-28 09:05:11 +00:00
parent b802e0bc44
commit f5d0c5a322
11 changed files with 28 additions and 32669 deletions

45
.gitignore vendored
View file

@ -1,45 +0,0 @@
data/
data
data_0830/
data*.zip
*.zip
*io.txt
error.txt
miss*.txt
output.txt
token_count_in.txt
token_count_out.txt
*local.sh
*.DS_store
openchat*/
toolllama*/
ws/
.history/
reproduction_data*/
output/
*result/
result*/
__MACOSX/
api_test_results_with_docs.json
customized_api_test_results_with_docs.json
model_list.txt
run.bash
tool_data*
api_test_results*
api_details.json
api_details*
category_tool_details*
config.py
*dy.py
*dy.json
OAI_CONFIG_LIST
repos/
rapidapi_key_list.json
openai_utils_dy.py
.chroma/
*.pyc
**/__pycache__
.vscode/
.cache/42/
retrieval_model/

View file

@ -1,9 +1,9 @@
# AnyTool # AnyTool
![Static Badge](https://img.shields.io/badge/anytool-blue) ![Static Badge](https://img.shields.io/badge/anytool-blue)
<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/buaacyw/GaussianEditor/blob/master/LICENSE.txt'><img src='https://img.shields.io/badge/License-Apache-blue'></a> <a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/dyabel/AnyTool/blob/public/LICENSE'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253) This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
![Figure](https://media.discordapp.net/attachments/1202909094470492163/1202909161755648010/image.png?ex=65d865f5&is=65c5f0f5&hm=a399dda2c4b1c6caf17d3a0d29bc7dc9c504012ba7a4cc856283ce9dc9a3ebd5&=&format=webp&quality=lossless&width=781&height=601) ![Figure](./assets/anytool.png)
# 🔧 Installation # 🔧 Installation
## ✅ Dependencies ## ✅ Dependencies
@ -14,6 +14,10 @@ Require Python 3.9+
pip install -r requirements.txt pip install -r requirements.txt
``` ```
# OpenAI GPT API config
Fill your and toolbench key into the config.py (see config_example.py).
# 🔆 Data Preparation # 🔆 Data Preparation
**ToolBench** **ToolBench**
@ -23,30 +27,30 @@ Refer to [ToolBench](https://github.com/OpenBMB/ToolBench).
You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
``` ```
python extract_api_details.py python scripts/extract_api_details.py
python extract_category_tool_details.py python scripts/extract_category_tool_details.py
python extract_tool_database.py python scripts/extract_tool_database.py
``` ```
**AnyToolBench** **AnyToolBench**
Generation script Generation script
``` ```
python data_generation_by_gpt4.py python scripts/data_generation_by_gpt4.py
``` ```
We provide sample data in anytoolbench.json file. We provide sample data in [anytoolbench.json]() file.
# 🚗 Run AnyTool # 🚗 Run AnyTool
Fill your OpenAI config and toolbench key into the config.py (see config_example.py). Fill your OpenAI GPT API config and toolbench key into the config.py (see config_example.py).
Run ToolBench Experiment on ToolBench
``` ```
python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64 python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
``` ```
Run AnyToolBench Experiment on AnyToolBench
``` ```
python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64 python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
``` ```

BIN
assets/anytool.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

View file

@ -2,18 +2,10 @@ import zipfile
import os import os
import json import json
from copy import deepcopy from copy import deepcopy
# Extract the new zip file
# with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
# zip_ref.extractall(extracted_folder_path_small)
extracted_folder_path_small = 'data/toolenv/tools' extracted_folder_path_small = 'data/toolenv/tools'
# api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
# Walk through the extracted files and read the JSON data # Walk through the extracted files and read the JSON data
detailed_data_small = {} # Initialize an empty dictionary to store the extracted data detailed_data = {} # Initialize an empty dictionary to store the extracted data
cnt = 0 cnt = 0
api_name_list = [] api_name_list = []
data_for_retrieval = [] data_for_retrieval = []
@ -34,26 +26,17 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
tool_name = json_data['tool_name'] tool_name = json_data['tool_name']
api_list = json_data.get('api_list', []) api_list = json_data.get('api_list', [])
# Extract necessary data for each API and organize it in the dictionary # Extract necessary data for each API and organize it in the dictionary
if category not in detailed_data_small: if category not in detailed_data:
detailed_data_small[category] = {} detailed_data[category] = {}
if tool_name not in detailed_data_small[category]: if tool_name not in detailed_data[category]:
detailed_data_small[category][tool_name] = {"api_list": []} detailed_data[category][tool_name] = {"api_list": []}
else: else:
tool_name += '_new' tool_name += '_new'
raise ValueError('duplicate tool name') raise ValueError('duplicate tool name')
detailed_data_small[category][tool_name] = {"api_list": []} detailed_data[category][tool_name] = {"api_list": []}
for api in api_list: for api in api_list:
cnt += 1 cnt += 1
api_name = api.get('name', 'Unknown API') api_name = api.get('name', 'Unknown API')
# try:
# if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
# print('remove')
# continue
# except:
# print(category, tool_name, api_name)
# pass
# if api_name in api_name_list:
# raise Exception('duplicate api name')
api_name_list.append(api_name) api_name_list.append(api_name)
description = api.get('description', 'No description available.') description = api.get('description', 'No description available.')
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])] required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
@ -61,7 +44,6 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
test_endpoint = api.get('test_endpoint', '') test_endpoint = api.get('test_endpoint', '')
tool_description = json_data.get('tool_description', 'No description available.'), tool_description = json_data.get('tool_description', 'No description available.'),
# Organizing the data # Organizing the data
# print(len(detailed_data_small[category][tool_name]['api_list']))
if tool_description is not None: if tool_description is not None:
tool_description = tool_description[:100] tool_description = tool_description[:100]
if description is not None: if description is not None:
@ -75,36 +57,22 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
"required_parameters": required_parameters, "required_parameters": required_parameters,
"optional_parameters": optional_parameters, "optional_parameters": optional_parameters,
}) })
detailed_data_small[category][tool_name]["api_list"].append({ detailed_data[category][tool_name]["api_list"].append({
"name": api_name, "name": api_name,
"description": description, "description": description,
"required_parameters": required_parameters, "required_parameters": required_parameters,
"optional_parameters": optional_parameters, "optional_parameters": optional_parameters,
# "test_endpoint": test_endpoint # "test_endpoint": test_endpoint
}) })
# except Exception as e:
# Store the error message if we fail to process a file
# if category not in detailed_data_small:
# detailed_data_small[category] = {}
# detailed_data_small[category][file] = {"error": str(e)}
# Verifying the structure of the detailed_data_small by displaying a sample
# sample_detailed_data_small = {
# category: {
# tool_name: detailed_data_small[category][tool_name]
# for tool_name in list(detailed_data_small[category].keys())[:1]
# }
# for category in list(detailed_data_small.keys())[:3]
# }
cnt = 0 cnt = 0
for category in detailed_data_small: for category in detailed_data:
for tool_name in detailed_data_small[category]: for tool_name in detailed_data[category]:
cnt += len(detailed_data_small[category][tool_name]['api_list']) cnt += len(detailed_data[category][tool_name]['api_list'])
print('total api number:', cnt) print('total api number:', cnt)
# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False) # json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
print(len(data_for_retrieval)) # print(len(data_for_retrieval))
json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False) # json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False) json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
print(cnt) print(cnt)

File diff suppressed because it is too large Load diff