1
This commit is contained in:
parent
b802e0bc44
commit
f5d0c5a322
11 changed files with 28 additions and 32669 deletions
45
.gitignore
vendored
45
.gitignore
vendored
|
|
@ -1,45 +0,0 @@
|
|||
data/
|
||||
data
|
||||
data_0830/
|
||||
data*.zip
|
||||
*.zip
|
||||
*io.txt
|
||||
error.txt
|
||||
miss*.txt
|
||||
output.txt
|
||||
token_count_in.txt
|
||||
token_count_out.txt
|
||||
*local.sh
|
||||
*.DS_store
|
||||
openchat*/
|
||||
toolllama*/
|
||||
ws/
|
||||
.history/
|
||||
reproduction_data*/
|
||||
output/
|
||||
*result/
|
||||
result*/
|
||||
__MACOSX/
|
||||
api_test_results_with_docs.json
|
||||
customized_api_test_results_with_docs.json
|
||||
model_list.txt
|
||||
run.bash
|
||||
tool_data*
|
||||
api_test_results*
|
||||
api_details.json
|
||||
api_details*
|
||||
category_tool_details*
|
||||
config.py
|
||||
*dy.py
|
||||
*dy.json
|
||||
OAI_CONFIG_LIST
|
||||
repos/
|
||||
rapidapi_key_list.json
|
||||
openai_utils_dy.py
|
||||
.chroma/
|
||||
|
||||
*.pyc
|
||||
**/__pycache__
|
||||
.vscode/
|
||||
.cache/42/
|
||||
retrieval_model/
|
||||
24
README.md
24
README.md
|
|
@ -1,9 +1,9 @@
|
|||
# AnyTool
|
||||

|
||||
<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/buaacyw/GaussianEditor/blob/master/LICENSE.txt'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
|
||||
<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/dyabel/AnyTool/blob/public/LICENSE'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
|
||||
|
||||
This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
|
||||

|
||||

|
||||
|
||||
# 🔧 Installation
|
||||
## ✅ Dependencies
|
||||
|
|
@ -14,6 +14,10 @@ Require Python 3.9+
|
|||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
# OpenAI GPT API config
|
||||
Fill your and toolbench key into the config.py (see config_example.py).
|
||||
|
||||
|
||||
# 🔆 Data Preparation
|
||||
**ToolBench**
|
||||
|
||||
|
|
@ -23,30 +27,30 @@ Refer to [ToolBench](https://github.com/OpenBMB/ToolBench).
|
|||
|
||||
You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
|
||||
```
|
||||
python extract_api_details.py
|
||||
python extract_category_tool_details.py
|
||||
python extract_tool_database.py
|
||||
python scripts/extract_api_details.py
|
||||
python scripts/extract_category_tool_details.py
|
||||
python scripts/extract_tool_database.py
|
||||
```
|
||||
|
||||
**AnyToolBench**
|
||||
|
||||
Generation script
|
||||
```
|
||||
python data_generation_by_gpt4.py
|
||||
python scripts/data_generation_by_gpt4.py
|
||||
```
|
||||
|
||||
We provide sample data in anytoolbench.json file.
|
||||
We provide sample data in [anytoolbench.json]() file.
|
||||
|
||||
|
||||
|
||||
# 🚗 Run AnyTool
|
||||
Fill your OpenAI config and toolbench key into the config.py (see config_example.py).
|
||||
Fill your OpenAI GPT API config and toolbench key into the config.py (see config_example.py).
|
||||
|
||||
Run ToolBench
|
||||
Experiment on ToolBench
|
||||
```
|
||||
python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
|
||||
```
|
||||
Run AnyToolBench
|
||||
Experiment on AnyToolBench
|
||||
```
|
||||
python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
|
||||
```
|
||||
|
|
|
|||
BIN
assets/anytool.png
Normal file
BIN
assets/anytool.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 45 KiB |
|
|
@ -2,18 +2,10 @@ import zipfile
|
|||
import os
|
||||
import json
|
||||
from copy import deepcopy
|
||||
# Extract the new zip file
|
||||
# with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
|
||||
# zip_ref.extractall(extracted_folder_path_small)
|
||||
extracted_folder_path_small = 'data/toolenv/tools'
|
||||
|
||||
|
||||
|
||||
# api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
|
||||
|
||||
|
||||
# Walk through the extracted files and read the JSON data
|
||||
detailed_data_small = {} # Initialize an empty dictionary to store the extracted data
|
||||
detailed_data = {} # Initialize an empty dictionary to store the extracted data
|
||||
cnt = 0
|
||||
api_name_list = []
|
||||
data_for_retrieval = []
|
||||
|
|
@ -34,26 +26,17 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
|||
tool_name = json_data['tool_name']
|
||||
api_list = json_data.get('api_list', [])
|
||||
# Extract necessary data for each API and organize it in the dictionary
|
||||
if category not in detailed_data_small:
|
||||
detailed_data_small[category] = {}
|
||||
if tool_name not in detailed_data_small[category]:
|
||||
detailed_data_small[category][tool_name] = {"api_list": []}
|
||||
if category not in detailed_data:
|
||||
detailed_data[category] = {}
|
||||
if tool_name not in detailed_data[category]:
|
||||
detailed_data[category][tool_name] = {"api_list": []}
|
||||
else:
|
||||
tool_name += '_new'
|
||||
raise ValueError('duplicate tool name')
|
||||
detailed_data_small[category][tool_name] = {"api_list": []}
|
||||
detailed_data[category][tool_name] = {"api_list": []}
|
||||
for api in api_list:
|
||||
cnt += 1
|
||||
api_name = api.get('name', 'Unknown API')
|
||||
# try:
|
||||
# if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
|
||||
# print('remove')
|
||||
# continue
|
||||
# except:
|
||||
# print(category, tool_name, api_name)
|
||||
# pass
|
||||
# if api_name in api_name_list:
|
||||
# raise Exception('duplicate api name')
|
||||
api_name_list.append(api_name)
|
||||
description = api.get('description', 'No description available.')
|
||||
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
|
||||
|
|
@ -61,7 +44,6 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
|||
test_endpoint = api.get('test_endpoint', '')
|
||||
tool_description = json_data.get('tool_description', 'No description available.'),
|
||||
# Organizing the data
|
||||
# print(len(detailed_data_small[category][tool_name]['api_list']))
|
||||
if tool_description is not None:
|
||||
tool_description = tool_description[:100]
|
||||
if description is not None:
|
||||
|
|
@ -75,36 +57,22 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
|||
"required_parameters": required_parameters,
|
||||
"optional_parameters": optional_parameters,
|
||||
})
|
||||
detailed_data_small[category][tool_name]["api_list"].append({
|
||||
detailed_data[category][tool_name]["api_list"].append({
|
||||
"name": api_name,
|
||||
"description": description,
|
||||
"required_parameters": required_parameters,
|
||||
"optional_parameters": optional_parameters,
|
||||
# "test_endpoint": test_endpoint
|
||||
})
|
||||
# except Exception as e:
|
||||
# Store the error message if we fail to process a file
|
||||
# if category not in detailed_data_small:
|
||||
# detailed_data_small[category] = {}
|
||||
# detailed_data_small[category][file] = {"error": str(e)}
|
||||
|
||||
# Verifying the structure of the detailed_data_small by displaying a sample
|
||||
# sample_detailed_data_small = {
|
||||
# category: {
|
||||
# tool_name: detailed_data_small[category][tool_name]
|
||||
# for tool_name in list(detailed_data_small[category].keys())[:1]
|
||||
# }
|
||||
# for category in list(detailed_data_small.keys())[:3]
|
||||
# }
|
||||
cnt = 0
|
||||
for category in detailed_data_small:
|
||||
for tool_name in detailed_data_small[category]:
|
||||
cnt += len(detailed_data_small[category][tool_name]['api_list'])
|
||||
for category in detailed_data:
|
||||
for tool_name in detailed_data[category]:
|
||||
cnt += len(detailed_data[category][tool_name]['api_list'])
|
||||
print('total api number:', cnt)
|
||||
|
||||
# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
print(len(data_for_retrieval))
|
||||
json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
# json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
# print(len(data_for_retrieval))
|
||||
# json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||
print(cnt)
|
||||
|
||||
32568
tool_details.json
32568
tool_details.json
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue