1
This commit is contained in:
parent
b802e0bc44
commit
f5d0c5a322
11 changed files with 28 additions and 32669 deletions
45
.gitignore
vendored
45
.gitignore
vendored
|
|
@ -1,45 +0,0 @@
|
||||||
data/
|
|
||||||
data
|
|
||||||
data_0830/
|
|
||||||
data*.zip
|
|
||||||
*.zip
|
|
||||||
*io.txt
|
|
||||||
error.txt
|
|
||||||
miss*.txt
|
|
||||||
output.txt
|
|
||||||
token_count_in.txt
|
|
||||||
token_count_out.txt
|
|
||||||
*local.sh
|
|
||||||
*.DS_store
|
|
||||||
openchat*/
|
|
||||||
toolllama*/
|
|
||||||
ws/
|
|
||||||
.history/
|
|
||||||
reproduction_data*/
|
|
||||||
output/
|
|
||||||
*result/
|
|
||||||
result*/
|
|
||||||
__MACOSX/
|
|
||||||
api_test_results_with_docs.json
|
|
||||||
customized_api_test_results_with_docs.json
|
|
||||||
model_list.txt
|
|
||||||
run.bash
|
|
||||||
tool_data*
|
|
||||||
api_test_results*
|
|
||||||
api_details.json
|
|
||||||
api_details*
|
|
||||||
category_tool_details*
|
|
||||||
config.py
|
|
||||||
*dy.py
|
|
||||||
*dy.json
|
|
||||||
OAI_CONFIG_LIST
|
|
||||||
repos/
|
|
||||||
rapidapi_key_list.json
|
|
||||||
openai_utils_dy.py
|
|
||||||
.chroma/
|
|
||||||
|
|
||||||
*.pyc
|
|
||||||
**/__pycache__
|
|
||||||
.vscode/
|
|
||||||
.cache/42/
|
|
||||||
retrieval_model/
|
|
||||||
24
README.md
24
README.md
|
|
@ -1,9 +1,9 @@
|
||||||
# AnyTool
|
# AnyTool
|
||||||

|

|
||||||
<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/buaacyw/GaussianEditor/blob/master/LICENSE.txt'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
|
<a href='https://arxiv.org/abs/2402.04253'><img src='https://img.shields.io/badge/arXiv-2402.04253-b31b1b.svg'></a> <a href='https://github.com/dyabel/AnyTool/blob/public/LICENSE'><img src='https://img.shields.io/badge/License-Apache-blue'></a>
|
||||||
|
|
||||||
This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
|
This is the implementation of the paper [AnyTool: Self-Reflective, Hierarchical Agents for Large-Scale API Calls](https://arxiv.org/abs/2402.04253)
|
||||||

|

|
||||||
|
|
||||||
# 🔧 Installation
|
# 🔧 Installation
|
||||||
## ✅ Dependencies
|
## ✅ Dependencies
|
||||||
|
|
@ -14,6 +14,10 @@ Require Python 3.9+
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# OpenAI GPT API config
|
||||||
|
Fill your and toolbench key into the config.py (see config_example.py).
|
||||||
|
|
||||||
|
|
||||||
# 🔆 Data Preparation
|
# 🔆 Data Preparation
|
||||||
**ToolBench**
|
**ToolBench**
|
||||||
|
|
||||||
|
|
@ -23,30 +27,30 @@ Refer to [ToolBench](https://github.com/OpenBMB/ToolBench).
|
||||||
|
|
||||||
You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
|
You should prepare the ToolBench data first. Make sure you have the directory of data/toolenv/tools
|
||||||
```
|
```
|
||||||
python extract_api_details.py
|
python scripts/extract_api_details.py
|
||||||
python extract_category_tool_details.py
|
python scripts/extract_category_tool_details.py
|
||||||
python extract_tool_database.py
|
python scripts/extract_tool_database.py
|
||||||
```
|
```
|
||||||
|
|
||||||
**AnyToolBench**
|
**AnyToolBench**
|
||||||
|
|
||||||
Generation script
|
Generation script
|
||||||
```
|
```
|
||||||
python data_generation_by_gpt4.py
|
python scripts/data_generation_by_gpt4.py
|
||||||
```
|
```
|
||||||
|
|
||||||
We provide sample data in anytoolbench.json file.
|
We provide sample data in [anytoolbench.json]() file.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 🚗 Run AnyTool
|
# 🚗 Run AnyTool
|
||||||
Fill your OpenAI config and toolbench key into the config.py (see config_example.py).
|
Fill your OpenAI GPT API config and toolbench key into the config.py (see config_example.py).
|
||||||
|
|
||||||
Run ToolBench
|
Experiment on ToolBench
|
||||||
```
|
```
|
||||||
python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
|
python anytool.py --output_dir result/test_instruction/G1_instruction --query_path data/test_instruction/G1_instruction.json --max_api_number 64
|
||||||
```
|
```
|
||||||
Run AnyToolBench
|
Experiment on AnyToolBench
|
||||||
```
|
```
|
||||||
python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
|
python anytool.py --output_dir result/anytoolbench --query_path anytoolbench.json -max_api_number 64
|
||||||
```
|
```
|
||||||
|
|
|
||||||
BIN
assets/anytool.png
Normal file
BIN
assets/anytool.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 45 KiB |
|
|
@ -2,18 +2,10 @@ import zipfile
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
# Extract the new zip file
|
|
||||||
# with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
|
|
||||||
# zip_ref.extractall(extracted_folder_path_small)
|
|
||||||
extracted_folder_path_small = 'data/toolenv/tools'
|
extracted_folder_path_small = 'data/toolenv/tools'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
|
|
||||||
|
|
||||||
|
|
||||||
# Walk through the extracted files and read the JSON data
|
# Walk through the extracted files and read the JSON data
|
||||||
detailed_data_small = {} # Initialize an empty dictionary to store the extracted data
|
detailed_data = {} # Initialize an empty dictionary to store the extracted data
|
||||||
cnt = 0
|
cnt = 0
|
||||||
api_name_list = []
|
api_name_list = []
|
||||||
data_for_retrieval = []
|
data_for_retrieval = []
|
||||||
|
|
@ -34,26 +26,17 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
||||||
tool_name = json_data['tool_name']
|
tool_name = json_data['tool_name']
|
||||||
api_list = json_data.get('api_list', [])
|
api_list = json_data.get('api_list', [])
|
||||||
# Extract necessary data for each API and organize it in the dictionary
|
# Extract necessary data for each API and organize it in the dictionary
|
||||||
if category not in detailed_data_small:
|
if category not in detailed_data:
|
||||||
detailed_data_small[category] = {}
|
detailed_data[category] = {}
|
||||||
if tool_name not in detailed_data_small[category]:
|
if tool_name not in detailed_data[category]:
|
||||||
detailed_data_small[category][tool_name] = {"api_list": []}
|
detailed_data[category][tool_name] = {"api_list": []}
|
||||||
else:
|
else:
|
||||||
tool_name += '_new'
|
tool_name += '_new'
|
||||||
raise ValueError('duplicate tool name')
|
raise ValueError('duplicate tool name')
|
||||||
detailed_data_small[category][tool_name] = {"api_list": []}
|
detailed_data[category][tool_name] = {"api_list": []}
|
||||||
for api in api_list:
|
for api in api_list:
|
||||||
cnt += 1
|
cnt += 1
|
||||||
api_name = api.get('name', 'Unknown API')
|
api_name = api.get('name', 'Unknown API')
|
||||||
# try:
|
|
||||||
# if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
|
|
||||||
# print('remove')
|
|
||||||
# continue
|
|
||||||
# except:
|
|
||||||
# print(category, tool_name, api_name)
|
|
||||||
# pass
|
|
||||||
# if api_name in api_name_list:
|
|
||||||
# raise Exception('duplicate api name')
|
|
||||||
api_name_list.append(api_name)
|
api_name_list.append(api_name)
|
||||||
description = api.get('description', 'No description available.')
|
description = api.get('description', 'No description available.')
|
||||||
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
|
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
|
||||||
|
|
@ -61,7 +44,6 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
||||||
test_endpoint = api.get('test_endpoint', '')
|
test_endpoint = api.get('test_endpoint', '')
|
||||||
tool_description = json_data.get('tool_description', 'No description available.'),
|
tool_description = json_data.get('tool_description', 'No description available.'),
|
||||||
# Organizing the data
|
# Organizing the data
|
||||||
# print(len(detailed_data_small[category][tool_name]['api_list']))
|
|
||||||
if tool_description is not None:
|
if tool_description is not None:
|
||||||
tool_description = tool_description[:100]
|
tool_description = tool_description[:100]
|
||||||
if description is not None:
|
if description is not None:
|
||||||
|
|
@ -75,36 +57,22 @@ for root, dirs, files in os.walk(extracted_folder_path_small):
|
||||||
"required_parameters": required_parameters,
|
"required_parameters": required_parameters,
|
||||||
"optional_parameters": optional_parameters,
|
"optional_parameters": optional_parameters,
|
||||||
})
|
})
|
||||||
detailed_data_small[category][tool_name]["api_list"].append({
|
detailed_data[category][tool_name]["api_list"].append({
|
||||||
"name": api_name,
|
"name": api_name,
|
||||||
"description": description,
|
"description": description,
|
||||||
"required_parameters": required_parameters,
|
"required_parameters": required_parameters,
|
||||||
"optional_parameters": optional_parameters,
|
"optional_parameters": optional_parameters,
|
||||||
# "test_endpoint": test_endpoint
|
# "test_endpoint": test_endpoint
|
||||||
})
|
})
|
||||||
# except Exception as e:
|
|
||||||
# Store the error message if we fail to process a file
|
|
||||||
# if category not in detailed_data_small:
|
|
||||||
# detailed_data_small[category] = {}
|
|
||||||
# detailed_data_small[category][file] = {"error": str(e)}
|
|
||||||
|
|
||||||
# Verifying the structure of the detailed_data_small by displaying a sample
|
|
||||||
# sample_detailed_data_small = {
|
|
||||||
# category: {
|
|
||||||
# tool_name: detailed_data_small[category][tool_name]
|
|
||||||
# for tool_name in list(detailed_data_small[category].keys())[:1]
|
|
||||||
# }
|
|
||||||
# for category in list(detailed_data_small.keys())[:3]
|
|
||||||
# }
|
|
||||||
cnt = 0
|
cnt = 0
|
||||||
for category in detailed_data_small:
|
for category in detailed_data:
|
||||||
for tool_name in detailed_data_small[category]:
|
for tool_name in detailed_data[category]:
|
||||||
cnt += len(detailed_data_small[category][tool_name]['api_list'])
|
cnt += len(detailed_data[category][tool_name]['api_list'])
|
||||||
print('total api number:', cnt)
|
print('total api number:', cnt)
|
||||||
|
|
||||||
# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
# json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||||
print(len(data_for_retrieval))
|
# print(len(data_for_retrieval))
|
||||||
json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
# json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||||
json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
||||||
print(cnt)
|
print(cnt)
|
||||||
|
|
||||||
32568
tool_details.json
32568
tool_details.json
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue