AnyTool/scripts/extract_api_details.py
2024-02-28 09:05:11 +00:00

78 lines
3.8 KiB
Python

import zipfile
import os
import json
from copy import deepcopy
extracted_folder_path_small = 'data/toolenv/tools'
# Walk through the extracted files and read the JSON data
detailed_data = {} # Initialize an empty dictionary to store the extracted data
cnt = 0
api_name_list = []
data_for_retrieval = []
for root, dirs, files in os.walk(extracted_folder_path_small):
for file in files:
# Ensure we are only processing .json files
if file.endswith(".json"):
file_path = os.path.join(root, file)
# Extract the category name from the file path
print(file_path)
category = file_path.split('/')[-2]
with open(file_path, 'r', encoding='utf-8') as json_file:
# try:
json_data = json.load(json_file)
if 'name' in json_data:
tool_name = json_data['name']
else:
tool_name = json_data['tool_name']
api_list = json_data.get('api_list', [])
# Extract necessary data for each API and organize it in the dictionary
if category not in detailed_data:
detailed_data[category] = {}
if tool_name not in detailed_data[category]:
detailed_data[category][tool_name] = {"api_list": []}
else:
tool_name += '_new'
raise ValueError('duplicate tool name')
detailed_data[category][tool_name] = {"api_list": []}
for api in api_list:
cnt += 1
api_name = api.get('name', 'Unknown API')
api_name_list.append(api_name)
description = api.get('description', 'No description available.')
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
optional_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('optional_parameters', [])]
test_endpoint = api.get('test_endpoint', '')
tool_description = json_data.get('tool_description', 'No description available.'),
# Organizing the data
if tool_description is not None:
tool_description = tool_description[:100]
if description is not None:
description = description[:100]
data_for_retrieval.append({
"category_name": category,
"tool_name": tool_name,
"api_name": api_name,
"tool_description": tool_description,
"api_description": description,
"required_parameters": required_parameters,
"optional_parameters": optional_parameters,
})
detailed_data[category][tool_name]["api_list"].append({
"name": api_name,
"description": description,
"required_parameters": required_parameters,
"optional_parameters": optional_parameters,
# "test_endpoint": test_endpoint
})
cnt = 0
for category in detailed_data:
for tool_name in detailed_data[category]:
cnt += len(detailed_data[category][tool_name]['api_list'])
print('total api number:', cnt)
# json.dump(detailed_data, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
# print(len(data_for_retrieval))
# json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
json.dump(detailed_data, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
print(cnt)