110 lines
5.3 KiB
Python
110 lines
5.3 KiB
Python
import zipfile
|
|
import os
|
|
import json
|
|
from copy import deepcopy
|
|
# Extract the new zip file
|
|
# with zipfile.ZipFile(zip_file_path_small, 'r') as zip_ref:
|
|
# zip_ref.extractall(extracted_folder_path_small)
|
|
extracted_folder_path_small = 'data/toolenv/tools'
|
|
|
|
|
|
|
|
# api_test_results = json.load(open('api_test_results_with_docs2.json', 'r', encoding='utf-8'))
|
|
|
|
|
|
# Walk through the extracted files and read the JSON data
|
|
detailed_data_small = {} # Initialize an empty dictionary to store the extracted data
|
|
cnt = 0
|
|
api_name_list = []
|
|
data_for_retrieval = []
|
|
for root, dirs, files in os.walk(extracted_folder_path_small):
|
|
for file in files:
|
|
# Ensure we are only processing .json files
|
|
if file.endswith(".json"):
|
|
file_path = os.path.join(root, file)
|
|
# Extract the category name from the file path
|
|
print(file_path)
|
|
category = file_path.split('/')[-2]
|
|
with open(file_path, 'r', encoding='utf-8') as json_file:
|
|
# try:
|
|
json_data = json.load(json_file)
|
|
if 'name' in json_data:
|
|
tool_name = json_data['name']
|
|
else:
|
|
tool_name = json_data['tool_name']
|
|
api_list = json_data.get('api_list', [])
|
|
# Extract necessary data for each API and organize it in the dictionary
|
|
if category not in detailed_data_small:
|
|
detailed_data_small[category] = {}
|
|
if tool_name not in detailed_data_small[category]:
|
|
detailed_data_small[category][tool_name] = {"api_list": []}
|
|
else:
|
|
tool_name += '_new'
|
|
raise ValueError('duplicate tool name')
|
|
detailed_data_small[category][tool_name] = {"api_list": []}
|
|
for api in api_list:
|
|
cnt += 1
|
|
api_name = api.get('name', 'Unknown API')
|
|
# try:
|
|
# if api_test_results[category][tool_name][api_name]["result"]['return_type'] == "inalive":
|
|
# print('remove')
|
|
# continue
|
|
# except:
|
|
# print(category, tool_name, api_name)
|
|
# pass
|
|
# if api_name in api_name_list:
|
|
# raise Exception('duplicate api name')
|
|
api_name_list.append(api_name)
|
|
description = api.get('description', 'No description available.')
|
|
required_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('required_parameters', [])]
|
|
optional_parameters = [param.get('name', 'Unknown Parameter') for param in api.get('optional_parameters', [])]
|
|
test_endpoint = api.get('test_endpoint', '')
|
|
tool_description = json_data.get('tool_description', 'No description available.'),
|
|
# Organizing the data
|
|
# print(len(detailed_data_small[category][tool_name]['api_list']))
|
|
if tool_description is not None:
|
|
tool_description = tool_description[:100]
|
|
if description is not None:
|
|
description = description[:100]
|
|
data_for_retrieval.append({
|
|
"category_name": category,
|
|
"tool_name": tool_name,
|
|
"api_name": api_name,
|
|
"tool_description": tool_description,
|
|
"api_description": description,
|
|
"required_parameters": required_parameters,
|
|
"optional_parameters": optional_parameters,
|
|
})
|
|
detailed_data_small[category][tool_name]["api_list"].append({
|
|
"name": api_name,
|
|
"description": description,
|
|
"required_parameters": required_parameters,
|
|
"optional_parameters": optional_parameters,
|
|
# "test_endpoint": test_endpoint
|
|
})
|
|
# except Exception as e:
|
|
# Store the error message if we fail to process a file
|
|
# if category not in detailed_data_small:
|
|
# detailed_data_small[category] = {}
|
|
# detailed_data_small[category][file] = {"error": str(e)}
|
|
|
|
# Verifying the structure of the detailed_data_small by displaying a sample
|
|
# sample_detailed_data_small = {
|
|
# category: {
|
|
# tool_name: detailed_data_small[category][tool_name]
|
|
# for tool_name in list(detailed_data_small[category].keys())[:1]
|
|
# }
|
|
# for category in list(detailed_data_small.keys())[:3]
|
|
# }
|
|
cnt = 0
|
|
for category in detailed_data_small:
|
|
for tool_name in detailed_data_small[category]:
|
|
cnt += len(detailed_data_small[category][tool_name]['api_list'])
|
|
print('total api number:', cnt)
|
|
|
|
# json.dump(detailed_data_small, open('api_details_compressed.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
|
print(len(data_for_retrieval))
|
|
json.dump(data_for_retrieval, open('data_for_retrieval.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
|
json.dump(detailed_data_small, open('api_details.json', 'w', encoding='utf-8'), indent=4, ensure_ascii=False)
|
|
print(cnt)
|
|
|