1

2024-02-29 01:41:48 +00:00 · 2024-02-29 01:41:48 +00:00 · 1182893a23
commit 1182893a23
parent 75c2379b6d
9 changed files with 187 additions and 5677 deletions
--- a/README.md
+++ b/README.md
@ -18,9 +18,9 @@ pip install -r requirements.txt
 **OPENAI API config and the ToolBench key**
-Fill your OpenAI GPT-4 API config and toolbench key into the config.py (see config_example.py). We use Azure OpenAI for all our experiments. You can modify it according to your own configuration. 
+Fill your OpenAI GPT-4 API config and toolbench key into the config.py (see [config_example.py](./config_example.py) as an example). We use Azure OpenAI for all our experiments. You can modify it according to your own configuration. 
-Fill out the [form](https://docs.google.com/forms/d/e/1FAIpQLSdqHypmYanWU8ZhuUcrEuM5eFB03WqaqYJzvKUxUe1HzUBB3A/viewform?usp=send_form) to get the toolbench key. If you want to use your own RapidAPI key, you can put your key in the rapidapi_key_list.json
+Fill out the [form](https://docs.google.com/forms/d/e/1FAIpQLSdqHypmYanWU8ZhuUcrEuM5eFB03WqaqYJzvKUxUe1HzUBB3A/viewform?usp=send_form) to get the toolbench key. If you want to use your own RapidAPI key, you can put your key in the rapidapi_key_list.json (see [rapidapi_key_list_example.json](./rapidapi_key_list_example.json) as an example)
 **ToolBench**
@ -102,6 +102,32 @@ python scripts/main.py --output_dir result/anytoolbench --query_path anytoolbenc
 ```
 The pass rate can be found in the success_cnt.txt under the output directory.
 # 📏 Experiment Results
 Main results on the filtered ToolBench. We use pass rate defined in Eq 2 and illustrated in Figure 4(b) in our paper, as the metric. All results are reproduced. *: OpenAI’s text-embedding-ada-002; Ref.: reference; Avg.: average; SR: self-reflective.
 | Model        | API Retriever        | Solver                     | Use Ref. APIs | G1 I (%) | G1 T (%) | G1 C (%) | G2 I (%) | G2 C (%) | G3 I (%) | Avg. (%) |
 |--------------|----------------------|----------------------------|---------------|----------|----------|----------|----------|----------|----------|----------|
 | ToolLLM      | OpenAI TE*           | ToolLLaMA w/ DFSDT         |               | 8.7      | 6.8      | 12.0     | 4.7      | 8.2      | 10.5     | 8.5      |
 | ToolLLM      | ToolLLM's            | ToolLLaMA w/ DFSDT         |               | 28.4     | 26.3     | 38.4     | 21.5     | 15.1     | 7.7      | 22.9     |
 | ToolLLM      | ToolLLM's            | GPT-4 w/ DFSDT             |               | 42.6     | 46.2     | 51.4     | 23.4     | 24.5     | 2.6      | 31.8     |
 | ToolLLM      | None                 | ToolLLaMA w/ DFSDT         | ✓             | 29.4     | 31.8     | 37.1     | 19.6     | 22.4     | 13.2     | 25.6     |
 | GPT-4        | None                 | GPT-4 w/ CoT               | ✓             | 31.3     | 34.8     | 47.1     | 27.1     | 34.7     | 2.6      | 29.6     |
 | GPT-4        | None                 | GPT-4 w/ DFSDT             | ✓             | 36.5     | 49.2     | 51.4     | 38.3     | 39.8     | 18.4     | 38.9     |
 | GPT-4        | Plain Agent          | GPT-4 w/ DFSDT             |               | 13.9     | 23.5     | 17.6     | 13.9     | 9.2      | 13.2     | 15.2     |
 | GPT-4        | AutoGen-RAG          | GPT-4 w/ DFSDT             |               | 14.8     | 19.7     | 19.7     | 7.4      | 9.2      | 7.9      | 13.1     |
 | GPT-3.5      | None                 | GPT-3.5 w/ CoT             | ✓             | 37.5     | 37.1     | 42.9     | 24.3     | 22.4     | 5.3      | 28.3     |
 | GPT-3.5      | None                 | GPT-3.5 w/ DFSDT           | ✓             | 39.1     | 40.2     | 48.6     | 31.8     | 25.5     | 15.8     | 33.5     |
 | AnyTool (Ours) | SR Agent           | SR GPT-4 w/ DFSDT          |               | 52.2     | 61.4     | 67.6     | 58.9     | 45.9     | 63.2     | 58.2     |
 Results on our AnyToolBench. All models use
 DFSDT implementation in the solver. SR: self-reflective;
 PR: pass rate
 | Method  | API Retriever | Solver   | PR (%) |
 |---------|---------------|----------|--------|
 | ToolLLM | ToolLLM’s     | ToolLLaMA | 18.9   |
 | ToolLLM | ToolLLM’s     | GPT-4    | 36.6   |
 | GPT-4   | Plain Agent   | GPT-4    | 14.0   |
 | AnyTool (Ours) | SR Agent | SR GPT-4 | 73.8   |
 # 👨‍🏫 Acknowledgement
 This repo is built on [ToolBench](https://github.com/OpenBMB/ToolBench).
--- a/anytool/api_database_function.py
+++ b/anytool/api_database_function.py
@ -122,7 +122,7 @@ def get_api_details(category_name: str=None, tool_name: str=None, api_name: str=
            for api in tool_data["api_list"]:
                if api["name"] == api_name:
                    return api
-    return {}
+    return 'api not found'
 def locate_api(api_name: str=None) -> dict:
    """query the details of a specific api"""
@ -161,6 +161,7 @@ def sample_apis(gt_apis, num=200):
            apis.extend(sampled_apis)
    return categories, tools, apis
 get_api_details_function = {
    'name': 'get_api_details',
    'description': 'get the details of a specific api',
@ -305,11 +306,11 @@ def get_tools_descriptions(category_name: str, tool_list: str) -> dict:
            return f'tool name {tool_name} not found'
    return {tool_name: category_tool_details_dict[category_name][tool_name]['tool_description'] for tool_name in tool_list}
-def get_response_example(api_name: str) -> str:
+# def get_response_example(api_name: str) -> str:
-    """get the response example of a specific api"""
+#     """get the response example of a specific api"""
-    api_details = get_api_details(api_name)
+#     api_details = get_api_details(api_name)
-    if api_details is None:
+#     if api_details is None:
-        return 'api name not found'
+#         return 'api name not found'
    # return api_details['response_example']
 split_function = lambda x: x.split("}")
 # # 1. create an RetrieveAssistantAgent instance named "assistant"
--- a/anytool/prompt_template.py
+++ b/anytool/prompt_template.py
@ -110,7 +110,7 @@ selected tool.
 3. If you need detailed information about some tools, gets_tools_descriptions will
 provide it.
 4. For in-depth understanding of an API's functionality, turn to
-get api details.
+get_api_details. Remember, do not make up the API names, use get_apis_in_tool to get the API list.
 Selection and Testing Functions:
 1. As you identify relevant functions, add them to your working list using
 add_apis_into_pool into api pool.
@ -145,6 +145,7 @@ different skill levels and cultural backgrounds. Ensure that the query is
 globally relevant and straightforward, serving a singular purpose without
 diverging into unrelated areas. The complexity of your query should stem from
 the synthesis of information from multiple APIs.
 4.You should finish in 20 steps.
 """.replace('{email}', "devon58425@trackden.com").replace('{phone number}', "+6285360071764").replace('{url}', "https://deepmind.google/")
--- a/anytool/rapidapi.py
+++ b/anytool/rapidapi.py
@ -454,7 +454,6 @@ You have access of the following tools:\n'''
                    # 11 message contains "error" field
                    # 12 error sending request
                    cnt = 0
                    print(111, response['error'], file=open('error.txt','a'))
                    while any([word in response["error"] for word in error_list]):
                        if cnt < len(self.rapidapi_key_list):
                            # if self.use_rapidapi_key or self.api_customization:
--- a/atb_data/anytoolbench.json
+++ b/atb_data/anytoolbench.json
--- a/config_example.py
+++ b/config_example.py
@ -1,7 +1,11 @@
 api_version = ""
 model_name = ""
 api_key = ""
 api_base = ""
 api_type = "azure" # leave it as blank if you do not use azure
 # your openai api key
 api_key = ""
 # the model name you want to use
 model_name = ""
 # your openai api version, leave it as blank if you do not use azure
 api_version = ""
 # your openai endpoint, leave it as blank if you do not use azure
 api_base = ""
 toolbench_key = ""
--- a/openai_utils.py
+++ b/openai_utils.py
@ -15,13 +15,16 @@ args = parse_args()
 output_dir = args.output_dir
 if api_type == "azure":
    from openai import AzureOpenAI as Client
    client = Client(
    api_key=api_key,
    api_version=api_version,
    azure_endpoint = api_base
    )
 else:
    from openai import OpenAI as Client
-client = Client(
+    client = Client(
-api_key=api_key,
+    api_key=api_key,
-api_version=api_version,
+    )
 azure_endpoint = api_base
 )
 # turbo_client = Client(
 # api_key=api_key,
 # api_version=api_version,
--- a/scripts/anytoolbench_generation.py
+++ b/scripts/anytoolbench_generation.py
@ -25,50 +25,6 @@ assert enc.decode(enc.encode("hello world")) == "hello world"
 token_cnt = 0
 error_list = ['Too many requests error...', 'Rate limit...', 'Unsubscribed', 'Unauthorized', 'not working error...', 'Quota','quota', 'Blocked', 'Rate limit', 'Unauthorized error']
 FORMAT_INSTRUCTIONS_CONTINUAL_DATA_GENERATION = """
 You have access to a database of tools and functions (apis). Function is same to api in our context. 
 You need to help me extend a user query which can be answered by the apis in the database.
 The database has the categorites of {categories},
 You can use the meta functions to retrieve the relevant functions. For example, you can use the meta
 function query_tools_in_category to retrieve the available tools of a specific category. Then, you can use the meta
 function query_apis_in_tool to retrieve the api list of a specific tool. 
 If you are unsure about the functioinality of some tools, you can use the meta api query_tool_details to retrieve the details of a specific tool. 
 If you are unsure about the functioinality of some apis, you can use the meta api get_api_details to retrieve the details of a specific api.
 After you get some functions, use the add_apis function to add the functions you find to the available function list which you can call them later.
 Please note that the original function names will be transformed to a standard form, 
 so you should not use the original function names when calling.
 You must synthsize some parameters to test each of these functions!
 You can try multiple times with different parameters.
 Then, you should use the function responses for formulating the query.
 If you find that some functions are not valid now or cannot be used to form a query, use remove_apis to remove the apis from the available api list.
 Please make sure that the extended query contains all the information or parameters needed to call the apis. 
 Do not use ambiguous words like 'a specific', 'my friend'. 
 You should mention the detailed information. The query should be related to the test results of the functions 
 but it should not  mention the tool names or api names. Make sure that extended part can be answered by the current functions.
 If you finished extending the query, 
 call the Finish function with the final extened query and the corresponding extended answer. 
 You must include the original query in the extended query.
 The answer should directly answer the query instead of giving a plan.
 You should call the initial meta functions no more than 20 times.
 The extended part should consist of a minimum of thirty words.
 """
 FORMAT_INSTRUCTIONS_DATA_GENERATION_OPTIMIZED="""
 You are an advanced AutoGPT interface designed for dynamic interaction with a comprehensive database of tools and APIs. Your primary function is to assist in generating user queries that can be resolved using the appropriate APIs within the database. To navigate this task efficiently, you have access to five initial meta APIs: query_all_categories, query_tools_in_category, query_apis_in_tool, query_tool_details, and get_api_details. Additionally, you have the capability to test APIs with the add_apis function and can finalize a process with the Finish function.
 You must articulate your analytical process at each step, providing a rationale for your choices and outlining the next action, all in a succinct manner not exceeding five sentences. Use the initial meta APIs to identify relevant functions, remembering that your total calls to these APIs should not exceed 10. Ensure that each formulated query is comprehensive, containing all necessary information to be addressed by the selected APIs.
 Here is your task:
 Call query_all_categories to start identifying potential API categories relevant to common user queries.
 Select a category and use query_tools_in_category to find available tools within that category.
 Choose a tool and employ query_apis_in_tool to obtain a list of APIs associated with it.
 If clarification on tool functionality is needed, use query_tool_details; similarly, use get_api_details for specifics on an API.
 Test potential APIs with add_apis if you deem it necessary.
 Once the suitable APIs are determined, compose a user query that these API can answer and execute the Finish function with this query.
 Begin your task with an initial analysis based on common user query needs and proceed strategically, justifying your decisions at each step and ensuring efficient use of your limited meta API calls."""
 import os
 import json
 from flask import Flask, jsonify, request
@ -280,6 +236,7 @@ def add_apis(api_list):
        api_name_reflect[openai_function_json["name"]] = pure_api_name
        api2origin[openai_function_json["name"]] = {'category_name': origin_api_list[k]['category_name'], 'tool_name': origin_api_list[k]['tool_name'], 'api_name': origin_api_list[k]['api_name']} 
        print(openai_function_json["name"])
        tool_names.append(standard_tool_name)
        cate_names.append(cate_name)
@ -426,9 +383,8 @@ class CoT_Runner(object):
            # if True:
                result = api_mapping[action_name](**json.loads(action_input))
            except Exception as e:
-                print(e, file=open('output/generation_error.txt','a'))
+                # raise e
                result = 'input format error'
                # result = 'function args should be dict'
            return result, 2
        if action_name == "Finish":
            if len(call_cnt) > 0 and min(call_cnt.values()) == 0:
@ -439,7 +395,7 @@ class CoT_Runner(object):
                return json.dumps({"error": f"{function_never_called} have not been called. You should call them at least once before formulating the final query", "response": ""}), 15
                # return json.dumps({"error": "You should call the each new added function at least once before formulating the final query", "response": ""}), 15
            if len(functions) == 7:
-                return json.dumps({"error": "There must be apis successfully added using the add_apis function, and you should formulate your query based on the found apis", "response": ""}), 15
+                return json.dumps({"error": "There must be apis successfully added using the add_apis_into_api_pool function, and you should formulate your query based on the found apis", "response": ""}), 15
            try:
                json_data = json.loads(action_input,strict=False)
@ -484,7 +440,6 @@ class CoT_Runner(object):
                        except:
                            # return json.dumps({"error": action_name, "response": ""}), 13
                            os.makedirs('output', exist_ok=True)
                            print(payload, file=open('output/timeout.txt','a'))
                            return json.dumps({"error": "connection timeout", "response": ""}), 13
                        if response.status_code != 200:
                            return json.dumps({"error": f"request invalid, data error. status_code={response.status_code}", "response": ""}), 12
@ -505,16 +460,6 @@ class CoT_Runner(object):
                            cnt += 1    
                        else:
                            break
 # 12 error sending request
                    if response["error"] == "API not working error...":
@ -538,25 +483,20 @@ class CoT_Runner(object):
                    #     return json.dumps({"error": f"Timeout error...{e}", "response": ""}), 5
            return json.dumps({"error": f"No such function name: {action_name}", "response": ""}), 1
-    def run(self, query, answer):
+    def run(self):
        messages = [
                    {'role':'system',
                     'content': 'You are QueryGPT, a helpful assistant who can strictly follow my instructions to generate diverse real queries'},
                    #  'The query should be related to the category {random.sample(query_all_categories(), random.randint(2, 3))}
                    ]
-        if len(query) > 0:
+         
-            messages.append(
+        messages.append({'role':'user', 
                {'role':'user',
                'content': FORMAT_INSTRUCTIONS_CONTINUAL_DATA_GENERATION.replace('{categories}', str(random.sample(query_all_categories(),5))) + 'Here is the query generated at the previous step: ' + query + ' And the answer is: ' + answer + 'You should extend this query to involve more api calls and also extend the answer. The extended part should be related to the current query.'})
        else:            
            messages.append({'role':'user', 
                     'content': FORMAT_INSTRUCTIONS_DATA_GENERATION.replace('{generated_queries}', str(generated_query_list[-5:])).replace('{categories}', str(random.sample(query_all_categories(), 49)))})
        i = 0
-        while i < 20:
+        while i < 30:
            print('#'*100)
            print(len(functions), len(raw_api_list))
            # assert len(functions) == len(raw_api_list) + 7, (len(functions), len(raw_api_list))
            print(len(enc.encode(str(messages))), file=open('token_count_in.txt','a'))
            response = call_gpt(
                messages,
                functions
@ -566,12 +506,10 @@ class CoT_Runner(object):
                # messages = messages_old
            elif isinstance(response, str):
                continue
            print(messages)
            # messages_old = deepcopy(messages)
            i = i + 1
            tool_calls = response.choices[0].message.tool_calls
            print('Thought:', response.choices[0].message.content)
            print(len(enc.encode(str(response.choices[0].message.content))), file=open('token_count_out.txt','a'))
            print(response.choices[0].finish_reason)
            if tool_calls:
                messages.append(
@ -607,30 +545,33 @@ class CoT_Runner(object):
        return 'Exceed_max_iterations', messages    
 generated_query_list = ['What is the current weather in Seattle, and what is the weather forecast for the next five days?']       
-
+    
-def generate_main():
+def generate_return_api_main():
    data = {}
-    global functions, tool_names, cate_names, generated_query_list
+    global functions, tool_names, cate_names, generated_query_list, raw_api_list, call_cnt
    while True:
        raw_api_list = []
        call_cnt = {}
        functions = [
-            # {'name': 'query_all_categories', 'description': 'query all categories in the database', 'parameters': {'type': 'object', 'properties': {}}}, {'name': 'query_tools_in_category', 'description': 'query all tools in a specific category', 'parameters': {'type': 'object', 'properties': {'category': {'type': 'string'}}}}, 
+                    get_tools_in_category_function,
-                    get_tools_in_category_function.to_json_schema(),
+                    get_apis_in_tool_function,
-
+                    get_api_details_function,
-                     {'name': 'query_apis_in_tool', 'description': 'query all apis in a specific tool', 'parameters': {'type': 'object', 'properties': {'category': {'type': 'string'}, 'tool_name': {'type': 'string'}},'required': ['category','tool_name']}}, 
+                    get_tools_descriptions_function,
-                     {'name': 'query_tool_details', 'description': 'query the details of a specific tool', 'parameters': {'type': 'object', 'properties': {'tool_name': {'type': 'string'}},'required': ['tool_name']}},
+                    add_apis_into_api_pool_function,
-                     {'name': 'add_apis', 'description': 'add apis to the current available api list. required input to be list of dictionaries describing with the keys category_name, tool_name, api_name', 'parameters': {'type': 'object', 'properties': {'api_list': {'type': 'null'}},'required': ['api_list']}},
+                     remove_apis_function,
                    #  retrieve_context_function.to_json_schema()
                     ]
        finish_func = {
            "name": "Finish",
-            "description": "If you believe that you have obtained a query that can answered by the api database, please call this function to provide the final answer.",
+            "description": "If you believe that you have obtained a query that can answered by the api database, please call this function to provide the query, the corresponding answer and the plan of using the functions to answer the query.",
            "parameters": {
                "type": "object",
                "properties": {
-                    "answer":{"type":"string"}
+                    "query":{"type":"string"},
                    "answer":{"type":"string"},
                    "plan":{"type":"string"},
                },
-                "required": ["answer"]
+                "required": ["query", "answer", 'plan']
            }
            }
@ -640,63 +581,22 @@ def generate_main():
        runner = CoT_Runner()
        result, messages = runner.run()
        if isinstance(result, str):
            continue
        data['result'] = result
-        generated_query_list.append(result['answer'])
+        if 'openai' in result: 
-        return result['answer'], messages
+            return result, messages, raw_api_list
-    
+        if not any([word in result['answer'].lower() for word in exclusion_words]):
-exclusion_words = ["sorry", "apologize", "apology", "unfortunately", "couldn't"]
+            return result, messages, raw_api_list
 def generate_return_api_main():
    data = {}
    global functions, tool_names, cate_names, generated_query_list, raw_api_list, call_cnt
    while True:
        # try:
        if True:
            raw_api_list = []
            call_cnt = {}
            functions = [
                        get_tools_in_category_function,
                        get_apis_in_tool_function,
                        get_api_details_function,
                        get_tools_descriptions_function,
                        add_apis_into_api_pool_function,
                         remove_apis_function,
                         ]
            finish_func = {
                "name": "Finish",
                "description": "If you believe that you have obtained a query that can answered by the api database, please call this function to provide the query, the corresponding answer and the plan of using the functions to answer the query.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query":{"type":"string"},
                        "answer":{"type":"string"},
                        "plan":{"type":"string"},
                    },
                    "required": ["query", "answer", 'plan']
                }
                }
            functions.append(finish_func)
            cate_names = ['' for func in functions]
            tool_names = ['' for func in functions]
            runner = CoT_Runner()
            result, messages = runner.run('', '')
            if isinstance(result, str):
                continue
            data['result'] = result
            if 'openai' in result: 
                return result, messages, raw_api_list
            generated_query_list.append(result['query'])
            if not any([word in result['answer'].lower() for word in exclusion_words]):
                return result, messages, raw_api_list
 import time
 if __name__ == '__main__':
    exclusion_words = ["sorry", "apologize", "apology", "unfortunately", "couldn't"]
    output_path = args.output_path
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    os.makedirs('output', exist_ok=True)
    generated_query_list = []
    query_data = []
    query = ''
    answer = ''
    for i in range(1000):
@ -712,11 +612,9 @@ if __name__ == '__main__':
                    query = result['query']
                    answer = result['answer']
                    plan = result['plan']
                    print(query)
                else:
                    continue
                solved, reason = check_task_solved(query, answer)
                if solved != 'Solved':
                    continue
                break
        except Exception as e:
            raise e
@ -728,11 +626,12 @@ if __name__ == '__main__':
        #         if 'tool_calls' in message:
        #             message['tool_calls'] = [tool_call.json() for tool_call in message['tool_calls']]
        data['generate_messages'] = generate_messages
        generated_query_list.append(query)
-        generated_query_list.append({
+        query_data.append({
            'query': query,
            'final_answer': answer,
            'gt_api_list': api_list,
            'query_id': str(2000000+i)
        })
-        json.dump(generated_query_list, open(output_path, 'w'), indent=4)
+        json.dump(query_data, open(output_path, 'w'), indent=4)
--- a/scripts/main.py
+++ b/scripts/main.py
@ -14,7 +14,7 @@ args = parse_args()
 output_dir = args.output_dir
 raise_error = False
 max_api_number = args.max_api_number
-sem = Semaphore(16)  # 允许同时运行的最大线程数为3
+sem = Semaphore(16)  # 允许同时运行的最大线程数为16
 class DoNothingContextManager:
    def __enter__(self):
        pass
@ -897,6 +897,7 @@ output_dir = args.output_dir
 query_path = args.query_path
 if __name__ == "__main__":
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs('output', exist_ok=True)
    success_cnt = 0
    pass_cnt = 0
    unsolvable_task_cnt = 0