135 lines
No EOL
6.8 KiB
Python
135 lines
No EOL
6.8 KiB
Python
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
from typing import Optional, List, Mapping, Any
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
from termcolor import colored
|
|
import time
|
|
from typing import Optional
|
|
from transformers import (
|
|
AutoTokenizer,
|
|
AutoModelForCausalLM
|
|
)
|
|
from toolbench.utils import process_system_message
|
|
from toolbench.model.model_adapter import get_conversation_template
|
|
from toolbench.inference.utils import SimpleChatIO, generate_stream, react_parser
|
|
|
|
|
|
class LlamaModel:
|
|
def __init__(self, model_name_or_path: str, template:str="tool-llama-single-round", device: str="cuda", cpu_offloading: bool=False, max_sequence_length: int=2048) -> None:
|
|
super().__init__()
|
|
self.model_name = model_name_or_path
|
|
self.template = template
|
|
self.max_sequence_length = max_sequence_length
|
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False, model_max_length=self.max_sequence_length)
|
|
self.model = AutoModelForCausalLM.from_pretrained(
|
|
model_name_or_path, low_cpu_mem_usage=True
|
|
)
|
|
if self.tokenizer.pad_token_id == None:
|
|
self.tokenizer.add_special_tokens({"bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>"})
|
|
self.model.resize_token_embeddings(len(self.tokenizer))
|
|
self.use_gpu = (True if device == "cuda" else False)
|
|
if (device == "cuda" and not cpu_offloading) or device == "mps":
|
|
self.model.to(device)
|
|
self.chatio = SimpleChatIO()
|
|
|
|
def prediction(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
|
gen_params = {
|
|
"model": "",
|
|
"prompt": prompt,
|
|
"temperature": 0.5,
|
|
"max_new_tokens": 512,
|
|
"stop": "</s>",
|
|
"stop_token_ids": None,
|
|
"echo": False
|
|
}
|
|
generate_stream_func = generate_stream
|
|
output_stream = generate_stream_func(self.model, self.tokenizer, gen_params, "cuda", self.max_sequence_length, force_generate=True)
|
|
outputs = self.chatio.return_output(output_stream)
|
|
prediction = outputs.strip()
|
|
return prediction
|
|
|
|
def add_message(self, message):
|
|
self.conversation_history.append(message)
|
|
|
|
def change_messages(self,messages):
|
|
self.conversation_history = messages
|
|
|
|
def display_conversation(self, detailed=False):
|
|
role_to_color = {
|
|
"system": "red",
|
|
"user": "green",
|
|
"assistant": "blue",
|
|
"function": "magenta",
|
|
}
|
|
print("before_print"+"*"*50)
|
|
for message in self.conversation_history:
|
|
print_obj = f"{message['role']}: {message['content']} "
|
|
if "function_call" in message.keys():
|
|
print_obj = print_obj + f"function_call: {message['function_call']}"
|
|
print_obj += ""
|
|
print(
|
|
colored(
|
|
print_obj,
|
|
role_to_color[message["role"]],
|
|
)
|
|
)
|
|
print("end_print"+"*"*50)
|
|
|
|
def parse(self,functions,process_id,**args):
|
|
conv = get_conversation_template(self.template)
|
|
if self.template == "tool-llama":
|
|
roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
|
|
elif self.template == "tool-llama-single-round" or self.template == "tool-llama-multi-rounds":
|
|
roles = {"system": conv.roles[0], "user": conv.roles[1], "function": conv.roles[2], "assistant": conv.roles[3]}
|
|
|
|
self.time = time.time()
|
|
conversation_history = self.conversation_history
|
|
prompt = ''
|
|
for message in conversation_history:
|
|
role = roles[message['role']]
|
|
content = message['content']
|
|
if role == "System" and functions != []:
|
|
content = process_system_message(content, functions)
|
|
prompt += f"{role}: {content}\n"
|
|
prompt += "Assistant:\n"
|
|
if functions != []:
|
|
predictions = self.prediction(prompt)
|
|
else:
|
|
predictions = self.prediction(prompt)
|
|
|
|
decoded_token_len = len(self.tokenizer(predictions))
|
|
if process_id == 0:
|
|
print(f"[process({process_id})]total tokens: {decoded_token_len}")
|
|
|
|
thought, action, action_input = react_parser(predictions)
|
|
if len(thought.strip()) > 1:
|
|
print(thought)
|
|
# input()
|
|
message = {
|
|
"role": "assistant",
|
|
"content": thought,
|
|
"function_call": {
|
|
"name": action,
|
|
"arguments": action_input
|
|
}
|
|
}
|
|
return message, 0, decoded_token_len
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# can accept all huggingface LlamaModel family
|
|
llm = LlamaModel("decapoda-research/llama-7b-hf")
|
|
messages = [
|
|
{'role': 'system', 'content': '''You are AutoGPT, you can use many tools(functions) to do
|
|
the following task.\nFirst I will give you the task description, and your task start.\nAt each step, you need to give your thought to analyze the status now and what to do next, with a function call to actually excute your step.\nAfter the call, you will get the call result, and you are now in a new state.\nThen you will analyze your status now, then decide what to do next...\nAfter many (Thought-call) pairs, you finally perform the task, then you can give your finial answer.\nRemember: \n1.the state change is , you can\'t go
|
|
back to the former state, if you want to restart the task, say "I give up and restart".\n2.All the thought is short, at most in 5 sentence.\nLet\'s Begin!\nTask description: Use numbers and basic arithmetic operations (+ - * /) to obtain exactly one number=24. Each
|
|
step, you are only allowed to choose two of the left numbers to obtain a new number. For example, you can combine [3,13,9,7] as 7*9 - 3*13 = 24.\nRemember:\n1.all of the number must be used , and must be used ONCE. So Only when left numbers is exact 24, you will win. So you don\'t succeed when left number = [24, 5]. You succeed when left number = [24]. \n2.all the try takes exactly 3 steps, look
|
|
at the input format'''},
|
|
{'role': 'user', 'content': '\nThe real task input is: [1, 2, 4, 7]\nBegin!\n'}
|
|
]
|
|
functions = [{'name': 'play_24', 'description': '''make your current conbine with the format "x operation y = z (left: aaa) " like "1+2=3, (left: 3 5 7)", then I will tell you whether you win. This is the ONLY way
|
|
to interact with the game, and the total process of a input use 3 steps of call, each step you can only combine 2 of the left numbers, so the count of left numbers decrease from 4 to 1''','parameters':{'type': 'object', 'properties':{}}}]#, 'parameters': {'type': 'object', 'properties': {'input': {'type': 'string', 'description': 'describe what number you want to conbine, and how to conbine.'}}, 'required': ['input']}}]
|
|
|
|
llm.change_messages(messages)
|
|
output = llm.parse(functions=functions)
|
|
print(output) |