better model mgmt
This commit is contained in:
parent
8734b1803c
commit
3b262a63f4
6 changed files with 34 additions and 119 deletions
|
|
@ -10,11 +10,9 @@ from langgraph.graph.message import add_messages
|
|||
from typing_extensions import TypedDict
|
||||
|
||||
from open_notebook.config import LANGGRAPH_CHECKPOINT_FILE
|
||||
from open_notebook.domain.models import DefaultModels
|
||||
from open_notebook.domain.notebook import Notebook
|
||||
from open_notebook.graphs.utils import run_pattern
|
||||
|
||||
DEFAULT_MODELS = DefaultModels.load()
|
||||
from open_notebook.graphs.utils import provision_model
|
||||
from open_notebook.prompter import Prompter
|
||||
|
||||
|
||||
class ThreadState(TypedDict):
|
||||
|
|
@ -25,15 +23,11 @@ class ThreadState(TypedDict):
|
|||
|
||||
|
||||
def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict:
|
||||
model_id = config.get("configurable", {}).get(
|
||||
"model_id", DEFAULT_MODELS.default_chat_model
|
||||
)
|
||||
ai_message = run_pattern(
|
||||
"chat",
|
||||
model_id,
|
||||
messages=state["messages"],
|
||||
state=state,
|
||||
system_prompt = Prompter(prompt_template="chat").render(data=state)
|
||||
model = provision_model(
|
||||
str(system_prompt) + str(state.get("messages", [])), config, "chat"
|
||||
)
|
||||
ai_message = model.invoke([system_prompt] + state.get("messages", []))
|
||||
return {"messages": ai_message}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,8 @@ from langchain_core.runnables import (
|
|||
from langgraph.graph import END, START, StateGraph
|
||||
from typing_extensions import Annotated, TypedDict
|
||||
|
||||
from open_notebook.domain.models import DefaultModels
|
||||
from open_notebook.graphs.utils import run_pattern
|
||||
|
||||
DEFAULT_MODELS = DefaultModels.load()
|
||||
|
||||
|
||||
class PatternChainState(TypedDict):
|
||||
content_stack: Annotated[Sequence[str], operator.add]
|
||||
|
|
@ -20,9 +17,6 @@ class PatternChainState(TypedDict):
|
|||
|
||||
|
||||
def call_model(state: dict, config: RunnableConfig) -> dict:
|
||||
model_id = config.get("configurable", {}).get(
|
||||
"model_id", DEFAULT_MODELS.default_transformation_model
|
||||
)
|
||||
patterns = state["patterns"]
|
||||
current_transformation = patterns.pop(0)
|
||||
if current_transformation.startswith("patterns/"):
|
||||
|
|
@ -36,7 +30,7 @@ def call_model(state: dict, config: RunnableConfig) -> dict:
|
|||
|
||||
transformation_result = run_pattern(
|
||||
pattern_name=current_transformation,
|
||||
model_id=model_id,
|
||||
config=config,
|
||||
state=input_args,
|
||||
)
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -1,39 +1,48 @@
|
|||
from langchain.output_parsers import OutputFixingParser
|
||||
from langchain_core.messages import AIMessage
|
||||
from loguru import logger
|
||||
|
||||
from open_notebook.domain.models import DefaultModels
|
||||
from open_notebook.models import model_manager
|
||||
from open_notebook.prompter import Prompter
|
||||
from open_notebook.utils import token_count
|
||||
|
||||
|
||||
def provision_model(content, config, default_type):
|
||||
"""
|
||||
Returns the best model to use based on the context size and on whether there is a specific model being requested in Config.
|
||||
If context > 105_000, returns the large_context_model
|
||||
If model_id is specified in Config, returns that model
|
||||
Otherwise, returns the default model for the given type
|
||||
"""
|
||||
tokens = token_count(content)
|
||||
|
||||
if tokens > 105_000:
|
||||
logger.debug(
|
||||
f"Using large context model because the content has {tokens} tokens"
|
||||
)
|
||||
return model_manager.get_default_model("large_context")
|
||||
elif config.get("configurable", {}).get("model_id"):
|
||||
return model_manager.get_model(config.get("configurable", {}).get("model_id"))
|
||||
else:
|
||||
return model_manager.get_default_model(default_type)
|
||||
|
||||
|
||||
# todo: turn into a graph
|
||||
def run_pattern(
|
||||
pattern_name: str,
|
||||
model_id=None,
|
||||
config,
|
||||
messages=[],
|
||||
state: dict = {},
|
||||
parser=None,
|
||||
output_fixing_model_id=None,
|
||||
) -> dict:
|
||||
) -> AIMessage:
|
||||
system_prompt = Prompter(prompt_template=pattern_name, parser=parser).render(
|
||||
data=state
|
||||
)
|
||||
DEFAULT_MODELS = DefaultModels.load()
|
||||
tokens = token_count(str(system_prompt) + str(messages))
|
||||
|
||||
if tokens > 105_000:
|
||||
model_id = DEFAULT_MODELS.large_context_model
|
||||
logger.debug(
|
||||
f"Using large context model ({model_id}) because the content has {tokens} tokens"
|
||||
)
|
||||
|
||||
model_id = (
|
||||
model_id
|
||||
or DEFAULT_MODELS.default_transformation_model
|
||||
or DEFAULT_MODELS.default_chat_model
|
||||
chain = provision_model(
|
||||
str(system_prompt) + str(messages), config, "transformation"
|
||||
)
|
||||
|
||||
chain = model_manager.get_default_model("transformation")
|
||||
if parser:
|
||||
chain = chain | parser
|
||||
|
||||
|
|
@ -44,6 +53,7 @@ def run_pattern(
|
|||
llm=output_fix_model,
|
||||
)
|
||||
|
||||
# todo: precisa deste if?
|
||||
if len(messages) > 0:
|
||||
response = chain.invoke([system_prompt] + messages)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
|
||||
# BACKGROUND
|
||||
|
||||
Your are a cognitive assistant that helps me study and research.
|
||||
|
||||
# OUR WORKING FRAMEWORK
|
||||
|
||||
You have access to some information about the project I am working on
|
||||
as well as the content of a specific item I am interested about.
|
||||
|
||||
Your goal is to respond to the question using purely the content in your CONTEXT.
|
||||
|
||||
If the content in CONTEXT is not enough to answer the question, do not make up any information and just reply that you can't answer that.
|
||||
Kindly tell the user what sort of things you'd be able to talk about.
|
||||
|
||||
# PROJECT INFO
|
||||
|
||||
{{ notebook }}
|
||||
|
||||
# CONTENT
|
||||
|
||||
{{ doc_content }}
|
||||
|
||||
# QUESTION
|
||||
|
||||
{{ question}}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
|
||||
# SYSTEM ROLE
|
||||
You are a content analysis assistant that reads through documents and provides a Table of Contents (ToC) to help users identify what the document covers more easily.
|
||||
Your ToC should capture all major topics and transitions in the content and should mention them in the order theh appear.
|
||||
|
||||
# TASK
|
||||
Analyze the provided content and create a Table of Contents:
|
||||
- Captures the core topics included in the text
|
||||
- Gives a small description of what is covered
|
||||
|
||||
# INSTRUCTIONS FOR LARGE DOCUMENTS
|
||||
|
||||
If you see a PREVIOUS TOC section below, it means that this request is a continuation of a previous request. Most likely to handle context length issues.
|
||||
Every time, you should replace the previous toc with the new one, and append the new content to the previous content.
|
||||
|
||||
{% if toc %}
|
||||
# PREVIOUS TOC
|
||||
|
||||
{{toc}}
|
||||
{% endif %}
|
||||
|
||||
# CONTENT
|
||||
|
||||
{{content}}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
|
||||
# SYSTEM ROLE
|
||||
You are a content summarization assistant that creates dense, information-rich summaries optimized for machine understanding. Your summaries should capture key concepts with minimal words while maintaining complete, clear sentences.
|
||||
|
||||
# TASK
|
||||
Analyze the provided content and create a summary that:
|
||||
- Captures the core concepts and key information
|
||||
- Uses clear, direct language
|
||||
- Maintains context from any previous summaries
|
||||
- Includes relevant topics/tags
|
||||
- Creates an appropriate title
|
||||
|
||||
# OUTPUT SCHEMA
|
||||
{'summary': {'type': 'string'},
|
||||
'topics': {'items': {'type': 'string'}, 'type': 'array'},
|
||||
'title': {'type': 'string'}}
|
||||
|
||||
# OUTPUT EXAMPLE
|
||||
{
|
||||
"title": "The title of the content",
|
||||
"topics": ["topic1", "topic2"],
|
||||
"summary": "The summary of the content"
|
||||
}
|
||||
|
||||
# CONTENT
|
||||
|
||||
{{content}}
|
||||
|
||||
{% if summary %}
|
||||
# PREVIOUS SUMMARY
|
||||
|
||||
{{summary}}
|
||||
{% endif %}
|
||||
Loading…
Reference in a new issue