better model mgmt

2024-11-01 21:11:23 -03:00 · 2024-11-01 21:11:23 -03:00 · 3b262a63f4
commit 3b262a63f4
parent 8734b1803c
6 changed files with 34 additions and 119 deletions
--- a/open_notebook/graphs/chat.py
+++ b/open_notebook/graphs/chat.py
@ -10,11 +10,9 @@ from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict

 from open_notebook.config import LANGGRAPH_CHECKPOINT_FILE
-from open_notebook.domain.models import DefaultModels
 from open_notebook.domain.notebook import Notebook
-from open_notebook.graphs.utils import run_pattern
-
-DEFAULT_MODELS = DefaultModels.load()
+from open_notebook.graphs.utils import provision_model
+from open_notebook.prompter import Prompter


 class ThreadState(TypedDict):
@ -25,15 +23,11 @@ class ThreadState(TypedDict):


 def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict:
-    model_id = config.get("configurable", {}).get(
-        "model_id", DEFAULT_MODELS.default_chat_model
-    )
-    ai_message = run_pattern(
-        "chat",
-        model_id,
-        messages=state["messages"],
-        state=state,
+    system_prompt = Prompter(prompt_template="chat").render(data=state)
+    model = provision_model(
+        str(system_prompt) + str(state.get("messages", [])), config, "chat"
    )
+    ai_message = model.invoke([system_prompt] + state.get("messages", []))
    return {"messages": ai_message}


--- a/open_notebook/graphs/multipattern.py
+++ b/open_notebook/graphs/multipattern.py
@ -7,11 +7,8 @@ from langchain_core.runnables import (
 from langgraph.graph import END, START, StateGraph
 from typing_extensions import Annotated, TypedDict

-from open_notebook.domain.models import DefaultModels
 from open_notebook.graphs.utils import run_pattern

-DEFAULT_MODELS = DefaultModels.load()
-

 class PatternChainState(TypedDict):
    content_stack: Annotated[Sequence[str], operator.add]
@ -20,9 +17,6 @@ class PatternChainState(TypedDict):


 def call_model(state: dict, config: RunnableConfig) -> dict:
-    model_id = config.get("configurable", {}).get(
-        "model_id", DEFAULT_MODELS.default_transformation_model
-    )
    patterns = state["patterns"]
    current_transformation = patterns.pop(0)
    if current_transformation.startswith("patterns/"):
@ -36,7 +30,7 @@ def call_model(state: dict, config: RunnableConfig) -> dict:

    transformation_result = run_pattern(
        pattern_name=current_transformation,
-        model_id=model_id,
+        config=config,
        state=input_args,
    )
    return {
--- a/open_notebook/graphs/utils.py
+++ b/open_notebook/graphs/utils.py
@ -1,39 +1,48 @@
 from langchain.output_parsers import OutputFixingParser
+from langchain_core.messages import AIMessage
 from loguru import logger

-from open_notebook.domain.models import DefaultModels
 from open_notebook.models import model_manager
 from open_notebook.prompter import Prompter
 from open_notebook.utils import token_count


+def provision_model(content, config, default_type):
+    """
+    Returns the best model to use based on the context size and on whether there is a specific model being requested in Config.
+    If context > 105_000, returns the large_context_model
+    If model_id is specified in Config, returns that model
+    Otherwise, returns the default model for the given type
+    """
+    tokens = token_count(content)
+
+    if tokens > 105_000:
+        logger.debug(
+            f"Using large context model because the content has {tokens} tokens"
+        )
+        return model_manager.get_default_model("large_context")
+    elif config.get("configurable", {}).get("model_id"):
+        return model_manager.get_model(config.get("configurable", {}).get("model_id"))
+    else:
+        return model_manager.get_default_model(default_type)
+
+
+# todo: turn into a graph
 def run_pattern(
    pattern_name: str,
-    model_id=None,
+    config,
    messages=[],
    state: dict = {},
    parser=None,
    output_fixing_model_id=None,
-) -> dict:
+) -> AIMessage:
    system_prompt = Prompter(prompt_template=pattern_name, parser=parser).render(
        data=state
    )
-    DEFAULT_MODELS = DefaultModels.load()
-    tokens = token_count(str(system_prompt) + str(messages))
-
-    if tokens > 105_000:
-        model_id = DEFAULT_MODELS.large_context_model
-        logger.debug(
-            f"Using large context model ({model_id}) because the content has {tokens} tokens"
-        )
-
-    model_id = (
-        model_id
-        or DEFAULT_MODELS.default_transformation_model
-        or DEFAULT_MODELS.default_chat_model
+    chain = provision_model(
+        str(system_prompt) + str(messages), config, "transformation"
    )

-    chain = model_manager.get_default_model("transformation")
    if parser:
        chain = chain | parser

@ -44,6 +53,7 @@ def run_pattern(
            llm=output_fix_model,
        )

+    # todo: precisa deste if?
    if len(messages) > 0:
        response = chain.invoke([system_prompt] + messages)
    else:
--- a/prompts/doc_query.jinja
+++ b/prompts/doc_query.jinja
@ -1,26 +0,0 @@
-
-# BACKGROUND
-
-Your are a cognitive assistant that helps me study and research.
-
-# OUR WORKING FRAMEWORK
-
-You have access to some information about the project I am working on
-as well as the content of a specific item I am interested about.
- 
-Your goal is to respond to the question using purely the content in your CONTEXT.
-
-If the content in CONTEXT is not enough to answer the question, do not make up any information and just reply that you can't answer that. 
-Kindly tell the user what sort of things you'd be able to talk about.
-
-# PROJECT INFO
-
-{{ notebook }}
-
-# CONTENT
-
-{{ doc_content }}
-
-# QUESTION
-
-{{ question}}
--- a/prompts/recursive_toc.jinja
+++ b/prompts/recursive_toc.jinja
@ -1,24 +0,0 @@
-
-# SYSTEM ROLE
-You are a content analysis assistant that reads through documents and provides a Table of Contents (ToC) to help users identify what the document covers more easily.
-Your ToC should capture all major topics and transitions in the content and should mention them in the order theh appear. 
-
-# TASK
-Analyze the provided content and create a Table of Contents:
- Captures the core topics included in the text
- Gives a small description of what is covered
-
-# INSTRUCTIONS FOR LARGE DOCUMENTS
-
-If you see a PREVIOUS TOC section below, it means that this request is a continuation of a previous request. Most likely to handle context length issues.
-Every time, you should replace the previous toc with the new one, and append the new content to the previous content.
-
-{% if toc %}
-# PREVIOUS TOC
-
-{{toc}}
-{% endif %}
-
-# CONTENT
-
-{{content}}
--- a/prompts/summarize.jinja
+++ b/prompts/summarize.jinja
@ -1,33 +0,0 @@
-
-# SYSTEM ROLE
-You are a content summarization assistant that creates dense, information-rich summaries optimized for machine understanding. Your summaries should capture key concepts with minimal words while maintaining complete, clear sentences.
-
-# TASK
-Analyze the provided content and create a summary that:
- Captures the core concepts and key information
- Uses clear, direct language
- Maintains context from any previous summaries
- Includes relevant topics/tags
- Creates an appropriate title
-
-# OUTPUT SCHEMA
-{'summary': {'type': 'string'},
- 'topics': {'items': {'type': 'string'}, 'type': 'array'},
- 'title': {'type': 'string'}}
-
-# OUTPUT EXAMPLE
-{
-    "title": "The title of the content",
-    "topics": ["topic1", "topic2"],
-    "summary": "The summary of the content"
-}
-
-# CONTENT
-
-{{content}}
-
-{% if summary %}
-# PREVIOUS SUMMARY
-
-{{summary}}
-{% endif %}