commit
17b3ad010b
7 changed files with 130 additions and 15 deletions
|
|
@ -3,9 +3,7 @@ from typing import Annotated, List
|
|||
|
||||
from ai_prompter import Prompter
|
||||
from langchain_core.output_parsers.pydantic import PydanticOutputParser
|
||||
from langchain_core.runnables import (
|
||||
RunnableConfig,
|
||||
)
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
from langgraph.types import Send
|
||||
from pydantic import BaseModel, Field
|
||||
|
|
@ -13,6 +11,7 @@ from typing_extensions import TypedDict
|
|||
|
||||
from open_notebook.domain.notebook import vector_search
|
||||
from open_notebook.graphs.utils import provision_langchain_model
|
||||
from open_notebook.utils import clean_thinking_content
|
||||
|
||||
|
||||
class SubGraphState(TypedDict):
|
||||
|
|
@ -59,10 +58,19 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) -
|
|||
config.get("configurable", {}).get("strategy_model"),
|
||||
"tools",
|
||||
max_tokens=2000,
|
||||
structured=dict(type="json"),
|
||||
)
|
||||
# model = model.bind_tools(tools)
|
||||
ai_message = (model | parser).invoke(system_prompt)
|
||||
return {"strategy": ai_message}
|
||||
# First get the raw response from the model
|
||||
ai_message = model.invoke(system_prompt)
|
||||
|
||||
# Clean the thinking content from the response
|
||||
cleaned_content = clean_thinking_content(ai_message.content)
|
||||
|
||||
# Parse the cleaned JSON content
|
||||
strategy = parser.parse(cleaned_content)
|
||||
|
||||
return {"strategy": strategy}
|
||||
|
||||
|
||||
async def trigger_queries(state: ThreadState, config: RunnableConfig):
|
||||
|
|
@ -99,7 +107,7 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict:
|
|||
max_tokens=2000,
|
||||
)
|
||||
ai_message = model.invoke(system_prompt)
|
||||
return {"answers": [ai_message.content]}
|
||||
return {"answers": [clean_thinking_content(ai_message.content)]}
|
||||
|
||||
|
||||
async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict:
|
||||
|
|
@ -111,7 +119,7 @@ async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict
|
|||
max_tokens=2000,
|
||||
)
|
||||
ai_message = model.invoke(system_prompt)
|
||||
return {"final_answer": ai_message.content}
|
||||
return {"final_answer": clean_thinking_content(ai_message.content)}
|
||||
|
||||
|
||||
agent_state = StateGraph(ThreadState)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from typing_extensions import TypedDict
|
|||
from open_notebook.domain.notebook import Source
|
||||
from open_notebook.domain.transformation import DefaultPrompts, Transformation
|
||||
from open_notebook.graphs.utils import provision_langchain_model
|
||||
from open_notebook.utils import clean_thinking_content
|
||||
|
||||
|
||||
class TransformationState(TypedDict):
|
||||
|
|
@ -42,11 +43,15 @@ def run_transformation(state: dict, config: RunnableConfig) -> dict:
|
|||
)
|
||||
|
||||
response = chain.invoke(payload)
|
||||
|
||||
# Clean thinking content from the response
|
||||
cleaned_content = clean_thinking_content(response.content)
|
||||
|
||||
if source:
|
||||
source.add_insight(transformation.title, response.content)
|
||||
source.add_insight(transformation.title, cleaned_content)
|
||||
|
||||
return {
|
||||
"output": response.content,
|
||||
"output": cleaned_content,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import re
|
||||
import unicodedata
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
from typing import Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
|
@ -217,3 +218,75 @@ def compare_versions(version1: str, version2: str) -> int:
|
|||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
# Compile regex pattern once for better performance
|
||||
THINK_PATTERN = re.compile(r'<think>(.*?)</think>', re.DOTALL)
|
||||
|
||||
|
||||
def parse_thinking_content(content: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Parse message content to extract thinking content from <think> tags.
|
||||
|
||||
Args:
|
||||
content (str): The original message content
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]: (thinking_content, cleaned_content)
|
||||
- thinking_content: Content from within <think> tags
|
||||
- cleaned_content: Original content with <think> blocks removed
|
||||
|
||||
Example:
|
||||
>>> content = "<think>Let me analyze this</think>Here's my answer"
|
||||
>>> thinking, cleaned = parse_thinking_content(content)
|
||||
>>> print(thinking)
|
||||
"Let me analyze this"
|
||||
>>> print(cleaned)
|
||||
"Here's my answer"
|
||||
"""
|
||||
# Input validation
|
||||
if not isinstance(content, str):
|
||||
return "", str(content) if content is not None else ""
|
||||
|
||||
# Limit processing for very large content (100KB limit)
|
||||
if len(content) > 100000:
|
||||
return "", content
|
||||
|
||||
# Find all thinking blocks
|
||||
thinking_matches = THINK_PATTERN.findall(content)
|
||||
|
||||
if not thinking_matches:
|
||||
return "", content
|
||||
|
||||
# Join all thinking content with double newlines
|
||||
thinking_content = "\n\n".join(match.strip() for match in thinking_matches)
|
||||
|
||||
# Remove all <think>...</think> blocks from the original content
|
||||
cleaned_content = THINK_PATTERN.sub("", content)
|
||||
|
||||
# Clean up extra whitespace
|
||||
cleaned_content = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_content).strip()
|
||||
|
||||
return thinking_content, cleaned_content
|
||||
|
||||
|
||||
def clean_thinking_content(content: str) -> str:
|
||||
"""
|
||||
Remove thinking content from AI responses, returning only the cleaned content.
|
||||
|
||||
This is a convenience function for cases where you only need the cleaned
|
||||
content and don't need access to the thinking process.
|
||||
|
||||
Args:
|
||||
content (str): The original message content with potential <think> tags
|
||||
|
||||
Returns:
|
||||
str: Content with <think> blocks removed and whitespace cleaned
|
||||
|
||||
Example:
|
||||
>>> content = "<think>Let me think...</think>Here's the answer"
|
||||
>>> clean_thinking_content(content)
|
||||
"Here's the answer"
|
||||
"""
|
||||
_, cleaned_content = parse_thinking_content(content)
|
||||
return cleaned_content
|
||||
|
|
|
|||
|
|
@ -40,10 +40,9 @@ async def process_ask_query(question, strategy_model, answer_model, final_answer
|
|||
|
||||
|
||||
def results_card(item):
|
||||
score = item.get("relevance", item.get("similarity", item.get("score", 0)))
|
||||
with st.container(border=True):
|
||||
st.markdown(
|
||||
f"[{score:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**"
|
||||
f"[{item['final_score']:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**"
|
||||
)
|
||||
if "matches" in item:
|
||||
with st.expander("Matches"):
|
||||
|
|
@ -160,5 +159,15 @@ with search_tab:
|
|||
st.session_state["search_results"] = vector_search(
|
||||
search_term, 100, search_sources, search_notes
|
||||
)
|
||||
for item in st.session_state["search_results"]:
|
||||
|
||||
search_results = st.session_state["search_results"].copy()
|
||||
for item in search_results:
|
||||
item["final_score"] = item.get(
|
||||
"relevance", item.get("similarity", item.get("score", 0))
|
||||
)
|
||||
|
||||
# Sort search results by final_score in descending order
|
||||
search_results.sort(key=lambda x: x["final_score"], reverse=True)
|
||||
|
||||
for item in search_results:
|
||||
results_card(item)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ from pages.stream_app.utils import (
|
|||
create_session_for_notebook,
|
||||
)
|
||||
|
||||
from open_notebook.utils import parse_thinking_content
|
||||
|
||||
from .note import make_note_from_chat
|
||||
|
||||
|
||||
|
|
@ -186,11 +188,28 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession):
|
|||
continue
|
||||
|
||||
with st.chat_message(name=msg.type):
|
||||
st.markdown(convert_source_references(msg.content))
|
||||
if msg.type == "ai":
|
||||
# Parse thinking content for AI messages
|
||||
thinking_content, cleaned_content = parse_thinking_content(msg.content)
|
||||
|
||||
# Show thinking content in expander if present
|
||||
if thinking_content:
|
||||
with st.expander("🤔 AI Reasoning", expanded=False):
|
||||
st.markdown(thinking_content)
|
||||
|
||||
# Show the cleaned regular content
|
||||
if cleaned_content:
|
||||
st.markdown(convert_source_references(cleaned_content))
|
||||
elif msg.content: # Fallback to original if cleaning resulted in empty content
|
||||
st.markdown(convert_source_references(msg.content))
|
||||
|
||||
# New Note button for AI messages
|
||||
if st.button("💾 New Note", key=f"render_save_{msg.id}"):
|
||||
make_note_from_chat(
|
||||
content=msg.content,
|
||||
notebook_id=current_notebook.id,
|
||||
)
|
||||
st.rerun()
|
||||
else:
|
||||
# Human messages - display normally
|
||||
st.markdown(convert_source_references(msg.content))
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "open-notebook"
|
||||
version = "0.2.2"
|
||||
version = "0.2.3"
|
||||
description = "An open source implementation of a research assistant, inspired by Google Notebook LM"
|
||||
authors = [
|
||||
{name = "Luis Novo", email = "lfnovo@gmail.com"}
|
||||
|
|
|
|||
3
uv.lock
3
uv.lock
|
|
@ -2716,7 +2716,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "open-notebook"
|
||||
version = "0.2.2"
|
||||
version = "0.2.3"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "ai-prompter" },
|
||||
|
|
@ -3460,6 +3460,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/bd/62/d29612ca33b7844e7
|
|||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/32/5a/3399a2caf51c91db650de57464465b830c2d4ea15b23d24a98182202b704/pymupdf-1.26.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:32296f12a7c7f36febd59cee77823a54490313bcaba9879b17def6518186f94e", size = 23054640 },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/e0/cc3ec6a4d5ada8992b8610f134565ceb517243f12736b50d795cb3459315/pymupdf-1.26.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aad7949eca62aca40854510cdb125cf873b181726dc9497a90834200f31faa63", size = 22402766 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e8/cf/d5b1cd775a17a7b83e25cbf4c46f64cf1352c962ca97646e3e01953cf0df/pymupdf-1.26.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3b62c4d443121ed9a2eb967c3a0e45f8dbabcc838db8604ece02c4e868808edc", size = 23448474 },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/9f/e7101bd24a0f5cbfa0310c8e5c3a8ec0dd9a86986812ff86ac2fbd273c92/pymupdf-1.26.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a65c411eb1cbb79e40c307e10fbad23658f19e9d7334ac4de21d24b58009a7b9", size = 24056183 },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/39/23ac15cf0edc2877ef366dc7ae041ac199d212433c2c3113661d1a1d5ad0/pymupdf-1.26.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:26cebdcc1b2b7a7445423599ce2e0000f2be0333cce0fa0e6846e5a7da46f965", size = 24258802 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e1/8c/56bd5951128d5c5c0b64d2942090c2cd7bc44302bac991b941ac736e3d63/pymupdf-1.26.1-cp39-abi3-win32.whl", hash = "sha256:82ed9e106cf564fc959c0691c374ba68443086ba1a1c9f26128eebbc3e6df9e5", size = 16927933 },
|
||||
|
|
|
|||
Loading…
Reference in a new issue