From 01dc2240a2a931769ba64909b32034f23f0848bd Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:40:23 -0300 Subject: [PATCH 1/6] feat: sort search results --- pages/3_🔍_Ask_and_Search.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index 8c92f61..8930469 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -160,5 +160,16 @@ with search_tab: st.session_state["search_results"] = vector_search( search_term, 100, search_sources, search_notes ) + + for item in st.session_state["search_results"]: + item["final_score"] = item.get( + "relevance", item.get("similarity", item.get("score", 0)) + ) + + # Sort search results by final_score in descending order + st.session_state["search_results"].sort( + key=lambda x: x["final_score"], reverse=True + ) + for item in st.session_state["search_results"]: results_card(item) From 7eee271232c953edbb5921cf53bd412fc6057a5a Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:41:15 -0300 Subject: [PATCH 2/6] feat: extract think tags from reasoning models --- open_notebook/graphs/ask.py | 22 ++++++--- open_notebook/graphs/transformation.py | 9 +++- open_notebook/utils.py | 64 ++++++++++++++++++++++++++ pages/stream_app/chat.py | 19 +++++++- 4 files changed, 104 insertions(+), 10 deletions(-) diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index 3297627..9f639a6 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -3,9 +3,7 @@ from typing import Annotated, List from ai_prompter import Prompter from langchain_core.output_parsers.pydantic import PydanticOutputParser -from langchain_core.runnables import ( - RunnableConfig, -) +from langchain_core.runnables import RunnableConfig from langgraph.graph import END, START, StateGraph from langgraph.types import Send from pydantic import BaseModel, Field @@ -13,6 +11,7 @@ from typing_extensions import TypedDict from open_notebook.domain.notebook import vector_search from open_notebook.graphs.utils import provision_langchain_model +from open_notebook.utils import clean_thinking_content class SubGraphState(TypedDict): @@ -59,10 +58,19 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) - config.get("configurable", {}).get("strategy_model"), "tools", max_tokens=2000, + structured=dict(type="json"), ) # model = model.bind_tools(tools) - ai_message = (model | parser).invoke(system_prompt) - return {"strategy": ai_message} + # First get the raw response from the model + ai_message = model.invoke(system_prompt) + + # Clean the thinking content from the response + cleaned_content = clean_thinking_content(ai_message.content) + + # Parse the cleaned JSON content + strategy = parser.parse(cleaned_content) + + return {"strategy": strategy} async def trigger_queries(state: ThreadState, config: RunnableConfig): @@ -99,7 +107,7 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: max_tokens=2000, ) ai_message = model.invoke(system_prompt) - return {"answers": [ai_message.content]} + return {"answers": [clean_thinking_content(ai_message.content)]} async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict: @@ -111,7 +119,7 @@ async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict max_tokens=2000, ) ai_message = model.invoke(system_prompt) - return {"final_answer": ai_message.content} + return {"final_answer": clean_thinking_content(ai_message.content)} agent_state = StateGraph(ThreadState) diff --git a/open_notebook/graphs/transformation.py b/open_notebook/graphs/transformation.py index f610945..360ab4b 100644 --- a/open_notebook/graphs/transformation.py +++ b/open_notebook/graphs/transformation.py @@ -7,6 +7,7 @@ from typing_extensions import TypedDict from open_notebook.domain.notebook import Source from open_notebook.domain.transformation import DefaultPrompts, Transformation from open_notebook.graphs.utils import provision_langchain_model +from open_notebook.utils import clean_thinking_content class TransformationState(TypedDict): @@ -42,11 +43,15 @@ def run_transformation(state: dict, config: RunnableConfig) -> dict: ) response = chain.invoke(payload) + + # Clean thinking content from the response + cleaned_content = clean_thinking_content(response.content) + if source: - source.add_insight(transformation.title, response.content) + source.add_insight(transformation.title, cleaned_content) return { - "output": response.content, + "output": cleaned_content, } diff --git a/open_notebook/utils.py b/open_notebook/utils.py index e87690a..014ca9e 100644 --- a/open_notebook/utils.py +++ b/open_notebook/utils.py @@ -1,6 +1,7 @@ import re import unicodedata from importlib.metadata import PackageNotFoundError, version +from typing import Tuple from urllib.parse import urlparse import requests @@ -217,3 +218,66 @@ def compare_versions(version1: str, version2: str) -> int: return 1 else: return 0 + + +def parse_thinking_content(content: str) -> Tuple[str, str]: + """ + Parse message content to extract thinking content from tags. + + Args: + content (str): The original message content + + Returns: + Tuple[str, str]: (thinking_content, cleaned_content) + - thinking_content: Content from within tags + - cleaned_content: Original content with blocks removed + + Example: + >>> content = "Let me analyze thisHere's my answer" + >>> thinking, cleaned = parse_thinking_content(content) + >>> print(thinking) + "Let me analyze this" + >>> print(cleaned) + "Here's my answer" + """ + # Pattern to match ... blocks (including multiline) + think_pattern = r'(.*?)' + + # Find all thinking blocks + thinking_matches = re.findall(think_pattern, content, re.DOTALL) + + if not thinking_matches: + return "", content + + # Join all thinking content with double newlines + thinking_content = "\n\n".join(match.strip() for match in thinking_matches) + + # Remove all ... blocks from the original content + cleaned_content = re.sub(think_pattern, "", content, flags=re.DOTALL) + + # Clean up extra whitespace + cleaned_content = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_content).strip() + + return thinking_content, cleaned_content + + +def clean_thinking_content(content: str) -> str: + """ + Remove thinking content from AI responses, returning only the cleaned content. + + This is a convenience function for cases where you only need the cleaned + content and don't need access to the thinking process. + + Args: + content (str): The original message content with potential tags + + Returns: + str: Content with blocks removed and whitespace cleaned + + Example: + >>> content = "Let me think...Here's the answer" + >>> clean_thinking_content(content) + "Here's the answer" + """ + _, cleaned_content = parse_thinking_content(content) + return cleaned_content diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index 6c121a5..f29e297 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -14,6 +14,8 @@ from pages.stream_app.utils import ( create_session_for_notebook, ) +from open_notebook.utils import parse_thinking_content + from .note import make_note_from_chat @@ -186,11 +188,26 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): continue with st.chat_message(name=msg.type): - st.markdown(convert_source_references(msg.content)) if msg.type == "ai": + # Parse thinking content for AI messages + thinking_content, cleaned_content = parse_thinking_content(msg.content) + + # Show thinking content in expander if present + if thinking_content: + with st.expander("🤔 AI Reasoning", expanded=False): + st.markdown(thinking_content) + + # Show the cleaned regular content + if cleaned_content: + st.markdown(convert_source_references(cleaned_content)) + + # New Note button for AI messages if st.button("💾 New Note", key=f"render_save_{msg.id}"): make_note_from_chat( content=msg.content, notebook_id=current_notebook.id, ) st.rerun() + else: + # Human messages - display normally + st.markdown(convert_source_references(msg.content)) From e3ee803a42fcc904a3d0c30a09eb96f88c178b90 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:55:41 -0300 Subject: [PATCH 3/6] review: add validation and compile regex just once --- open_notebook/utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/open_notebook/utils.py b/open_notebook/utils.py index 014ca9e..a32d6eb 100644 --- a/open_notebook/utils.py +++ b/open_notebook/utils.py @@ -220,6 +220,10 @@ def compare_versions(version1: str, version2: str) -> int: return 0 +# Compile regex pattern once for better performance +THINK_PATTERN = re.compile(r'(.*?)', re.DOTALL) + + def parse_thinking_content(content: str) -> Tuple[str, str]: """ Parse message content to extract thinking content from tags. @@ -240,11 +244,16 @@ def parse_thinking_content(content: str) -> Tuple[str, str]: >>> print(cleaned) "Here's my answer" """ - # Pattern to match ... blocks (including multiline) - think_pattern = r'(.*?)' + # Input validation + if not isinstance(content, str): + return "", str(content) if content is not None else "" + + # Limit processing for very large content (100KB limit) + if len(content) > 100000: + return "", content # Find all thinking blocks - thinking_matches = re.findall(think_pattern, content, re.DOTALL) + thinking_matches = THINK_PATTERN.findall(content) if not thinking_matches: return "", content @@ -253,7 +262,7 @@ def parse_thinking_content(content: str) -> Tuple[str, str]: thinking_content = "\n\n".join(match.strip() for match in thinking_matches) # Remove all ... blocks from the original content - cleaned_content = re.sub(think_pattern, "", content, flags=re.DOTALL) + cleaned_content = THINK_PATTERN.sub("", content) # Clean up extra whitespace cleaned_content = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_content).strip() From 26da01935a7cf28fd2d6cc09b3fa2d02e1daab39 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:56:01 -0300 Subject: [PATCH 4/6] review: prevent mutation and remove duplicate final_score calculation --- pages/3_🔍_Ask_and_Search.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index 8930469..def0b0c 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -40,10 +40,9 @@ async def process_ask_query(question, strategy_model, answer_model, final_answer def results_card(item): - score = item.get("relevance", item.get("similarity", item.get("score", 0))) with st.container(border=True): st.markdown( - f"[{score:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**" + f"[{item['final_score']:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**" ) if "matches" in item: with st.expander("Matches"): @@ -161,15 +160,14 @@ with search_tab: search_term, 100, search_sources, search_notes ) - for item in st.session_state["search_results"]: + search_results = st.session_state["search_results"].copy() + for item in search_results: item["final_score"] = item.get( "relevance", item.get("similarity", item.get("score", 0)) ) # Sort search results by final_score in descending order - st.session_state["search_results"].sort( - key=lambda x: x["final_score"], reverse=True - ) + search_results.sort(key=lambda x: x["final_score"], reverse=True) - for item in st.session_state["search_results"]: + for item in search_results: results_card(item) From 37fb92370f4d461c7c7fdc17cec572e5fcf48a6b Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:56:12 -0300 Subject: [PATCH 5/6] review: fallback if content is empty --- pages/stream_app/chat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index f29e297..b616d20 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -200,6 +200,8 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): # Show the cleaned regular content if cleaned_content: st.markdown(convert_source_references(cleaned_content)) + elif msg.content: # Fallback to original if cleaning resulted in empty content + st.markdown(convert_source_references(msg.content)) # New Note button for AI messages if st.button("💾 New Note", key=f"render_save_{msg.id}"): From f92b41e51084e4814ccf5e4b308f64324cdbfc15 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 26 Jun 2025 11:56:27 -0300 Subject: [PATCH 6/6] chore: bump version --- pyproject.toml | 2 +- uv.lock | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9a3bdef..598c3ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "open-notebook" -version = "0.2.2" +version = "0.2.3" description = "An open source implementation of a research assistant, inspired by Google Notebook LM" authors = [ {name = "Luis Novo", email = "lfnovo@gmail.com"} diff --git a/uv.lock b/uv.lock index aed4b60..cff08b5 100644 --- a/uv.lock +++ b/uv.lock @@ -2716,7 +2716,7 @@ wheels = [ [[package]] name = "open-notebook" -version = "0.2.2" +version = "0.2.3" source = { editable = "." } dependencies = [ { name = "ai-prompter" }, @@ -3460,6 +3460,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/bd/62/d29612ca33b7844e7 wheels = [ { url = "https://files.pythonhosted.org/packages/32/5a/3399a2caf51c91db650de57464465b830c2d4ea15b23d24a98182202b704/pymupdf-1.26.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:32296f12a7c7f36febd59cee77823a54490313bcaba9879b17def6518186f94e", size = 23054640 }, { url = "https://files.pythonhosted.org/packages/64/e0/cc3ec6a4d5ada8992b8610f134565ceb517243f12736b50d795cb3459315/pymupdf-1.26.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aad7949eca62aca40854510cdb125cf873b181726dc9497a90834200f31faa63", size = 22402766 }, + { url = "https://files.pythonhosted.org/packages/e8/cf/d5b1cd775a17a7b83e25cbf4c46f64cf1352c962ca97646e3e01953cf0df/pymupdf-1.26.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3b62c4d443121ed9a2eb967c3a0e45f8dbabcc838db8604ece02c4e868808edc", size = 23448474 }, { url = "https://files.pythonhosted.org/packages/82/9f/e7101bd24a0f5cbfa0310c8e5c3a8ec0dd9a86986812ff86ac2fbd273c92/pymupdf-1.26.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a65c411eb1cbb79e40c307e10fbad23658f19e9d7334ac4de21d24b58009a7b9", size = 24056183 }, { url = "https://files.pythonhosted.org/packages/99/39/23ac15cf0edc2877ef366dc7ae041ac199d212433c2c3113661d1a1d5ad0/pymupdf-1.26.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:26cebdcc1b2b7a7445423599ce2e0000f2be0333cce0fa0e6846e5a7da46f965", size = 24258802 }, { url = "https://files.pythonhosted.org/packages/e1/8c/56bd5951128d5c5c0b64d2942090c2cd7bc44302bac991b941ac736e3d63/pymupdf-1.26.1-cp39-abi3-win32.whl", hash = "sha256:82ed9e106cf564fc959c0691c374ba68443086ba1a1c9f26128eebbc3e6df9e5", size = 16927933 },