From 1a6fe4723b31b327f9051b9a2994e121210b99c6 Mon Sep 17 00:00:00 2001 From: danrush777 Date: Sun, 8 Feb 2026 22:29:45 +0100 Subject: [PATCH 1/2] fix: handle structured content format in LLM response parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some LLM providers (notably Gemini, DeepSeek via OpenAI-compatible proxies) return ai_message.content as a list of content parts: [{'type': 'text', 'text': '...', 'extras': {...}}] The current code uses str() on non-string content, which produces the Python repr of the entire list — not valid JSON. This breaks PydanticOutputParser.parse() with OutputParserException. This adds extract_text_content() to properly unwrap text from both string and structured content formats, applied in ask.py, chat.py, and prompt.py. Fixes #329 --- open_notebook/graphs/ask.py | 19 ++++--------------- open_notebook/graphs/chat.py | 7 ++----- open_notebook/graphs/prompt.py | 4 ++-- open_notebook/utils/text_utils.py | 26 ++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 22 deletions(-) diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index 2bdfc20..6c14e62 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -12,6 +12,7 @@ from typing_extensions import TypedDict from open_notebook.ai.provision import provision_langchain_model from open_notebook.domain.notebook import vector_search from open_notebook.utils import clean_thinking_content +from open_notebook.utils.text_utils import extract_text_content class SubGraphState(TypedDict): @@ -62,11 +63,7 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) - ai_message = await model.ainvoke(system_prompt) # Clean the thinking content from the response - message_content = ( - ai_message.content - if isinstance(ai_message.content, str) - else str(ai_message.content) - ) + message_content = extract_text_content(ai_message.content) cleaned_content = clean_thinking_content(message_content) # Parse the cleaned JSON content @@ -109,11 +106,7 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: max_tokens=2000, ) ai_message = await model.ainvoke(system_prompt) - ai_content = ( - ai_message.content - if isinstance(ai_message.content, str) - else str(ai_message.content) - ) + ai_content = extract_text_content(ai_message.content) return {"answers": [clean_thinking_content(ai_content)]} @@ -126,11 +119,7 @@ async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict max_tokens=2000, ) ai_message = await model.ainvoke(system_prompt) - final_content = ( - ai_message.content - if isinstance(ai_message.content, str) - else str(ai_message.content) - ) + final_content = extract_text_content(ai_message.content) return {"final_answer": clean_thinking_content(final_content)} diff --git a/open_notebook/graphs/chat.py b/open_notebook/graphs/chat.py index 4c32570..070499f 100644 --- a/open_notebook/graphs/chat.py +++ b/open_notebook/graphs/chat.py @@ -14,6 +14,7 @@ from open_notebook.ai.provision import provision_langchain_model from open_notebook.config import LANGGRAPH_CHECKPOINT_FILE from open_notebook.domain.notebook import Notebook from open_notebook.utils import clean_thinking_content +from open_notebook.utils.text_utils import extract_text_content class ThreadState(TypedDict): @@ -69,11 +70,7 @@ def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict ai_message = model.invoke(payload) # Clean thinking content from AI response (e.g., ... tags) - content = ( - ai_message.content - if isinstance(ai_message.content, str) - else str(ai_message.content) - ) + content = extract_text_content(ai_message.content) cleaned_content = clean_thinking_content(content) cleaned_message = ai_message.model_copy(update={"content": cleaned_content}) diff --git a/open_notebook/graphs/prompt.py b/open_notebook/graphs/prompt.py index 00bc356..cd80d10 100644 --- a/open_notebook/graphs/prompt.py +++ b/open_notebook/graphs/prompt.py @@ -7,7 +7,7 @@ from langgraph.graph import END, START, StateGraph from typing_extensions import TypedDict from open_notebook.ai.provision import provision_langchain_model -from open_notebook.utils.text_utils import clean_thinking_content +from open_notebook.utils.text_utils import clean_thinking_content, extract_text_content class PatternChainState(TypedDict): @@ -33,7 +33,7 @@ async def call_model(state: dict, config: RunnableConfig) -> dict: response = await chain.ainvoke(payload) # Clean thinking tags from response (handles extended thinking models) - output = clean_thinking_content(str(response.content)) + output = clean_thinking_content(extract_text_content(response.content)) return {"output": output} diff --git a/open_notebook/utils/text_utils.py b/open_notebook/utils/text_utils.py index 3846924..ff7ea14 100644 --- a/open_notebook/utils/text_utils.py +++ b/open_notebook/utils/text_utils.py @@ -117,3 +117,29 @@ def clean_thinking_content(content: str) -> str: """ _, cleaned_content = parse_thinking_content(content) return cleaned_content + + +def extract_text_content(content) -> str: + """Extract text from LLM response content. + + Handles both plain string responses and structured content formats + (e.g. Gemini's envelope format): + [{'type': 'text', 'text': '...', 'extras': {...}}] + + Args: + content: The content from an AI message, either a string or a list of parts. + + Returns: + The extracted text content as a string. + """ + if isinstance(content, str): + return content + if isinstance(content, list): + text_parts = [] + for part in content: + if isinstance(part, dict) and "text" in part: + text_parts.append(part["text"]) + elif isinstance(part, str): + text_parts.append(part) + return "".join(text_parts) + return str(content) From 189a30c5705af1156f158d15a4f77e743a38e22a Mon Sep 17 00:00:00 2001 From: Luis Novo Date: Tue, 17 Feb 2026 17:32:34 -0300 Subject: [PATCH 2/2] fix: bump podcast-creator to >= 0.9.4 Fixes #211 --- pyproject.toml | 2 +- uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index caa3ade..d41cd14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ "ai-prompter>=0.3,<1", "esperanto>=2.19.3,<3", "surrealdb>=1.0.4", - "podcast-creator>=0.9.1,<1", + "podcast-creator>=0.9.4,<1", "surreal-commands>=1.3.1,<2", "numpy>=2.4.1", ] diff --git a/uv.lock b/uv.lock index ef336aa..375ad11 100644 --- a/uv.lock +++ b/uv.lock @@ -2168,7 +2168,7 @@ requires-dist = [ { name = "loguru", specifier = ">=0.7.2" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.11.1" }, { name = "numpy", specifier = ">=2.4.1" }, - { name = "podcast-creator", specifier = ">=0.9.1,<1" }, + { name = "podcast-creator", specifier = ">=0.9.4,<1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.1" }, { name = "pydantic", specifier = ">=2.9.2" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, @@ -2519,7 +2519,7 @@ wheels = [ [[package]] name = "podcast-creator" -version = "0.9.1" +version = "0.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ai-prompter" }, @@ -2535,9 +2535,9 @@ dependencies = [ { name = "requests" }, { name = "tiktoken" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7d/de/f7ee60b502dad23b724d669be31fdeb6a790e306968c2cd6a079388262be/podcast_creator-0.9.1.tar.gz", hash = "sha256:177ae68b18c7efd815e555dcec3c644e541bd053e2c63669fd0a18a008b2f374", size = 470751, upload-time = "2026-02-16T17:58:44.275Z" } +sdist = { url = "https://files.pythonhosted.org/packages/97/4a/9f23b55659d7d236645593a4b75141837ed88568ba6a6a370b01d97827e6/podcast_creator-0.9.4.tar.gz", hash = "sha256:9e40a77c105d0b02f04a3eef7881a34454ef556fabd8297fe68d50307ca5f926", size = 472357, upload-time = "2026-02-17T20:21:57.257Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/d7/687284d059fc490a19d60af8f07a66b19895e15946e7ced143096d3c5ea0/podcast_creator-0.9.1-py3-none-any.whl", hash = "sha256:e3e513f2aacccd96c15bcab891216ff447568551c4392b3f12575aa0cf0cbeee", size = 74421, upload-time = "2026-02-16T17:58:42.818Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ac/b331aae683771964f0574189c8dbc1bc0c7b22aca9a376d61c3248180848/podcast_creator-0.9.4-py3-none-any.whl", hash = "sha256:2bd1138cbd1a4deda9da657e7e2b9c8a7d8c0cc43c649506af4837aeb708d46f", size = 74844, upload-time = "2026-02-17T20:21:58.271Z" }, ] [[package]]