From 0524eddb0b31ac2100b03f0de0b0c3c4b86633c2 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Thu, 7 Nov 2024 20:06:34 -0300 Subject: [PATCH 01/44] add citation links to notes --- pages/components/note_panel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pages/components/note_panel.py b/pages/components/note_panel.py index d1cba31..b1f1ef2 100644 --- a/pages/components/note_panel.py +++ b/pages/components/note_panel.py @@ -3,6 +3,7 @@ from loguru import logger from streamlit_monaco import st_monaco # type: ignore from open_notebook.domain.notebook import Note +from pages.stream_app.utils import convert_source_references def note_panel(note_id, notebook_id=None): @@ -12,7 +13,7 @@ def note_panel(note_id, notebook_id=None): t_preview, t_edit = st.tabs(["Preview", "Edit"]) with t_preview: st.subheader(note.title) - st.markdown(note.content) + st.markdown(convert_source_references(note.content)) with t_edit: note.title = st.text_input("Title", value=note.title) note.content = st_monaco( From 53da255801e68625936d0a6983f28df38840dc80 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 16:07:51 -0300 Subject: [PATCH 02/44] improve search --- open_notebook/domain/notebook.py | 140 ++++--------------------------- pages/3_🔍_Ask_and_Search.py | 50 +++++++---- pages/stream_app/source.py | 2 +- 3 files changed, 54 insertions(+), 138 deletions(-) diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index e473193..21c9965 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -309,7 +309,8 @@ def text_search(keyword: str, results: int, source: bool = True, note: bool = Tr try: results = repo_query( """ - SELECT * FROM fn::text_search($keyword, $results, $source, $note); + select * + from fn::text_search($keyword, $results, $source, $note) """, {"keyword": keyword, "results": results, "source": source, "note": note}, ) @@ -320,7 +321,13 @@ def text_search(keyword: str, results: int, source: bool = True, note: bool = Tr raise DatabaseOperationError(e) -def vector_search(keyword: str, results: int, source: bool = True, note: bool = True): +def vector_search( + keyword: str, + results: int, + source: bool = True, + note: bool = True, + minimum_score=0.2, +): if not keyword: raise InvalidInputError("Search keyword cannot be empty") try: @@ -328,131 +335,18 @@ def vector_search(keyword: str, results: int, source: bool = True, note: bool = embed = EMBEDDING_MODEL.embed(keyword) results = repo_query( """ - SELECT * FROM fn::vector_search($embed, $results, $source, $note, 0.15); + SELECT * FROM fn::vector_search($embed, $results, $source, $note, $minimum_score); """, - {"embed": embed, "results": results, "source": source, "note": note}, + { + "embed": embed, + "results": results, + "source": source, + "note": note, + "minimum_score": minimum_score, + }, ) return results except Exception as e: logger.error(f"Error performing vector search: {str(e)}") logger.exception(e) raise DatabaseOperationError(e) - - -def hybrid_search( - keyword_search: List[str], - embed_search: List[str], - results: int = 50, - source: bool = True, - note: bool = True, - max_chunks_per_doc: int = 3, - min_results_per_query: int = 3, -) -> Dict[str, List[Dict]]: - if not keyword_search and not embed_search: - raise InvalidInputError("At least one search term required") - - # Process keyword searches - all_keyword_results = {} # Dictionary to store results per keyword - for keyword in keyword_search: - try: - search_results = text_search(keyword, results, source, note) - # Sort results by relevance - sorted_results = sorted( - search_results, key=lambda x: x.get("relevance", 0), reverse=True - ) - # Group by parent_id and limit chunks per document - seen_parent_ids = {} - filtered_results = [] - for result in sorted_results: - parent_id = result["parent_id"] - if parent_id not in seen_parent_ids: - seen_parent_ids[parent_id] = 1 - filtered_results.append(result) - elif seen_parent_ids[parent_id] < max_chunks_per_doc: - seen_parent_ids[parent_id] += 1 - filtered_results.append(result) - all_keyword_results[keyword] = filtered_results - except Exception as e: - logger.warning(f"Error in keyword search for term '{keyword}': {str(e)}") - continue - - # Ensure minimum results from each keyword query - keyword_results = [] - remaining_slots = results - - # First pass: add minimum results from each query - for keyword, query_results in all_keyword_results.items(): - keyword_results.extend(query_results[:min_results_per_query]) - remaining_slots -= min(len(query_results), min_results_per_query) - - # Second pass: fill remaining slots with best results - all_remaining = [] - for keyword, query_results in all_keyword_results.items(): - all_remaining.extend(query_results[min_results_per_query:]) - - # Sort remaining by relevance and add until we hit the limit - all_remaining = sorted( - all_remaining, key=lambda x: x.get("relevance", 0), reverse=True - ) - seen_ids = {r["id"] for r in keyword_results} - for result in all_remaining: - if remaining_slots <= 0: - break - if result["id"] not in seen_ids: - keyword_results.append(result) - seen_ids.add(result["id"]) - remaining_slots -= 1 - - # Process vector searches with the same approach - all_vector_results = {} # Dictionary to store results per embedding - for embed in embed_search: - try: - search_results = vector_search(embed, results, source, note) - # Sort results by similarity - sorted_results = sorted( - search_results, key=lambda x: x.get("similarity", 0), reverse=True - ) - # Group by parent_id and limit chunks per document - seen_parent_ids = {} - filtered_results = [] - for result in sorted_results: - parent_id = result["parent_id"] - if parent_id not in seen_parent_ids: - seen_parent_ids[parent_id] = 1 - filtered_results.append(result) - elif seen_parent_ids[parent_id] < max_chunks_per_doc: - seen_parent_ids[parent_id] += 1 - filtered_results.append(result) - all_vector_results[embed] = filtered_results - except Exception as e: - logger.warning(f"Error in vector search for term '{embed}': {str(e)}") - continue - - # Ensure minimum results from each vector query - vector_results = [] - remaining_slots = results - - # First pass: add minimum results from each query - for embed, query_results in all_vector_results.items(): - vector_results.extend(query_results[:min_results_per_query]) - remaining_slots -= min(len(query_results), min_results_per_query) - - # Second pass: fill remaining slots with best results - all_remaining = [] - for embed, query_results in all_vector_results.items(): - all_remaining.extend(query_results[min_results_per_query:]) - - # Sort remaining by similarity and add until we hit the limit - all_remaining = sorted( - all_remaining, key=lambda x: x.get("similarity", 0), reverse=True - ) - seen_ids = {r["id"] for r in vector_results} - for result in all_remaining: - if remaining_slots <= 0: - break - if result["id"] not in seen_ids: - vector_results.append(result) - seen_ids.add(result["id"]) - remaining_slots -= 1 - - return {"keyword_results": keyword_results, "vector_results": vector_results} diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index c085867..e2da0c2 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -2,7 +2,7 @@ import streamlit as st from open_notebook.domain.models import Model from open_notebook.domain.notebook import text_search, vector_search -from open_notebook.graphs.rag import graph as rag_graph +from open_notebook.graphs.ask import graph as ask_graph from pages.stream_app.utils import convert_source_references, setup_page setup_page("🔍 Search") @@ -15,10 +15,13 @@ if "search_results" not in st.session_state: def results_card(item): score = item.get("relevance", item.get("similarity", item.get("score", 0))) - with st.expander(f"[{score:.2f}] **{item['title']}**"): - st.markdown(f"**{item['content']}**") - st.write(item["id"]) - st.write(item["parent_id"]) + with st.container(border=True): + st.markdown( + f"[{score:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**" + ) + with st.expander("Matches"): + for match in item["matches"]: + st.markdown(match) with ask_tab: @@ -26,22 +29,41 @@ with ask_tab: st.caption( "The LLM will answer your query based on the documents in your knowledge base. " ) - st.warning( - "This functionality requires the use of Tools and, at this moment, works well with Open AI and Anthropic models only." - ) question = st.text_input("Question", "") models = Model.get_models_by_type("language") - model: Model = st.selectbox("Model", models, format_func=lambda x: x.name) + strategy_model: Model = st.selectbox( + "Query Strategy Model", + models, + format_func=lambda x: x.name, + help="This is the LLM that will be responsible for strategizing the search", + ) + answer_model: Model = st.selectbox( + "Indivual Answer Model", + models, + format_func=lambda x: x.name, + help="This is the LLM that will be responsible for processing individual subqueries", + ) + final_answer_model: Model = st.selectbox( + "Final Answer Model", + models, + format_func=lambda x: x.name, + help="This is the LLM that will be responsible for processing the final answer", + ) if st.button("Ask"): st.write(f"Searching for {question}") - messages = [question] - rag_results = rag_graph.invoke( + rag_results = ask_graph.invoke( dict( - messages=messages, + question=question, + ), + config=dict( + configurable=dict( + strategy_model=strategy_model.id, + answer_model=answer_model.id, + final_answer_model=final_answer_model.id, + ) ), - config=dict(configurable=dict(model_id=model.id)), ) - st.markdown(convert_source_references(rag_results["messages"][-1].content)) + st.markdown(convert_source_references(rag_results["final_answer"])) with st.expander("Details (for debugging)"): st.json(rag_results) diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index fdc56c5..629e2d8 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -38,7 +38,7 @@ def generate_toc_and_title(source) -> "Source": @st.dialog("Source", width="large") def source_panel_dialog(source_id): - source_panel(source_id) + source_panel(source_id, modal=True) @st.dialog("Add a Source", width="large") From 99b8ada28062fb2ee2158ec813011625cfdd6689 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 16:08:13 -0300 Subject: [PATCH 03/44] new ask model strategy --- open_notebook/graphs/ask.py | 124 ++++++++++++++++++++++++++++++++ open_notebook/graphs/rag.py | 44 ------------ prompts/ask/entry.jinja | 45 ++++++++++++ prompts/ask/final_answer.jinja | 40 +++++++++++ prompts/ask/query_process.jinja | 50 +++++++++++++ 5 files changed, 259 insertions(+), 44 deletions(-) create mode 100644 open_notebook/graphs/ask.py delete mode 100644 open_notebook/graphs/rag.py create mode 100644 prompts/ask/entry.jinja create mode 100644 prompts/ask/final_answer.jinja create mode 100644 prompts/ask/query_process.jinja diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py new file mode 100644 index 0000000..a2746db --- /dev/null +++ b/open_notebook/graphs/ask.py @@ -0,0 +1,124 @@ +import operator +from typing import Annotated, List, Literal + +from langchain_core.output_parsers.pydantic import PydanticOutputParser +from langchain_core.runnables import ( + RunnableConfig, +) +from langgraph.graph import END, START, StateGraph +from langgraph.types import Send +from pydantic import BaseModel, Field +from typing_extensions import TypedDict + +from open_notebook.domain.notebook import text_search, vector_search +from open_notebook.graphs.utils import provision_langchain_model +from open_notebook.prompter import Prompter + + +class SubGraphState(TypedDict): + question: str + term: str + type: Literal["text", "vector"] + instructions: str + results: dict + answer: str + + +class Search(BaseModel): + term: str + type: Literal["text", "vector"] = Field( + description="The type of search. Use 'text' for keyword search and 'vector' for semantic search. If you are using text, search always for a single word" + ) + instructions: str = Field( + description="Tell the answeting LLM what information you need extracted from this search" + ) + + +class Strategy(BaseModel): + reasoning: str + searches: List[Search] = Field( + default_factory=list, + description="You can add up to five searches to this strategy", + ) + + +class ThreadState(TypedDict): + question: str + strategy: Strategy + answers: Annotated[list, operator.add] + final_answer: str + + +def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: + parser = PydanticOutputParser(pydantic_object=Strategy) + system_prompt = Prompter(prompt_template="ask/entry", parser=parser).render( + data=state + ) + model = provision_langchain_model( + system_prompt, + config.get("configurable", {}).get("strategy_model"), + "tools", + max_tokens=2000, + ) + # model = model.bind_tools(tools) + ai_message = (model | parser).invoke(system_prompt) + return {"strategy": ai_message} + + +def trigger_queries(state: ThreadState, config: RunnableConfig): + return [ + Send( + "provide_answer", + { + "question": state["question"], + "instructions": s.instructions, + "term": s.term, + "type": s.type, + }, + ) + for s in state["strategy"].searches + ] + + +def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: + payload = state + if state["type"] == "text": + results = text_search(state["term"], 10, True, True) + else: + results = vector_search(state["term"], 10, True, True) + if len(results) == 0: + return {"answers": []} + payload["results"] = results + system_prompt = Prompter(prompt_template="ask/query_process").render(data=payload) + model = provision_langchain_model( + system_prompt, + config.get("configurable", {}).get("answer_model"), + "tools", + max_tokens=2000, + ) + ai_message = model.invoke(system_prompt) + return {"answers": [ai_message.content]} + + +def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict: + system_prompt = Prompter(prompt_template="ask/final_answer").render(data=state) + model = provision_langchain_model( + system_prompt, + config.get("configurable", {}).get("final_answer_model"), + "tools", + max_tokens=2000, + ) + ai_message = model.invoke(system_prompt) + return {"final_answer": ai_message.content} + + +agent_state = StateGraph(ThreadState) +agent_state.add_node("agent", call_model_with_messages) +agent_state.add_node("provide_answer", provide_answer) +agent_state.add_node("write_final_answer", write_final_answer) +agent_state.add_edge(START, "agent") +agent_state.add_conditional_edges("agent", trigger_queries, ["provide_answer"]) +agent_state.add_edge("provide_answer", "write_final_answer") +agent_state.add_edge("write_final_answer", END) + +graph = agent_state.compile() diff --git a/open_notebook/graphs/rag.py b/open_notebook/graphs/rag.py deleted file mode 100644 index 24dc435..0000000 --- a/open_notebook/graphs/rag.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Annotated - -from langchain_core.runnables import ( - RunnableConfig, -) -from langgraph.graph import START, StateGraph -from langgraph.graph.message import add_messages -from langgraph.prebuilt import ToolNode, tools_condition -from typing_extensions import TypedDict - -from open_notebook.graphs.tools import repository_search -from open_notebook.graphs.utils import provision_langchain_model -from open_notebook.prompter import Prompter - -tools = [repository_search] -tool_node = ToolNode(tools) - - -class ThreadState(TypedDict): - messages: Annotated[list, add_messages] - # notebook: Optional[Notebook] - # context: Optional[str] - # context_config: Optional[dict] - - -def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: - system_prompt = Prompter(prompt_template="rag").render(data=state) - payload = [system_prompt] + state.get("messages", []) - model = provision_langchain_model(str(payload), config, "tools", max_tokens=2000) - model = model.bind_tools(tools) - ai_message = model.invoke(payload) - return {"messages": ai_message} - - -agent_state = StateGraph(ThreadState) -agent_state.add_node("agent", call_model_with_messages) -agent_state.add_node("tools", tool_node) -agent_state.add_edge(START, "agent") -agent_state.add_conditional_edges( - "agent", - tools_condition, -) -agent_state.add_edge("tools", "agent") -graph = agent_state.compile() diff --git a/prompts/ask/entry.jinja b/prompts/ask/entry.jinja new file mode 100644 index 0000000..8035bb2 --- /dev/null +++ b/prompts/ask/entry.jinja @@ -0,0 +1,45 @@ +# SYSTEM ROLE + +You are a cognitive study assistant that helps users research and learn by engaging in focused discussions about documents in their workspace. + +The first step in the process is receiving the user's question and formulating a research strategy to find the most relevant information. + +# YOUR JOB + +Based on the user question, you need to analyze the key concepts and terms to determine the appropriate search strategy. + +Step 1: develop your search strategy (reasoning) +Step 2: formulate your search queries (searches) + +Return both the reasoning and searches as a JSON object, like in the EXAMPLE below. + +# EXAMPLE + +User: Can you tell me more about the concept of "RAG" and how it can be applied to generate answers to user questions via LLM? + +Your answer could be something like: + +```json +{ + "reasoning": "The user is asking about the concept of RAG and its application in generating answers to user questions via LLM. I should search for documents related to RAG, retrieval augmented generation, and vector search to provide a comprehensive response.", + "searches": [ + { "type": "text", "term": "RAG", "instructions": "Describe the concept and utility of RAG." }, + { "type": "vector", "term": "Retrieval Augmented Generation", "instructions": "Describe the concept and utility of RAG." }, + { "type": "vector", "term": "Vector Search", "instructions": "Describe how RAG utilizes vector search." } + ] +} +``` + +# OUTPUT FORMATTING + +{{format_instructions}} + +- Do not include any text other than the JSON object +- Do not include ```json``` in the response + +# USER QUESTION + +{{question}} + +# ANSWER + diff --git a/prompts/ask/final_answer.jinja b/prompts/ask/final_answer.jinja new file mode 100644 index 0000000..9c8b2d0 --- /dev/null +++ b/prompts/ask/final_answer.jinja @@ -0,0 +1,40 @@ +# SYSTEM ROLE + +You are a cognitive study assistant that helps users research and learn by engaging in focused discussions about documents in their workspace. + +You are responsible for the last step of the process, which is to provide the final answer to the user's question. You should provide accurate, factual responses based on the available documents and knowledge, while avoiding speculation or making up information. If you are unsure about something, acknowledge the uncertainty rather than guessing. + +# QUESTION + +This is the question originally made by the user: + +{{question}} + +# REASONS + +Based on the question, you derived the following reasonsing and search strategies: + +{{strategy}} + +# RESULTS + +Here are the answers you received for each of your queries. + +{{answers}} + +# YOUR JOB + +Based on the user question, the context and the retrieved answers, please formulate a final response to the user. + +# CITING SOURCES + +It's very important that your response contains references to the searched documents so the user can follow-up and read more about the topic. The way you do that is by adding the id of the specific document in between brackets like this: [document_id]. The references will be present on all the answers you have been provided. + +## IMPORTANT + +- Do not make up documents or document ids. Only use the ids of the documents that you can see on the answers you received. +- The ID is composed of the type of document and a random string, such as "source:randomstring", "note:randomstring", or "insight:randomstring". There are various types of documents, including notes, insights, and sources. **Always use the complete ID exactly as it is provided, including its type prefix. Do not add, remove, or modify any part of the ID.** +- **Use document IDs exactly as they are returned in the answers. Do not add any prefixes or modify them in any way.** + +# YOUR ANSWER + diff --git a/prompts/ask/query_process.jinja b/prompts/ask/query_process.jinja new file mode 100644 index 0000000..17b0d4d --- /dev/null +++ b/prompts/ask/query_process.jinja @@ -0,0 +1,50 @@ +# SYSTEM ROLE + +You are a research assistant that helps users research and learn by engaging in focused discussions about documents in their workspace. + +# QUESTION + +This is the question originally made by the user: + +{{question}} + +# SEARCH STRATEGY + +The main answer agent has developed the following search strategy to find the most relevant information: + +{{term}} + +And provided you with the following instructions to formulate the answer: + +{{instructions}} + +# YOUR JOB + +Based on the user question, the context and the retrieved results, please formulate the appropriate answer. + +# RESULTS + +{{results}} + +# CITING SOURCES + +It's very important that your response contains references to the searched documents so the user can follow-up and read more about the topic. The way you do that is by adding the id of the specific document in between brackets like this: [document_id]. + +## EXAMPLE + +User: Can you tell me more about the concept of "Deep Learning"? + +Assistant: Deep learning is a subset of machine learning in artificial intelligence (AI) that enables networks to learn unsupervised from unstructured or unlabeled data. [note:iuiodadalknda]. It can also be categorized into three main types: supervised, unsupervised, and reinforcement learning. [insight:adadadadadadad]. + +Please note, "note:iuiodadalknda" and "insight:adadadadadadad" are examples of document IDs with different prefixes. You should not make up document IDs or copy the IDs from this example. You should use the IDs of the documents that you have access to through the search tool. + +## IMPORTANT + +- Do not make up documents or document ids. Only use the ids of the documents that you have access through the query you made. +- The ID is composed of the type of document and a random string, such as "source:randomstring", "note:randomstring", or "insight:randomstring". There are various types of documents, including notes, insights, and sources. **Always use the complete ID exactly as it is provided, including its type prefix. Do not add, remove, or modify any part of the ID.** +- Do not assume or change the type prefix of any document ID. If a document ID is "note:xyz", use it exactly as "note:xyz". Do not change it to "source:xyz" or any other variation. +- **Use document IDs exactly as they are returned from the search tool. Do not add any prefixes or modify them in any way.** + + +# YOUR ANSWER + From 183149014e372ac1dcf123f760a859771c10c724 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 16:08:54 -0300 Subject: [PATCH 04/44] change model provisioning parameters --- open_notebook/graphs/chat.py | 7 ++++++- open_notebook/graphs/tools.py | 14 -------------- open_notebook/graphs/utils.py | 14 ++++++++------ 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/open_notebook/graphs/chat.py b/open_notebook/graphs/chat.py index 5e3b4ca..7342ca3 100644 --- a/open_notebook/graphs/chat.py +++ b/open_notebook/graphs/chat.py @@ -25,7 +25,12 @@ class ThreadState(TypedDict): def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: system_prompt = Prompter(prompt_template="chat").render(data=state) payload = [system_prompt] + state.get("messages", []) - model = provision_langchain_model(str(payload), config, "chat", max_tokens=2000) + model = provision_langchain_model( + str(payload), + config.get("configurable", {}).get("model_id"), + "chat", + max_tokens=2000, + ) ai_message = model.invoke(payload) return {"messages": ai_message} diff --git a/open_notebook/graphs/tools.py b/open_notebook/graphs/tools.py index 620fac4..9c3df13 100644 --- a/open_notebook/graphs/tools.py +++ b/open_notebook/graphs/tools.py @@ -1,10 +1,7 @@ from datetime import datetime -from typing import List from langchain.tools import tool -from open_notebook.domain.notebook import hybrid_search - # todo: turn this into a system prompt variable @tool @@ -14,14 +11,3 @@ def get_current_timestamp() -> str: Returns the current timestamp in the format YYYYMMDDHHmmss. """ return datetime.now().strftime("%Y%m%d%H%M%S") - - -@tool -def repository_search(keyword_searches: List[str], vector_searches: List[str]) -> str: - """ - name: repository_search - Makes a search in the content repository for the given query. - keyword_searches: List[str] - A list of search terms to search for using keyword search. - vector_searches: List[str] - A list of search terms to search for using vector search. - """ - return hybrid_search(keyword_searches, vector_searches, 20) diff --git a/open_notebook/graphs/utils.py b/open_notebook/graphs/utils.py index 07365ea..3c79d85 100644 --- a/open_notebook/graphs/utils.py +++ b/open_notebook/graphs/utils.py @@ -8,7 +8,9 @@ from open_notebook.prompter import Prompter from open_notebook.utils import token_count -def provision_langchain_model(content, config, default_type, **kwargs) -> BaseChatModel: +def provision_langchain_model( + content, model_id, default_type, **kwargs +) -> BaseChatModel: """ Returns the best model to use based on the context size and on whether there is a specific model being requested in Config. If context > 105_000, returns the large_context_model @@ -22,10 +24,8 @@ def provision_langchain_model(content, config, default_type, **kwargs) -> BaseCh f"Using large context model because the content has {tokens} tokens" ) model = model_manager.get_default_model("large_context", **kwargs) - elif config.get("configurable", {}).get("model_id"): - model = model_manager.get_model( - config.get("configurable", {}).get("model_id"), **kwargs - ) + elif model_id: + model = model_manager.get_model(model_id, **kwargs) else: model = model_manager.get_default_model(default_type, **kwargs) @@ -45,7 +45,9 @@ def run_pattern( data=state ) payload = [system_prompt] + messages - chain = provision_langchain_model(str(payload), config, "transformation") + chain = provision_langchain_model( + str(payload), config.get("configurable", {}).get("model_id"), "transformation" + ) response = chain.invoke(payload) From dc76d6b8d85691b332e66be2dfcfb0252bb354a3 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 18:29:51 -0300 Subject: [PATCH 05/44] enable different text models for podcast --- open_notebook/plugins/podcasts.py | 28 +++++++- pages/5_🎙️_Podcasts.py | 109 ++++++++++++++++++++++++------ 2 files changed, 114 insertions(+), 23 deletions(-) diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py index 8ad6050..ce92ff3 100644 --- a/open_notebook/plugins/podcasts.py +++ b/open_notebook/plugins/podcasts.py @@ -4,6 +4,7 @@ from loguru import logger from podcastfy.client import generate_podcast from pydantic import Field, field_validator +from open_notebook.config import DATA_FOLDER from open_notebook.domain.notebook import ObjectModel @@ -27,6 +28,8 @@ class PodcastConfig(ObjectModel): conversation_style: List[str] engagement_technique: List[str] dialogue_structure: List[str] + transcript_model: Optional[str] = None + transcript_model_provider: Optional[str] = None user_instructions: Optional[str] = None ending_message: Optional[str] = None wordcount: int = Field(ge=400, le=10000) @@ -53,7 +56,11 @@ class PodcastConfig(ObjectModel): "engagement_techniques": self.engagement_technique, "creativity": self.creativity, "text_to_speech": { - # "temp_audio_dir": f"{PODCASTS_FOLDER}/tmp", + "output_directories": { + "transcripts": f"{DATA_FOLDER}/podcasts/transcripts", + "audio": f"{DATA_FOLDER}/podcasts/audio", + }, + "temp_audio_dir": f"{DATA_FOLDER}/podcasts/audio/tmp", "ending_message": "Thank you for listening to this episode. Don't forget to subscribe to our podcast for more interesting conversations.", "default_tts_model": self.provider, self.provider: { @@ -71,8 +78,25 @@ class PodcastConfig(ObjectModel): f"Generating episode {episode_name} with config {conversation_config}" ) + api_key_label = None + llm_model_name = None + if self.transcript_model_provider: + if self.transcript_model_provider == "openai": + api_key_label = "OPENAI_API_KEY" + llm_model_name = self.transcript_model + elif self.transcript_model_provider == "anthropic": + api_key_label = "ANTHROPIC_API_KEY" + llm_model_name = self.transcript_model + elif self.transcript_model_provider == "gemini": + api_key_label = "GEMINI_API_KEY" + llm_model_name = self.transcript_model + audio_file = generate_podcast( - conversation_config=conversation_config, text=text, tts_model=self.provider + conversation_config=conversation_config, + text=text, + tts_model=self.provider, + llm_model_name=llm_model_name, + api_key_label=api_key_label, ) episode = PodcastEpisode( name=episode_name, diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py index 6e1122e..040d76d 100644 --- a/pages/5_🎙️_Podcasts.py +++ b/pages/5_🎙️_Podcasts.py @@ -18,7 +18,6 @@ setup_page("🎙️ Podcasts") text_to_speech_models = Model.get_models_by_type("text_to_speech") - provider_models: Dict[str, List[str]] = {} for model in text_to_speech_models: @@ -26,11 +25,28 @@ for model in text_to_speech_models: provider_models[model.provider] = [] provider_models[model.provider].append(model.name) +text_models = Model.get_models_by_type("language") + +transcript_provider_models: Dict[str, List[str]] = {} + +for model in text_models: + if model.provider not in ["gemini", "openai", "anthropic"]: + continue + if model.provider not in transcript_provider_models: + transcript_provider_models[model.provider] = [] + transcript_provider_models[model.provider].append(model.name) + if len(text_to_speech_models) == 0: st.error("No text to speech models found. Please set one up in the Settings page.") st.stop() +if len(text_models) == 0: + st.error( + "No language models found. Please set one up in the Settings page. Only Gemini, Open AI and Anthropic models supported for transcript generation." + ) + st.stop() + episodes_tab, templates_tab = st.tabs(["Episodes", "Templates"]) with episodes_tab: @@ -90,7 +106,23 @@ with templates_tab: pd_cfg["ending_message"] = st.text_input( "Ending Message", placeholder="Thank you for listening!" ) - pd_cfg["provider"] = st.selectbox("Provider", provider_models.keys()) + pd_cfg["transcript_model_provider"] = st.selectbox( + "Transcript Model Provider", transcript_provider_models.keys() + ) + pd_cfg["transcript_model"] = st.selectbox( + "Transcript Model", + transcript_provider_models[pd_cfg["transcript_model_provider"]], + ) + + pd_cfg["provider"] = st.selectbox( + "Audio Model Provider", provider_models.keys() + ) + pd_cfg["model"] = st.selectbox( + "Audio Model", provider_models[pd_cfg["provider"]] + ) + st.caption( + "OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5" + ) pd_cfg["voice1"] = st.text_input( "Voice 1", help="You can use Elevenlabs voice ID" ) @@ -105,10 +137,6 @@ with templates_tab: "Voice 2", help="You can use Elevenlabs voice ID" ) - pd_cfg["model"] = st.selectbox("Model", provider_models[pd_cfg["provider"]]) - st.caption( - "OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5" - ) if st.button("Save"): try: pd = PodcastConfig(**pd_cfg) @@ -200,12 +228,65 @@ with templates_tab: placeholder="Thank you for listening!", key=f"ending_message_{pd_config.id}", ) + + if pd_config.transcript_model_provider not in transcript_provider_models: + st.warning( + f"Transcript Model Provider {pd_config.transcript_model_provider} not setup. Changing to default." + ) + index = 0 + else: + index = list(transcript_provider_models.keys()).index( + pd_config.transcript_model_provider + ) + + pd_config.transcript_model_provider = st.selectbox( + "Transcript Model Provider", + list(transcript_provider_models.keys()), + index=index, + key=f"transcript_provider_{pd_config.id}", + ) + if ( + not pd_config.transcript_model + or pd_config.transcript_model + not in transcript_provider_models[pd_config.transcript_model_provider] + ): + st.warning( + f"Transcript Model {pd_config.transcript_model} not setup. Changing to default." + ) + index = 0 + else: + index = transcript_provider_models[ + pd_config.transcript_model_provider + ].index(pd_config.transcript_model) + pd_config.transcript_model = st.selectbox( + "Transcript Model", + transcript_provider_models[pd_config.transcript_model_provider], + index=index, + key=f"transcript_model_{pd_config.id}", + ) + pd_config.provider = st.selectbox( - "Provider", + "Audio Model Provider", list(provider_models.keys()), index=list(provider_models.keys()).index(pd_config.provider), key=f"provider_{pd_config.id}", ) + if pd_config.model not in provider_models[pd_config.provider]: + st.warning( + f"Audio Model {pd_config.model} not setup. Changing to default." + ) + index = 0 + else: + index = provider_models[pd_config.provider].index(pd_config.model) + pd_config.model = st.selectbox( + "Model", + provider_models[pd_config.provider], + index=index, + key=f"model_{pd_config.id}", + ) + st.caption( + "OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5" + ) pd_config.voice1 = st.text_input( "Voice 1", value=pd_config.voice1, @@ -224,20 +305,6 @@ with templates_tab: key=f"voice2_{pd_config.id}", help="You can use Elevenlabs voice ID", ) - if pd_config.model not in provider_models[pd_config.provider]: - st.warning(f"Model {pd_config.model} not setup. Changing to default.") - index = 0 - else: - index = provider_models[pd_config.provider].index(pd_config.model) - pd_config.model = st.selectbox( - "Model", - provider_models[pd_config.provider], - index=index, - key=f"model_{pd_config.id}", - ) - st.caption( - "OpenAI: tts-1 or tts-1-hd, Elevenlabs: eleven_multilingual_v2, eleven_turbo_v2_5" - ) if st.button("Save Config", key=f"btn_save{pd_config.id}"): try: From ee67258beb8c3a0b4ee0286925763fd1ad0c450b Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 18:30:34 -0300 Subject: [PATCH 06/44] add full docker image with db --- .dockerignore | 3 +++ .gitignore | 1 + Dockerfile | 5 +++-- Dockerfile_full | 36 ++++++++++++++++++++++++++++++++++++ Makefile | 6 ++++-- docker-compose.full.yml | 13 +++++++++++++ supervisord.conf | 21 +++++++++++++++++++++ 7 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 Dockerfile_full create mode 100644 docker-compose.full.yml create mode 100644 supervisord.conf diff --git a/.dockerignore b/.dockerignore index 4bea5cb..f6df8f5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -13,3 +13,6 @@ docker-compose* docs/ surreal-data/ temp/ +*.env +.mypy_cache/ +.ruff_cache/ diff --git a/.gitignore b/.gitignore index 11d4a4b..5bf6efc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.env prompts/patterns/user/ notebooks/ data/ diff --git a/Dockerfile b/Dockerfile index 052cadc..f3b9e00 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,10 @@ RUN pip install poetry --no-cache-dir RUN poetry self add poetry-plugin-dotenv RUN poetry config virtualenvs.create false -COPY . /app - +COPY pyproject.toml poetry.lock /app/ RUN poetry install --only main + +COPY . /app EXPOSE 8502 RUN mkdir -p /app/data diff --git a/Dockerfile_full b/Dockerfile_full new file mode 100644 index 0000000..769a973 --- /dev/null +++ b/Dockerfile_full @@ -0,0 +1,36 @@ +# Use an official Python runtime as a base image +FROM python:3.11.7-slim-bullseye + +# Install system dependencies required for building certain Python packages +RUN apt-get update && apt-get install -y \ + gcc \ + curl wget libmagic-dev ffmpeg supervisor \ + && rm -rf /var/lib/apt/lists/* + +# Install SurrealDB +RUN curl --proto '=https' --tlsv1.2 -sSf https://install.surrealdb.com | sh + +# Set the working directory in the container to /app +WORKDIR /app + +COPY pyproject.toml poetry.lock /app/ +RUN pip install poetry --no-cache-dir +RUN poetry self add poetry-plugin-dotenv +RUN poetry config virtualenvs.create false + +RUN poetry install --only main + +COPY . /app + +# Create supervisor configuration directory +RUN mkdir -p /etc/supervisor/conf.d + +# Copy supervisor configuration file +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +EXPOSE 8502 + +RUN mkdir -p /app/data + +# Use supervisor as the main process +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] diff --git a/Makefile b/Makefile index f9f6152..5a689d9 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,8 @@ docker-update-latest: docker-buildx-prepare # Release with latest docker-release-all: docker-release docker-update-latest - dev: - docker compose -f docker-compose.dev.yml up --build \ No newline at end of file + docker compose -f docker-compose.dev.yml up --build + +full: + docker compose -f docker-compose.full.yml up --build \ No newline at end of file diff --git a/docker-compose.full.yml b/docker-compose.full.yml new file mode 100644 index 0000000..b6d2299 --- /dev/null +++ b/docker-compose.full.yml @@ -0,0 +1,13 @@ +version: '3' + +services: + open_notebook_full: + build: + context: . + dockerfile: Dockerfile_full + ports: + - "8080:8502" + volumes: + - ./.docker_data/data:/app/data + - ./docker2.env:/app/.env + - ./google-credentials.json:/app/google-credentials.json diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..ca59e8b --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,21 @@ +[supervisord] +nodaemon=true +logfile=/dev/stdout +logfile_maxbytes=0 +pidfile=/tmp/supervisord.pid + +[program:surrealdb] +command=surreal start --log trace --user root --pass root rocksdb:/mydata/mydatabase.db +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=true + +[program:streamlit] +command=poetry run streamlit run app_home.py +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=true From e589c7b8aae956c355a5f74c28768c0f8cc0c34c Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Fri, 8 Nov 2024 18:30:56 -0300 Subject: [PATCH 07/44] cleanup --- open_notebook/config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/open_notebook/config.py b/open_notebook/config.py index 096850c..6d2d655 100644 --- a/open_notebook/config.py +++ b/open_notebook/config.py @@ -27,7 +27,3 @@ LANGGRAPH_CHECKPOINT_FILE = f"{sqlite_folder}/checkpoints.sqlite" # UPLOADS FOLDER UPLOADS_FOLDER = f"{DATA_FOLDER}/uploads" os.makedirs(UPLOADS_FOLDER, exist_ok=True) - -# PODCASTS FOLDER -PODCASTS_FOLDER = f"{DATA_FOLDER}/podcasts" -os.makedirs(PODCASTS_FOLDER, exist_ok=True) From d5be2b0d5b8da7c25e79d9223778adefbddcc1e1 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Sat, 9 Nov 2024 16:03:41 -0300 Subject: [PATCH 08/44] make rag async --- open_notebook/graphs/ask.py | 8 +-- pages/3_🔍_Ask_and_Search.py | 104 +++++++++++++++++++++++++++++------ 2 files changed, 91 insertions(+), 21 deletions(-) diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index a2746db..4586872 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -49,7 +49,7 @@ class ThreadState(TypedDict): final_answer: str -def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: +async def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: parser = PydanticOutputParser(pydantic_object=Strategy) system_prompt = Prompter(prompt_template="ask/entry", parser=parser).render( data=state @@ -65,7 +65,7 @@ def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict return {"strategy": ai_message} -def trigger_queries(state: ThreadState, config: RunnableConfig): +async def trigger_queries(state: ThreadState, config: RunnableConfig): return [ Send( "provide_answer", @@ -80,7 +80,7 @@ def trigger_queries(state: ThreadState, config: RunnableConfig): ] -def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: +async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: payload = state if state["type"] == "text": results = text_search(state["term"], 10, True, True) @@ -100,7 +100,7 @@ def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: return {"answers": [ai_message.content]} -def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict: +async def write_final_answer(state: ThreadState, config: RunnableConfig) -> dict: system_prompt = Prompter(prompt_template="ask/final_answer").render(data=state) model = provision_langchain_model( system_prompt, diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index e2da0c2..0b0869a 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -1,7 +1,9 @@ +import asyncio + import streamlit as st from open_notebook.domain.models import Model -from open_notebook.domain.notebook import text_search, vector_search +from open_notebook.domain.notebook import Note, Notebook, text_search, vector_search from open_notebook.graphs.ask import graph as ask_graph from pages.stream_app.utils import convert_source_references, setup_page @@ -12,6 +14,40 @@ ask_tab, search_tab = st.tabs(["Ask Your Knowledge Base (beta)", "Search"]) if "search_results" not in st.session_state: st.session_state["search_results"] = [] +if "ask_results" not in st.session_state: + st.session_state["ask_results"] = {} + + +async def process_ask_query(question, strategy_model, answer_model, final_answer_model): + async for chunk in ask_graph.astream( + input=dict( + question=question, + ), + config=dict( + configurable=dict( + strategy_model=strategy_model.id, + answer_model=answer_model.id, + final_answer_model=final_answer_model.id, + ) + ), + stream_mode="updates", + ): + yield (chunk) + + # result = await ask_graph.ainvoke( + # dict( + # question=question, + # ), + # config=dict( + # configurable=dict( + # strategy_model=strategy_model.id, + # answer_model=answer_model.id, + # final_answer_model=final_answer_model.id, + # ) + # ), + # ) + # return result + def results_card(item): score = item.get("relevance", item.get("similarity", item.get("score", 0))) @@ -49,23 +85,57 @@ with ask_tab: format_func=lambda x: x.name, help="This is the LLM that will be responsible for processing the final answer", ) - if st.button("Ask"): - st.write(f"Searching for {question}") - rag_results = ask_graph.invoke( - dict( - question=question, - ), - config=dict( - configurable=dict( - strategy_model=strategy_model.id, - answer_model=answer_model.id, - final_answer_model=final_answer_model.id, + ask_bt = st.button("Ask") + placeholder = st.container() + + async def stream_results(): + async for chunk in process_ask_query( + question, strategy_model, answer_model, final_answer_model + ): + if "agent" in chunk: + with placeholder.expander( + f"Agent Strategy: {chunk['agent']['strategy'].reasoning}" + ): + for search in chunk["agent"]["strategy"].searches: + st.markdown(f"**{search.type} - {search.term}**") + st.markdown(f"Instructions: {search.instructions}") + elif "provide_answer" in chunk: + for answer in chunk["provide_answer"]["answers"]: + with placeholder.expander("Answer"): + st.markdown(convert_source_references(answer)) + elif "write_final_answer" in chunk: + st.session_state["ask_results"]["answer"] = chunk["write_final_answer"][ + "final_answer" + ] + with placeholder.container(border=True): + st.markdown( + convert_source_references( + chunk["write_final_answer"]["final_answer"] + ) + ) + + if ask_bt: + placeholder.write(f"Searching for {question}") + st.session_state["ask_results"]["question"] = question + st.session_state["ask_results"]["answer"] = None + + asyncio.run(stream_results()) + + if st.session_state["ask_results"].get("answer"): + with st.container(border=True): + with st.form("save_note_form"): + notebook = st.selectbox( + "Notebook", Notebook.get_all(), format_func=lambda x: x.name ) - ), - ) - st.markdown(convert_source_references(rag_results["final_answer"])) - with st.expander("Details (for debugging)"): - st.json(rag_results) + if st.form_submit_button("Save Answer as Note"): + note = Note( + title=st.session_state["ask_results"]["question"], + content=st.session_state["ask_results"]["answer"], + ) + note.save() + note.add_to_notebook(notebook.id) + st.success("Note saved successfully") + with search_tab: with st.container(border=True): From 66edfc1e2b56c052fff28b4c05d9748ed0eebca7 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Sun, 10 Nov 2024 13:27:30 -0300 Subject: [PATCH 09/44] create record if doesnt exist --- open_notebook/domain/base.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/open_notebook/domain/base.py b/open_notebook/domain/base.py index 8514e89..80a624f 100644 --- a/open_notebook/domain/base.py +++ b/open_notebook/domain/base.py @@ -204,9 +204,13 @@ class RecordModel(BaseModel): result = repo_query(f"SELECT * FROM {self.record_id};") if result: result = result[0] - for key, value in result.items(): - if hasattr(self, key): - setattr(self, key, value) + else: + repo_create(self.record_id, {}) + result = {} + for key, value in result.items(): + if hasattr(self, key): + setattr(self, key, value) + return self def update(self, data): From b42a95b35f716d0aafad0223bf2684ba1b312aeb Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Sun, 10 Nov 2024 13:28:38 -0300 Subject: [PATCH 10/44] add transformation to domain --- pages/7_⚙️_Settings.py | 30 +++++++++++++++++++++++++++++- pages/components/source_panel.py | 25 +++++++++++-------------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index 4d3939c..135e889 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -3,6 +3,7 @@ import os import streamlit as st from open_notebook.domain.models import DefaultModels, Model, model_manager +from open_notebook.domain.transformation import DefaultTransformations, Transformation from open_notebook.models import MODEL_CLASS_MAP from pages.stream_app.utils import setup_page @@ -11,7 +12,9 @@ setup_page("⚙️ Settings") st.title("⚙️ Settings") -model_tab, model_defaults_tab = st.tabs(["Models", "Model Defaults"]) +model_tab, model_defaults_tab, transformations_tab = st.tabs( + ["Models", "Model Defaults", "Transformations"] +) provider_status = {} @@ -231,3 +234,28 @@ with model_defaults_tab: defs[k] = v.id DefaultModels().update(defs) model_manager.refresh_defaults() + +with transformations_tab: + transformations = Transformation.get_all() + default_transformations = DefaultTransformations() + st.markdown("Please, select which transformations to apply by default on sources") + selected_transformations = {} + for transformation in transformations["source_insights"]: + with st.container(border=True): + selected_transformations[transformation["name"]] = st.checkbox( + f"**{transformation['name']}**", + value=( + transformation["name"] in default_transformations.source_insights + ), + ) + st.write(transformation["description"]) + p = ["- " + pattern for pattern in transformation["patterns"]] + st.markdown("\n".join(p)) + if st.button("Save Defaults", key="save_transformations"): + default_transformations.source_insights = [ + transformation + for transformation, selected in selected_transformations.items() + if selected + ] + default_transformations.update(default_transformations.model_dump()) + st.toast("Default Transformations saved successfully") diff --git a/pages/components/source_panel.py b/pages/components/source_panel.py index 8fa130d..6b2ed43 100644 --- a/pages/components/source_panel.py +++ b/pages/components/source_panel.py @@ -1,9 +1,9 @@ import streamlit as st import streamlit_scrollable_textbox as stx # type: ignore -import yaml from humanize import naturaltime from open_notebook.domain.notebook import Source +from open_notebook.domain.transformation import Transformation from open_notebook.utils import surreal_clean from pages.stream_app.utils import run_patterns @@ -43,19 +43,16 @@ def source_panel(source_id: str, modal=False): st.rerun(scope="fragment" if modal else "app") with c2: - with open("transformations.yaml", "r") as file: - transformations = yaml.safe_load(file) - for transformation in transformations["source_insights"]: - if st.button( - transformation["name"], help=transformation["description"] - ): - result = run_patterns( - source.full_text, transformation["patterns"] - ) - source.add_insight( - transformation["insight_type"], surreal_clean(result) - ) - st.rerun(scope="fragment" if modal else "app") + transformations = Transformation.get_all() + for transformation in transformations["source_insights"]: + if st.button( + transformation["name"], help=transformation["description"] + ): + result = run_patterns(source.full_text, transformation["patterns"]) + source.add_insight( + transformation["insight_type"], surreal_clean(result) + ) + st.rerun(scope="fragment" if modal else "app") if st.button( "Embed vectors", From 2e2a4947b3decabc16679788a9e3572a2a99c794 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Sun, 10 Nov 2024 13:30:03 -0300 Subject: [PATCH 11/44] separate source and content graph --- open_notebook/domain/transformation.py | 19 ++++ .../graphs/content_processing/__init__.py | 13 ++- .../graphs/content_processing/audio.py | 4 +- .../graphs/content_processing/office.py | 4 +- .../graphs/content_processing/pdf.py | 4 +- .../graphs/content_processing/state.py | 2 +- .../graphs/content_processing/text.py | 4 +- .../graphs/content_processing/url.py | 6 +- .../graphs/content_processing/video.py | 4 +- .../graphs/content_processing/youtube.py | 4 +- open_notebook/graphs/source.py | 106 ++++++++++++++++++ pages/stream_app/source.py | 53 ++++----- 12 files changed, 167 insertions(+), 56 deletions(-) create mode 100644 open_notebook/domain/transformation.py create mode 100644 open_notebook/graphs/source.py diff --git a/open_notebook/domain/transformation.py b/open_notebook/domain/transformation.py new file mode 100644 index 0000000..6662365 --- /dev/null +++ b/open_notebook/domain/transformation.py @@ -0,0 +1,19 @@ +from typing import ClassVar, List, Optional + +import yaml +from pydantic import Field + +from open_notebook.domain.base import RecordModel + + +class Transformation: + @classmethod + def get_all(cls): + with open("transformations.yaml", "r") as file: + transformations = yaml.safe_load(file) + return transformations + + +class DefaultTransformations(RecordModel): + record_id: ClassVar[str] = "open_notebook:default_transformations" + source_insights: Optional[List[str]] = Field(default_factory=list) diff --git a/open_notebook/graphs/content_processing/__init__.py b/open_notebook/graphs/content_processing/__init__.py index 915da23..270bb4f 100644 --- a/open_notebook/graphs/content_processing/__init__.py +++ b/open_notebook/graphs/content_processing/__init__.py @@ -14,14 +14,14 @@ from open_notebook.graphs.content_processing.pdf import ( SUPPORTED_FITZ_TYPES, extract_pdf, ) -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState from open_notebook.graphs.content_processing.text import extract_txt from open_notebook.graphs.content_processing.url import extract_url, url_provider from open_notebook.graphs.content_processing.video import extract_best_audio_from_video from open_notebook.graphs.content_processing.youtube import extract_youtube_transcript -def source_identification(state: SourceState): +def source_identification(state: ContentState): """ Identify the content source based on parameters """ @@ -37,7 +37,7 @@ def source_identification(state: SourceState): return {"source_type": doc_type} -def file_type(state: SourceState): +def file_type(state: ContentState): """ Identify the file using python-magic """ @@ -45,10 +45,11 @@ def file_type(state: SourceState): file_path = state.get("file_path") if file_path is not None: return_dict["identified_type"] = magic.from_file(file_path, mime=True) + return_dict["title"] = os.path.basename(file_path) return return_dict -def file_type_edge(data: SourceState): +def file_type_edge(data: ContentState): assert data.get("identified_type"), "Type not identified" identified_type = data["identified_type"] @@ -68,7 +69,7 @@ def file_type_edge(data: SourceState): ) -def delete_file(data: SourceState): +def delete_file(data: ContentState): if data.get("delete_source"): logger.debug(f"Deleting file: {data.get('file_path')}") file_path = data.get("file_path") @@ -82,7 +83,7 @@ def delete_file(data: SourceState): logger.debug("Not deleting file") -workflow = StateGraph(SourceState) +workflow = StateGraph(ContentState) workflow.add_node("source", source_identification) workflow.add_node("url_provider", url_provider) workflow.add_node("file_type", file_type) diff --git a/open_notebook/graphs/content_processing/audio.py b/open_notebook/graphs/content_processing/audio.py index 3f99277..b3d7617 100644 --- a/open_notebook/graphs/content_processing/audio.py +++ b/open_notebook/graphs/content_processing/audio.py @@ -5,7 +5,7 @@ from loguru import logger from pydub import AudioSegment from open_notebook.domain.models import model_manager -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState # todo: remove reference to model_manager # future: parallelize the transcription process @@ -72,7 +72,7 @@ def split_audio(input_file, segment_length_minutes=15, output_prefix=None): return output_files -def extract_audio(data: SourceState): +def extract_audio(data: ContentState): SPEECH_TO_TEXT_MODEL = model_manager.speech_to_text input_audio_path = data.get("file_path") diff --git a/open_notebook/graphs/content_processing/office.py b/open_notebook/graphs/content_processing/office.py index 4736d8d..f7403a0 100644 --- a/open_notebook/graphs/content_processing/office.py +++ b/open_notebook/graphs/content_processing/office.py @@ -3,7 +3,7 @@ from loguru import logger from openpyxl import load_workbook from pptx import Presentation -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState SUPPORTED_OFFICE_TYPES = [ "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -251,7 +251,7 @@ def get_xlsx_info(file_path): return None -def extract_office_content(state: SourceState): +def extract_office_content(state: ContentState): """Universal function to extract content from Office files""" assert state.get("file_path"), "No file path provided" assert ( diff --git a/open_notebook/graphs/content_processing/pdf.py b/open_notebook/graphs/content_processing/pdf.py index e842a67..610ee58 100644 --- a/open_notebook/graphs/content_processing/pdf.py +++ b/open_notebook/graphs/content_processing/pdf.py @@ -4,7 +4,7 @@ import unicodedata import fitz # type: ignore from loguru import logger -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState # todo: find tables - https://pymupdf.readthedocs.io/en/latest/the-basics.html#extracting-tables-from-a-page # todo: what else can we do to make the text more readable? @@ -127,7 +127,7 @@ def _extract_text_from_pdf(pdf_path): doc.close() -def extract_pdf(state: SourceState): +def extract_pdf(state: ContentState): """ Parse the text file and print its content. """ diff --git a/open_notebook/graphs/content_processing/state.py b/open_notebook/graphs/content_processing/state.py index 37bffbf..586ee45 100644 --- a/open_notebook/graphs/content_processing/state.py +++ b/open_notebook/graphs/content_processing/state.py @@ -1,7 +1,7 @@ from typing_extensions import TypedDict -class SourceState(TypedDict): +class ContentState(TypedDict): content: str file_path: str url: str diff --git a/open_notebook/graphs/content_processing/text.py b/open_notebook/graphs/content_processing/text.py index e286e0f..b81ca6c 100644 --- a/open_notebook/graphs/content_processing/text.py +++ b/open_notebook/graphs/content_processing/text.py @@ -1,9 +1,9 @@ from loguru import logger -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState -def extract_txt(state: SourceState): +def extract_txt(state: ContentState): """ Parse the text file and print its content. """ diff --git a/open_notebook/graphs/content_processing/url.py b/open_notebook/graphs/content_processing/url.py index 05a00fd..c06efbc 100644 --- a/open_notebook/graphs/content_processing/url.py +++ b/open_notebook/graphs/content_processing/url.py @@ -5,14 +5,14 @@ import requests # type: ignore from bs4 import BeautifulSoup, Comment from loguru import logger -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState # future: better extraction methods # https://github.com/buriy/python-readability # also try readability: from readability import Document -def url_provider(state: SourceState): +def url_provider(state: ContentState): """ Identify the provider """ @@ -173,7 +173,7 @@ def extract_url_jina(url: str): return {"content": text} -def extract_url(state: SourceState): +def extract_url(state: ContentState): assert state.get("url"), "No URL provided" url = state["url"] try: diff --git a/open_notebook/graphs/content_processing/video.py b/open_notebook/graphs/content_processing/video.py index acd23e4..c48e540 100644 --- a/open_notebook/graphs/content_processing/video.py +++ b/open_notebook/graphs/content_processing/video.py @@ -4,7 +4,7 @@ import subprocess from loguru import logger -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState def extract_audio_from_video(input_file, output_file, stream_index): @@ -102,7 +102,7 @@ def select_best_audio_stream(streams): return max(scored_streams, key=lambda x: x[0])[1] -def extract_best_audio_from_video(data: SourceState): +def extract_best_audio_from_video(data: ContentState): """ Main function to extract the best audio stream from a video file """ diff --git a/open_notebook/graphs/content_processing/youtube.py b/open_notebook/graphs/content_processing/youtube.py index 8e73c51..1e85192 100644 --- a/open_notebook/graphs/content_processing/youtube.py +++ b/open_notebook/graphs/content_processing/youtube.py @@ -9,7 +9,7 @@ from youtube_transcript_api.formatters import TextFormatter # type: ignore from open_notebook.config import CONFIG from open_notebook.exceptions import NoTranscriptFound -from open_notebook.graphs.content_processing.state import SourceState +from open_notebook.graphs.content_processing.state import ContentState ssl._create_default_https_context = ssl._create_unverified_context @@ -129,7 +129,7 @@ def get_best_transcript(video_id, preferred_langs=["en", "es", "pt"]): return None -def extract_youtube_transcript(state: SourceState): +def extract_youtube_transcript(state: ContentState): """ Parse the text file and print its content. """ diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py new file mode 100644 index 0000000..39813ab --- /dev/null +++ b/open_notebook/graphs/source.py @@ -0,0 +1,106 @@ +import operator +from typing import List + +from langchain_core.runnables import ( + RunnableConfig, +) +from langgraph.graph import END, START, StateGraph +from langgraph.types import Send +from loguru import logger +from typing_extensions import Annotated, TypedDict + +from open_notebook.domain.notebook import Asset, Source +from open_notebook.domain.transformation import Transformation +from open_notebook.graphs.content_processing import ContentState +from open_notebook.graphs.content_processing import graph as content_graph +from open_notebook.graphs.multipattern import graph as transform_graph +from open_notebook.utils import surreal_clean + +# todo: we can make this more efficient + + +class SourceState(TypedDict): + content_state: ContentState + transformations: List[str] + notebook_id: str + source: Source + transformations: Annotated[list, operator.add] + + +class TransformationState(TypedDict): + source: Source + transformation: dict + + +def content_process(state: SourceState): + content_state = state["content_state"] + logger.debug("Content processing started for new content") + return {"content_state": content_graph.invoke(content_state)} + + +def run_patterns(input_text, patterns): + output = transform_graph.invoke(dict(content_stack=[input_text], patterns=patterns)) + return output["output"] + + +def save_source(state: SourceState): + logger.debug("Saving source") + content_state = state["content_state"] + source = Source( + asset=Asset( + url=content_state.get("url"), file_path=content_state.get("file_path") + ), + full_text=surreal_clean(content_state["content"]), + title=content_state.get("title"), + ) + source.save() + + if state["notebook_id"]: + logger.debug(f"Adding source to notebook {state['notebook_id']}") + source.add_to_notebook(state["notebook_id"]) + return {"source": source} + + +def trigger_transformations(state: SourceState, config: RunnableConfig): + if len(state["transformations"]) == 0: + return [] + transformations = Transformation.get_all() + to_apply = [ + t + for t in transformations["source_insights"] + if t["name"] in state["transformations"] + ] + logger.debug(f"Applying transformations {to_apply}") + return [ + Send( + "transform_content", + { + "source": state["source"], + "transformation": t, + }, + ) + for t in to_apply + ] + + +def transform_content(state: TransformationState): + source = state["source"] + content = source.full_text + transformation = state["transformation"] + logger.debug(f"Applying transformation {transformation['name']}") + result = run_patterns(content, patterns=transformation["patterns"]) + source.add_insight(transformation["name"], surreal_clean(result)) + return {"transformations": [{"name": transformation["name"], "content": result}]} + + +workflow = StateGraph(SourceState) +workflow.add_node("content_process", content_process) +workflow.add_node("save_source", save_source) +workflow.add_node("transform_content", transform_content) +workflow.add_edge(START, "content_process") +workflow.add_edge("content_process", "save_source") +workflow.add_conditional_edges( + "save_source", trigger_transformations, ["transform_content"] +) +workflow.add_edge("transform_content", END) +source_graph = workflow.compile() diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index 629e2d8..6114bd7 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -6,36 +6,15 @@ from humanize import naturaltime from loguru import logger from open_notebook.config import UPLOADS_FOLDER -from open_notebook.domain.notebook import Asset, Source +from open_notebook.domain.notebook import Source +from open_notebook.domain.transformation import DefaultTransformations, Transformation from open_notebook.exceptions import UnsupportedTypeException -from open_notebook.graphs.content_processing import graph -from open_notebook.utils import surreal_clean +from open_notebook.graphs.source import source_graph from pages.components import source_panel -from pages.stream_app.utils import run_patterns from .consts import context_icons -# moved it here to replace it with the pipeline on 0.1.0 -def generate_toc_and_title(source) -> "Source": - try: - patterns = ["patterns/default/toc"] - result = run_patterns(source.full_text, patterns=patterns) - source.add_insight("Table of Contents", surreal_clean(result)) - if not source.title: - patterns = [ - "Based on the Table of Contents below, please provide a Title for this content, with max 15 words" - ] - output = run_patterns(result, patterns=patterns) - source.title = surreal_clean(output) - source.save() - return source - except Exception as e: - logger.error(f"Error summarizing source {source.id}: {str(e)}") - logger.exception(e) - raise - - @st.dialog("Source", width="large") def source_panel_dialog(source_id): source_panel(source_id, modal=True) @@ -48,6 +27,7 @@ def add_source(notebook_id): source_text = None source_type = st.radio("Type", ["Link", "Upload", "Text"]) req = {} + transformations = Transformation.get_all() if source_type == "Link": source_link = st.text_input("Link") req["url"] = source_link @@ -58,6 +38,14 @@ def add_source(notebook_id): else: source_text = st.text_area("Text") req["content"] = source_text + + default_transformations = [t for t in DefaultTransformations().source_insights] + available_transformations = [t["name"] for t in transformations["source_insights"]] + apply_transformations = st.multiselect( + "Apply transformations", + options=available_transformations, + default=default_transformations, + ) if st.button("Process", key="add_source"): logger.debug("Adding source") with st.status("Processing...", expanded=True): @@ -82,17 +70,14 @@ def add_source(notebook_id): with open(new_path, "wb") as f: f.write(source_file.getbuffer()) - result = graph.invoke(req) - st.write("Saving..") - source = Source( - asset=Asset(url=req.get("url"), file_path=req.get("file_path")), - full_text=surreal_clean(result["content"]), - title=result.get("title"), + st.write("Processing content..") + source_graph.invoke( + { + "content_state": req, + "notebook_id": notebook_id, + "transformations": apply_transformations, + } ) - source.save() - source.add_to_notebook(notebook_id) - st.write("Summarizing...") - generate_toc_and_title(source) except UnsupportedTypeException as e: st.warning( "This type of content is not supported yet. If you think it should be, let us know on the project Issues's page" From ac2ea9e55493db293abe6ab1a182aa23e2efeb2a Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 16:49:50 -0300 Subject: [PATCH 12/44] add x.ai support --- open_notebook/models/__init__.py | 2 ++ open_notebook/models/llms.py | 30 +++++++++++++++++++++++++++++- pages/7_⚙️_Settings.py | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/open_notebook/models/__init__.py b/open_notebook/models/__init__.py index fc431d7..3e3b72a 100644 --- a/open_notebook/models/__init__.py +++ b/open_notebook/models/__init__.py @@ -17,6 +17,7 @@ from open_notebook.models.llms import ( OpenRouterLanguageModel, VertexAILanguageModel, VertexAnthropicLanguageModel, + XAILanguageModel, ) from open_notebook.models.speech_to_text_models import ( OpenAISpeechToTextModel, @@ -44,6 +45,7 @@ MODEL_CLASS_MAP: Dict[str, ProviderMap] = { "anthropic": AnthropicLanguageModel, "openai": OpenAILanguageModel, "gemini": GeminiLanguageModel, + "xai": XAILanguageModel, }, "embedding": { "openai": OpenAIEmbeddingModel, diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index 3c9046d..52ff94b 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -171,7 +171,7 @@ class OpenRouterLanguageModel(LanguageModel): def to_langchain(self) -> ChatOpenAI: """ - Convert the language model to a LangChain chat model. + Convert the language model to a LangChain chat model for Open Router. """ kwargs = self.kwargs if self.json: @@ -191,6 +191,34 @@ class OpenRouterLanguageModel(LanguageModel): ) +@dataclass +class XAILanguageModel(LanguageModel): + """ + Language model that uses the OpenAI chat model for X.AI. + """ + + model_name: str + + def to_langchain(self) -> ChatOpenAI: + """ + Convert the language model to a LangChain chat model. + """ + kwargs = self.kwargs + if self.json: + kwargs["response_format"] = {"type": "json_object"} + + return ChatOpenAI( + model=self.model_name, + temperature=self.temperature or 0.5, + base_url=os.environ.get("XAI_BASE_URL", "https://api.x.ai/v1"), + max_tokens=self.max_tokens, + model_kwargs=kwargs, + streaming=self.streaming, + api_key=SecretStr(os.environ.get("XAI_API_KEY", "xai")), + top_p=self.top_p, + ) + + @dataclass class AnthropicLanguageModel(LanguageModel): """ diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index 135e889..e2b09f0 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -28,6 +28,7 @@ model_types = [ provider_status["ollama"] = os.environ.get("OLLAMA_API_BASE") is not None provider_status["openai"] = os.environ.get("OPENAI_API_KEY") is not None +provider_status["xai"] = os.environ.get("XAI_API_KEY") is not None provider_status["vertexai"] = ( os.environ.get("VERTEX_PROJECT") is not None and os.environ.get("VERTEX_LOCATION") is not None From 00f070a644e4bca2c660a0afc7367cee9dacbfaa Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 17:32:35 -0300 Subject: [PATCH 13/44] add async content processing --- .../graphs/content_processing/__init__.py | 29 +- .../graphs/content_processing/audio.py | 126 +++--- .../graphs/content_processing/office.py | 428 ++++++++++-------- .../graphs/content_processing/pdf.py | 28 +- .../graphs/content_processing/text.py | 24 +- .../graphs/content_processing/url.py | 53 +-- .../graphs/content_processing/video.py | 179 ++++---- .../graphs/content_processing/youtube.py | 20 +- open_notebook/graphs/source.py | 34 +- pages/stream_app/source.py | 15 +- 10 files changed, 541 insertions(+), 395 deletions(-) diff --git a/open_notebook/graphs/content_processing/__init__.py b/open_notebook/graphs/content_processing/__init__.py index 270bb4f..5e22311 100644 --- a/open_notebook/graphs/content_processing/__init__.py +++ b/open_notebook/graphs/content_processing/__init__.py @@ -1,4 +1,5 @@ import os +from typing import Any, Dict import magic from langgraph.graph import END, START, StateGraph @@ -21,7 +22,7 @@ from open_notebook.graphs.content_processing.video import extract_best_audio_fro from open_notebook.graphs.content_processing.youtube import extract_youtube_transcript -def source_identification(state: ContentState): +async def source_identification(state: ContentState) -> Dict[str, str]: """ Identify the content source based on parameters """ @@ -37,7 +38,7 @@ def source_identification(state: ContentState): return {"source_type": doc_type} -def file_type(state: ContentState): +async def file_type(state: ContentState) -> Dict[str, Any]: """ Identify the file using python-magic """ @@ -49,7 +50,7 @@ def file_type(state: ContentState): return return_dict -def file_type_edge(data: ContentState): +async def file_type_edge(data: ContentState) -> str: assert data.get("identified_type"), "Type not identified" identified_type = data["identified_type"] @@ -69,7 +70,7 @@ def file_type_edge(data: ContentState): ) -def delete_file(data: ContentState): +async def delete_file(data: ContentState) -> Dict[str, Any]: if data.get("delete_source"): logger.debug(f"Deleting file: {data.get('file_path')}") file_path = data.get("file_path") @@ -81,9 +82,21 @@ def delete_file(data: ContentState): logger.warning(f"File not found while trying to delete: {file_path}") else: logger.debug("Not deleting file") + return {} +async def url_type_router(x: ContentState) -> str: + return x.get("identified_type", "") + + +async def source_type_router(x: ContentState) -> str: + return x.get("source_type", "") + + +# Create workflow workflow = StateGraph(ContentState) + +# Add nodes workflow.add_node("source", source_identification) workflow.add_node("url_provider", url_provider) workflow.add_node("file_type", file_type) @@ -95,10 +108,12 @@ workflow.add_node("extract_best_audio_from_video", extract_best_audio_from_video workflow.add_node("extract_audio", extract_audio) workflow.add_node("extract_youtube_transcript", extract_youtube_transcript) workflow.add_node("delete_file", delete_file) + +# Add edges workflow.add_edge(START, "source") workflow.add_conditional_edges( "source", - lambda x: x.get("source_type"), + source_type_router, { "url": "url_provider", "file": "file_type", @@ -111,7 +126,7 @@ workflow.add_conditional_edges( ) workflow.add_conditional_edges( "url_provider", - lambda x: x.get("identified_type"), + url_type_router, {"article": "extract_url", "youtube": "extract_youtube_transcript"}, ) workflow.add_edge("url_provider", END) @@ -125,4 +140,6 @@ workflow.add_edge("extract_office_content", "delete_file") workflow.add_edge("extract_best_audio_from_video", "extract_audio") workflow.add_edge("extract_audio", "delete_file") workflow.add_edge("delete_file", END) + +# Compile graph graph = workflow.compile() diff --git a/open_notebook/graphs/content_processing/audio.py b/open_notebook/graphs/content_processing/audio.py index b3d7617..6201788 100644 --- a/open_notebook/graphs/content_processing/audio.py +++ b/open_notebook/graphs/content_processing/audio.py @@ -1,4 +1,6 @@ +import asyncio import os +from functools import partial from math import ceil from loguru import logger @@ -11,90 +13,102 @@ from open_notebook.graphs.content_processing.state import ContentState # future: parallelize the transcription process -def split_audio(input_file, segment_length_minutes=15, output_prefix=None): +async def split_audio(input_file, segment_length_minutes=15, output_prefix=None): """ - Split an audio file into segments of specified length. - - Args: - input_file (str): Path to the input audio file - segment_length_minutes (int): Length of each segment in minutes - output_dir (str): Directory to save the segments (defaults to input file's directory) - output_prefix (str): Prefix for output files (defaults to input filename) - - Returns: - list: List of paths to the created segment files + Split an audio file into segments asynchronously. """ - # Convert input file to absolute path - input_file = os.path.abspath(input_file) - output_dir = os.path.dirname(input_file) - os.makedirs(output_dir, exist_ok=True) + def _split(input_file, segment_length_minutes, output_prefix): + # Convert input file to absolute path + input_file_abs = os.path.abspath(input_file) + output_dir = os.path.dirname(input_file_abs) + os.makedirs(output_dir, exist_ok=True) - # Set up output prefix - if output_prefix is None: - output_prefix = os.path.splitext(os.path.basename(input_file))[0] + # Set up output prefix + if output_prefix is None: + output_prefix = os.path.splitext(os.path.basename(input_file_abs))[0] - # Load the audio file - audio = AudioSegment.from_file(input_file) + # Load the audio file + audio = AudioSegment.from_file(input_file_abs) - # Calculate segment length in milliseconds - segment_length_ms = segment_length_minutes * 60 * 1000 + # Calculate segment length in milliseconds + segment_length_ms = segment_length_minutes * 60 * 1000 - # Calculate number of segments - total_segments = ceil(len(audio) / segment_length_ms) - logger.debug(f"Splitting file: {input_file} into {total_segments} segments") + # Calculate number of segments + total_segments = ceil(len(audio) / segment_length_ms) + logger.debug(f"Splitting file: {input_file_abs} into {total_segments} segments") - # List to store output file paths - output_files = [] + output_files = [] - # Split the audio into segments - for i in range(total_segments): - # Calculate start and end times for this segment - start_time = i * segment_length_ms - end_time = min((i + 1) * segment_length_ms, len(audio)) + # Split the audio into segments + for i in range(total_segments): + start_time = i * segment_length_ms + end_time = min((i + 1) * segment_length_ms, len(audio)) - # Extract segment - segment = audio[start_time:end_time] + # Extract segment + segment = audio[start_time:end_time] - # Generate output filename - # Format: prefix_001.mp3 (padding with zeros ensures correct ordering) - output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3" - output_path = os.path.join(output_dir, output_filename) + # Generate output filename + output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3" + output_path = os.path.join(output_dir, output_filename) - # Export segment - segment.export(output_path, format="mp3") + # Export segment + segment.export(output_path, format="mp3") + output_files.append(output_path) - output_files.append(output_path) + logger.debug(f"Exported segment {i+1}/{total_segments}: {output_filename}") - # Optional progress indication - logger.debug(f"Exported segment {i+1}/{total_segments}: {output_filename}") + return output_files - return output_files + # Run CPU-bound audio processing in thread pool + return await asyncio.get_event_loop().run_in_executor( + None, partial(_split, input_file, segment_length_minutes, output_prefix) + ) -def extract_audio(data: ContentState): +async def transcribe_audio_segment(audio_file, model): + """Transcribe a single audio segment asynchronously""" + + def _transcribe(audio_file, model): + return model.transcribe(audio_file) + + return await asyncio.get_event_loop().run_in_executor( + None, partial(_transcribe, audio_file, model) + ) + + +async def extract_audio(data: ContentState): SPEECH_TO_TEXT_MODEL = model_manager.speech_to_text - input_audio_path = data.get("file_path") audio_files = [] try: - audio_files = split_audio(input_audio_path) - transcriptions = [] + # Split audio into segments + audio_files = await split_audio(input_audio_path) - for audio_file in audio_files: - transcriptions.append(SPEECH_TO_TEXT_MODEL.transcribe(audio_file)) + # Transcribe all segments concurrently + transcribe_tasks = [ + transcribe_audio_segment(audio_file, SPEECH_TO_TEXT_MODEL) + for audio_file in audio_files + ] + transcriptions = await asyncio.gather(*transcribe_tasks) return {"content": " ".join(transcriptions)} except Exception as e: logger.error(f"Error transcribing audio: {str(e)}") logger.exception(e) - raise # Re-raise the exception after logging + raise finally: - for file in audio_files: - try: - os.remove(file) - except OSError as e: - logger.error(f"Error removing temporary file {file}: {str(e)}") + # Clean up temporary files + def _cleanup(files): + for file in files: + try: + os.remove(file) + except OSError as e: + logger.error(f"Error removing temporary file {file}: {str(e)}") + + await asyncio.get_event_loop().run_in_executor( + None, partial(_cleanup, audio_files) + ) diff --git a/open_notebook/graphs/content_processing/office.py b/open_notebook/graphs/content_processing/office.py index f7403a0..98f8ea5 100644 --- a/open_notebook/graphs/content_processing/office.py +++ b/open_notebook/graphs/content_processing/office.py @@ -1,3 +1,6 @@ +import asyncio +from functools import partial + from docx import Document from loguru import logger from openpyxl import load_workbook @@ -12,252 +15,284 @@ SUPPORTED_OFFICE_TYPES = [ ] -def extract_docx_content_detailed(file_path): - try: - doc = Document(file_path) - content = [] +async def extract_docx_content_detailed(file_path): + """Extract content from DOCX file""" - for paragraph in doc.paragraphs: - if not paragraph.text.strip(): - continue + def _extract(): + try: + doc = Document(file_path) + content = [] - style = paragraph.style.name if paragraph.style else "Normal" - text = paragraph.text.strip() + for paragraph in doc.paragraphs: + if not paragraph.text.strip(): + continue - # Get paragraph formatting - p_format = paragraph.paragraph_format - indent = p_format.left_indent or 0 + style = paragraph.style.name if paragraph.style else "Normal" + text = paragraph.text.strip() - # Convert indent to spaces (1 level = 4 spaces) - indent_level = 0 - if hasattr(indent, "pt"): - indent_level = int(indent.pt / 72) # 72 points = 1 inch - indent_spaces = " " * (indent_level * 4) + # Get paragraph formatting + p_format = paragraph.paragraph_format + indent = p_format.left_indent or 0 - # Handle different types of formatting - if "Heading" in style: - level = style[-1] if style[-1].isdigit() else "1" - heading_marks = "#" * int(level) - content.append(f"\n{heading_marks} {text}\n") + # Convert indent to spaces (1 level = 4 spaces) + indent_level = 0 + if hasattr(indent, "pt"): + indent_level = int(indent.pt / 72) # 72 points = 1 inch + indent_spaces = " " * (indent_level * 4) - # Handle bullet points - elif ( - paragraph.style - and hasattr(paragraph.style, "name") - and paragraph.style.name.startswith("List") - ): - # Numbered list - if ( - hasattr(paragraph._p, "pPr") - and paragraph._p.pPr is not None - and hasattr(paragraph._p.pPr, "numPr") - and paragraph._p.pPr.numPr is not None + # Handle different types of formatting + if "Heading" in style: + level = style[-1] if style[-1].isdigit() else "1" + heading_marks = "#" * int(level) + content.append(f"\n{heading_marks} {text}\n") + + # Handle bullet points + elif ( + paragraph.style + and hasattr(paragraph.style, "name") + and paragraph.style.name.startswith("List") ): - # Try to get the actual number - try: - if ( - hasattr(paragraph._p.pPr.numPr, "numId") - and paragraph._p.pPr.numPr.numId is not None - and hasattr(paragraph._p.pPr.numPr.numId, "val") - ): - number = paragraph._p.pPr.numPr.numId.val - content.append(f"{indent_spaces}{number}. {text}") - else: + # Numbered list + if ( + hasattr(paragraph._p, "pPr") + and paragraph._p.pPr is not None + and hasattr(paragraph._p.pPr, "numPr") + and paragraph._p.pPr.numPr is not None + ): + # Try to get the actual number + try: + if ( + hasattr(paragraph._p.pPr.numPr, "numId") + and paragraph._p.pPr.numPr.numId is not None + and hasattr(paragraph._p.pPr.numPr.numId, "val") + ): + number = paragraph._p.pPr.numPr.numId.val + content.append(f"{indent_spaces}{number}. {text}") + else: + content.append(f"{indent_spaces}1. {text}") + except Exception: content.append(f"{indent_spaces}1. {text}") - except Exception: - content.append(f"{indent_spaces}1. {text}") - # Bullet list - else: - content.append(f"{indent_spaces}* {text}") - - else: - # Handle text formatting - formatted_text = [] - for run in paragraph.runs: - if run.bold: - formatted_text.append(f"**{run.text}**") - elif run.italic: - formatted_text.append(f"*{run.text}*") + # Bullet list else: - formatted_text.append(run.text) + content.append(f"{indent_spaces}* {text}") - content.append(f"{indent_spaces}{''.join(formatted_text)}") + else: + # Handle text formatting + formatted_text = [] + for run in paragraph.runs: + if run.bold: + formatted_text.append(f"**{run.text}**") + elif run.italic: + formatted_text.append(f"*{run.text}*") + else: + formatted_text.append(run.text) - return "\n\n".join(content) + content.append(f"{indent_spaces}{''.join(formatted_text)}") - except Exception as e: - logger.error(f"Failed to extract DOCX content: {e}") - return None + return "\n\n".join(content) + + except Exception as e: + logger.error(f"Failed to extract DOCX content: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, _extract) -# Example of usage with metadata -def get_docx_info(file_path): - try: - doc = Document(file_path) +async def get_docx_info(file_path): + """Get DOCX metadata and content""" - # Extract core properties if available - core_props = { - "author": doc.core_properties.author, - "created": doc.core_properties.created, - "modified": doc.core_properties.modified, - "title": doc.core_properties.title, - "subject": doc.core_properties.subject, - "keywords": doc.core_properties.keywords, - "category": doc.core_properties.category, - "comments": doc.core_properties.comments, - } + def _get_info(): + try: + doc = Document(file_path) - # Get document content - content = extract_docx_content_detailed(file_path) + # Extract core properties if available + core_props = { + "author": doc.core_properties.author, + "created": doc.core_properties.created, + "modified": doc.core_properties.modified, + "title": doc.core_properties.title, + "subject": doc.core_properties.subject, + "keywords": doc.core_properties.keywords, + "category": doc.core_properties.category, + "comments": doc.core_properties.comments, + } - # Get document statistics - stats = { - "paragraph_count": len(doc.paragraphs), - "word_count": sum( - len(p.text.split()) for p in doc.paragraphs if p.text.strip() - ), - "character_count": sum( - len(p.text) for p in doc.paragraphs if p.text.strip() - ), - } + # Get document content + content = extract_docx_content_detailed(file_path) - return {"metadata": core_props, "content": content, "statistics": stats} + # Get document statistics + stats = { + "paragraph_count": len(doc.paragraphs), + "word_count": sum( + len(p.text.split()) for p in doc.paragraphs if p.text.strip() + ), + "character_count": sum( + len(p.text) for p in doc.paragraphs if p.text.strip() + ), + } - except Exception as e: - logger.error(f"Failed to get DOCX info: {e}") - return None + return {"metadata": core_props, "content": content, "statistics": stats} + + except Exception as e: + logger.error(f"Failed to get DOCX info: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, _get_info) -def extract_pptx_content(file_path): - try: - prs = Presentation(file_path) - content = [] +async def extract_pptx_content(file_path): + """Extract content from PPTX file""" - for slide_number, slide in enumerate(prs.slides, 1): - content.append(f"\n# Slide {slide_number}\n") + def _extract(): + try: + prs = Presentation(file_path) + content = [] - # Extract title - if slide.shapes.title: - content.append(f"## {slide.shapes.title.text}\n") + for slide_number, slide in enumerate(prs.slides, 1): + content.append(f"\n# Slide {slide_number}\n") - # Extract text from all shapes - for shape in slide.shapes: - if hasattr(shape, "text") and shape.text.strip(): - if shape != slide.shapes.title: # Skip title as it's already added - content.append(shape.text.strip()) + # Extract title + if slide.shapes.title: + content.append(f"## {slide.shapes.title.text}\n") - return "\n\n".join(content) + # Extract text from all shapes + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + if ( + shape != slide.shapes.title + ): # Skip title as it's already added + content.append(shape.text.strip()) - except Exception as e: - logger.error(f"Failed to extract PPTX content: {e}") - return None + return "\n\n".join(content) + + except Exception as e: + logger.error(f"Failed to extract PPTX content: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, _extract) -def extract_xlsx_content(file_path, max_rows=1000, max_cols=100): - try: - wb = load_workbook(file_path, data_only=True) - content = [] +async def extract_xlsx_content(file_path, max_rows=10000, max_cols=100): + """Extract content from XLSX file""" - for sheet in wb.sheetnames: - ws = wb[sheet] - content.append(f"\n# Sheet: {sheet}\n") + def _extract(): + try: + wb = load_workbook(file_path, data_only=True) + content = [] - # Get the maximum row and column with data - max_row = min(ws.max_row, max_rows) - max_col = min(ws.max_column, max_cols) + for sheet in wb.sheetnames: + ws = wb[sheet] + content.append(f"\n# Sheet: {sheet}\n") - # Create markdown table header - headers = [] - for col in range(1, max_col + 1): - cell_value = ws.cell(row=1, column=col).value - headers.append(str(cell_value) if cell_value is not None else "") + # Get the maximum row and column with data + max_row = min(ws.max_row, max_rows) + max_col = min(ws.max_column, max_cols) - content.append("| " + " | ".join(headers) + " |") - content.append("| " + " | ".join(["---"] * len(headers)) + " |") - - # Add table content - for row in range(2, max_row + 1): - row_data = [] + # Create markdown table header + headers = [] for col in range(1, max_col + 1): - cell_value = ws.cell(row=row, column=col).value - row_data.append(str(cell_value) if cell_value is not None else "") - content.append("| " + " | ".join(row_data) + " |") + cell_value = ws.cell(row=1, column=col).value + headers.append(str(cell_value) if cell_value is not None else "") - return "\n".join(content) + content.append("| " + " | ".join(headers) + " |") + content.append("| " + " | ".join(["---"] * len(headers)) + " |") - except Exception as e: - logger.error(f"Failed to extract XLSX content: {e}") - return None + # Add table content + for row in range(2, max_row + 1): + row_data = [] + for col in range(1, max_col + 1): + cell_value = ws.cell(row=row, column=col).value + row_data.append( + str(cell_value) if cell_value is not None else "" + ) + content.append("| " + " | ".join(row_data) + " |") + + return "\n".join(content) + + except Exception as e: + logger.error(f"Failed to extract XLSX content: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, partial(_extract)) -def get_pptx_info(file_path): - try: - prs = Presentation(file_path) +async def get_pptx_info(file_path): + """Get PPTX metadata and content""" - # Extract basic properties - props = { - "slide_count": len(prs.slides), - "title": "", # PowerPoint doesn't have built-in metadata like Word - } + def _get_info(): + try: + prs = Presentation(file_path) - # Get document content - content = extract_pptx_content(file_path) + # Extract basic properties + props = { + "slide_count": len(prs.slides), + "title": "", # PowerPoint doesn't have built-in metadata like Word + } - # Get presentation statistics - stats = { - "slide_count": len(prs.slides), - "shape_count": sum(len(slide.shapes) for slide in prs.slides), - "text_frame_count": sum( - sum(1 for shape in slide.shapes if hasattr(shape, "text")) - for slide in prs.slides - ), - } + # Get document content + content = extract_pptx_content(file_path) - return {"metadata": props, "content": content, "statistics": stats} + # Get presentation statistics + stats = { + "slide_count": len(prs.slides), + "shape_count": sum(len(slide.shapes) for slide in prs.slides), + "text_frame_count": sum( + sum(1 for shape in slide.shapes if hasattr(shape, "text")) + for slide in prs.slides + ), + } - except Exception as e: - logger.error(f"Failed to get PPTX info: {e}") - return None + return {"metadata": props, "content": content, "statistics": stats} + + except Exception as e: + logger.error(f"Failed to get PPTX info: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, _get_info) -def get_xlsx_info(file_path): - try: - wb = load_workbook(file_path, data_only=True) +async def get_xlsx_info(file_path): + """Get XLSX metadata and content""" - # Extract basic properties - props = { - "sheet_count": len(wb.sheetnames), - "sheets": wb.sheetnames, - "title": wb.properties.title, - "creator": wb.properties.creator, - "created": wb.properties.created, - "modified": wb.properties.modified, - } + def _get_info(): + try: + wb = load_workbook(file_path, data_only=True) - # Get document content - content = extract_xlsx_content(file_path) + # Extract basic properties + props = { + "sheet_count": len(wb.sheetnames), + "sheets": wb.sheetnames, + "title": wb.properties.title, + "creator": wb.properties.creator, + "created": wb.properties.created, + "modified": wb.properties.modified, + } - # Get workbook statistics - stats = { - "sheet_count": len(wb.sheetnames), - "total_rows": sum(sheet.max_row for sheet in wb.worksheets), - "total_columns": sum(sheet.max_column for sheet in wb.worksheets), - } + # Get document content + content = extract_xlsx_content(file_path) - return {"metadata": props, "content": content, "statistics": stats} + # Get workbook statistics + stats = { + "sheet_count": len(wb.sheetnames), + "total_rows": sum(sheet.max_row for sheet in wb.worksheets), + "total_columns": sum(sheet.max_column for sheet in wb.worksheets), + } - except Exception as e: - logger.error(f"Failed to get XLSX info: {e}") - return None + return {"metadata": props, "content": content, "statistics": stats} + + except Exception as e: + logger.error(f"Failed to get XLSX info: {e}") + return None + + return await asyncio.get_event_loop().run_in_executor(None, _get_info) -def extract_office_content(state: ContentState): +async def extract_office_content(state: ContentState): """Universal function to extract content from Office files""" assert state.get("file_path"), "No file path provided" assert ( state.get("identified_type") in SUPPORTED_OFFICE_TYPES ), "Unsupported File Type" - file_path = state["file_path"] doc_type = state["identified_type"] @@ -266,24 +301,23 @@ def extract_office_content(state: ContentState): == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ): logger.debug("Extracting content from DOCX file") - content = extract_docx_content_detailed(file_path) - info = get_docx_info(file_path) + content = await extract_docx_content_detailed(file_path) + info = await get_docx_info(file_path) elif ( doc_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation" ): logger.debug("Extracting content from PPTX file") - content = extract_pptx_content(file_path) - info = get_pptx_info(file_path) + content = await extract_pptx_content(file_path) + info = await get_pptx_info(file_path) elif ( doc_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ): logger.debug("Extracting content from XLSX file") - content = extract_xlsx_content(file_path) - info = get_xlsx_info(file_path) + content = await extract_xlsx_content(file_path) + info = await get_xlsx_info(file_path) else: raise Exception(f"Unsupported file format: {doc_type}") del info["content"] - return {"content": content, "metadata": info} diff --git a/open_notebook/graphs/content_processing/pdf.py b/open_notebook/graphs/content_processing/pdf.py index 610ee58..a4d4209 100644 --- a/open_notebook/graphs/content_processing/pdf.py +++ b/open_notebook/graphs/content_processing/pdf.py @@ -1,3 +1,4 @@ +import asyncio import re import unicodedata @@ -114,7 +115,7 @@ def clean_pdf_text(text): return text.strip() -def _extract_text_from_pdf(pdf_path): +async def _extract_text_from_pdf(pdf_path): doc = fitz.open(pdf_path) try: text = "" @@ -127,20 +128,39 @@ def _extract_text_from_pdf(pdf_path): doc.close() -def extract_pdf(state: ContentState): +async def _extract_text_from_pdf(pdf_path): + """Extract text from PDF asynchronously""" + + def _extract(): + doc = fitz.open(pdf_path) + try: + text = "" + logger.debug(f"Found {len(doc)} pages in PDF") + for page in doc: + text += page.get_text() + return clean_pdf_text(text) + finally: + doc.close() + + # Run CPU-bound PDF processing in a thread pool + return await asyncio.get_event_loop().run_in_executor(None, _extract) + + +async def extract_pdf(state: ContentState): """ - Parse the text file and print its content. + Parse the PDF file and extract its content asynchronously. """ return_dict = {} assert state.get("file_path"), "No file path provided" assert state.get("identified_type") in SUPPORTED_FITZ_TYPES, "Unsupported File Type" + if ( state.get("file_path") is not None and state.get("identified_type") in SUPPORTED_FITZ_TYPES ): file_path = state.get("file_path") try: - text = _extract_text_from_pdf(file_path) + text = await _extract_text_from_pdf(file_path) return_dict["content"] = text except FileNotFoundError: raise FileNotFoundError(f"File not found at {file_path}") diff --git a/open_notebook/graphs/content_processing/text.py b/open_notebook/graphs/content_processing/text.py index b81ca6c..85c11aa 100644 --- a/open_notebook/graphs/content_processing/text.py +++ b/open_notebook/graphs/content_processing/text.py @@ -1,11 +1,13 @@ +import asyncio + from loguru import logger from open_notebook.graphs.content_processing.state import ContentState -def extract_txt(state: ContentState): +async def extract_txt(state: ContentState): """ - Parse the text file and print its content. + Parse the text file and extract its content asynchronously. """ return_dict = {} if ( @@ -14,12 +16,22 @@ def extract_txt(state: ContentState): ): logger.debug(f"Extracting text from {state.get('file_path')}") file_path = state.get("file_path") + if file_path is not None: try: - with open(file_path, "r", encoding="utf-8") as file: - content = file.read() - logger.debug(f"Extracted: {content[:100]}") - return_dict["content"] = content + + def _read_file(): + with open(file_path, "r", encoding="utf-8") as file: + return file.read() + + # Run file I/O in thread pool + content = await asyncio.get_event_loop().run_in_executor( + None, _read_file + ) + + logger.debug(f"Extracted: {content[:100]}") + return_dict["content"] = content + except FileNotFoundError: raise FileNotFoundError(f"File not found at {file_path}") except Exception as e: diff --git a/open_notebook/graphs/content_processing/url.py b/open_notebook/graphs/content_processing/url.py index c06efbc..66bb62e 100644 --- a/open_notebook/graphs/content_processing/url.py +++ b/open_notebook/graphs/content_processing/url.py @@ -1,7 +1,7 @@ import re from urllib.parse import urlparse -import requests # type: ignore +import aiohttp from bs4 import BeautifulSoup, Comment from loguru import logger @@ -29,7 +29,7 @@ def url_provider(state: ContentState): return return_dict -def extract_url_bs4(url: str): +async def extract_url_bs4(url: str): """ Get the title and content of a URL using bs4 """ @@ -42,9 +42,10 @@ def extract_url_bs4(url: str): if url.startswith("") or url.startswith("") else None, } - except requests.exceptions.RequestException as e: + except aiohttp.ClientError as e: logger.error(f"Failed to fetch URL {url}: {e}") return None except Exception as e: @@ -151,38 +152,38 @@ def extract_url_bs4(url: str): return None -def extract_url_jina(url: str): +async def extract_url_jina(url: str): """ Get the content of a URL using Jina """ - response = requests.get(f"https://r.jina.ai/{url}") - text = response.text - if text.startswith("Title:") and "\n" in text: - title_end = text.index("\n") - title = text[6:title_end].strip() - content = text[title_end + 1 :].strip() - logger.debug( - f"Processed url: {url}, found title: {title}, content: {content[:100]}..." - ) - return {"title": title, "content": content} - else: - content = text - logger.debug( - f"Processed url: {url}, does not have Title prefix, returning full content: {content[:100]}..." - ) - return {"content": text} + async with aiohttp.ClientSession() as session: + async with session.get(f"https://r.jina.ai/{url}") as response: + text = await response.text() + if text.startswith("Title:") and "\n" in text: + title_end = text.index("\n") + title = text[6:title_end].strip() + content = text[title_end + 1 :].strip() + logger.debug( + f"Processed url: {url}, found title: {title}, content: {content[:100]}..." + ) + return {"title": title, "content": content} + else: + logger.debug( + f"Processed url: {url}, does not have Title prefix, returning full content: {text[:100]}..." + ) + return {"content": text} -def extract_url(state: ContentState): +async def extract_url(state: ContentState): assert state.get("url"), "No URL provided" url = state["url"] try: - result = extract_url_bs4(url) + result = await extract_url_bs4(url) if not result or not result.get("content"): logger.debug( f"BS4 extraction failed for url {url}, falling back to Jina extractor" ) - result = extract_url_jina(url) + result = await extract_url_jina(url) return result except Exception as e: logger.error(f"URL extraction failed for URL: {url}") diff --git a/open_notebook/graphs/content_processing/video.py b/open_notebook/graphs/content_processing/video.py index c48e540..9fc5018 100644 --- a/open_notebook/graphs/content_processing/video.py +++ b/open_notebook/graphs/content_processing/video.py @@ -1,114 +1,141 @@ +import asyncio import json import os import subprocess +from functools import partial from loguru import logger from open_notebook.graphs.content_processing.state import ContentState -def extract_audio_from_video(input_file, output_file, stream_index): +async def extract_audio_from_video(input_file, output_file, stream_index): """ - Extract the specified audio stream to MP3 format + Extract the specified audio stream to MP3 format asynchronously """ - try: - cmd = [ - "ffmpeg", - "-i", - input_file, - "-map", - f"0:a:{stream_index}", # Select specific audio stream - "-codec:a", - "libmp3lame", # Use MP3 codec - "-q:a", - "2", # High quality setting - "-y", # Overwrite output file if exists - output_file, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - raise Exception(f"FFmpeg failed: {result.stderr}") + def _extract(input_file, output_file, stream_index): + try: + cmd = [ + "ffmpeg", + "-i", + input_file, + "-map", + f"0:a:{stream_index}", # Select specific audio stream + "-codec:a", + "libmp3lame", # Use MP3 codec + "-q:a", + "2", # High quality setting + "-y", # Overwrite output file if exists + output_file, + ] - return True + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise Exception(f"FFmpeg failed: {result.stderr}") - except Exception as e: - print(f"Error extracting audio: {str(e)}") - return False + return True + + except Exception as e: + logger.error(f"Error extracting audio: {str(e)}") + return False + + return await asyncio.get_event_loop().run_in_executor( + None, partial(_extract, input_file, output_file, stream_index) + ) -def get_audio_streams(input_file): +async def get_audio_streams(input_file): """ - Analyze video file and return information about all audio streams + Analyze video file and return information about all audio streams asynchronously """ - logger.debug(f"Analyzing video file {input_file} for audio streams") - try: - # Get stream information in JSON format - cmd = [ - "ffprobe", - "-v", - "quiet", - "-print_format", - "json", - "-show_streams", - "-select_streams", - "a", - input_file, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - raise Exception(f"FFprobe failed: {result.stderr}") + def _analyze(input_file): + logger.debug(f"Analyzing video file {input_file} for audio streams") + try: + cmd = [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_streams", + "-select_streams", + "a", + input_file, + ] - data = json.loads(result.stdout) - return data.get("streams", []) + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise Exception(f"FFprobe failed: {result.stderr}") - except Exception as e: - print(f"Error analyzing file: {str(e)}") - return [] + data = json.loads(result.stdout) + return data.get("streams", []) + + except Exception as e: + logger.error(f"Error analyzing file: {str(e)}") + return [] + + return await asyncio.get_event_loop().run_in_executor( + None, partial(_analyze, input_file) + ) -def select_best_audio_stream(streams): +async def select_best_audio_stream(streams): """ Select the best audio stream based on various quality metrics """ - if not streams: - logger.debug("No audio streams found") - return None - else: - logger.debug(f"Found {len(streams)} audio streams") - # Score each stream based on various factors - scored_streams = [] - for stream in streams: - score = 0 + def _select(streams): + if not streams: + logger.debug("No audio streams found") + return None + else: + logger.debug(f"Found {len(streams)} audio streams") - # Prefer higher bit rates - bit_rate = stream.get("bit_rate") - if bit_rate: - score += int(int(bit_rate) / 1000000) # Convert to Mbps and ensure int + # Score each stream based on various factors + scored_streams = [] + for stream in streams: + score = 0 - # Prefer more channels (stereo over mono) - channels = stream.get("channels", 0) - score += channels * 10 + # Prefer higher bit rates + bit_rate = stream.get("bit_rate") + if bit_rate: + score += int(int(bit_rate) / 1000000) # Convert to Mbps and ensure int - # Prefer higher sample rates - sample_rate = stream.get("sample_rate", "0") - score += int(int(sample_rate) / 48000) + # Prefer more channels (stereo over mono) + channels = stream.get("channels", 0) + score += channels * 10 - scored_streams.append((score, stream)) + # Prefer higher sample rates + sample_rate = stream.get("sample_rate", "0") + score += int(int(sample_rate) / 48000) - # Return the stream with highest score - return max(scored_streams, key=lambda x: x[0])[1] + scored_streams.append((score, stream)) + + # Return the stream with highest score + return max(scored_streams, key=lambda x: x[0])[1] + + return await asyncio.get_event_loop().run_in_executor( + None, partial(_select, streams) + ) -def extract_best_audio_from_video(data: ContentState): +async def extract_best_audio_from_video(data: ContentState): """ - Main function to extract the best audio stream from a video file + Main function to extract the best audio stream from a video file asynchronously """ input_file = data.get("file_path") assert input_file is not None, "Input file path must be provided" - if not os.path.exists(input_file): + + def _check_file(path): + return os.path.exists(path) + + file_exists = await asyncio.get_event_loop().run_in_executor( + None, partial(_check_file, input_file) + ) + + if not file_exists: logger.critical(f"Input file not found: {input_file}") return False @@ -116,20 +143,20 @@ def extract_best_audio_from_video(data: ContentState): output_file = f"{base_name}_audio.mp3" # Get all audio streams - streams = get_audio_streams(input_file) + streams = await get_audio_streams(input_file) if not streams: logger.debug("No audio streams found in the file") return False # Select best stream - best_stream = select_best_audio_stream(streams) + best_stream = await select_best_audio_stream(streams) if not best_stream: logger.error("Could not determine best audio stream") return False # Extract the selected stream stream_index = streams.index(best_stream) - success = extract_audio_from_video(input_file, output_file, stream_index) + success = await extract_audio_from_video(input_file, output_file, stream_index) if success: logger.debug(f"Successfully extracted audio to: {output_file}") diff --git a/open_notebook/graphs/content_processing/youtube.py b/open_notebook/graphs/content_processing/youtube.py index 1e85192..e39f4ba 100644 --- a/open_notebook/graphs/content_processing/youtube.py +++ b/open_notebook/graphs/content_processing/youtube.py @@ -1,7 +1,7 @@ import re import ssl -import requests +import aiohttp from bs4 import BeautifulSoup from loguru import logger from youtube_transcript_api import YouTubeTranscriptApi # type: ignore @@ -14,11 +14,15 @@ from open_notebook.graphs.content_processing.state import ContentState ssl._create_default_https_context = ssl._create_unverified_context -def get_video_title(video_id): +async def get_video_title(video_id): try: url = f"https://www.youtube.com/watch?v={video_id}" - response = requests.get(url) - soup = BeautifulSoup(response.text, "html.parser") + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + html = await response.text() + + # BeautifulSoup doesn't support async operations + soup = BeautifulSoup(html, "html.parser") # YouTube stores title in a meta tag title = soup.find("meta", property="og:title")["content"] @@ -63,7 +67,7 @@ def _extract_youtube_id(url): return match.group(1) if match else None -def get_best_transcript(video_id, preferred_langs=["en", "es", "pt"]): +async def get_best_transcript(video_id, preferred_langs=["en", "es", "pt"]): try: transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) @@ -129,7 +133,7 @@ def get_best_transcript(video_id, preferred_langs=["en", "es", "pt"]): return None -def extract_youtube_transcript(state: ContentState): +async def extract_youtube_transcript(state: ContentState): """ Parse the text file and print its content. """ @@ -139,12 +143,12 @@ def extract_youtube_transcript(state: ContentState): ) video_id = _extract_youtube_id(state.get("url")) - transcript = get_best_transcript(video_id, languages) + transcript = await get_best_transcript(video_id, languages) logger.debug(f"Found transcript: {transcript}") formatter = TextFormatter() try: - title = get_video_title(video_id) + title = await get_video_title(video_id) except Exception as e: logger.critical(f"Failed to get video title for video_id: {video_id}") logger.exception(e) diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py index 39813ab..14129e6 100644 --- a/open_notebook/graphs/source.py +++ b/open_notebook/graphs/source.py @@ -16,8 +16,6 @@ from open_notebook.graphs.content_processing import graph as content_graph from open_notebook.graphs.multipattern import graph as transform_graph from open_notebook.utils import surreal_clean -# todo: we can make this more efficient - class SourceState(TypedDict): content_state: ContentState @@ -32,20 +30,24 @@ class TransformationState(TypedDict): transformation: dict -def content_process(state: SourceState): +async def content_process(state: SourceState) -> dict: content_state = state["content_state"] logger.debug("Content processing started for new content") - return {"content_state": content_graph.invoke(content_state)} + processed_state = await content_graph.ainvoke(content_state) + return {"content_state": processed_state} -def run_patterns(input_text, patterns): - output = transform_graph.invoke(dict(content_stack=[input_text], patterns=patterns)) +async def run_patterns(input_text: str, patterns: List[dict]) -> str: + output = await transform_graph.ainvoke( + dict(content_stack=[input_text], patterns=patterns) + ) return output["output"] -def save_source(state: SourceState): +def save_source(state: SourceState) -> dict: logger.debug("Saving source") content_state = state["content_state"] + source = Source( asset=Asset( url=content_state.get("url"), file_path=content_state.get("file_path") @@ -61,9 +63,10 @@ def save_source(state: SourceState): return {"source": source} -def trigger_transformations(state: SourceState, config: RunnableConfig): +def trigger_transformations(state: SourceState, config: RunnableConfig) -> List[Send]: if len(state["transformations"]) == 0: return [] + transformations = Transformation.get_all() to_apply = [ t @@ -71,6 +74,7 @@ def trigger_transformations(state: SourceState, config: RunnableConfig): if t["name"] in state["transformations"] ] logger.debug(f"Applying transformations {to_apply}") + return [ Send( "transform_content", @@ -83,24 +87,34 @@ def trigger_transformations(state: SourceState, config: RunnableConfig): ] -def transform_content(state: TransformationState): +async def transform_content(state: TransformationState) -> dict: source = state["source"] content = source.full_text transformation = state["transformation"] + logger.debug(f"Applying transformation {transformation['name']}") - result = run_patterns(content, patterns=transformation["patterns"]) + result = await run_patterns(content, patterns=transformation["patterns"]) + source.add_insight(transformation["name"], surreal_clean(result)) + return {"transformations": [{"name": transformation["name"], "content": result}]} +# Create and compile the workflow workflow = StateGraph(SourceState) + +# Add nodes workflow.add_node("content_process", content_process) workflow.add_node("save_source", save_source) workflow.add_node("transform_content", transform_content) + +# Define the graph edges workflow.add_edge(START, "content_process") workflow.add_edge("content_process", "save_source") workflow.add_conditional_edges( "save_source", trigger_transformations, ["transform_content"] ) workflow.add_edge("transform_content", END) + +# Compile the graph source_graph = workflow.compile() diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index 6114bd7..550553a 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -1,3 +1,4 @@ +import asyncio import os from pathlib import Path @@ -71,12 +72,14 @@ def add_source(notebook_id): f.write(source_file.getbuffer()) st.write("Processing content..") - source_graph.invoke( - { - "content_state": req, - "notebook_id": notebook_id, - "transformations": apply_transformations, - } + asyncio.run( + source_graph.ainvoke( + { + "content_state": req, + "notebook_id": notebook_id, + "transformations": apply_transformations, + } + ) ) except UnsupportedTypeException as e: st.warning( From 01cf15e7d140f0e9a2aebbbefb23a6f18871cafd Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 17:33:28 -0300 Subject: [PATCH 14/44] add check --- open_notebook/graphs/source.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py index 14129e6..6582553 100644 --- a/open_notebook/graphs/source.py +++ b/open_notebook/graphs/source.py @@ -90,6 +90,8 @@ def trigger_transformations(state: SourceState, config: RunnableConfig) -> List[ async def transform_content(state: TransformationState) -> dict: source = state["source"] content = source.full_text + if not content: + return None transformation = state["transformation"] logger.debug(f"Applying transformation {transformation['name']}") From 817b1bc7f92cd3b2427f039f37595852a65543cb Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 17:47:50 -0300 Subject: [PATCH 15/44] add initial embedding to the content graph --- open_notebook/domain/notebook.py | 21 --------------------- open_notebook/graphs/source.py | 14 ++++++++++++-- pages/stream_app/source.py | 3 ++- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index 21c9965..2b8e422 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -213,27 +213,6 @@ class Source(ObjectModel): logger.exception(e) raise DatabaseOperationError(e) - # @classmethod - # def search(cls, query: str) -> List[Dict[str, Any]]: - # if not query: - # raise InvalidInputError("Search query cannot be empty") - # try: - # result = repo_query( - # """ - # SELECT * omit full_text - # FROM source - # WHERE string::lowercase(title) CONTAINS $query or title @@ $query - # OR string::lowercase(summary) CONTAINS $query or summary @@ $query - # OR string::lowercase(full_text) CONTAINS $query or full_text @@ $query - # """, - # {"query": query}, - # ) - # return result - # except Exception as e: - # logger.error(f"Error searching sources: {str(e)}") - # logger.exception(e) - # raise DatabaseOperationError("Failed to search sources") - def add_insight(self, insight_type: str, content: str) -> Any: EMBEDDING_MODEL = model_manager.embedding_model diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py index 6582553..dbce6da 100644 --- a/open_notebook/graphs/source.py +++ b/open_notebook/graphs/source.py @@ -23,6 +23,7 @@ class SourceState(TypedDict): notebook_id: str source: Source transformations: Annotated[list, operator.add] + embed: bool = False class TransformationState(TypedDict): @@ -102,6 +103,14 @@ async def transform_content(state: TransformationState) -> dict: return {"transformations": [{"name": transformation["name"], "content": result}]} +async def embed_content(state: SourceState) -> dict: + source: Source = state["source"] + if state["embed"]: + logger.debug("Embedding content for vector search") + source.vectorize() + return {"source": source} + + # Create and compile the workflow workflow = StateGraph(SourceState) @@ -109,14 +118,15 @@ workflow = StateGraph(SourceState) workflow.add_node("content_process", content_process) workflow.add_node("save_source", save_source) workflow.add_node("transform_content", transform_content) - +workflow.add_node("embed_content", embed_content) # Define the graph edges workflow.add_edge(START, "content_process") workflow.add_edge("content_process", "save_source") workflow.add_conditional_edges( "save_source", trigger_transformations, ["transform_content"] ) -workflow.add_edge("transform_content", END) +workflow.add_edge("transform_content", "embed_content") +workflow.add_edge("embed_content", END) # Compile the graph source_graph = workflow.compile() diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index 550553a..d37efcc 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -47,6 +47,7 @@ def add_source(notebook_id): options=available_transformations, default=default_transformations, ) + embed = st.checkbox("Embed content for vector search", value=False) if st.button("Process", key="add_source"): logger.debug("Adding source") with st.status("Processing...", expanded=True): @@ -71,13 +72,13 @@ def add_source(notebook_id): with open(new_path, "wb") as f: f.write(source_file.getbuffer()) - st.write("Processing content..") asyncio.run( source_graph.ainvoke( { "content_state": req, "notebook_id": notebook_id, "transformations": apply_transformations, + "embed": embed, } ) ) From bfd5efcc53bf1eb51e20acd23e0a103c8ed23f37 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 17:48:02 -0300 Subject: [PATCH 16/44] add selectbox for transformations --- pages/components/source_panel.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pages/components/source_panel.py b/pages/components/source_panel.py index 6b2ed43..83f2304 100644 --- a/pages/components/source_panel.py +++ b/pages/components/source_panel.py @@ -44,17 +44,21 @@ def source_panel(source_id: str, modal=False): with c2: transformations = Transformation.get_all() - for transformation in transformations["source_insights"]: - if st.button( - transformation["name"], help=transformation["description"] - ): - result = run_patterns(source.full_text, transformation["patterns"]) - source.add_insight( - transformation["insight_type"], surreal_clean(result) - ) - st.rerun(scope="fragment" if modal else "app") + transformation = st.selectbox( + "Run a transformation", + transformations["source_insights"], + key=f"transformation_{source.id}", + format_func=lambda x: x["name"], + ) + st.caption(transformation["description"]) + if st.button("Run"): + result = run_patterns(source.full_text, transformation["patterns"]) + source.add_insight( + transformation["insight_type"], surreal_clean(result) + ) + st.rerun(scope="fragment" if modal else "app") - if st.button( + if source.embedded_chunks == 0 and st.button( "Embed vectors", icon="🦾", disabled=source.embedded_chunks > 0, From 532e606a4962d642a4cdab46547b1201d2b1a493 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 18:16:42 -0300 Subject: [PATCH 17/44] add parallel processing to embed --- open_notebook/domain/notebook.py | 49 +++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index 2b8e422..be0dbc0 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -1,4 +1,5 @@ -from typing import Any, ClassVar, Dict, List, Literal, Optional +from concurrent.futures import ThreadPoolExecutor +from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple from loguru import logger from pydantic import BaseModel, Field, field_validator @@ -186,28 +187,62 @@ class Source(ObjectModel): return self.relate("reference", notebook_id) def vectorize(self) -> None: + logger.info(f"Starting vectorization for source {self.id}") EMBEDDING_MODEL = model_manager.embedding_model try: if not self.full_text: + logger.warning(f"No text to vectorize for source {self.id}") return + chunks = split_text( self.full_text, ) - logger.debug(f"Split into {len(chunks)} chunks") + chunk_count = len(chunks) + logger.info(f"Split into {chunk_count} chunks for source {self.id}") - # future: we can increase the batch size after surreal launches their new SDK - for i, chunk in enumerate(chunks): + if chunk_count == 0: + logger.warning("No chunks created after splitting") + return + + def process_chunk(args: Tuple[int, str]) -> Tuple[int, List[float], str]: + idx, chunk = args + logger.debug(f"Processing chunk {idx}/{chunk_count}") + try: + embedding = EMBEDDING_MODEL.embed(chunk) + cleaned_content = surreal_clean(chunk) + logger.debug(f"Successfully processed chunk {idx}") + return (idx, embedding, cleaned_content) + except Exception as e: + logger.error(f"Error processing chunk {idx}: {str(e)}") + raise + + # Process chunks in parallel while preserving order + logger.info("Starting parallel processing of chunks") + with ThreadPoolExecutor(max_workers=8) as executor: + # Create list of (index, chunk) tuples + chunk_tasks = list(enumerate(chunks)) + # Process all chunks in parallel and get results + results = list(executor.map(process_chunk, chunk_tasks)) + + logger.info(f"Parallel processing complete. Got {len(results)} results") + + # Insert results in order (they're already ordered by index) + for idx, embedding, content in results: + logger.debug(f"Inserting chunk {idx} into database") repo_query( f""" CREATE source_embedding CONTENT {{ "source": {self.id}, - "order": {i}, + "order": {idx}, "content": $content, - "embedding": {EMBEDDING_MODEL.embed(chunk)}, + "embedding": {embedding}, }};""", - {"content": surreal_clean(chunk)}, + {"content": content}, ) + + logger.info(f"Vectorization complete for source {self.id}") + except Exception as e: logger.error(f"Error vectorizing source {self.id}: {str(e)}") logger.exception(e) From 8cb6d835feab4e7afd15f908933e16d7312a33fc Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Mon, 11 Nov 2024 18:17:08 -0300 Subject: [PATCH 18/44] add ui improvements to embed and transformation dialogs --- open_notebook/graphs/source.py | 19 +++++++------------ pages/components/source_panel.py | 25 +++++++++++++------------ pages/stream_app/source.py | 7 +++++-- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py index dbce6da..9456bd7 100644 --- a/open_notebook/graphs/source.py +++ b/open_notebook/graphs/source.py @@ -23,7 +23,7 @@ class SourceState(TypedDict): notebook_id: str source: Source transformations: Annotated[list, operator.add] - embed: bool = False + embed: bool class TransformationState(TypedDict): @@ -61,6 +61,11 @@ def save_source(state: SourceState) -> dict: if state["notebook_id"]: logger.debug(f"Adding source to notebook {state['notebook_id']}") source.add_to_notebook(state["notebook_id"]) + + if state["embed"]: + logger.debug("Embedding content for vector search") + source.vectorize() + return {"source": source} @@ -103,14 +108,6 @@ async def transform_content(state: TransformationState) -> dict: return {"transformations": [{"name": transformation["name"], "content": result}]} -async def embed_content(state: SourceState) -> dict: - source: Source = state["source"] - if state["embed"]: - logger.debug("Embedding content for vector search") - source.vectorize() - return {"source": source} - - # Create and compile the workflow workflow = StateGraph(SourceState) @@ -118,15 +115,13 @@ workflow = StateGraph(SourceState) workflow.add_node("content_process", content_process) workflow.add_node("save_source", save_source) workflow.add_node("transform_content", transform_content) -workflow.add_node("embed_content", embed_content) # Define the graph edges workflow.add_edge(START, "content_process") workflow.add_edge("content_process", "save_source") workflow.add_conditional_edges( "save_source", trigger_transformations, ["transform_content"] ) -workflow.add_edge("transform_content", "embed_content") -workflow.add_edge("embed_content", END) +workflow.add_edge("transform_content", END) # Compile the graph source_graph = workflow.compile() diff --git a/pages/components/source_panel.py b/pages/components/source_panel.py index 83f2304..55f09df 100644 --- a/pages/components/source_panel.py +++ b/pages/components/source_panel.py @@ -44,19 +44,20 @@ def source_panel(source_id: str, modal=False): with c2: transformations = Transformation.get_all() - transformation = st.selectbox( - "Run a transformation", - transformations["source_insights"], - key=f"transformation_{source.id}", - format_func=lambda x: x["name"], - ) - st.caption(transformation["description"]) - if st.button("Run"): - result = run_patterns(source.full_text, transformation["patterns"]) - source.add_insight( - transformation["insight_type"], surreal_clean(result) + with st.container(border=True): + transformation = st.selectbox( + "Run a transformation", + transformations["source_insights"], + key=f"transformation_{source.id}", + format_func=lambda x: x["name"], ) - st.rerun(scope="fragment" if modal else "app") + st.caption(transformation["description"]) + if st.button("Run"): + result = run_patterns(source.full_text, transformation["patterns"]) + source.add_insight( + transformation["insight_type"], surreal_clean(result) + ) + st.rerun(scope="fragment" if modal else "app") if source.embedded_chunks == 0 and st.button( "Embed vectors", diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index d37efcc..d8b04f5 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -47,7 +47,10 @@ def add_source(notebook_id): options=available_transformations, default=default_transformations, ) - embed = st.checkbox("Embed content for vector search", value=False) + run_embed = st.checkbox( + "Embed content for vector search", + help="Creates an embedded content for vector search. Costs a little money and takes a little bit more time. You can do this later if you prefer.", + ) if st.button("Process", key="add_source"): logger.debug("Adding source") with st.status("Processing...", expanded=True): @@ -78,7 +81,7 @@ def add_source(notebook_id): "content_state": req, "notebook_id": notebook_id, "transformations": apply_transformations, - "embed": embed, + "embed": run_embed, } ) ) From a33228de5ab47f61929ee999eb09059119de4345 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 12 Nov 2024 12:56:03 -0300 Subject: [PATCH 19/44] split system and user message in patterns --- open_notebook/graphs/utils.py | 8 +-- prompts/patterns/default/analyze_paper.jinja | 7 +-- prompts/patterns/default/cleanup.jinja | 5 +- prompts/patterns/default/command.jinja | 7 +-- .../common_tranformation_instructions.jinja | 4 +- prompts/patterns/default/keyinsights.jinja | 7 +-- prompts/patterns/default/makeitdense.jinja | 7 +-- prompts/patterns/default/mermaid.jinja | 6 +- .../default/reflection_questions.jinja | 8 +-- prompts/patterns/default/summarize.jinja | 7 +-- prompts/patterns/default/toc.jinja | 6 +- prompts/rag.jinja | 59 ------------------- 12 files changed, 25 insertions(+), 106 deletions(-) delete mode 100644 prompts/rag.jinja diff --git a/open_notebook/graphs/utils.py b/open_notebook/graphs/utils.py index 3c79d85..e8be911 100644 --- a/open_notebook/graphs/utils.py +++ b/open_notebook/graphs/utils.py @@ -1,5 +1,5 @@ from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import BaseMessage +from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage from loguru import logger from open_notebook.domain.models import model_manager @@ -37,18 +37,18 @@ def provision_langchain_model( def run_pattern( pattern_name: str, config, - messages=[], state: dict = {}, parser=None, ) -> BaseMessage: system_prompt = Prompter(prompt_template=pattern_name, parser=parser).render( data=state ) - payload = [system_prompt] + messages + payload = [SystemMessage(content=system_prompt)] + [ + HumanMessage(content=state["input_text"]) + ] chain = provision_langchain_model( str(payload), config.get("configurable", {}).get("model_id"), "transformation" ) response = chain.invoke(payload) - return response diff --git a/prompts/patterns/default/analyze_paper.jinja b/prompts/patterns/default/analyze_paper.jinja index 9f0e1c7..6558834 100644 --- a/prompts/patterns/default/analyze_paper.jinja +++ b/prompts/patterns/default/analyze_paper.jinja @@ -1,4 +1,3 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} # IDENTITY and PURPOSE @@ -35,8 +34,6 @@ You are an insightful and analytical reader of academic papers, extracting the k - Do not include warnings, disclaimers, or personal opinions. - Output only the requested sections with their respective labels. +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/patterns/default/cleanup.jinja b/prompts/patterns/default/cleanup.jinja index 5df76cc..640716e 100644 --- a/prompts/patterns/default/cleanup.jinja +++ b/prompts/patterns/default/cleanup.jinja @@ -1,6 +1,7 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} Please clean-up the following text, fixing the paragraphs, ponctuation, etc. If you find any word or name mispellings, feel free to correct. -{{input_text}} +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + +# INPUT diff --git a/prompts/patterns/default/command.jinja b/prompts/patterns/default/command.jinja index c5034c8..2d73874 100644 --- a/prompts/patterns/default/command.jinja +++ b/prompts/patterns/default/command.jinja @@ -1,9 +1,6 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} {{command}} +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT diff --git a/prompts/patterns/default/common_tranformation_instructions.jinja b/prompts/patterns/default/common_tranformation_instructions.jinja index 8b95af7..66f355b 100644 --- a/prompts/patterns/default/common_tranformation_instructions.jinja +++ b/prompts/patterns/default/common_tranformation_instructions.jinja @@ -1,4 +1,4 @@ # ADDITIONAL INSTRUCTIONS -- You are working on my editorial projects. The text below is my own. -- Please do not reply with any acknowledgements or greetings, just provide the content requested. +- You are working on my editorial projects. The text below is my own. Do not give me any warnings about copyright or plagiarism. +- Output ONLY the requested content, without acknowledgements of the task and additional chatting. Don't start with "Sure, I can help you with that." or "Here is the information you requested:". Just provide the content. \ No newline at end of file diff --git a/prompts/patterns/default/keyinsights.jinja b/prompts/patterns/default/keyinsights.jinja index 1cc756c..7cbfa3c 100644 --- a/prompts/patterns/default/keyinsights.jinja +++ b/prompts/patterns/default/keyinsights.jinja @@ -1,5 +1,4 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} # IDENTITY and PURPOSE @@ -23,8 +22,6 @@ Take a step back and think step-by-step about how to achieve the best possible r - Do not start items with the same opening words. - Ensure you follow ALL these instructions when creating your output. +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/patterns/default/makeitdense.jinja b/prompts/patterns/default/makeitdense.jinja index 6531aec..75acba8 100644 --- a/prompts/patterns/default/makeitdense.jinja +++ b/prompts/patterns/default/makeitdense.jinja @@ -1,4 +1,3 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} # MISSION You are a Sparse Priming Representation (SPR) writer. An SPR is a particular kind of use of language for advanced NLP, NLU, and NLG tasks, particularly useful for the latest generation of Large Language Models (LLMs). You will be given information by the USER which you are to render as an SPR. @@ -9,8 +8,6 @@ LLMs are a kind of deep neural network. They have been demonstrated to embed kno # METHODOLOGY Render the input as a distilled list of succinct statements, assertions, associations, concepts, analogies, and metaphors. The idea is to capture as much, conceptually, as possible but with as few words as possible. Write it in a way that makes sense to you, as the future audience will be another language model, not a human. Use complete sentences. +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/patterns/default/mermaid.jinja b/prompts/patterns/default/mermaid.jinja index cc05ee0..1b1aa2f 100644 --- a/prompts/patterns/default/mermaid.jinja +++ b/prompts/patterns/default/mermaid.jinja @@ -22,8 +22,6 @@ You always output Markdown Mermaid syntax that can be rendered as a diagram. - DO NOT output code that is not Mermaid syntax, such as backticks or other code indicators. - Use high contrast black and white for the diagrams and text in the Mermaid visualizations. +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT diff --git a/prompts/patterns/default/reflection_questions.jinja b/prompts/patterns/default/reflection_questions.jinja index aa1e304..3057894 100644 --- a/prompts/patterns/default/reflection_questions.jinja +++ b/prompts/patterns/default/reflection_questions.jinja @@ -1,5 +1,3 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} - # IDENTITY and PURPOSE You extract deep, thought-provoking, and meaningful reflections from text content. You are especially focused on themes related to the human experience, such as the purpose of life, personal growth, the intersection of technology and humanity, artificial intelligence's societal impact, human potential, collective evolution, and transformative learning. Your reflections aim to provoke new ways of thinking, challenge assumptions, and provide a thoughtful synthesis of the content. @@ -20,8 +18,6 @@ You extract deep, thought-provoking, and meaningful reflections from text conten - Every bullet should be formatted as a question that elicits contemplation or a statement that offers a profound insight. - Do not give warnings or notes; only output the requested section. +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/patterns/default/summarize.jinja b/prompts/patterns/default/summarize.jinja index 2e6abab..90a60ba 100644 --- a/prompts/patterns/default/summarize.jinja +++ b/prompts/patterns/default/summarize.jinja @@ -1,4 +1,3 @@ -{% include 'patterns/default/common_tranformation_instructions.jinja' %} # SYSTEM ROLE You are a content summarization assistant that creates dense, information-rich summaries optimized for machine understanding. Your summaries should capture key concepts with minimal words while maintaining complete, clear sentences. @@ -9,8 +8,6 @@ Analyze the provided content and create a summary that: - Uses clear, direct language - Maintains context from any previous summaries +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/patterns/default/toc.jinja b/prompts/patterns/default/toc.jinja index 23b84f0..f8fca4b 100644 --- a/prompts/patterns/default/toc.jinja +++ b/prompts/patterns/default/toc.jinja @@ -8,8 +8,6 @@ Analyze the provided content and create a Table of Contents: - Captures the core topics included in the text - Gives a small description of what is covered +{% include 'patterns/default/common_tranformation_instructions.jinja' %} + # INPUT - -{{input_text}} - -# OUTPUT \ No newline at end of file diff --git a/prompts/rag.jinja b/prompts/rag.jinja deleted file mode 100644 index 3d8d057..0000000 --- a/prompts/rag.jinja +++ /dev/null @@ -1,59 +0,0 @@ -# SYSTEM ROLE -You are a cognitive study assistant that helps users research and learn by engaging in focused discussions about documents in their workspace. - -You have access to a search tool that you can use in order to reply to the user query. - -The tool accepts 2 arrays as parameters: - -- keyword_searches: List[str] - A list of search terms to search for using keyword search. -- vector_searches: List[str] - A list of search terms to search for using vector search. - -It's very important that your response contains references to the searched documents so the user can follow-up and read more about the topic. The way you do that is by adding the id of the specific document in between brackets like this: [document_id]. - -# EXAMPLE - -User: Can you tell me more about the concept of "Deep Learning"? - -Assistant: Deep learning is a subset of machine learning in artificial intelligence (AI) that enables networks to learn unsupervised from unstructured or unlabeled data. [note:iuiodadalknda]. It can also be categorized into three main types: supervised, unsupervised, and reinforcement learning. [insight:adadadadadadad]. - -Please note, "note:iuiodadalknda" and "insight:adadadadadadad" are examples of document IDs with different prefixes. You should not make up document IDs or copy the IDs from this example. You should use the IDs of the documents that you have access to through the search tool. - -# IMPORTANT - -- Do not make up documents or document ids. Only use the ids of the documents that you have access through the query you made. -- The ID is composed of the type of document and a random string, such as "source:randomstring", "note:randomstring", or "insight:randomstring". There are various types of documents, including notes, insights, and sources. **Always use the complete ID exactly as it is provided, including its type prefix. Do not add, remove, or modify any part of the ID.** -- Do not assume or change the type prefix of any document ID. If a document ID is "note:xyz", use it exactly as "note:xyz". Do not change it to "source:xyz" or any other variation. -- **Use document IDs exactly as they are returned from the search tool. Do not add any prefixes or modify them in any way.** - - -{# -You are a cognitive study assistant designed to help users research and learn by engaging in focused discussions about documents in their workspace. Your primary goal is to provide informative, accurate responses to user queries while properly citing relevant documents from the available search tool. - -To answer this question effectively, you have access to a search tool with the following parameters: -- keyword_searches: List[str] - A list of search terms for keyword search -- vector_searches: List[str] - A list of search terms for vector search - -Follow these steps to formulate your response: - -1. Analyze the user's question and determine appropriate search terms. -2. Use the search tool to find relevant information. -3. Carefully review the search results, paying close attention to document IDs and content relevance. -4. Compose a clear, informative response that directly addresses the user's question. -5. Include relevant document citations using the exact document IDs provided by the search tool. -6. Review your response for accuracy and relevance before delivering it to the user. - -Important guidelines: -- Always use the complete document ID as provided by the search tool, including its type prefix (e.g., "note:", "insight:", "source:"). -- Do not make up or modify document IDs in any way. -- Ensure that each citation is directly relevant to the information it supports. -- Prioritize accuracy and relevance in your search strategy and response composition. - -Before composing your final response, wrap your thought process in tags to analyze the question, plan your search strategy, and evaluate the search results. This will help ensure that you retrieve the most relevant information and use the correct document IDs in your citations. Include the following steps: -a. Analyze the question and identify key concepts -b. Plan search strategy (both keyword and vector searches) -c. Evaluate search results and note relevant document IDs -d. Outline the main points for the response - -Your final response should be conversational in tone, directly addressing the user's question while seamlessly incorporating document citations. Use square brackets with the full document ID for each citation, like this: [document_id]. - -Remember, the quality and accuracy of your response, including proper document citations, are crucial for helping the user in their research and learning process. #} \ No newline at end of file From 8452b893c0299a651458b63c83c59d0ce31024ca Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 12 Nov 2024 12:56:47 -0300 Subject: [PATCH 20/44] poetry update --- poetry.lock | 166 ++++++++++++++++++++++++++-------------------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/poetry.lock b/poetry.lock index d4066b1..11dffc5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -345,13 +345,13 @@ css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "blinker" -version = "1.8.2" +version = "1.9.0" description = "Fast, simple object-to-object and broadcast signaling" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01"}, - {file = "blinker-1.8.2.tar.gz", hash = "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83"}, + {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, + {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, ] [[package]] @@ -728,37 +728,37 @@ typing-inspect = ">=0.4.0,<1" [[package]] name = "debugpy" -version = "1.8.7" +version = "1.8.8" description = "An implementation of the Debug Adapter Protocol for Python" optional = false python-versions = ">=3.8" files = [ - {file = "debugpy-1.8.7-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:95fe04a573b8b22896c404365e03f4eda0ce0ba135b7667a1e57bd079793b96b"}, - {file = "debugpy-1.8.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:628a11f4b295ffb4141d8242a9bb52b77ad4a63a2ad19217a93be0f77f2c28c9"}, - {file = "debugpy-1.8.7-cp310-cp310-win32.whl", hash = "sha256:85ce9c1d0eebf622f86cc68618ad64bf66c4fc3197d88f74bb695a416837dd55"}, - {file = "debugpy-1.8.7-cp310-cp310-win_amd64.whl", hash = "sha256:29e1571c276d643757ea126d014abda081eb5ea4c851628b33de0c2b6245b037"}, - {file = "debugpy-1.8.7-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:caf528ff9e7308b74a1749c183d6808ffbedbb9fb6af78b033c28974d9b8831f"}, - {file = "debugpy-1.8.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cba1d078cf2e1e0b8402e6bda528bf8fda7ccd158c3dba6c012b7897747c41a0"}, - {file = "debugpy-1.8.7-cp311-cp311-win32.whl", hash = "sha256:171899588bcd412151e593bd40d9907133a7622cd6ecdbdb75f89d1551df13c2"}, - {file = "debugpy-1.8.7-cp311-cp311-win_amd64.whl", hash = "sha256:6e1c4ffb0c79f66e89dfd97944f335880f0d50ad29525dc792785384923e2211"}, - {file = "debugpy-1.8.7-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:4d27d842311353ede0ad572600c62e4bcd74f458ee01ab0dd3a1a4457e7e3706"}, - {file = "debugpy-1.8.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:703c1fd62ae0356e194f3e7b7a92acd931f71fe81c4b3be2c17a7b8a4b546ec2"}, - {file = "debugpy-1.8.7-cp312-cp312-win32.whl", hash = "sha256:2f729228430ef191c1e4df72a75ac94e9bf77413ce5f3f900018712c9da0aaca"}, - {file = "debugpy-1.8.7-cp312-cp312-win_amd64.whl", hash = "sha256:45c30aaefb3e1975e8a0258f5bbd26cd40cde9bfe71e9e5a7ac82e79bad64e39"}, - {file = "debugpy-1.8.7-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:d050a1ec7e925f514f0f6594a1e522580317da31fbda1af71d1530d6ea1f2b40"}, - {file = "debugpy-1.8.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f4349a28e3228a42958f8ddaa6333d6f8282d5edaea456070e48609c5983b7"}, - {file = "debugpy-1.8.7-cp313-cp313-win32.whl", hash = "sha256:11ad72eb9ddb436afb8337891a986302e14944f0f755fd94e90d0d71e9100bba"}, - {file = "debugpy-1.8.7-cp313-cp313-win_amd64.whl", hash = "sha256:2efb84d6789352d7950b03d7f866e6d180284bc02c7e12cb37b489b7083d81aa"}, - {file = "debugpy-1.8.7-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:4b908291a1d051ef3331484de8e959ef3e66f12b5e610c203b5b75d2725613a7"}, - {file = "debugpy-1.8.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da8df5b89a41f1fd31503b179d0a84a5fdb752dddd5b5388dbd1ae23cda31ce9"}, - {file = "debugpy-1.8.7-cp38-cp38-win32.whl", hash = "sha256:b12515e04720e9e5c2216cc7086d0edadf25d7ab7e3564ec8b4521cf111b4f8c"}, - {file = "debugpy-1.8.7-cp38-cp38-win_amd64.whl", hash = "sha256:93176e7672551cb5281577cdb62c63aadc87ec036f0c6a486f0ded337c504596"}, - {file = "debugpy-1.8.7-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:90d93e4f2db442f8222dec5ec55ccfc8005821028982f1968ebf551d32b28907"}, - {file = "debugpy-1.8.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6db2a370e2700557a976eaadb16243ec9c91bd46f1b3bb15376d7aaa7632c81"}, - {file = "debugpy-1.8.7-cp39-cp39-win32.whl", hash = "sha256:a6cf2510740e0c0b4a40330640e4b454f928c7b99b0c9dbf48b11efba08a8cda"}, - {file = "debugpy-1.8.7-cp39-cp39-win_amd64.whl", hash = "sha256:6a9d9d6d31846d8e34f52987ee0f1a904c7baa4912bf4843ab39dadf9b8f3e0d"}, - {file = "debugpy-1.8.7-py2.py3-none-any.whl", hash = "sha256:57b00de1c8d2c84a61b90880f7e5b6deaf4c312ecbde3a0e8912f2a56c4ac9ae"}, - {file = "debugpy-1.8.7.zip", hash = "sha256:18b8f731ed3e2e1df8e9cdaa23fb1fc9c24e570cd0081625308ec51c82efe42e"}, + {file = "debugpy-1.8.8-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:e59b1607c51b71545cb3496876544f7186a7a27c00b436a62f285603cc68d1c6"}, + {file = "debugpy-1.8.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6531d952b565b7cb2fbd1ef5df3d333cf160b44f37547a4e7cf73666aca5d8d"}, + {file = "debugpy-1.8.8-cp310-cp310-win32.whl", hash = "sha256:b01f4a5e5c5fb1d34f4ccba99a20ed01eabc45a4684f4948b5db17a319dfb23f"}, + {file = "debugpy-1.8.8-cp310-cp310-win_amd64.whl", hash = "sha256:535f4fb1c024ddca5913bb0eb17880c8f24ba28aa2c225059db145ee557035e9"}, + {file = "debugpy-1.8.8-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:c399023146e40ae373753a58d1be0a98bf6397fadc737b97ad612886b53df318"}, + {file = "debugpy-1.8.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:09cc7b162586ea2171eea055985da2702b0723f6f907a423c9b2da5996ad67ba"}, + {file = "debugpy-1.8.8-cp311-cp311-win32.whl", hash = "sha256:eea8821d998ebeb02f0625dd0d76839ddde8cbf8152ebbe289dd7acf2cdc6b98"}, + {file = "debugpy-1.8.8-cp311-cp311-win_amd64.whl", hash = "sha256:d4483836da2a533f4b1454dffc9f668096ac0433de855f0c22cdce8c9f7e10c4"}, + {file = "debugpy-1.8.8-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:0cc94186340be87b9ac5a707184ec8f36547fb66636d1029ff4f1cc020e53996"}, + {file = "debugpy-1.8.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64674e95916e53c2e9540a056e5f489e0ad4872645399d778f7c598eacb7b7f9"}, + {file = "debugpy-1.8.8-cp312-cp312-win32.whl", hash = "sha256:5c6e885dbf12015aed73770f29dec7023cb310d0dc2ba8bfbeb5c8e43f80edc9"}, + {file = "debugpy-1.8.8-cp312-cp312-win_amd64.whl", hash = "sha256:19ffbd84e757a6ca0113574d1bf5a2298b3947320a3e9d7d8dc3377f02d9f864"}, + {file = "debugpy-1.8.8-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:705cd123a773d184860ed8dae99becd879dfec361098edbefb5fc0d3683eb804"}, + {file = "debugpy-1.8.8-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:890fd16803f50aa9cb1a9b9b25b5ec321656dd6b78157c74283de241993d086f"}, + {file = "debugpy-1.8.8-cp313-cp313-win32.whl", hash = "sha256:90244598214bbe704aa47556ec591d2f9869ff9e042e301a2859c57106649add"}, + {file = "debugpy-1.8.8-cp313-cp313-win_amd64.whl", hash = "sha256:4b93e4832fd4a759a0c465c967214ed0c8a6e8914bced63a28ddb0dd8c5f078b"}, + {file = "debugpy-1.8.8-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:143ef07940aeb8e7316de48f5ed9447644da5203726fca378f3a6952a50a9eae"}, + {file = "debugpy-1.8.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f95651bdcbfd3b27a408869a53fbefcc2bcae13b694daee5f1365b1b83a00113"}, + {file = "debugpy-1.8.8-cp38-cp38-win32.whl", hash = "sha256:26b461123a030e82602a750fb24d7801776aa81cd78404e54ab60e8b5fecdad5"}, + {file = "debugpy-1.8.8-cp38-cp38-win_amd64.whl", hash = "sha256:f3cbf1833e644a3100eadb6120f25be8a532035e8245584c4f7532937edc652a"}, + {file = "debugpy-1.8.8-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:53709d4ec586b525724819dc6af1a7703502f7e06f34ded7157f7b1f963bb854"}, + {file = "debugpy-1.8.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a9c013077a3a0000e83d97cf9cc9328d2b0bbb31f56b0e99ea3662d29d7a6a2"}, + {file = "debugpy-1.8.8-cp39-cp39-win32.whl", hash = "sha256:ffe94dd5e9a6739a75f0b85316dc185560db3e97afa6b215628d1b6a17561cb2"}, + {file = "debugpy-1.8.8-cp39-cp39-win_amd64.whl", hash = "sha256:5c0e5a38c7f9b481bf31277d2f74d2109292179081f11108e668195ef926c0f9"}, + {file = "debugpy-1.8.8-py2.py3-none-any.whl", hash = "sha256:ec684553aba5b4066d4de510859922419febc710df7bba04fe9e7ef3de15d34f"}, + {file = "debugpy-1.8.8.zip", hash = "sha256:e6355385db85cbd666be703a96ab7351bc9e6c61d694893206f8001e22aee091"}, ] [[package]] @@ -829,13 +829,13 @@ files = [ [[package]] name = "edge-tts" -version = "6.1.15" +version = "6.1.18" description = "Microsoft Edge's TTS" optional = false python-versions = ">=3.7" files = [ - {file = "edge_tts-6.1.15-py3-none-any.whl", hash = "sha256:b9e68df19505237f4081eab41663d23d42c7deb59475f809081844d47e5cfacf"}, - {file = "edge_tts-6.1.15.tar.gz", hash = "sha256:9e8c60cd30e83db379151736a4aaee0a2de6b999bc4e4b312ee7895dc987806d"}, + {file = "edge_tts-6.1.18-py3-none-any.whl", hash = "sha256:05eca3efb81e730b2b466a992907d12587e08a665fc4e9ac4a8d595b7b93f7aa"}, + {file = "edge_tts-6.1.18.tar.gz", hash = "sha256:eece936df0e87cf7740848418b3b742536db889433402a954f41480b0bf62af4"}, ] [package.dependencies] @@ -1151,13 +1151,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-api-core" -version = "2.22.0" +version = "2.23.0" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google_api_core-2.22.0-py3-none-any.whl", hash = "sha256:a6652b6bd51303902494998626653671703c420f6f4c88cfd3f50ed723e9d021"}, - {file = "google_api_core-2.22.0.tar.gz", hash = "sha256:26f8d76b96477db42b55fd02a33aae4a42ec8b86b98b94969b7333a2c828bf35"}, + {file = "google_api_core-2.23.0-py3-none-any.whl", hash = "sha256:c20100d4c4c41070cf365f1d8ddf5365915291b5eb11b83829fbd1c999b5122f"}, + {file = "google_api_core-2.23.0.tar.gz", hash = "sha256:2ceb087315e6af43f256704b871d99326b1f12a9d6ce99beaedec99ba26a0ace"}, ] [package.dependencies] @@ -1283,13 +1283,13 @@ xai = ["tensorflow (>=2.3.0,<3.0.0dev)"] [[package]] name = "google-cloud-bigquery" -version = "3.26.0" +version = "3.27.0" description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_bigquery-3.26.0-py2.py3-none-any.whl", hash = "sha256:e0e9ad28afa67a18696e624cbccab284bf2c0a3f6eeb9eeb0426c69b943793a8"}, - {file = "google_cloud_bigquery-3.26.0.tar.gz", hash = "sha256:edbdc788beea659e04c0af7fe4dcd6d9155344b98951a0d5055bd2f15da4ba23"}, + {file = "google_cloud_bigquery-3.27.0-py2.py3-none-any.whl", hash = "sha256:b53b0431e5ba362976a4cd8acce72194b4116cdf8115030c7b339b884603fcc3"}, + {file = "google_cloud_bigquery-3.27.0.tar.gz", hash = "sha256:379c524054d7b090fa56d0c22662cc6e6458a6229b6754c0e7177e3a73421d2c"}, ] [package.dependencies] @@ -1826,13 +1826,13 @@ pygments = ">=2.2.0" [[package]] name = "identify" -version = "2.6.1" +version = "2.6.2" description = "File identification library for Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, - {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, + {file = "identify-2.6.2-py2.py3-none-any.whl", hash = "sha256:c097384259f49e372f4ea00a19719d95ae27dd5ff0fd77ad630aa891306b82f3"}, + {file = "identify-2.6.2.tar.gz", hash = "sha256:fab5c716c24d7a789775228823797296a2994b075fb6080ac83a102772a98cbd"}, ] [package.extras] @@ -1990,22 +1990,22 @@ test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"] [[package]] name = "jedi" -version = "0.19.1" +version = "0.19.2" description = "An autocompletion tool for Python that can be used for text editors." optional = false python-versions = ">=3.6" files = [ - {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, - {file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"}, + {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"}, + {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"}, ] [package.dependencies] -parso = ">=0.8.3,<0.9.0" +parso = ">=0.8.4,<0.9.0" [package.extras] docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"] qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"] -testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] +testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"] [[package]] name = "jinja2" @@ -2489,13 +2489,13 @@ orjson = ">=3.10.1" [[package]] name = "langsmith" -version = "0.1.140" +version = "0.1.142" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.140-py3-none-any.whl", hash = "sha256:3de70183ae19a4ada4d77a8a9f336ff95ca0ead98215771033ee889a2889fe19"}, - {file = "langsmith-0.1.140.tar.gz", hash = "sha256:cb0a717d7b9e6d3145285d7ca0ab216e064cbe7a1ca4139fc04af57fb2315e70"}, + {file = "langsmith-0.1.142-py3-none-any.whl", hash = "sha256:f639ca23c9a0bb77af5fb881679b2f66ff1f21f19d0bebf4e51375e7585a8b38"}, + {file = "langsmith-0.1.142.tar.gz", hash = "sha256:f8a84d100f3052233ff0a1d66ae14c5dfc20b7e41a1601de011384f16ee6cb82"}, ] [package.dependencies] @@ -2610,13 +2610,13 @@ rapidfuzz = ">=3.9.0,<4.0.0" [[package]] name = "litellm" -version = "1.52.1" +version = "1.52.4" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.52.1-py3-none-any.whl", hash = "sha256:a76133fc6d14a3157275d9ae850b9f18312ae93ae313092d6cf9e9d35f2c72f2"}, - {file = "litellm-1.52.1.tar.gz", hash = "sha256:750056e0329c5c742193b8f2104133da1e69b2fcc534827e18f7b536af56315c"}, + {file = "litellm-1.52.4-py3-none-any.whl", hash = "sha256:bfb208c2fc2c960bea6db34dbb77cd3c8a63e76d13a4d9163815df982d7e2764"}, + {file = "litellm-1.52.4.tar.gz", hash = "sha256:aaf5de4da0fad31f8e3cb90d026660638adfb9d97fe7c2a63ac9e072d1690900"}, ] [package.dependencies] @@ -3196,13 +3196,13 @@ files = [ [[package]] name = "narwhals" -version = "1.13.2" +version = "1.13.3" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.8" files = [ - {file = "narwhals-1.13.2-py3-none-any.whl", hash = "sha256:d901ad3741ae39e87e9022cf605ec24a20c40812b6975814a04c031c3e4b55f7"}, - {file = "narwhals-1.13.2.tar.gz", hash = "sha256:e48958800688180c53a696dfc0fce864d5fb6e293925564f1b4a9a837ad3a19f"}, + {file = "narwhals-1.13.3-py3-none-any.whl", hash = "sha256:cde49b59b4540885d822777b747ed3fad65632b3d34648040308afcf08e62547"}, + {file = "narwhals-1.13.3.tar.gz", hash = "sha256:db95cb5b5a6b99bad9fe7f2e2dacf937d57dee1c76c4544d4354a324084e36b5"}, ] [package.extras] @@ -3500,13 +3500,13 @@ files = [ [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -3804,13 +3804,13 @@ files = [ [[package]] name = "podcastfy" -version = "0.3.2" +version = "0.3.3" description = "An Open Source alternative to NotebookLM's podcast feature: Transforming Multimodal Content into Captivating Multilingual Audio Conversations with GenAI" optional = false python-versions = "<4.0,>=3.11" files = [ - {file = "podcastfy-0.3.2-py3-none-any.whl", hash = "sha256:c46669a4d03e13b9230ceb2a9fc5fb9ae05dce081a4d9585cc43ef8c67f47d3a"}, - {file = "podcastfy-0.3.2.tar.gz", hash = "sha256:c8420e0445842d651a9add1918881712d98cc9f30657ef37ada65f76a0e0c5aa"}, + {file = "podcastfy-0.3.3-py3-none-any.whl", hash = "sha256:857813dea2b96da292a1f22226a2696066526abbba57289bd5cb5533eb12c041"}, + {file = "podcastfy-0.3.3.tar.gz", hash = "sha256:7ce62ba1ddaccc9d46c74cdafc5def90dfe2f9f26aac53c00f7b922476e80545"}, ] [package.dependencies] @@ -5163,23 +5163,23 @@ websockets = "13.1" [[package]] name = "setuptools" -version = "75.3.0" +version = "75.4.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, - {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, + {file = "setuptools-75.4.0-py3-none-any.whl", hash = "sha256:b3c5d862f98500b06ffdf7cc4499b48c46c317d8d56cb30b5c8bce4d88f5c216"}, + {file = "setuptools-75.4.0.tar.gz", hash = "sha256:1dc484f5cf56fd3fe7216d7b8df820802e7246cfb534a1db2aa64f14fcb9cdcb"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"] +core = ["importlib-metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] [[package]] name = "shapely" @@ -5601,13 +5601,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "streamlit" -version = "1.40.0" +version = "1.40.1" description = "A faster way to build and share data apps" optional = false python-versions = "!=3.9.7,>=3.8" files = [ - {file = "streamlit-1.40.0-py2.py3-none-any.whl", hash = "sha256:05d22bc111d682ef4deaf7ededeec2305051b99dd6d7d564788705e4ce6f8029"}, - {file = "streamlit-1.40.0.tar.gz", hash = "sha256:6e4d3b90c4934951f97d790daf7953df5beb2916e447ac9f78e1b76a9ef83327"}, + {file = "streamlit-1.40.1-py2.py3-none-any.whl", hash = "sha256:b9d7a317a0cc88edd7857c7e07dde9cf95647d3ae51cbfa8a3db82fbb8a2990d"}, + {file = "streamlit-1.40.1.tar.gz", hash = "sha256:1f2b09f04b6ad366a2c7b4d48104697d1c8bc33f48bdf7ed939cc04c12d3aec6"}, ] [package.dependencies] @@ -5629,7 +5629,7 @@ tenacity = ">=8.1.0,<10" toml = ">=0.10.1,<2" tornado = ">=6.0.3,<7" typing-extensions = ">=4.3.0,<5" -watchdog = {version = ">=2.1.5,<6", markers = "platform_system != \"Darwin\""} +watchdog = {version = ">=2.1.5,<7", markers = "platform_system != \"Darwin\""} [package.extras] snowflake = ["snowflake-connector-python (>=2.8.0)", "snowflake-snowpark-python[modin] (>=1.17.0)"] @@ -5898,13 +5898,13 @@ files = [ [[package]] name = "tomli" -version = "2.0.2" +version = "2.1.0" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, - {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, + {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"}, + {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"}, ] [[package]] @@ -6411,13 +6411,13 @@ requests = "*" [[package]] name = "zipp" -version = "3.20.2" +version = "3.21.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"}, - {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"}, + {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, + {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, ] [package.extras] From dfb6decc1c31c05ba5661822c19b0ce956f6efe0 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 12 Nov 2024 12:57:06 -0300 Subject: [PATCH 21/44] wip - prepare for o1 --- open_notebook/models/llms.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index 52ff94b..c501886 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -254,15 +254,24 @@ class OpenAILanguageModel(LanguageModel): """ Convert the language model to a LangChain chat model. """ - kwargs = self.kwargs + kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original if self.json: kwargs["response_format"] = {"type": "json_object"} + # Set the token limit in kwargs with the appropriate key + if self.model_name in ["o1-mini", "o1-preview"]: + kwargs["max_completion_tokens"] = self.max_tokens + top_p = 1 + streaming = False + else: + kwargs["max_tokens"] = self.max_tokens + top_p = self.top_p + streaming = self.streaming + return ChatOpenAI( model=self.model_name, temperature=self.temperature or 0.5, - max_tokens=self.max_tokens, model_kwargs=kwargs, - streaming=self.streaming, - top_p=self.top_p, + streaming=streaming, + top_p=top_p, ) From dacdabe6edaf0ba89602e2e77405223dd3c51d79 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 12 Nov 2024 12:57:36 -0300 Subject: [PATCH 22/44] ui design for delete button --- pages/components/source_panel.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pages/components/source_panel.py b/pages/components/source_panel.py index 55f09df..eac9e5f 100644 --- a/pages/components/source_panel.py +++ b/pages/components/source_panel.py @@ -68,12 +68,9 @@ def source_panel(source_id: str, modal=False): source.vectorize() st.success("Embedding complete") - chk_delete = st.checkbox( - "🗑️ Delete source", key=f"delete_source_{source.id}", value=False - ) - if chk_delete: - st.warning( - "Source will be deleted with all its insights and embeddings" + with st.container(border=True): + st.caption( + "Deleting the source will also delete all its insights and embeddings" ) if st.button( "Delete", type="primary", key=f"bt_delete_source_{source.id}" From a38fda44479a6eb1a92f68ebc48c76a109957ae8 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Tue, 12 Nov 2024 12:57:57 -0300 Subject: [PATCH 23/44] fix summary transformation --- transformations.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/transformations.yaml b/transformations.yaml index 435fb3c..d53756a 100644 --- a/transformations.yaml +++ b/transformations.yaml @@ -4,7 +4,6 @@ source_insights: insight_type: "Content Summary" description: "Summarize the content" patterns: - - patterns/default/makeitdense - patterns/default/summarize - name: "Key Insights" insight_type: "Key Insights" From 281abdf01b0701a30e702af772ae904551645f34 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 11:55:38 -0300 Subject: [PATCH 24/44] improve the accuracy of ids in the citations --- open_notebook/graphs/ask.py | 2 ++ open_notebook/graphs/chat.py | 3 ++- pages/3_🔍_Ask_and_Search.py | 14 -------------- prompts/ask/query_process.jinja | 4 ++++ 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index 4586872..8201783 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -89,6 +89,8 @@ async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: if len(results) == 0: return {"answers": []} payload["results"] = results + ids = [r["id"] for r in results] + payload["ids"] = ids system_prompt = Prompter(prompt_template="ask/query_process").render(data=payload) model = provision_langchain_model( system_prompt, diff --git a/open_notebook/graphs/chat.py b/open_notebook/graphs/chat.py index 7342ca3..8d6835a 100644 --- a/open_notebook/graphs/chat.py +++ b/open_notebook/graphs/chat.py @@ -1,6 +1,7 @@ import sqlite3 from typing import Annotated, Optional +from langchain_core.messages import SystemMessage from langchain_core.runnables import ( RunnableConfig, ) @@ -24,7 +25,7 @@ class ThreadState(TypedDict): def call_model_with_messages(state: ThreadState, config: RunnableConfig) -> dict: system_prompt = Prompter(prompt_template="chat").render(data=state) - payload = [system_prompt] + state.get("messages", []) + payload = [SystemMessage(content=system_prompt)] + state.get("messages", []) model = provision_langchain_model( str(payload), config.get("configurable", {}).get("model_id"), diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index 0b0869a..b07e8e5 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -34,20 +34,6 @@ async def process_ask_query(question, strategy_model, answer_model, final_answer ): yield (chunk) - # result = await ask_graph.ainvoke( - # dict( - # question=question, - # ), - # config=dict( - # configurable=dict( - # strategy_model=strategy_model.id, - # answer_model=answer_model.id, - # final_answer_model=final_answer_model.id, - # ) - # ), - # ) - # return result - def results_card(item): score = item.get("relevance", item.get("similarity", item.get("score", 0))) diff --git a/prompts/ask/query_process.jinja b/prompts/ask/query_process.jinja index 17b0d4d..e787fab 100644 --- a/prompts/ask/query_process.jinja +++ b/prompts/ask/query_process.jinja @@ -45,6 +45,10 @@ Please note, "note:iuiodadalknda" and "insight:adadadadadadad" are examples of d - Do not assume or change the type prefix of any document ID. If a document ID is "note:xyz", use it exactly as "note:xyz". Do not change it to "source:xyz" or any other variation. - **Use document IDs exactly as they are returned from the search tool. Do not add any prefixes or modify them in any way.** +## IDs PROVIDED IN THIS QUERY + +You have been given the following content ids to work from: {{ids}} +So, if you are citing some document, it should be one of these. # YOUR ANSWER From 62957e80ca93ff4b6270964e2672d106bef4630f Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 11:55:51 -0300 Subject: [PATCH 25/44] wip - preparing for o1 models --- open_notebook/models/llms.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index c501886..018a604 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -254,6 +254,13 @@ class OpenAILanguageModel(LanguageModel): """ Convert the language model to a LangChain chat model. """ + + data = { + "model": self.model_name, + "top_p": self.top_p, + "temperature": self.temperature, + } + kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original if self.json: kwargs["response_format"] = {"type": "json_object"} @@ -261,17 +268,14 @@ class OpenAILanguageModel(LanguageModel): # Set the token limit in kwargs with the appropriate key if self.model_name in ["o1-mini", "o1-preview"]: kwargs["max_completion_tokens"] = self.max_tokens - top_p = 1 - streaming = False + data["top_p"] = 1 + data["streaming"] = False else: - kwargs["max_tokens"] = self.max_tokens - top_p = self.top_p - streaming = self.streaming + data["max_tokens"] = self.max_tokens + data["top_p"] = self.top_p + data["streaming"] = self.streaming return ChatOpenAI( - model=self.model_name, - temperature=self.temperature or 0.5, + **data, model_kwargs=kwargs, - streaming=streaming, - top_p=top_p, ) From e4b8fa8cc721a9bebd1f0a8838b05ccd1241bd41 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 12:17:57 -0300 Subject: [PATCH 26/44] cleanup logging --- open_notebook/database/migrate.py | 6 +++--- open_notebook/database/repository.py | 4 +--- open_notebook/domain/base.py | 1 - open_notebook/graphs/source.py | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/open_notebook/database/migrate.py b/open_notebook/database/migrate.py index 085caf4..542888f 100644 --- a/open_notebook/database/migrate.py +++ b/open_notebook/database/migrate.py @@ -53,14 +53,14 @@ class MigrationManager: def run_migration_up(self): current_version = self.get_current_version() - logger.debug(f"Current version before migration: {current_version}") + logger.info(f"Current version before migration: {current_version}") if self.needs_migration: try: self.runner.run() new_version = self.get_current_version() - logger.debug(f"Migration successful. New version: {new_version}") + logger.info(f"Migration successful. New version: {new_version}") except Exception as e: logger.error(f"Migration failed: {str(e)}") else: - logger.debug("Database is already at the latest version") + logger.info("Database is already at the latest version") diff --git a/open_notebook/database/repository.py b/open_notebook/database/repository.py index 59f0cca..aa442b0 100644 --- a/open_notebook/database/repository.py +++ b/open_notebook/database/repository.py @@ -30,7 +30,7 @@ def repo_query(query_str: str, vars: Optional[Dict[str, Any]] = None): result = connection.query(query_str, vars) return result except Exception as e: - logger.critical(f"Query: {query_str}, Variables: {vars}") + logger.critical(f"Query: {query_str}") logger.exception(e) raise @@ -62,7 +62,5 @@ def repo_relate(source: str, relationship: str, target: str): # "target": target, # # "content": {}, # You can add properties to the relation here if needed # } - logger.debug(f"Executing RELATE query: {query}") result = repo_query(query) - logger.debug(f"RELATE query result: {result}") return result diff --git a/open_notebook/domain/base.py b/open_notebook/domain/base.py index 80a624f..1b46704 100644 --- a/open_notebook/domain/base.py +++ b/open_notebook/domain/base.py @@ -126,7 +126,6 @@ class ObjectModel(BaseModel): if self.id is None: data["created"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - logger.debug("Creating new record") repo_result = repo_create(self.__class__.table_name, data) else: data["created"] = ( diff --git a/open_notebook/graphs/source.py b/open_notebook/graphs/source.py index 9456bd7..d3e3f23 100644 --- a/open_notebook/graphs/source.py +++ b/open_notebook/graphs/source.py @@ -33,7 +33,7 @@ class TransformationState(TypedDict): async def content_process(state: SourceState) -> dict: content_state = state["content_state"] - logger.debug("Content processing started for new content") + logger.info("Content processing started for new content") processed_state = await content_graph.ainvoke(content_state) return {"content_state": processed_state} @@ -46,7 +46,6 @@ async def run_patterns(input_text: str, patterns: List[dict]) -> str: def save_source(state: SourceState) -> dict: - logger.debug("Saving source") content_state = state["content_state"] source = Source( From 80353a97c98e61ef96b026ebdd49f4c703640c79 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 12:18:26 -0300 Subject: [PATCH 27/44] make model rag work with vector only --- open_notebook/graphs/ask.py | 24 +++++++++++++----------- pages/3_🔍_Ask_and_Search.py | 2 +- prompts/ask/entry.jinja | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index 8201783..995864e 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -1,5 +1,5 @@ import operator -from typing import Annotated, List, Literal +from typing import Annotated, List from langchain_core.output_parsers.pydantic import PydanticOutputParser from langchain_core.runnables import ( @@ -7,10 +7,11 @@ from langchain_core.runnables import ( ) from langgraph.graph import END, START, StateGraph from langgraph.types import Send +from loguru import logger from pydantic import BaseModel, Field from typing_extensions import TypedDict -from open_notebook.domain.notebook import text_search, vector_search +from open_notebook.domain.notebook import vector_search from open_notebook.graphs.utils import provision_langchain_model from open_notebook.prompter import Prompter @@ -18,7 +19,7 @@ from open_notebook.prompter import Prompter class SubGraphState(TypedDict): question: str term: str - type: Literal["text", "vector"] + # type: Literal["text", "vector"] instructions: str results: dict answer: str @@ -26,9 +27,9 @@ class SubGraphState(TypedDict): class Search(BaseModel): term: str - type: Literal["text", "vector"] = Field( - description="The type of search. Use 'text' for keyword search and 'vector' for semantic search. If you are using text, search always for a single word" - ) + # type: Literal["text", "vector"] = Field( + # description="The type of search. Use 'text' for keyword search and 'vector' for semantic search. If you are using text, search always for a single word" + # ) instructions: str = Field( description="Tell the answeting LLM what information you need extracted from this search" ) @@ -62,6 +63,7 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) - ) # model = model.bind_tools(tools) ai_message = (model | parser).invoke(system_prompt) + logger.debug(ai_message) return {"strategy": ai_message} @@ -73,7 +75,7 @@ async def trigger_queries(state: ThreadState, config: RunnableConfig): "question": state["question"], "instructions": s.instructions, "term": s.term, - "type": s.type, + # "type": s.type, }, ) for s in state["strategy"].searches @@ -82,10 +84,10 @@ async def trigger_queries(state: ThreadState, config: RunnableConfig): async def provide_answer(state: SubGraphState, config: RunnableConfig) -> dict: payload = state - if state["type"] == "text": - results = text_search(state["term"], 10, True, True) - else: - results = vector_search(state["term"], 10, True, True) + # if state["type"] == "text": + # results = text_search(state["term"], 10, True, True) + # else: + results = vector_search(state["term"], 10, True, True) if len(results) == 0: return {"answers": []} payload["results"] = results diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index b07e8e5..d84e404 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -83,7 +83,7 @@ with ask_tab: f"Agent Strategy: {chunk['agent']['strategy'].reasoning}" ): for search in chunk["agent"]["strategy"].searches: - st.markdown(f"**{search.type} - {search.term}**") + st.markdown(f"Searched for: **{search.term}**") st.markdown(f"Instructions: {search.instructions}") elif "provide_answer" in chunk: for answer in chunk["provide_answer"]["answers"]: diff --git a/prompts/ask/entry.jinja b/prompts/ask/entry.jinja index 8035bb2..a97f03a 100644 --- a/prompts/ask/entry.jinja +++ b/prompts/ask/entry.jinja @@ -23,9 +23,9 @@ Your answer could be something like: { "reasoning": "The user is asking about the concept of RAG and its application in generating answers to user questions via LLM. I should search for documents related to RAG, retrieval augmented generation, and vector search to provide a comprehensive response.", "searches": [ - { "type": "text", "term": "RAG", "instructions": "Describe the concept and utility of RAG." }, - { "type": "vector", "term": "Retrieval Augmented Generation", "instructions": "Describe the concept and utility of RAG." }, - { "type": "vector", "term": "Vector Search", "instructions": "Describe how RAG utilizes vector search." } + { "term": "RAG", "instructions": "Describe the concept and utility of RAG." }, + { "term": "Retrieval Augmented Generation", "instructions": "Describe the concept and utility of RAG." }, + { "term": "Vector Search", "instructions": "Describe how RAG utilizes vector search." } ] } ``` From 9ba5709a3cbe76f8f8d2989a4d2551d1bbba038f Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 14:48:00 -0300 Subject: [PATCH 28/44] model selector and model suggestions --- open_notebook/domain/base.py | 3 +- open_notebook/domain/models.py | 7 +- open_notebook_config.yaml | 26 ++++- pages/3_🔍_Ask_and_Search.py | 28 +++--- pages/7_⚙️_Settings.py | 153 ++++++++++++++++------------- pages/8_🛝_Playground.py | 11 ++- pages/components/model_selector.py | 35 +++++++ pages/stream_app/utils.py | 34 ++----- 8 files changed, 182 insertions(+), 115 deletions(-) create mode 100644 pages/components/model_selector.py diff --git a/open_notebook/domain/base.py b/open_notebook/domain/base.py index 1b46704..4757fdf 100644 --- a/open_notebook/domain/base.py +++ b/open_notebook/domain/base.py @@ -112,8 +112,6 @@ class ObjectModel(BaseModel): from open_notebook.domain.models import model_manager from open_notebook.models import EmbeddingModel - EMBEDDING_MODEL: EmbeddingModel = model_manager.embedding_model - try: self.model_validate(self.model_dump(), strict=True) data = self._prepare_save_data() @@ -122,6 +120,7 @@ class ObjectModel(BaseModel): if self.needs_embedding(): embedding_content = self.get_embedding_content() if embedding_content: + EMBEDDING_MODEL: EmbeddingModel = model_manager.embedding_model data["embedding"] = EMBEDDING_MODEL.embed(embedding_content) if self.id is None: diff --git a/open_notebook/domain/models.py b/open_notebook/domain/models.py index 9cb84ca..ff43ee7 100644 --- a/open_notebook/domain/models.py +++ b/open_notebook/domain/models.py @@ -68,7 +68,9 @@ class ModelManager: ) return cached_model - assert model_id, "Model ID cannot be empty" + if not model_id: + return None + model: Model = Model.get(model_id) if not model: @@ -160,9 +162,6 @@ class ModelManager: elif model_type == "large_context": model_id = self.defaults.large_context_model - if not model_id: - raise ValueError(f"No default model configured for type: {model_type}") - return self.get_model(model_id, **kwargs) def clear_cache(self): diff --git a/open_notebook_config.yaml b/open_notebook_config.yaml index 280c2a4..e6d6f72 100644 --- a/open_notebook_config.yaml +++ b/open_notebook_config.yaml @@ -10,4 +10,28 @@ youtube_transcripts: - fr - de - hi - - ja \ No newline at end of file + - ja +suggested_models: + openai: + language: + - gpt-4o-mini + embedding: + - text-embedding-3-small + text_to_speech: + - tts-1-hd + speech_to_text: + - whisper-1 + gemini: + language: + - gemini-1.5-flash + text_to_speech: + - default + xai: + language: + - grok-beta + anthropic: + language: + - claude-3-5-sonnet-20241022 + elevenlabs: + text_to_speech: + - eleven_turbo_v2_5 \ No newline at end of file diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index d84e404..7956737 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -2,9 +2,10 @@ import asyncio import streamlit as st -from open_notebook.domain.models import Model +from open_notebook.domain.models import DefaultModels from open_notebook.domain.notebook import Note, Notebook, text_search, vector_search from open_notebook.graphs.ask import graph as ask_graph +from pages.components.model_selector import model_selector from pages.stream_app.utils import convert_source_references, setup_page setup_page("🔍 Search") @@ -52,23 +53,26 @@ with ask_tab: "The LLM will answer your query based on the documents in your knowledge base. " ) question = st.text_input("Question", "") - models = Model.get_models_by_type("language") - strategy_model: Model = st.selectbox( + default_model = DefaultModels().load().default_chat_model + strategy_model = model_selector( "Query Strategy Model", - models, - format_func=lambda x: x.name, + "strategy_model", + selected_id=default_model, + model_type="language", help="This is the LLM that will be responsible for strategizing the search", ) - answer_model: Model = st.selectbox( - "Indivual Answer Model", - models, - format_func=lambda x: x.name, + answer_model = model_selector( + "Individual Answer Model", + "answer_model", + model_type="language", + selected_id=default_model, help="This is the LLM that will be responsible for processing individual subqueries", ) - final_answer_model: Model = st.selectbox( + final_answer_model = model_selector( "Final Answer Model", - models, - format_func=lambda x: x.name, + "final_answer_model", + model_type="language", + selected_id=default_model, help="This is the LLM that will be responsible for processing the final answer", ) ask_bt = st.button("Ask") diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index e2b09f0..955c995 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -2,9 +2,11 @@ import os import streamlit as st +from open_notebook.config import CONFIG from open_notebook.domain.models import DefaultModels, Model, model_manager from open_notebook.domain.transformation import DefaultTransformations, Transformation from open_notebook.models import MODEL_CLASS_MAP +from pages.components.model_selector import model_selector from pages.stream_app.utils import setup_page setup_page("⚙️ Settings") @@ -59,8 +61,41 @@ provider_status["litellm"] = ( available_providers = [k for k, v in provider_status.items() if v] unavailable_providers = [k for k, v in provider_status.items() if not v] + +def generate_new_models(models, suggested_models): + # Create a set of existing model keys for efficient lookup + existing_model_keys = { + f"{model.provider}-{model.name}-{model.type}" for model in models + } + + new_models = [] + + # Iterate through suggested models by provider + for provider, types in suggested_models.items(): + # Iterate through each type (language, embedding, etc.) + for type_, model_list in types.items(): + for model_name in model_list: + model_key = f"{provider}-{model_name}-{type_}" + + # Check if model already exists + if model_key not in existing_model_keys: + new_models.append( + { + "name": model_name, + "type": type_, + "provider": provider, + } + ) + + return new_models + + +default_models = DefaultModels().model_dump() +all_models = Model.get_all() + with model_tab: st.subheader("Add Model") + provider = st.selectbox("Provider", available_providers) if len(unavailable_providers) > 0: st.caption( @@ -92,8 +127,20 @@ with model_tab: model = Model(name=model_name, provider=provider, type=model_type) model.save() st.success("Saved") + st.divider() - all_models = Model.get_all() + suggested_models = CONFIG.get("suggested_models", []) + recommendations = generate_new_models(all_models, suggested_models) + if len(recommendations) > 0: + with st.expander("💁‍♂️ Recommended models to get you started.."): + for recommendation in recommendations: + st.markdown( + f"**{recommendation['name']}** ({recommendation['provider']}, {recommendation['type']})" + ) + if st.button("Add", key=f"add_{recommendation['name']}"): + new_model = Model(**recommendation) + new_model.save() + st.rerun() st.subheader("Configured Models") model_types_available = { # "vision": False, @@ -114,20 +161,7 @@ with model_tab: if not available: st.warning(f"No models available for {model_type}") - -def get_selected_index(models, model_id, default=0): - """Returns the index of the selected model in the list of models""" - if not model_id or not models: - return default - for i, model in enumerate(models): - if model.id == model_id: - return i - return default - - with model_defaults_tab: - default_models = DefaultModels().model_dump() - all_models = Model.get_all() text_generation_models = [model for model in all_models if model.type == "language"] text_to_speech_models = [ @@ -143,93 +177,80 @@ with model_defaults_tab: "In this section, you can select the default models to be used on the various content operations done by Open Notebook. Some of these can be overriden in the different modules." ) defs = {} - defs["default_chat_model"] = st.selectbox( + defs["default_chat_model"] = model_selector( "Default Chat Model", - text_generation_models, - format_func=lambda x: x.name, + "default_chat_model", + selected_id=default_models.get("default_chat_model"), help="This model will be used for chat.", - index=get_selected_index( - text_generation_models, default_models.get("default_chat_model") - ), + model_type="language", ) st.divider() - defs["default_transformation_model"] = st.selectbox( + defs["default_transformation_model"] = model_selector( "Default Transformation Model", - text_generation_models, - format_func=lambda x: x.name, + "default_transformation_model", + selected_id=default_models.get("default_transformation_model"), help="This model will be used for text transformations such as summaries, insights, etc.", - index=get_selected_index( - text_generation_models, default_models.get("default_transformation_model") - ), + model_type="language", ) + st.caption("You can use a cheap model here like gpt-4o-mini, llama3, etc.") st.divider() - defs["default_tools_model"] = st.selectbox( + defs["default_tools_model"] = model_selector( "Default Tools Model", - text_generation_models, - format_func=lambda x: x.name, + "default_tools_model", + selected_id=default_models.get("default_tools_model"), help="This model will be used for calling tools. Currently, it's best to use Open AI and Anthropic for this.", - index=get_selected_index( - text_generation_models, default_models.get("default_tools_model") - ), + model_type="language", ) + st.caption("Recommended to use a capable model here, like gpt-4o, claude, etc.") st.divider() - defs["large_context_model"] = st.selectbox( + defs["large_context_model"] = model_selector( "Large Context Model", - text_generation_models, - format_func=lambda x: x.name, + "large_context_model", + selected_id=default_models.get("large_context_model"), help="This model will be used for larger context generation -- recommended: Gemini", - index=get_selected_index( - text_generation_models, default_models.get("large_context_model") - ), + model_type="language", ) st.caption("Recommended to use Gemini models for larger context processing") st.divider() - defs["default_text_to_speech_model"] = st.selectbox( + defs["default_text_to_speech_model"] = model_selector( "Default Text to Speech Model", - text_to_speech_models, - format_func=lambda x: x.name, + "default_text_to_speech_model", + selected_id=default_models.get("default_text_to_speech_model"), help="This is the default model for converting text to speech (podcasts, etc)", - index=get_selected_index( - text_to_speech_models, default_models.get("default_text_to_speech_model") - ), + model_type="text_to_speech", ) st.caption("You can override this model on different podcasts") st.divider() - defs["default_speech_to_text_model"] = st.selectbox( + defs["default_speech_to_text_model"] = model_selector( "Default Speech to Text Model", - speech_to_text_models, - format_func=lambda x: x.name, + "default_speech_to_text_model", + selected_id=default_models.get("default_speech_to_text_model"), help="This is the default model for converting speech to text (audio transcriptions, etc)", - index=get_selected_index( - speech_to_text_models, default_models.get("default_speech_to_text_model") - ), + model_type="speech_to_text", ) + st.divider() - # defs["default_vision_model"] = st.selectbox( - # "Default Vision Model", - # vision_models, - # format_func=lambda x: x.name, - # help="This is the default model for vision tasks (image recognition, PDF recognition, etc)", - # index=get_selected_index( - # vision_models, default_models.get("default_vision_model") + # defs["default_vision_model"] = ( + # model_selector( + # "Default Speech to Text Model", + # "default_vision_model", + # selected_id=default_models.get("default_vision_model"), + # help="This is the default model for vision tasks", + # model_type="vision", # ), # ) - # st.divider() - defs["default_embedding_model"] = st.selectbox( - "Default Embedding Model", - embedding_models, - format_func=lambda x: x.name, + defs["default_embedding_model"] = model_selector( + "Default Speech to Text Model", + "default_embedding_model", + selected_id=default_models.get("default_embedding_model"), help="This is the default model for embeddings (semantic search, etc)", - index=get_selected_index( - embedding_models, default_models.get("default_embedding_model") - ), + model_type="embedding", ) st.caption( "Caution: you cannot change the embedding model once there is embeddings or they will need to be regenerated" ) - # if st.button("Save Defaults", key="save_defaults"): for k, v in defs.items(): if v: defs[k] = v.id diff --git a/pages/8_🛝_Playground.py b/pages/8_🛝_Playground.py index 5bcac7a..355cf9b 100644 --- a/pages/8_🛝_Playground.py +++ b/pages/8_🛝_Playground.py @@ -1,8 +1,8 @@ import streamlit as st import yaml -from open_notebook.domain.models import Model from open_notebook.graphs.multipattern import graph as pattern_graph +from pages.components.model_selector import model_selector from pages.stream_app.utils import setup_page setup_page("🛝 Playground") @@ -22,12 +22,13 @@ transformation = st.selectbox( with st.expander("Details"): st.json(transformation) -models = Model.get_models_by_type("language") -model = st.selectbox( +model = model_selector( "Pick a pattern model", - models, - format_func=lambda x: x.name, + key="model", + help="This is the model that will be used to run the transformation", + model_type="language", ) + input_text = st.text_area("Enter some text", height=200) if st.button("Run"): diff --git a/pages/components/model_selector.py b/pages/components/model_selector.py new file mode 100644 index 0000000..832367f --- /dev/null +++ b/pages/components/model_selector.py @@ -0,0 +1,35 @@ +from typing import Literal + +import streamlit as st + +from open_notebook.domain.models import Model + + +def model_selector( + label, + key, + selected_id=None, + help=None, + model_type: Literal[ + "language", "embedding", "speech_to_text", "text_to_speech" + ] = "language", +) -> Model: + models = Model.get_models_by_type(model_type) + models.sort(key=lambda x: (x.provider, x.name)) + try: + index = ( + next((i for i, m in enumerate(models) if m.id == selected_id), 0) + if selected_id + else 0 + ) + except Exception: + index = 0 + + return st.selectbox( + label, + models, + format_func=lambda x: f"{x.provider} - {x.name}", + help=help, + index=index, + key=key, + ) diff --git a/pages/stream_app/utils.py b/pages/stream_app/utils.py index 2caae54..db5df6d 100644 --- a/pages/stream_app/utils.py +++ b/pages/stream_app/utils.py @@ -116,34 +116,18 @@ def check_migration(): def check_models(): default_models = model_manager.defaults - if ( - not default_models.default_chat_model - or not default_models.default_transformation_model + if not all( + [ + default_models.default_chat_model, + default_models.default_transformation_model, + default_models.default_embedding_model, + default_models.default_speech_to_text_model, + default_models.large_context_model, + ] ): st.warning( - "You don't have default chat and transformation models selected. Please, select them on the settings page." + "You are missing some default models and the app might not work as expected. Please, select them on the settings page." ) - st.stop() - elif not default_models.default_embedding_model: - st.warning( - "You don't have a default embedding model selected. Vector search will not be possible and your assistant will be less able to answer your queries. Please, select one on the settings page." - ) - st.stop() - elif not default_models.default_speech_to_text_model: - st.warning( - "You don't have a default speech to text model selected. Your assistant will not be able to transcribe audio. Please, select one on the settings page." - ) - st.stop() - elif not default_models.default_text_to_speech_model: - st.warning( - "You don't have a default text to speech model selected. Your assistant will not be able to generate audio and podcasts. Please, select one on the settings page." - ) - st.stop() - elif not default_models.large_context_model: - st.warning( - "You don't have a large context model selected. Your assistant will not be able to process large documents. Please, select one on the settings page." - ) - st.stop() def handle_error(func): From 321234e48528e08427aea0cde4dea178b0878e2a Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 15:09:56 -0300 Subject: [PATCH 29/44] add support for GROQ models --- open_notebook/models/__init__.py | 4 ++ open_notebook/models/llms.py | 25 +++++++++ open_notebook/models/speech_to_text_models.py | 19 +++++++ pages/7_⚙️_Settings.py | 1 + poetry.lock | 54 +++++++++++++++---- pyproject.toml | 2 + 6 files changed, 95 insertions(+), 10 deletions(-) diff --git a/open_notebook/models/__init__.py b/open_notebook/models/__init__.py index 3e3b72a..c131abd 100644 --- a/open_notebook/models/__init__.py +++ b/open_notebook/models/__init__.py @@ -10,6 +10,7 @@ from open_notebook.models.embedding_models import ( from open_notebook.models.llms import ( AnthropicLanguageModel, GeminiLanguageModel, + GroqLanguageModel, LanguageModel, LiteLLMLanguageModel, OllamaLanguageModel, @@ -20,6 +21,7 @@ from open_notebook.models.llms import ( XAILanguageModel, ) from open_notebook.models.speech_to_text_models import ( + GroqSpeechToTextModel, OpenAISpeechToTextModel, SpeechToTextModel, ) @@ -46,6 +48,7 @@ MODEL_CLASS_MAP: Dict[str, ProviderMap] = { "openai": OpenAILanguageModel, "gemini": GeminiLanguageModel, "xai": XAILanguageModel, + "groq": GroqLanguageModel, }, "embedding": { "openai": OpenAIEmbeddingModel, @@ -55,6 +58,7 @@ MODEL_CLASS_MAP: Dict[str, ProviderMap] = { }, "speech_to_text": { "openai": OpenAISpeechToTextModel, + "groq": GroqSpeechToTextModel, }, "text_to_speech": { "openai": OpenAITextToSpeechModel, diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index 018a604..03d8c67 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -13,6 +13,7 @@ from langchain_core.language_models.chat_models import BaseChatModel from langchain_google_genai import ChatGoogleGenerativeAI from langchain_google_vertexai import ChatVertexAI from langchain_google_vertexai.model_garden import ChatAnthropicVertex +from langchain_groq.chat_models import ChatGroq from langchain_ollama.chat_models import ChatOllama from langchain_openai.chat_models import ChatOpenAI from pydantic import SecretStr @@ -191,6 +192,30 @@ class OpenRouterLanguageModel(LanguageModel): ) +@dataclass +class GroqLanguageModel(LanguageModel): + """ + Language model that uses the Groq chat model. + """ + + model_name: str + + def to_langchain(self) -> ChatGroq: + """ + Convert the language model to a LangChain chat model for Groq. + """ + kwargs = self.kwargs + kwargs["top_p"] = self.top_p + + return ChatGroq( + model=self.model_name, + temperature=self.temperature or 0.5, + max_tokens=self.max_tokens, + model_kwargs=kwargs, + stop_sequences=None, + ) + + @dataclass class XAILanguageModel(LanguageModel): """ diff --git a/open_notebook/models/speech_to_text_models.py b/open_notebook/models/speech_to_text_models.py index aa89d51..113339b 100644 --- a/open_notebook/models/speech_to_text_models.py +++ b/open_notebook/models/speech_to_text_models.py @@ -40,3 +40,22 @@ class OpenAISpeechToTextModel(SpeechToTextModel): model=self.model_name, file=audio ) return transcription.text + + +@dataclass +class GroqSpeechToTextModel(SpeechToTextModel): + model_name: str + + def transcribe(self, audio_file_path: str) -> str: + """ + Transcribes an audio file into text + """ + from groq import Groq + + # todo: make this Singleton + client = Groq() + with open(audio_file_path, "rb") as audio: + transcription = client.audio.transcriptions.create( + model=self.model_name, file=audio + ) + return transcription.text diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index 955c995..67108f2 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -30,6 +30,7 @@ model_types = [ provider_status["ollama"] = os.environ.get("OLLAMA_API_BASE") is not None provider_status["openai"] = os.environ.get("OPENAI_API_KEY") is not None +provider_status["groq"] = os.environ.get("GROQ_API_KEY") is not None provider_status["xai"] = os.environ.get("XAI_API_KEY") is not None provider_status["vertexai"] = ( os.environ.get("VERTEX_PROJECT") is not None diff --git a/poetry.lock b/poetry.lock index 11dffc5..0f80bfd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1166,8 +1166,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} proto-plus = [ - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, + {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -1346,8 +1346,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extr google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, + {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -1389,8 +1389,8 @@ files = [ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" proto-plus = [ - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, + {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -1578,6 +1578,25 @@ files = [ docs = ["Sphinx", "furo"] test = ["objgraph", "psutil"] +[[package]] +name = "groq" +version = "0.12.0" +description = "The official Python library for the groq API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "groq-0.12.0-py3-none-any.whl", hash = "sha256:e8aa1529f82a01b2d15394b7ea242af9ee9387f65bdd1b91ce9a10f5a911dac1"}, + {file = "groq-0.12.0.tar.gz", hash = "sha256:569229e2dadfc428b0df3d2987407691a4e3bc035b5849a65ef4909514a4605e"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +typing-extensions = ">=4.7,<5" + [[package]] name = "grpc-google-iam-v1" version = "0.13.1" @@ -2247,8 +2266,8 @@ langchain-core = ">=0.3.15,<0.4.0" langchain-text-splitters = ">=0.3.0,<0.4.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ - {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, {version = ">=1,<2", markers = "python_version < \"3.12\""}, + {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, ] pydantic = ">=2.7.4,<3.0.0" PyYAML = ">=5.3" @@ -2292,8 +2311,8 @@ langchain = ">=0.3.6,<0.4.0" langchain-core = ">=0.3.14,<0.4.0" langsmith = ">=0.1.125,<0.2.0" numpy = [ - {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, {version = ">=1,<2", markers = "python_version < \"3.12\""}, + {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, ] pydantic-settings = ">=2.4.0,<3.0.0" PyYAML = ">=5.3" @@ -2317,8 +2336,8 @@ jsonpatch = ">=1.33,<2.0" langsmith = ">=0.1.125,<0.2.0" packaging = ">=23.2,<25" pydantic = [ - {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, {version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""}, + {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, ] PyYAML = ">=5.3" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10.0.0" @@ -2366,6 +2385,21 @@ pydantic = ">=2,<3" anthropic = ["anthropic[vertexai] (>=0.35.0,<1)"] mistral = ["langchain-mistralai (>=0.2.0,<1)"] +[[package]] +name = "langchain-groq" +version = "0.2.1" +description = "An integration package connecting Groq and LangChain" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "langchain_groq-0.2.1-py3-none-any.whl", hash = "sha256:98d282fd9d7d99b0f55de0a1daea2d5d350ef697e3cb5e97de06aeba4eca8679"}, + {file = "langchain_groq-0.2.1.tar.gz", hash = "sha256:a59c81d1a15dc97abf4fdb4c2589f98109313eda147e6b378829222d4d929792"}, +] + +[package.dependencies] +groq = ">=0.4.1,<1" +langchain-core = ">=0.3.15,<0.4.0" + [[package]] name = "langchain-ollama" version = "0.2.0" @@ -2502,8 +2536,8 @@ files = [ httpx = ">=0.23.0,<1" orjson = ">=3.9.14,<4.0.0" pydantic = [ - {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, + {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, ] requests = ">=2,<3" requests-toolbelt = ">=1.0.0,<2.0.0" @@ -3562,8 +3596,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4186,8 +4220,8 @@ files = [ annotated-types = ">=0.6.0" pydantic-core = "2.23.4" typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, {version = ">=4.6.1", markers = "python_version < \"3.13\""}, + {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, ] [package.extras] @@ -6431,4 +6465,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "d7a79be658b4a848e346be1958ce4ef50da966a00319ddb5b9edc24be96c5aba" +content-hash = "93b2d5c2ae9dd34b47c12f14b07b76d7d48c57c5eec78b09ae08a1d3a3e747dd" diff --git a/pyproject.toml b/pyproject.toml index a4a623f..9ae242f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,8 @@ python-docx = "^1.1.2" python-pptx = "^1.0.2" openpyxl = "^3.1.5" google-generativeai = "^0.8.3" +langchain-groq = "^0.2.1" +groq = "^0.12.0" [tool.poetry.group.dev.dependencies] ipykernel = "^6.29.5" From 666a4f85b942530ef717fd04cf7ee00216969453 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 15:21:01 -0300 Subject: [PATCH 30/44] update env sample --- .env.example | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.env.example b/.env.example index a964062..fc3fae6 100644 --- a/.env.example +++ b/.env.example @@ -7,6 +7,7 @@ OPENAI_API_KEY= # ANTHROPIC_API_KEY= # GEMINI +# this is the best model for long context and podcast generation # GEMINI_API_KEY= # VERTEXAI @@ -20,6 +21,12 @@ OPENAI_API_KEY= # OPENROUTER_BASE_URL="https://openrouter.ai/api/v1" # OPENROUTER_API_KEY= +# GROQ +# GROQ_API_KEY= + +# XAI +# XAI_API_KEY= + # ELEVENLABS # Used only by the podcast feature ELEVENLABS_API_KEY= From b04761affcdd3a0129a83d35e522d93e4a255b88 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 15:21:17 -0300 Subject: [PATCH 31/44] mypy fixes --- open_notebook/models/llms.py | 7 ++++++- pages/7_⚙️_Settings.py | 3 ++- pages/8_🛝_Playground.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index 03d8c67..3d84bcf 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -295,12 +295,17 @@ class OpenAILanguageModel(LanguageModel): kwargs["max_completion_tokens"] = self.max_tokens data["top_p"] = 1 data["streaming"] = False + data["max_tokens"] = None else: data["max_tokens"] = self.max_tokens data["top_p"] = self.top_p data["streaming"] = self.streaming return ChatOpenAI( - **data, + model_name=data.get("model_name"), + temperature=data.get("temperature"), + streaming=data.get("streaming"), + max_tokens=data.get("max_tokens"), + top_p=data.get("top_p"), model_kwargs=kwargs, ) diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index 67108f2..449ed39 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -268,7 +268,8 @@ with transformations_tab: selected_transformations[transformation["name"]] = st.checkbox( f"**{transformation['name']}**", value=( - transformation["name"] in default_transformations.source_insights + transformation["name"] + in (default_transformations.source_insights or []) ), ) st.write(transformation["description"]) diff --git a/pages/8_🛝_Playground.py b/pages/8_🛝_Playground.py index 355cf9b..a094d19 100644 --- a/pages/8_🛝_Playground.py +++ b/pages/8_🛝_Playground.py @@ -13,7 +13,7 @@ with open("transformations.yaml", "r") as file: insight_transformations = transformations["source_insights"] -transformation = st.selectbox( +transformation: dict = st.selectbox( "Pick a transformation", insight_transformations, format_func=lambda x: x.get("name", "No Name"), From 066c7a06e24b498f13dc8a4e27bb69f8a95010cc Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 15:52:44 -0300 Subject: [PATCH 32/44] improve search functions --- migrations/4.surrealql | 134 ++++++++++++++++++++++++++++ migrations/4_down.surrealql | 139 ++++++++++++++++++++++++++++++ open_notebook/database/migrate.py | 2 + open_notebook/graphs/ask.py | 2 - open_notebook/models/llms.py | 28 +++--- poetry.lock | 19 +--- pyproject.toml | 1 - 7 files changed, 287 insertions(+), 38 deletions(-) create mode 100644 migrations/4.surrealql create mode 100644 migrations/4_down.surrealql diff --git a/migrations/4.surrealql b/migrations/4.surrealql new file mode 100644 index 0000000..f89531e --- /dev/null +++ b/migrations/4.surrealql @@ -0,0 +1,134 @@ + +REMOVE FUNCTION IF EXISTS fn::text_search; + + +DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { + + let $source_title_search = + IF $sources {( + SELECT id, title, + search::highlight('`', '`', 1) as content, + id as parent_id, + math::max(search::score(1)) AS relevance + FROM source + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_embedding_search = + IF $sources {( + SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source_embedding + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_full_search = + IF $sources {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM source + WHERE full_text @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT id, insight_type + " - " + (source.title OR '') as title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM source_insight + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_title_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_content_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_chunk_results = array::union($source_embedding_search, $source_full_search); + + let $source_asset_results = array::union($source_title_search, $source_insight_search); + + let $source_results = array::union($source_chunk_results, $source_asset_results ); + let $note_results = array::union($note_title_search, $note_content_search ); + let $final_results = array::union($source_results, $note_results ); + + RETURN (select id, parent_id, title, math::max(relevance) as relevance, + array::flatten(content) as matches + from $final_results where id is not None + group by id, parent_id, title ORDER BY relevance DESC LIMIT $match_count); + +}; + +REMOVE FUNCTION IF EXISTS fn::vector_search; + +DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) { + let $source_embedding_search = + IF $sources {( + SELECT + source.id as id, + source.title as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_embedding + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT + id, + insight_type + ' - ' + (source.title OR '') as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_insight + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $note_content_search = + IF $show_notes {( + SELECT + id, + title, + content, + id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM note + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $all_results = array::union( + array::union($source_embedding_search, $source_insight_search), + $note_content_search + ); + + + RETURN (select id, parent_id, title, math::max(similarity) as similarity, + array::flatten(content) as matches + from $all_results where id is not None + group by id, parent_id, title ORDER BY similarity DESC LIMIT $match_count); + +}; \ No newline at end of file diff --git a/migrations/4_down.surrealql b/migrations/4_down.surrealql new file mode 100644 index 0000000..67acbd7 --- /dev/null +++ b/migrations/4_down.surrealql @@ -0,0 +1,139 @@ + +REMOVE FUNCTION IF EXISTS fn::vector_search; + +DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) { + let $source_embedding_search = + IF $sources {( + SELECT + id, + source.title as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_embedding + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT + id, + insight_type + ' - ' + source.title as title, + content, + source.id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM source_insight + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $note_content_search = + IF $show_notes {( + SELECT + id, + title, + content, + id as parent_id, + vector::similarity::cosine(embedding, $query) as similarity + FROM note + WHERE vector::similarity::cosine(embedding, $query) >= $min_similarity + ORDER BY similarity DESC + LIMIT $match_count + )} + ELSE { [] }; + + + let $all_results = array::union( + array::union($source_embedding_search, $source_insight_search), + $note_content_search + ); + + + RETURN ( + SELECT + id, title, content, parent_id, + math::max(similarity) as similarity + FROM $all_results + GROUP BY id + ORDER BY similarity DESC + LIMIT $match_count + ); +}; + + +REMOVE FUNCTION IF EXISTS fn::text_search; + + +DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) { + + let $source_title_search = + IF $sources {( + SELECT id, title, + search::highlight('`', '`', 1) as content, + id as parent_id, + math::max(search::score(1)) AS relevance + FROM source + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_embedding_search = + IF $sources {( + SELECT id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source_embedding + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_full_search = + IF $sources {( + SELECT source.id as id, source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source + WHERE full_text @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_insight_search = + IF $sources {( + SELECT id, insight_type + " - " + source.title as title, search::highlight('`', '`', 1) as content, source.id as parent_id, math::max(search::score(1)) AS relevance + FROM source_insight + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_title_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE title @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $note_content_search = + IF $show_notes {( + SELECT id, title, search::highlight('`', '`', 1) as content, id as parent_id, math::max(search::score(1)) AS relevance + FROM note + WHERE content @1@ $query_text + GROUP BY id)} + ELSE { [] }; + + let $source_chunk_results = array::union($source_embedding_search, $source_full_search); + + let $source_asset_results = array::union($source_title_search, $source_insight_search); + + let $source_results = array::union($source_chunk_results, $source_asset_results ); + let $note_results = array::union($note_title_search, $note_content_search ); + let $final_results = array::union($source_results, $note_results ); + + RETURN (SELECT id, title, content, parent_id, math::max(relevance) as relevance from $final_results + where id is not None +group by id, title, content, parent_id ORDER BY relevance DESC LIMIT $match_count); + + +}; diff --git a/open_notebook/database/migrate.py b/open_notebook/database/migrate.py index 542888f..ac40f60 100644 --- a/open_notebook/database/migrate.py +++ b/open_notebook/database/migrate.py @@ -22,6 +22,7 @@ class MigrationManager: Migration.from_file("migrations/1.surrealql"), Migration.from_file("migrations/2.surrealql"), Migration.from_file("migrations/3.surrealql"), + Migration.from_file("migrations/4.surrealql"), ] self.down_migrations = [ Migration.from_file( @@ -29,6 +30,7 @@ class MigrationManager: ), Migration.from_file("migrations/2_down.surrealql"), Migration.from_file("migrations/3_down.surrealql"), + Migration.from_file("migrations/4_down.surrealql"), ] self.runner = MigrationRunner( up_migrations=self.up_migrations, diff --git a/open_notebook/graphs/ask.py b/open_notebook/graphs/ask.py index 995864e..c320642 100644 --- a/open_notebook/graphs/ask.py +++ b/open_notebook/graphs/ask.py @@ -7,7 +7,6 @@ from langchain_core.runnables import ( ) from langgraph.graph import END, START, StateGraph from langgraph.types import Send -from loguru import logger from pydantic import BaseModel, Field from typing_extensions import TypedDict @@ -63,7 +62,6 @@ async def call_model_with_messages(state: ThreadState, config: RunnableConfig) - ) # model = model.bind_tools(tools) ai_message = (model | parser).invoke(system_prompt) - logger.debug(ai_message) return {"strategy": ai_message} diff --git a/open_notebook/models/llms.py b/open_notebook/models/llms.py index 3d84bcf..4a5ed9b 100644 --- a/open_notebook/models/llms.py +++ b/open_notebook/models/llms.py @@ -280,12 +280,6 @@ class OpenAILanguageModel(LanguageModel): Convert the language model to a LangChain chat model. """ - data = { - "model": self.model_name, - "top_p": self.top_p, - "temperature": self.temperature, - } - kwargs = self.kwargs.copy() # Make a copy to avoid modifying the original if self.json: kwargs["response_format"] = {"type": "json_object"} @@ -293,19 +287,19 @@ class OpenAILanguageModel(LanguageModel): # Set the token limit in kwargs with the appropriate key if self.model_name in ["o1-mini", "o1-preview"]: kwargs["max_completion_tokens"] = self.max_tokens - data["top_p"] = 1 - data["streaming"] = False - data["max_tokens"] = None + top_p = 1 + streaming = False + max_tokens = None else: - data["max_tokens"] = self.max_tokens - data["top_p"] = self.top_p - data["streaming"] = self.streaming + max_tokens = self.max_tokens + top_p = self.top_p + streaming = self.streaming return ChatOpenAI( - model_name=data.get("model_name"), - temperature=data.get("temperature"), - streaming=data.get("streaming"), - max_tokens=data.get("max_tokens"), - top_p=data.get("top_p"), + model=self.model_name, + temperature=self.temperature, + streaming=streaming, + max_tokens=max_tokens, + top_p=top_p, model_kwargs=kwargs, ) diff --git a/poetry.lock b/poetry.lock index 0f80bfd..2d3aa26 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1826,23 +1826,6 @@ files = [ [package.extras] tests = ["freezegun", "pytest", "pytest-cov"] -[[package]] -name = "icecream" -version = "2.1.3" -description = "Never use print() to debug again; inspect variables, expressions, and program execution with a single, simple function call." -optional = false -python-versions = "*" -files = [ - {file = "icecream-2.1.3-py2.py3-none-any.whl", hash = "sha256:757aec31ad4488b949bc4f499d18e6e5973c40cc4d4fc607229e78cfaec94c34"}, - {file = "icecream-2.1.3.tar.gz", hash = "sha256:0aa4a7c3374ec36153a1d08f81e3080e83d8ac1eefd97d2f4fe9544e8f9b49de"}, -] - -[package.dependencies] -asttokens = ">=2.0.1" -colorama = ">=0.3.9" -executing = ">=0.3.1" -pygments = ">=2.2.0" - [[package]] name = "identify" version = "2.6.2" @@ -6465,4 +6448,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "93b2d5c2ae9dd34b47c12f14b07b76d7d48c57c5eec78b09ae08a1d3a3e747dd" +content-hash = "b672f17cddbf990c0d05737cc796ae92835864702a2eeee34732152ca796a0c7" diff --git a/pyproject.toml b/pyproject.toml index 9ae242f..ed13bab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ streamlit = "^1.39.0" watchdog = "^5.0.3" pydantic = "^2.9.2" loguru = "^0.7.2" -icecream = "^2.1.3" langchain = "^0.3.3" langgraph = "^0.2.38" humanize = "^4.11.0" From 06c6842f11f32154ac0ee7a272dd318770d1323b Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:02:18 -0300 Subject: [PATCH 33/44] fix insight context to improve citations --- open_notebook/domain/notebook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index be0dbc0..5ef3f14 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -141,15 +141,16 @@ class Source(ObjectModel): def get_context( self, context_size: Literal["short", "long"] = "short" ) -> Dict[str, Any]: + insights = [insight.model_dump() for insight in self.insights] if context_size == "long": return dict( id=self.id, title=self.title, - insights=[insight.model_dump() for insight in self.insights], + insights=insights, full_text=self.full_text, ) else: - return dict(id=self.id, title=self.title, insights=self.insights) + return dict(id=self.id, title=self.title, insights=insights) @property def embedded_chunks(self) -> int: From 182ae741d8b5ea096c639aa78bc613aa33231f3f Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:02:48 -0300 Subject: [PATCH 34/44] cleanup podcast --- open_notebook/plugins/podcasts.py | 102 ++++++------------------------ pages/5_🎙️_Podcasts.py | 41 +++++++----- 2 files changed, 45 insertions(+), 98 deletions(-) diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py index ce92ff3..39adec9 100644 --- a/open_notebook/plugins/podcasts.py +++ b/open_notebook/plugins/podcasts.py @@ -2,7 +2,7 @@ from typing import ClassVar, List, Optional from loguru import logger from podcastfy.client import generate_podcast -from pydantic import Field, field_validator +from pydantic import Field, field_validator, model_validator from open_notebook.config import DATA_FOLDER from open_notebook.domain.notebook import ObjectModel @@ -23,8 +23,8 @@ class PodcastConfig(ObjectModel): podcast_name: str podcast_tagline: str output_language: str = Field(default="English") - person1_role: str - person2_role: str + person1_role: List[str] + person2_role: List[str] conversation_style: List[str] engagement_technique: List[str] dialogue_structure: List[str] @@ -35,10 +35,24 @@ class PodcastConfig(ObjectModel): wordcount: int = Field(ge=400, le=10000) creativity: float = Field(ge=0, le=1) provider: str = Field(default="openai") - voice1: Optional[str] = None - voice2: Optional[str] = None + voice1: str + voice2: str model: str + # Backwards compatibility + @field_validator("person1_role", "person2_role", mode="before") + @classmethod + def split_string_to_list(cls, value): + if isinstance(value, str): + return [item.strip() for item in value.split(",")] + return value + + @model_validator(mode="after") + def validate_voices(self) -> "PodcastConfig": + if not self.voice1 or not self.voice2: + raise ValueError("Both voice1 and voice2 must be provided") + return self + def generate_episode(self, episode_name, text, instructions=None): self.user_instructions = ( instructions if instructions else self.user_instructions @@ -140,13 +154,8 @@ conversation_styles = [ "Debate-style", "Interview-style", "Storytelling", - "Reflective", - "Narrative", "Satirical", "Educational", - "Conversational", - "Critical", - "Empathetic", "Philosophical", "Speculative", "Motivational", @@ -156,25 +165,15 @@ conversation_styles = [ "Serious", "Investigative", "Debunking", - "Collaborative", "Didactic", "Thought-provoking", "Controversial", - "Skeptical", - "Optimistic", - "Pessimistic", - "Objective", - "Subjective", "Sarcastic", "Emotional", "Exploratory", - "Friendly", "Fast-paced", "Slow-paced", "Introspective", - "Open-ended", - "Affirmative", - "Dissenting", ] # Dialogue Structures @@ -191,15 +190,10 @@ dialogue_structures = [ "Pro Arguments", "Con Arguments", "Cross-examination", - "Rebuttal", "Expert Interviews", - "Panel Discussion", "Case Studies", "Myth Busting", - "Debunking Misconceptions", - "Audience Questions", "Q&A Session", - "Listener Feedback", "Rapid-fire Questions", "Summary of Key Points", "Recap", @@ -207,29 +201,11 @@ dialogue_structures = [ "Actionable Tips", "Call to Action", "Future Outlook", - "Teaser for Next Episode", "Closing Remarks", - "Thank You and Credits", - "Outtakes or Bloopers", - "Sponsor Messages", - "Social Media Shout-outs", "Resource Recommendations", - "Feedback Request", - "Lightning Round", - "Behind-the-Scenes Insights", - "Ethical Considerations", - "Fact-checking Segment", "Trending Topics", "Closing Inspirational Quote", "Final Reflections", - "Debrief", - "Farewell Messages", - "Next Episode Preview", - "Live Reactions", - "Call-in Segment", - "Acknowledgements", - "Transition Segments", - "Break Segments", ] # Podcast Participant Roles @@ -265,15 +241,7 @@ participant_roles = [ "Researcher", "Reporter", "Advocate", - "Influencer", - "Observer", - "Listener", - "Facilitator", - "Innovator", "Debater", - "Educator", - "Motivator", - "Narrator", "Explorer", "Opponent", "Proponent", @@ -289,49 +257,17 @@ participant_roles = [ "Author", "Journalist", "Activist", - "Challenger", - "Supporter", - "Mentor", - "Mentee", "Panelist", - "Audience Representative", - "Case Study Presenter", "Data Analyst", - "Ethicist", - "Cultural Critic", - "Technologist", - "Environmentalist", - "Legal Expert", - "Healthcare Professional", - "Financial Advisor", - "Policy Maker", - "Sociologist", - "Anthropologist", "Myth Buster", "Trend Analyst", "Futurist", - "Negotiator", - "Community Leader", "Voice of Reason", - "Conflict Resolver", - "Emotional Support", "Pragmatist", "Idealist", "Realist", "Satirist", - "Story Analyst", - "Language Expert", - "Historical Witness", - "Survivor", - "Inspirational Figure", - "Cultural Ambassador", - "Digital Nomad", - "Remote Correspondent", "Field Reporter", - "Data Scientist", - "Gamer", - "Musician", - "Filmmaker", ] # Engagement Techniques diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py index 040d76d..40ede27 100644 --- a/pages/5_🎙️_Podcasts.py +++ b/pages/5_🎙️_Podcasts.py @@ -82,19 +82,26 @@ with templates_tab: "User Instructions", help="Any additional intructions to pass to the LLM that will generate the transcript", ) - pd_cfg["person1_role"] = st.text_input("Person 1 role") + pd_cfg["person1_role"] = st_tags( + [], participant_roles, "Person 1 roles", key="person1_roles" + ) st.caption(f"Suggestions:{', '.join(participant_roles)}") - pd_cfg["person2_role"] = st.text_input("Person 2 role") + pd_cfg["person2_role"] = st_tags( + [], participant_roles, "Person 2 roles", key="person2_roles" + ) pd_cfg["conversation_style"] = st_tags( - [], conversation_styles, "Conversation Style" + [], conversation_styles, "Conversation Style", key="conversation_styles" ) st.caption(f"Suggestions:{', '.join(conversation_styles)}") pd_cfg["engagement_technique"] = st_tags( - [], engagement_techniques, "Engagement Techniques" + [], + engagement_techniques, + "Engagement Techniques", + key="engagement_techniques", ) st.caption(f"Suggestions:{', '.join(engagement_techniques)}") pd_cfg["dialogue_structure"] = st_tags( - [], dialogue_structures, "Dialogue Structure" + [], dialogue_structures, "Dialogue Structure", key="dialogue_structures" ) st.caption(f"Suggestions:{', '.join(dialogue_structures)}") pd_cfg["wordcount"] = st.slider( @@ -126,6 +133,8 @@ with templates_tab: pd_cfg["voice1"] = st.text_input( "Voice 1", help="You can use Elevenlabs voice ID" ) + st.caption("Voice names are case sensitive. Be sure to add the exact name.") + st.markdown( "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)" ) @@ -142,10 +151,8 @@ with templates_tab: pd = PodcastConfig(**pd_cfg) pd_cfg = {} pd.save() - st.rerun() except Exception as e: st.error(e) - st.exception(e) for pd_config in PodcastConfig.get_all(order_by="created desc"): with st.expander(pd_config.name): @@ -174,17 +181,20 @@ with templates_tab: value=pd_config.output_language, key=f"output_language_{pd_config.id}", ) - pd_config.person1_role = st.text_input( - "Person 1 role", - value=pd_config.person1_role, - key=f"person1_role_{pd_config.id}", + pd_config.person1_role = st_tags( + pd_config.person1_role, + conversation_styles, + "Person 1 Roles", + key=f"person_1_roles_{pd_config.id}", ) st.caption(f"Suggestions:{', '.join(participant_roles)}") - pd_config.person2_role = st.text_input( - "Person 2 role", - value=pd_config.person2_role, - key=f"person2_role_{pd_config.id}", + pd_config.person2_role = st_tags( + pd_config.person2_role, + conversation_styles, + "Person 2 Roles", + key=f"person_2_roles_{pd_config.id}", ) + pd_config.conversation_style = st_tags( pd_config.conversation_style, conversation_styles, @@ -293,6 +303,7 @@ with templates_tab: key=f"voice1_{pd_config.id}", help="You can use Elevenlabs voice ID", ) + st.caption("Voice names are case sensitive. Be sure to add the exact name.") st.markdown( "[Open AI voices](https://platform.openai.com/docs/guides/text-to-speech)" ) From 95cc9a5081a61d6bb6ff07336a455885abc40068 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:03:09 -0300 Subject: [PATCH 35/44] add items to context by default --- pages/stream_app/note.py | 2 +- pages/stream_app/source.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pages/stream_app/note.py b/pages/stream_app/note.py index 3dc96d4..4a1c5be 100644 --- a/pages/stream_app/note.py +++ b/pages/stream_app/note.py @@ -61,7 +61,7 @@ def note_card(note, notebook_id): "Context", label_visibility="collapsed", options=context_icons, - index=0, + index=1, key=f"note_{note.id}", ) st.caption(f"Updated: {naturaltime(note.updated)}") diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index d8b04f5..1a2c358 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -114,7 +114,7 @@ def source_card(source, notebook_id): "Context", label_visibility="collapsed", options=context_icons, - index=0, + index=1, key=f"source_{source.id}", ) st.caption( From 70358a1be7484848dd556439b3b7532c08ce6b2f Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:03:19 -0300 Subject: [PATCH 36/44] poetry update --- poetry.lock | 562 ++++++++++++++++++++++++------------------------- pyproject.toml | 2 +- 2 files changed, 271 insertions(+), 293 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2d3aa26..7494da1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,102 +13,87 @@ files = [ [[package]] name = "aiohttp" -version = "3.10.10" +version = "3.11.0" description = "Async http client/server framework (asyncio)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:333cf6cf8e65f6a1e06e9eb3e643a0c515bb850d470902274239fea02033e9a8"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:274cfa632350225ce3fdeb318c23b4a10ec25c0e2c880eff951a3842cf358ac1"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9e5e4a85bdb56d224f412d9c98ae4cbd032cc4f3161818f692cd81766eee65a"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b606353da03edcc71130b52388d25f9a30a126e04caef1fd637e31683033abd"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab5a5a0c7a7991d90446a198689c0535be89bbd6b410a1f9a66688f0880ec026"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578a4b875af3e0daaf1ac6fa983d93e0bbfec3ead753b6d6f33d467100cdc67b"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8105fd8a890df77b76dd3054cddf01a879fc13e8af576805d667e0fa0224c35d"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3bcd391d083f636c06a68715e69467963d1f9600f85ef556ea82e9ef25f043f7"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fbc6264158392bad9df19537e872d476f7c57adf718944cc1e4495cbabf38e2a"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e48d5021a84d341bcaf95c8460b152cfbad770d28e5fe14a768988c461b821bc"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2609e9ab08474702cc67b7702dbb8a80e392c54613ebe80db7e8dbdb79837c68"}, - {file = "aiohttp-3.10.10-cp310-cp310-win32.whl", hash = "sha256:84afcdea18eda514c25bc68b9af2a2b1adea7c08899175a51fe7c4fb6d551257"}, - {file = "aiohttp-3.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:9c72109213eb9d3874f7ac8c0c5fa90e072d678e117d9061c06e30c85b4cf0e6"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c30a0eafc89d28e7f959281b58198a9fa5e99405f716c0289b7892ca345fe45f"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:258c5dd01afc10015866114e210fb7365f0d02d9d059c3c3415382ab633fcbcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15ecd889a709b0080f02721255b3f80bb261c2293d3c748151274dfea93ac871"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3935f82f6f4a3820270842e90456ebad3af15810cf65932bd24da4463bc0a4c"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:413251f6fcf552a33c981c4709a6bba37b12710982fec8e558ae944bfb2abd38"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1720b4f14c78a3089562b8875b53e36b51c97c51adc53325a69b79b4b48ebcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:679abe5d3858b33c2cf74faec299fda60ea9de62916e8b67e625d65bf069a3b7"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79019094f87c9fb44f8d769e41dbb664d6e8fcfd62f665ccce36762deaa0e911"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2fb38c2ed905a2582948e2de560675e9dfbee94c6d5ccdb1301c6d0a5bf092"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a3f00003de6eba42d6e94fabb4125600d6e484846dbf90ea8e48a800430cc142"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbb122c557a16fafc10354b9d99ebf2f2808a660d78202f10ba9d50786384b9"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30ca7c3b94708a9d7ae76ff281b2f47d8eaf2579cd05971b5dc681db8caac6e1"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:df9270660711670e68803107d55c2b5949c2e0f2e4896da176e1ecfc068b974a"}, - {file = "aiohttp-3.10.10-cp311-cp311-win32.whl", hash = "sha256:aafc8ee9b742ce75044ae9a4d3e60e3d918d15a4c2e08a6c3c3e38fa59b92d94"}, - {file = "aiohttp-3.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:362f641f9071e5f3ee6f8e7d37d5ed0d95aae656adf4ef578313ee585b585959"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9294bbb581f92770e6ed5c19559e1e99255e4ca604a22c5c6397b2f9dd3ee42c"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8fa23fe62c436ccf23ff930149c047f060c7126eae3ccea005f0483f27b2e28"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c6a5b8c7926ba5d8545c7dd22961a107526562da31a7a32fa2456baf040939f"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:007ec22fbc573e5eb2fb7dec4198ef8f6bf2fe4ce20020798b2eb5d0abda6138"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9627cc1a10c8c409b5822a92d57a77f383b554463d1884008e051c32ab1b3742"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50edbcad60d8f0e3eccc68da67f37268b5144ecc34d59f27a02f9611c1d4eec7"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a45d85cf20b5e0d0aa5a8dca27cce8eddef3292bc29d72dcad1641f4ed50aa16"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b00807e2605f16e1e198f33a53ce3c4523114059b0c09c337209ae55e3823a8"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f2d4324a98062be0525d16f768a03e0bbb3b9fe301ceee99611dc9a7953124e6"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438cd072f75bb6612f2aca29f8bd7cdf6e35e8f160bc312e49fbecab77c99e3a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:baa42524a82f75303f714108fea528ccacf0386af429b69fff141ffef1c534f9"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a7d8d14fe962153fc681f6366bdec33d4356f98a3e3567782aac1b6e0e40109a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1277cd707c465cd09572a774559a3cc7c7a28802eb3a2a9472588f062097205"}, - {file = "aiohttp-3.10.10-cp312-cp312-win32.whl", hash = "sha256:59bb3c54aa420521dc4ce3cc2c3fe2ad82adf7b09403fa1f48ae45c0cbde6628"}, - {file = "aiohttp-3.10.10-cp312-cp312-win_amd64.whl", hash = "sha256:0e1b370d8007c4ae31ee6db7f9a2fe801a42b146cec80a86766e7ad5c4a259cf"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ad7593bb24b2ab09e65e8a1d385606f0f47c65b5a2ae6c551db67d6653e78c28"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1eb89d3d29adaf533588f209768a9c02e44e4baf832b08118749c5fad191781d"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3fe407bf93533a6fa82dece0e74dbcaaf5d684e5a51862887f9eaebe6372cd79"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aed5155f819873d23520919e16703fc8925e509abbb1a1491b0087d1cd969e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f05e9727ce409358baa615dbeb9b969db94324a79b5a5cea45d39bdb01d82e6"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dffb610a30d643983aeb185ce134f97f290f8935f0abccdd32c77bed9388b42"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6658732517ddabe22c9036479eabce6036655ba87a0224c612e1ae6af2087e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:741a46d58677d8c733175d7e5aa618d277cd9d880301a380fd296975a9cdd7bc"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e00e3505cd80440f6c98c6d69269dcc2a119f86ad0a9fd70bccc59504bebd68a"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ffe595f10566f8276b76dc3a11ae4bb7eba1aac8ddd75811736a15b0d5311414"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdfcf6443637c148c4e1a20c48c566aa694fa5e288d34b20fcdc58507882fed3"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d183cf9c797a5291e8301790ed6d053480ed94070637bfaad914dd38b0981f67"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:77abf6665ae54000b98b3c742bc6ea1d1fb31c394bcabf8b5d2c1ac3ebfe7f3b"}, - {file = "aiohttp-3.10.10-cp313-cp313-win32.whl", hash = "sha256:4470c73c12cd9109db8277287d11f9dd98f77fc54155fc71a7738a83ffcc8ea8"}, - {file = "aiohttp-3.10.10-cp313-cp313-win_amd64.whl", hash = "sha256:486f7aabfa292719a2753c016cc3a8f8172965cabb3ea2e7f7436c7f5a22a151"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1b66ccafef7336a1e1f0e389901f60c1d920102315a56df85e49552308fc0486"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:acd48d5b80ee80f9432a165c0ac8cbf9253eaddb6113269a5e18699b33958dbb"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3455522392fb15ff549d92fbf4b73b559d5e43dc522588f7eb3e54c3f38beee7"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c3b868724137f713a38376fef8120c166d1eadd50da1855c112fe97954aed8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da1dee8948d2137bb51fbb8a53cce6b1bcc86003c6b42565f008438b806cccd8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5ce2ce7c997e1971b7184ee37deb6ea9922ef5163c6ee5aa3c274b05f9e12fa"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28529e08fde6f12eba8677f5a8608500ed33c086f974de68cc65ab218713a59d"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7db54c7914cc99d901d93a34704833568d86c20925b2762f9fa779f9cd2e70f"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03a42ac7895406220124c88911ebee31ba8b2d24c98507f4a8bf826b2937c7f2"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7e338c0523d024fad378b376a79faff37fafb3c001872a618cde1d322400a572"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:038f514fe39e235e9fef6717fbf944057bfa24f9b3db9ee551a7ecf584b5b480"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:64f6c17757251e2b8d885d728b6433d9d970573586a78b78ba8929b0f41d045a"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:93429602396f3383a797a2a70e5f1de5df8e35535d7806c9f91df06f297e109b"}, - {file = "aiohttp-3.10.10-cp38-cp38-win32.whl", hash = "sha256:c823bc3971c44ab93e611ab1a46b1eafeae474c0c844aff4b7474287b75fe49c"}, - {file = "aiohttp-3.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:54ca74df1be3c7ca1cf7f4c971c79c2daf48d9aa65dea1a662ae18926f5bc8ce"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01948b1d570f83ee7bbf5a60ea2375a89dfb09fd419170e7f5af029510033d24"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9fc1500fd2a952c5c8e3b29aaf7e3cc6e27e9cfc0a8819b3bce48cc1b849e4cc"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f614ab0c76397661b90b6851a030004dac502e48260ea10f2441abd2207fbcc7"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00819de9e45d42584bed046314c40ea7e9aea95411b38971082cad449392b08c"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05646ebe6b94cc93407b3bf34b9eb26c20722384d068eb7339de802154d61bc5"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998f3bd3cfc95e9424a6acd7840cbdd39e45bc09ef87533c006f94ac47296090"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9010c31cd6fa59438da4e58a7f19e4753f7f264300cd152e7f90d4602449762"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ea7ffc6d6d6f8a11e6f40091a1040995cdff02cfc9ba4c2f30a516cb2633554"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ef9c33cc5cbca35808f6c74be11eb7f5f6b14d2311be84a15b594bd3e58b5527"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce0cdc074d540265bfeb31336e678b4e37316849d13b308607efa527e981f5c2"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:597a079284b7ee65ee102bc3a6ea226a37d2b96d0418cc9047490f231dc09fe8"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:7789050d9e5d0c309c706953e5e8876e38662d57d45f936902e176d19f1c58ab"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e7f8b04d83483577fd9200461b057c9f14ced334dcb053090cea1da9c8321a91"}, - {file = "aiohttp-3.10.10-cp39-cp39-win32.whl", hash = "sha256:c02a30b904282777d872266b87b20ed8cc0d1501855e27f831320f471d54d983"}, - {file = "aiohttp-3.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:edfe3341033a6b53a5c522c802deb2079eee5cbfbb0af032a55064bd65c73a23"}, - {file = "aiohttp-3.10.10.tar.gz", hash = "sha256:0631dd7c9f0822cc61c88586ca76d5b5ada26538097d0f1df510b082bad3411a"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:024409c1b1d6076d0ed933dcebd7e4fc6f3320a227bfa0c1b6b93a8b5a146f04"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:62502b8ffee8c6a4b5c6bf99d1de277d42bf51b2fb713975d9b63b560150b7ac"}, + {file = "aiohttp-3.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c54c635d1f52490cde7ef3a423645167a8284e452a35405d5c7dc1242a8e75c9"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:104ea21994b1403e4c1b398866f1187c1694fa291314ad7216ec1d8ec6b49f38"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04b24497b3baf15035730de5f207ade88a67d4483a5f16ced7ece348933a5b47"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08474e71772a516ba2e2167b4707af8361d2c452b3d8a5364c984f4867869499"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f40380c96dd407dfa84eb2d264e68aa47717b53bdbe210a59cc3c35a4635f195"}, + {file = "aiohttp-3.11.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1668ef2f3a7ec9881f4b6a917e5f97c87a343fa6b0d5fc826b7b0297ddd0887"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f3bf5c132eb48002bcc3825702d241d35b4e9585009e65e9dcf9c4635d0b7424"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0315978b2a4569e03fb59100f6a7e7d23f718a4521491f5c13d946d37549f3d"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d5cae4cd271e20b7ab757e966cc919186b9f02535418ab36c471a5377ef4deaa"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31b91ff3a1fcb206a1fa76e0de1f08c9ffb1dc0deb7296fa2618adfe380fc676"}, + {file = "aiohttp-3.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ebf610c37df4f09c71c9bbf8309b4b459107e6fe889ac0d7e16f6e4ebd975f86"}, + {file = "aiohttp-3.11.0-cp310-cp310-win32.whl", hash = "sha256:b40c304ab01e89ad0aeeecf91bbaa6ae3b00e27b796c9e8d50b71a4a7e885cc8"}, + {file = "aiohttp-3.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd0834e4260eab78671b81d34f110fbaac449563e48d419cec0030d9a8e58693"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:89a96a0696dc67d548f69cb518c581a7a33cc1f26ab42229dea1709217c9d926"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f6b925c7775ab857bdc1e52e1f5abcae7d18751c09b751aeb641a5276d9b990e"}, + {file = "aiohttp-3.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7867d0808614f04e78e0a8d5a2c1f8ac6bc626a0c0e2f62be48be6b749e2f8b2"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:229ae13959a5f499d90ffbb4b9eac2255d8599315027d6f7c22fa9803a94d5b1"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62a2f5268b672087c45b33479ba1bb1d5a48c6d76c133cfce3a4f77410c200d1"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a896059b6937d1a22d8ee8377cdcd097bd26cd8c653b8f972051488b9baadee9"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:104deb7873681273c5daa13c41924693df394043a118dae90387d35bc5531788"}, + {file = "aiohttp-3.11.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae36ae52b0c22fb69fb8b744eff82a20db512a29eafc6e3a4ab43b17215b219d"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7349205bb163318dcc102329d30be59a647a3d24c82c3d91ed35b7e7301ea7e"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9095580806d9ed07c0c29b23364a0b1fb78258ef9f4bddf7e55bac0e475d4edf"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4d218d3eca40196384ad3b481309c56fd60e664128885d1734da0a8aa530d433"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:6533dd06df3d17d1756829b68b365b1583929b54082db8f65083a4184bf68322"}, + {file = "aiohttp-3.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72cd984f7f14e8c01b3e38f18f39ea85dba84e52ea05e37116ba5e2a72eef396"}, + {file = "aiohttp-3.11.0-cp311-cp311-win32.whl", hash = "sha256:c1828e10c3a49e2b234b87600ecb68a92b8a8dcf8b99bca9447f16c4baaa1630"}, + {file = "aiohttp-3.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:900ff74d78eb580ae4aa5883242893b123a0c442a46570902500f08d6a7e6696"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f8f0d79b923070f25674e4ea8f3d61c9d89d24d9598d50ff32c5b9b23c79a25b"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:113bf06b029143e94a47c4f36e11a8b7e396e9d1f1fc8cea58e6b7e370cfed38"}, + {file = "aiohttp-3.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3e1ed8d152cccceffb1ee7a2ac227c16372e453fb11b3aeaa56783049b85d3f6"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2e82e515e268b965424ecabebd91834a41b36260b6ef5db015ee12ddb28ef3"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c1c49bc393d854d4421ebc174a0a41f9261f50d3694d8ca277146cbbcfd24ee7"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57e17c6d71f2dc857a8a1d09be1be7802e35d90fb4ba4b06cf1aab6414a57894"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12071dd2cc95ba81e0f2737bebcb98b2a8656015e87772e84e8fb9e635b5da6e"}, + {file = "aiohttp-3.11.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97056d3422594e0787733ac4c45bef58722d452f4dc6615fee42f59fe51707dd"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ec5efbc872b00ddd85e3904059d274f284cff314e13f48776050ca2c58f451d"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:dd505a1121ad5b666191840b7bd1d8cb917df2647deeca6f3474331b72452362"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:600b1d9f86a130131915e2f2127664311b33902c486b21a747d626f5144b4471"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8c47a0ba6c2b3d3e5715f8338d657badd21f778c6be16701922c65521c5ecfc9"}, + {file = "aiohttp-3.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8b323b5d3aef7dd811424c269322eec58a977c0c8152e650159e47210d900504"}, + {file = "aiohttp-3.11.0-cp312-cp312-win32.whl", hash = "sha256:aabc4e92cb153636d6be54e84dad1b252ddb9aebe077942b6dcffe5e468d476a"}, + {file = "aiohttp-3.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:508cfcc99534b1282595357592d8367b44392b21f6eb5d4dc021f8d0d809e94d"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c98a596ac20e8980cc6f34c0c92a113e98eb08f3997c150064d26d2aeb043e5a"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ad14cdc0fba4df31c0f6e06c21928c5b924725cbf60d0ccc5f6e7132636250e9"}, + {file = "aiohttp-3.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:170fb2324826bb9f08055a8291f42192ae5ee2f25b2966c8f0f4537c61d73a7b"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdad66685fcf2ad14ce522cf849d4a025f4fd206d6cfc3f403d9873e4c243b03"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8b95a63a8e8b5f0464bd8b1b0d59d2bec98a59b6aacc71e9be23df6989b3dfb"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7bcfcede95531589295f56e924702cef7f9685c9e4e5407592e04ded6a65bf3"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ecc2fb1a0a9d48cf773add34196cddf7e488e48e9596e090849751bf43098f4"}, + {file = "aiohttp-3.11.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fef105113d56e817cb9bcc609667ee461321413a7b972b03f5b4939f40f307c"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d33b4490026968bdc7f0729b9d87a3a6b1e09043557d2fc1c605c6072deb2f11"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6362f50a6f0e5482c4330d2151cb682779230683da0e155c15ec9fc58cb50b6a"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f698aa61879df64425191d41213dfd99efdc1627e6398e6d7aa5c312fac9702"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0e7a0762cc29cd3acd01a4d2b547b3af7956ad230ebb80b529a8e4f3e4740fe8"}, + {file = "aiohttp-3.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b3e4fb7f5354d39490d8209aefdf5830b208d01c7293a2164e404312c3d8bc55"}, + {file = "aiohttp-3.11.0-cp313-cp313-win32.whl", hash = "sha256:6c5a6958f4366496004cf503d847093d464814543f157ef3b738bbf604232415"}, + {file = "aiohttp-3.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ed360d6672a9423aad39902a4e9fe305464d20ed7931dbdba30a4625782d875"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d1ea006426edf7e1299c52a58b0443158012f7a56fed3515164b60bfcb1503a9"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c5e6a1f8b0268ffa1c84d7c3558724956002ba8361176e76406233e704bbcffb"}, + {file = "aiohttp-3.11.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:40dc9446cff326672fcbf93efdb8ef7e949824de1097624efe4f61ac7f0d2c43"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b4545e8d96870da9652930c5198366605ff8f982757030e2148cf341e5746b"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:37f8cf3c43f292d9bb3e6760476c2b55b9663a581fad682a586a410c43a7683e"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:329f5059e0bf6983dceebac8e6ed20e75eaff6163b3414f4a4cb59e0d7037672"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ae6f182be72c3531915e90625cc65afce4df8a0fc4988bd52d8a5d5faaeb68"}, + {file = "aiohttp-3.11.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d664e5f937c08adb7908ea9f391fbf2928a9b09cb412ac0aba602bde9e499e4"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:feca9fafa4385aea6759c171cd25ea82f7375312fca04178dae35331be45e538"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c415b9601ff50709d6050c8a9281733a9b042b9e589265ac40305b875cf9c463"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:91d3991fad8b65e5dbc13cd95669ea689fe0a96ff63e4e64ac24ed724e4f8103"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9231d610754724273a6ac05a1f177979490bfa6f84d49646df3928af2e88cfd5"}, + {file = "aiohttp-3.11.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4e4e155968040e32c124a89852a1a5426d0e920a35f4331e1b3949037bfe93a3"}, + {file = "aiohttp-3.11.0-cp39-cp39-win32.whl", hash = "sha256:76d6ee8bb132f8ee0fcb0e205b4708ddb6fba524eb515ee168113063d825131b"}, + {file = "aiohttp-3.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:577c7429f8869fa30186fc2c9eee64d75a30b51b61f26aac9725866ae5985cfd"}, + {file = "aiohttp-3.11.0.tar.gz", hash = "sha256:f57a0de48dda792629e7952d34a0c7b81ea336bb9b721391c7c58145b237fe55"}, ] [package.dependencies] @@ -117,7 +102,8 @@ aiosignal = ">=1.1.2" attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.12.0,<2.0" +propcache = ">=0.2.0" +yarl = ">=1.17.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] @@ -1166,8 +1152,8 @@ googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""} proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, + {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -1180,13 +1166,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.151.0" +version = "2.153.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" files = [ - {file = "google_api_python_client-2.151.0-py2.py3-none-any.whl", hash = "sha256:4427b2f47cd88b0355d540c2c52215f68c337f3bc9d6aae1ceeae4525977504c"}, - {file = "google_api_python_client-2.151.0.tar.gz", hash = "sha256:a9d26d630810ed4631aea21d1de3e42072f98240aaf184a8a1a874a371115034"}, + {file = "google_api_python_client-2.153.0-py2.py3-none-any.whl", hash = "sha256:6ff13bbfa92a57972e33ec3808e18309e5981b8ca1300e5da23bf2b4d6947384"}, + {file = "google_api_python_client-2.153.0.tar.gz", hash = "sha256:35cce8647f9c163fc04fb4d811fc91aae51954a2bdd74918decbe0e65d791dd2"}, ] [package.dependencies] @@ -1236,13 +1222,13 @@ httplib2 = ">=0.19.0" [[package]] name = "google-cloud-aiplatform" -version = "1.71.1" +version = "1.72.0" description = "Vertex AI API client library" optional = false python-versions = ">=3.8" files = [ - {file = "google-cloud-aiplatform-1.71.1.tar.gz", hash = "sha256:0013527e06853382ff0885898195bb7f3cf4a70eb7e5d53e4b1a28c8bd1775e2"}, - {file = "google_cloud_aiplatform-1.71.1-py2.py3-none-any.whl", hash = "sha256:4cd49bbc7f8ad88b92029a090b834ebacf9efadc844226f1e74d015d68f69ef5"}, + {file = "google_cloud_aiplatform-1.72.0-py2.py3-none-any.whl", hash = "sha256:a75dbeda47eaecb7bb2b1801b9c8dfe72a14f76a649525cdff496646214a7afb"}, + {file = "google_cloud_aiplatform-1.72.0.tar.gz", hash = "sha256:50611d3d51ff92d80f866e5e0f145daac9d943499c6d715250a9947eca4774f2"}, ] [package.dependencies] @@ -1260,13 +1246,13 @@ shapely = "<3.0.0dev" [package.extras] autologging = ["mlflow (>=1.27.0,<=2.16.0)"] -cloud-profiler = ["tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] +cloud-profiler = ["tensorboard-plugin-profile (>=2.4.0,<2.18.0)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] datasets = ["pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)"] endpoint = ["requests (>=2.28.1)"] evaluation = ["pandas (>=1.0.0)", "tqdm (>=4.23.0)"] -full = ["docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)"] -langchain = ["langchain (>=0.1.16,<0.3)", "langchain-core (<0.3)", "langchain-google-vertexai (<2)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "orjson (<=3.10.6)", "tenacity (<=8.3)"] -langchain-testing = ["absl-py", "cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "langchain (>=0.1.16,<0.3)", "langchain-core (<0.3)", "langchain-google-vertexai (<2)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "orjson (<=3.10.6)", "pydantic (>=2.6.3,<3)", "pytest-xdist", "tenacity (<=8.3)"] +full = ["docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<2.18.0)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)"] +langchain = ["langchain (>=0.1.16,<0.4)", "langchain-core (<0.4)", "langchain-google-vertexai (<3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)"] +langchain-testing = ["absl-py", "cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "langchain (>=0.1.16,<0.4)", "langchain-core (<0.4)", "langchain-google-vertexai (<3)", "openinference-instrumentation-langchain (>=0.1.19,<0.2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)", "pytest-xdist"] lit = ["explainable-ai-sdk (>=1.0.0)", "lit-nlp (==0.4.0)", "pandas (>=1.0.0)", "tensorflow (>=2.3.0,<3.0.0dev)"] metadata = ["numpy (>=1.15.0)", "pandas (>=1.0.0)"] pipelines = ["pyyaml (>=5.3.1,<7)"] @@ -1275,8 +1261,8 @@ private-endpoints = ["requests (>=2.28.1)", "urllib3 (>=1.21.1,<1.27)"] ray = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0)", "pyarrow (>=6.0.1)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "setuptools (<70.0.0)"] ray-testing = ["google-cloud-bigquery", "google-cloud-bigquery-storage", "immutabledict", "pandas (>=1.0.0)", "pyarrow (>=6.0.1)", "pytest-xdist", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "ray[train]", "scikit-learn", "setuptools (<70.0.0)", "tensorflow", "torch (>=2.0.0,<2.1.0)", "xgboost", "xgboost-ray"] reasoningengine = ["cloudpickle (>=3.0,<4.0)", "google-cloud-trace (<2)", "opentelemetry-exporter-gcp-trace (<2)", "opentelemetry-sdk (<2)", "pydantic (>=2.6.3,<3)"] -tensorboard = ["tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] -testing = ["aiohttp", "bigframes", "docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-api-core (>=2.11,<3.0.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "grpcio-testing", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "ipython", "kfp (>=2.6.0,<3.0.0)", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "nltk", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pytest-asyncio", "pytest-xdist", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "requests-toolbelt (<1.0.0)", "scikit-learn", "sentencepiece (>=0.2.0)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<3.0.0dev)", "tensorflow (==2.13.0)", "tensorflow (==2.16.1)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "torch (>=2.0.0,<2.1.0)", "torch (>=2.2.0)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)", "xgboost"] +tensorboard = ["tensorboard-plugin-profile (>=2.4.0,<2.18.0)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "werkzeug (>=2.0.0,<2.1.0dev)"] +testing = ["aiohttp", "bigframes", "docker (>=5.0.3)", "explainable-ai-sdk (>=1.0.0)", "fastapi (>=0.71.0,<=0.114.0)", "google-api-core (>=2.11,<3.0.0)", "google-cloud-bigquery", "google-cloud-bigquery-storage", "google-vizier (>=0.1.6)", "grpcio-testing", "httpx (>=0.23.0,<0.25.0)", "immutabledict", "ipython", "kfp (>=2.6.0,<3.0.0)", "lit-nlp (==0.4.0)", "mlflow (>=1.27.0,<=2.16.0)", "nltk", "numpy (>=1.15.0)", "pandas (>=1.0.0)", "pyarrow (>=10.0.1)", "pyarrow (>=14.0.0)", "pyarrow (>=3.0.0,<8.0dev)", "pyarrow (>=6.0.1)", "pytest-asyncio", "pytest-xdist", "pyyaml (>=5.3.1,<7)", "ray[default] (>=2.4,<2.5.dev0 || >2.9.0,!=2.9.1,!=2.9.2,<2.10.dev0 || >=2.33.dev0,<=2.33.0)", "ray[default] (>=2.5,<=2.33.0)", "requests (>=2.28.1)", "requests-toolbelt (<1.0.0)", "scikit-learn", "sentencepiece (>=0.2.0)", "setuptools (<70.0.0)", "starlette (>=0.17.1)", "tensorboard-plugin-profile (>=2.4.0,<2.18.0)", "tensorflow (==2.13.0)", "tensorflow (==2.16.1)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.3.0,<3.0.0dev)", "tensorflow (>=2.4.0,<3.0.0dev)", "torch (>=2.0.0,<2.1.0)", "torch (>=2.2.0)", "tqdm (>=4.23.0)", "urllib3 (>=1.21.1,<1.27)", "uvicorn[standard] (>=0.16.0)", "werkzeug (>=2.0.0,<2.1.0dev)", "xgboost"] tokenization = ["sentencepiece (>=0.2.0)"] vizier = ["google-vizier (>=0.1.6)"] xai = ["tensorflow (>=2.3.0,<3.0.0dev)"] @@ -1346,8 +1332,8 @@ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extr google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, + {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -1389,8 +1375,8 @@ files = [ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev" proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, + {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, ] protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev" @@ -1476,13 +1462,13 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] [[package]] name = "googleapis-common-protos" -version = "1.65.0" +version = "1.66.0" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" files = [ - {file = "googleapis_common_protos-1.65.0-py2.py3-none-any.whl", hash = "sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63"}, - {file = "googleapis_common_protos-1.65.0.tar.gz", hash = "sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0"}, + {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"}, + {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"}, ] [package.dependencies] @@ -2028,84 +2014,84 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jiter" -version = "0.7.0" +version = "0.7.1" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.8" files = [ - {file = "jiter-0.7.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e14027f61101b3f5e173095d9ecf95c1cac03ffe45a849279bde1d97e559e314"}, - {file = "jiter-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:979ec4711c2e37ac949561858bd42028884c9799516a923e1ff0b501ef341a4a"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:662d5d3cca58ad6af7a3c6226b641c8655de5beebcb686bfde0df0f21421aafa"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d89008fb47043a469f97ad90840b97ba54e7c3d62dc7cbb6cbf938bd0caf71d"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8b16c35c846a323ce9067170d5ab8c31ea3dbcab59c4f7608bbbf20c2c3b43f"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9e82daaa1b0a68704f9029b81e664a5a9de3e466c2cbaabcda5875f961702e7"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43a87a9f586636e1f0dd3651a91f79b491ea0d9fd7cbbf4f5c463eebdc48bda7"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ec05b1615f96cc3e4901678bc863958611584072967d9962f9e571d60711d52"}, - {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a5cb97e35370bde7aa0d232a7f910f5a0fbbc96bc0a7dbaa044fd5cd6bcd7ec3"}, - {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb316dacaf48c8c187cea75d0d7f835f299137e6fdd13f691dff8f92914015c7"}, - {file = "jiter-0.7.0-cp310-none-win32.whl", hash = "sha256:243f38eb4072763c54de95b14ad283610e0cd3bf26393870db04e520f60eebb3"}, - {file = "jiter-0.7.0-cp310-none-win_amd64.whl", hash = "sha256:2221d5603c139f6764c54e37e7c6960c469cbcd76928fb10d15023ba5903f94b"}, - {file = "jiter-0.7.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:91cec0ad755bd786c9f769ce8d843af955df6a8e56b17658771b2d5cb34a3ff8"}, - {file = "jiter-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:feba70a28a27d962e353e978dbb6afd798e711c04cb0b4c5e77e9d3779033a1a"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d866ec066c3616cacb8535dbda38bb1d470b17b25f0317c4540182bc886ce2"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8e7a7a00b6f9f18289dd563596f97ecaba6c777501a8ba04bf98e03087bcbc60"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9aaf564094c7db8687f2660605e099f3d3e6ea5e7135498486674fcb78e29165"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4d27e09825c1b3c7a667adb500ce8b840e8fc9f630da8454b44cdd4fb0081bb"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca7c287da9c1d56dda88da1d08855a787dbb09a7e2bd13c66a2e288700bd7c7"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db19a6d160f093cbc8cd5ea2abad420b686f6c0e5fb4f7b41941ebc6a4f83cda"}, - {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e46a63c7f877cf7441ffc821c28287cfb9f533ae6ed707bde15e7d4dfafa7ae"}, - {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ba426fa7ff21cb119fa544b75dd3fbee6a70e55a5829709c0338d07ccd30e6d"}, - {file = "jiter-0.7.0-cp311-none-win32.whl", hash = "sha256:c07f55a64912b0c7982377831210836d2ea92b7bd343fca67a32212dd72e38e0"}, - {file = "jiter-0.7.0-cp311-none-win_amd64.whl", hash = "sha256:ed27b2c43e1b5f6c7fedc5c11d4d8bfa627de42d1143d87e39e2e83ddefd861a"}, - {file = "jiter-0.7.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac7930bcaaeb1e229e35c91c04ed2e9f39025b86ee9fc3141706bbf6fff4aeeb"}, - {file = "jiter-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:571feae3e7c901a8eedde9fd2865b0dfc1432fb15cab8c675a8444f7d11b7c5d"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8af4df8a262fa2778b68c2a03b6e9d1cb4d43d02bea6976d46be77a3a331af1"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd028d4165097a611eb0c7494d8c1f2aebd46f73ca3200f02a175a9c9a6f22f5"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6b487247c7836810091e9455efe56a52ec51bfa3a222237e1587d04d3e04527"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6d28a92f28814e1a9f2824dc11f4e17e1df1f44dc4fdeb94c5450d34bcb2602"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90443994bbafe134f0b34201dad3ebe1c769f0599004084e046fb249ad912425"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f9abf464f9faac652542ce8360cea8e68fba2b78350e8a170248f9bcc228702a"}, - {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db7a8d99fc5f842f7d2852f06ccaed066532292c41723e5dff670c339b649f88"}, - {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:15cf691ebd8693b70c94627d6b748f01e6d697d9a6e9f2bc310934fcfb7cf25e"}, - {file = "jiter-0.7.0-cp312-none-win32.whl", hash = "sha256:9dcd54fa422fb66ca398bec296fed5f58e756aa0589496011cfea2abb5be38a5"}, - {file = "jiter-0.7.0-cp312-none-win_amd64.whl", hash = "sha256:cc989951f73f9375b8eacd571baaa057f3d7d11b7ce6f67b9d54642e7475bfad"}, - {file = "jiter-0.7.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:24cecd18df540963cd27c08ca5ce1d0179f229ff78066d9eecbe5add29361340"}, - {file = "jiter-0.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d41b46236b90b043cca73785674c23d2a67d16f226394079d0953f94e765ed76"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b160db0987171365c153e406a45dcab0ee613ae3508a77bfff42515cb4ce4d6e"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d1c8d91e0f0bd78602eaa081332e8ee4f512c000716f5bc54e9a037306d693a7"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:997706c683195eeff192d2e5285ce64d2a610414f37da3a3f2625dcf8517cf90"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ea52a8a0ff0229ab2920284079becd2bae0688d432fca94857ece83bb49c541"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d77449d2738cf74752bb35d75ee431af457e741124d1db5e112890023572c7c"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8203519907a1d81d6cb00902c98e27c2d0bf25ce0323c50ca594d30f5f1fbcf"}, - {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41d15ccc53931c822dd7f1aebf09faa3cda2d7b48a76ef304c7dbc19d1302e51"}, - {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:febf3179b2fabf71fbd2fd52acb8594163bb173348b388649567a548f356dbf6"}, - {file = "jiter-0.7.0-cp313-none-win32.whl", hash = "sha256:4a8e2d866e7eda19f012444e01b55079d8e1c4c30346aaac4b97e80c54e2d6d3"}, - {file = "jiter-0.7.0-cp313-none-win_amd64.whl", hash = "sha256:7417c2b928062c496f381fb0cb50412eee5ad1d8b53dbc0e011ce45bb2de522c"}, - {file = "jiter-0.7.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9c62c737b5368e51e74960a08fe1adc807bd270227291daede78db24d5fbf556"}, - {file = "jiter-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e4640722b1bef0f6e342fe4606aafaae0eb4f4be5c84355bb6867f34400f6688"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f367488c3b9453eab285424c61098faa1cab37bb49425e69c8dca34f2dfe7d69"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0cf5d42beb3514236459454e3287db53d9c4d56c4ebaa3e9d0efe81b19495129"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc5190ea1113ee6f7252fa8a5fe5a6515422e378356c950a03bbde5cafbdbaab"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ee47a149d698796a87abe445fc8dee21ed880f09469700c76c8d84e0d11efd"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48592c26ea72d3e71aa4bea0a93454df907d80638c3046bb0705507b6704c0d7"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:79fef541199bd91cfe8a74529ecccb8eaf1aca38ad899ea582ebbd4854af1e51"}, - {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d1ef6bb66041f2514739240568136c81b9dcc64fd14a43691c17ea793b6535c0"}, - {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aca4d950863b1c238e315bf159466e064c98743eef3bd0ff9617e48ff63a4715"}, - {file = "jiter-0.7.0-cp38-none-win32.whl", hash = "sha256:897745f230350dcedb8d1ebe53e33568d48ea122c25e6784402b6e4e88169be7"}, - {file = "jiter-0.7.0-cp38-none-win_amd64.whl", hash = "sha256:b928c76a422ef3d0c85c5e98c498ce3421b313c5246199541e125b52953e1bc0"}, - {file = "jiter-0.7.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c9b669ff6f8ba08270dee9ccf858d3b0203b42314a428a1676762f2d390fbb64"}, - {file = "jiter-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b5be919bacd73ca93801c3042bce6e95cb9c555a45ca83617b9b6c89df03b9c2"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a282e1e8a396dabcea82d64f9d05acf7efcf81ecdd925b967020dcb0e671c103"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:17ecb1a578a56e97a043c72b463776b5ea30343125308f667fb8fce4b3796735"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b6045fa0527129218cdcd8a8b839f678219686055f31ebab35f87d354d9c36e"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:189cc4262a92e33c19d4fd24018f5890e4e6da5b2581f0059938877943f8298c"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c138414839effbf30d185e30475c6dc8a16411a1e3681e5fd4605ab1233ac67a"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2791604acef33da6b72d5ecf885a32384bcaf9aa1e4be32737f3b8b9588eef6a"}, - {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae60ec89037a78d60bbf3d8b127f1567769c8fa24886e0abed3f622791dea478"}, - {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:836f03dea312967635233d826f783309b98cfd9ccc76ac776e224cfcef577862"}, - {file = "jiter-0.7.0-cp39-none-win32.whl", hash = "sha256:ebc30ae2ce4bc4986e1764c404b4ea1924f926abf02ce92516485098f8545374"}, - {file = "jiter-0.7.0-cp39-none-win_amd64.whl", hash = "sha256:abf596f951370c648f37aa9899deab296c42a3829736e598b0dd10b08f77a44d"}, - {file = "jiter-0.7.0.tar.gz", hash = "sha256:c061d9738535497b5509f8970584f20de1e900806b239a39a9994fc191dad630"}, + {file = "jiter-0.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:262e96d06696b673fad6f257e6a0abb6e873dc22818ca0e0600f4a1189eb334f"}, + {file = "jiter-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be6de02939aac5be97eb437f45cfd279b1dc9de358b13ea6e040e63a3221c40d"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935f10b802bc1ce2b2f61843e498c7720aa7f4e4bb7797aa8121eab017293c3d"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9cd3cccccabf5064e4bb3099c87bf67db94f805c1e62d1aefd2b7476e90e0ee2"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aa919ebfc5f7b027cc368fe3964c0015e1963b92e1db382419dadb098a05192"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ae2d01e82c94491ce4d6f461a837f63b6c4e6dd5bb082553a70c509034ff3d4"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f9568cd66dbbdab67ae1b4c99f3f7da1228c5682d65913e3f5f95586b3cb9a9"}, + {file = "jiter-0.7.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ecbf4e20ec2c26512736284dc1a3f8ed79b6ca7188e3b99032757ad48db97dc"}, + {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b1a0508fddc70ce00b872e463b387d49308ef02b0787992ca471c8d4ba1c0fa1"}, + {file = "jiter-0.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f84c9996664c460f24213ff1e5881530abd8fafd82058d39af3682d5fd2d6316"}, + {file = "jiter-0.7.1-cp310-none-win32.whl", hash = "sha256:c915e1a1960976ba4dfe06551ea87063b2d5b4d30759012210099e712a414d9f"}, + {file = "jiter-0.7.1-cp310-none-win_amd64.whl", hash = "sha256:75bf3b7fdc5c0faa6ffffcf8028a1f974d126bac86d96490d1b51b3210aa0f3f"}, + {file = "jiter-0.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ad04a23a91f3d10d69d6c87a5f4471b61c2c5cd6e112e85136594a02043f462c"}, + {file = "jiter-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e47a554de88dff701226bb5722b7f1b6bccd0b98f1748459b7e56acac2707a5"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e44fff69c814a2e96a20b4ecee3e2365e9b15cf5fe4e00869d18396daa91dab"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df0a1d05081541b45743c965436f8b5a1048d6fd726e4a030113a2699a6046ea"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f22cf8f236a645cb6d8ffe2a64edb5d2b66fb148bf7c75eea0cb36d17014a7bc"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8589f50b728ea4bf22e0632eefa125c8aa9c38ed202a5ee6ca371f05eeb3ff"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f20de711224f2ca2dbb166a8d512f6ff48c9c38cc06b51f796520eb4722cc2ce"}, + {file = "jiter-0.7.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8a9803396032117b85ec8cbf008a54590644a062fedd0425cbdb95e4b2b60479"}, + {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3d8bae77c82741032e9d89a4026479061aba6e646de3bf5f2fc1ae2bbd9d06e0"}, + {file = "jiter-0.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3dc9939e576bbc68c813fc82f6620353ed68c194c7bcf3d58dc822591ec12490"}, + {file = "jiter-0.7.1-cp311-none-win32.whl", hash = "sha256:f7605d24cd6fab156ec89e7924578e21604feee9c4f1e9da34d8b67f63e54892"}, + {file = "jiter-0.7.1-cp311-none-win_amd64.whl", hash = "sha256:f3ea649e7751a1a29ea5ecc03c4ada0a833846c59c6da75d747899f9b48b7282"}, + {file = "jiter-0.7.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ad36a1155cbd92e7a084a568f7dc6023497df781adf2390c345dd77a120905ca"}, + {file = "jiter-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7ba52e6aaed2dc5c81a3d9b5e4ab95b039c4592c66ac973879ba57c3506492bb"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b7de0b6f6728b678540c7927587e23f715284596724be203af952418acb8a2d"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9463b62bd53c2fb85529c700c6a3beb2ee54fde8bef714b150601616dcb184a6"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:627164ec01d28af56e1f549da84caf0fe06da3880ebc7b7ee1ca15df106ae172"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25d0e5bf64e368b0aa9e0a559c3ab2f9b67e35fe7269e8a0d81f48bbd10e8963"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c244261306f08f8008b3087059601997016549cb8bb23cf4317a4827f07b7d74"}, + {file = "jiter-0.7.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7ded4e4b75b68b843b7cea5cd7c55f738c20e1394c68c2cb10adb655526c5f1b"}, + {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:80dae4f1889b9d09e5f4de6b58c490d9c8ce7730e35e0b8643ab62b1538f095c"}, + {file = "jiter-0.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5970cf8ec943b51bce7f4b98d2e1ed3ada170c2a789e2db3cb484486591a176a"}, + {file = "jiter-0.7.1-cp312-none-win32.whl", hash = "sha256:701d90220d6ecb3125d46853c8ca8a5bc158de8c49af60fd706475a49fee157e"}, + {file = "jiter-0.7.1-cp312-none-win_amd64.whl", hash = "sha256:7824c3ecf9ecf3321c37f4e4d4411aad49c666ee5bc2a937071bdd80917e4533"}, + {file = "jiter-0.7.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:097676a37778ba3c80cb53f34abd6943ceb0848263c21bf423ae98b090f6c6ba"}, + {file = "jiter-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3298af506d4271257c0a8f48668b0f47048d69351675dd8500f22420d4eec378"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12fd88cfe6067e2199964839c19bd2b422ca3fd792949b8f44bb8a4e7d21946a"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dacca921efcd21939123c8ea8883a54b9fa7f6545c8019ffcf4f762985b6d0c8"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de3674a5fe1f6713a746d25ad9c32cd32fadc824e64b9d6159b3b34fd9134143"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65df9dbae6d67e0788a05b4bad5706ad40f6f911e0137eb416b9eead6ba6f044"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ba9a358d59a0a55cccaa4957e6ae10b1a25ffdabda863c0343c51817610501d"}, + {file = "jiter-0.7.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:576eb0f0c6207e9ede2b11ec01d9c2182973986514f9c60bc3b3b5d5798c8f50"}, + {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:e550e29cdf3577d2c970a18f3959e6b8646fd60ef1b0507e5947dc73703b5627"}, + {file = "jiter-0.7.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:81d968dbf3ce0db2e0e4dec6b0a0d5d94f846ee84caf779b07cab49f5325ae43"}, + {file = "jiter-0.7.1-cp313-none-win32.whl", hash = "sha256:f892e547e6e79a1506eb571a676cf2f480a4533675f834e9ae98de84f9b941ac"}, + {file = "jiter-0.7.1-cp313-none-win_amd64.whl", hash = "sha256:0302f0940b1455b2a7fb0409b8d5b31183db70d2b07fd177906d83bf941385d1"}, + {file = "jiter-0.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:c65a3ce72b679958b79d556473f192a4dfc5895e8cc1030c9f4e434690906076"}, + {file = "jiter-0.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e80052d3db39f9bb8eb86d207a1be3d9ecee5e05fdec31380817f9609ad38e60"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a497859c4f3f7acd71c8bd89a6f9cf753ebacacf5e3e799138b8e1843084e3"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c1288bc22b9e36854a0536ba83666c3b1fb066b811019d7b682c9cf0269cdf9f"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b096ca72dd38ef35675e1d3b01785874315182243ef7aea9752cb62266ad516f"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbd52c50b605af13dbee1a08373c520e6fcc6b5d32f17738875847fea4e2cd"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af29c5c6eb2517e71ffa15c7ae9509fa5e833ec2a99319ac88cc271eca865519"}, + {file = "jiter-0.7.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f114a4df1e40c03c0efbf974b376ed57756a1141eb27d04baee0680c5af3d424"}, + {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:191fbaee7cf46a9dd9b817547bf556facde50f83199d07fc48ebeff4082f9df4"}, + {file = "jiter-0.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0e2b445e5ee627fb4ee6bbceeb486251e60a0c881a8e12398dfdff47c56f0723"}, + {file = "jiter-0.7.1-cp38-none-win32.whl", hash = "sha256:47ac4c3cf8135c83e64755b7276339b26cd3c7ddadf9e67306ace4832b283edf"}, + {file = "jiter-0.7.1-cp38-none-win_amd64.whl", hash = "sha256:60b49c245cd90cde4794f5c30f123ee06ccf42fb8730a019a2870cd005653ebd"}, + {file = "jiter-0.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8f212eeacc7203256f526f550d105d8efa24605828382cd7d296b703181ff11d"}, + {file = "jiter-0.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d9e247079d88c00e75e297e6cb3a18a039ebcd79fefc43be9ba4eb7fb43eb726"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0aacaa56360139c53dcf352992b0331f4057a0373bbffd43f64ba0c32d2d155"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc1b55314ca97dbb6c48d9144323896e9c1a25d41c65bcb9550b3e0c270ca560"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f281aae41b47e90deb70e7386558e877a8e62e1693e0086f37d015fa1c102289"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93c20d2730a84d43f7c0b6fb2579dc54335db742a59cf9776d0b80e99d587382"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e81ccccd8069110e150613496deafa10da2f6ff322a707cbec2b0d52a87b9671"}, + {file = "jiter-0.7.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a7d5e85766eff4c9be481d77e2226b4c259999cb6862ccac5ef6621d3c8dcce"}, + {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f52ce5799df5b6975439ecb16b1e879d7655e1685b6e3758c9b1b97696313bfb"}, + {file = "jiter-0.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e0c91a0304373fdf97d56f88356a010bba442e6d995eb7773cbe32885b71cdd8"}, + {file = "jiter-0.7.1-cp39-none-win32.whl", hash = "sha256:5c08adf93e41ce2755970e8aa95262298afe2bf58897fb9653c47cd93c3c6cdc"}, + {file = "jiter-0.7.1-cp39-none-win_amd64.whl", hash = "sha256:6592f4067c74176e5f369228fb2995ed01400c9e8e1225fb73417183a5e635f0"}, + {file = "jiter-0.7.1.tar.gz", hash = "sha256:448cf4f74f7363c34cdef26214da527e8eeffd88ba06d0b80b485ad0667baf5d"}, ] [[package]] @@ -2249,8 +2235,8 @@ langchain-core = ">=0.3.15,<0.4.0" langchain-text-splitters = ">=0.3.0,<0.4.0" langsmith = ">=0.1.17,<0.2.0" numpy = [ - {version = ">=1,<2", markers = "python_version < \"3.12\""}, {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, + {version = ">=1,<2", markers = "python_version < \"3.12\""}, ] pydantic = ">=2.7.4,<3.0.0" PyYAML = ">=5.3" @@ -2277,41 +2263,41 @@ pydantic = ">=2.7.4,<3.0.0" [[package]] name = "langchain-community" -version = "0.3.4" +version = "0.3.7" description = "Community contributed LangChain integrations." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_community-0.3.4-py3-none-any.whl", hash = "sha256:67a44d3db8ba14a8abae67c8f611e6dc20002446439e761f673c7dffa506fb85"}, - {file = "langchain_community-0.3.4.tar.gz", hash = "sha256:80c7e6491788449b8a6e7a31444ff8ebb5c32242f67a65aa33d56ad35a7b5b5c"}, + {file = "langchain_community-0.3.7-py3-none-any.whl", hash = "sha256:048f89d9a54b0720a0f865d5d469494e088cb9970a2397b19446ce0d84867141"}, + {file = "langchain_community-0.3.7.tar.gz", hash = "sha256:5b7a5cea82bedbf3ea276eac56128e00dbaf86561991cfc80fb21175a343c9a3"}, ] [package.dependencies] aiohttp = ">=3.8.3,<4.0.0" dataclasses-json = ">=0.5.7,<0.7" httpx-sse = ">=0.4.0,<0.5.0" -langchain = ">=0.3.6,<0.4.0" -langchain-core = ">=0.3.14,<0.4.0" +langchain = ">=0.3.7,<0.4.0" +langchain-core = ">=0.3.17,<0.4.0" langsmith = ">=0.1.125,<0.2.0" numpy = [ - {version = ">=1,<2", markers = "python_version < \"3.12\""}, {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""}, + {version = ">=1,<2", markers = "python_version < \"3.12\""}, ] pydantic-settings = ">=2.4.0,<3.0.0" PyYAML = ">=5.3" requests = ">=2,<3" -SQLAlchemy = ">=1.4,<3" +SQLAlchemy = ">=1.4,<2.0.36" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10" [[package]] name = "langchain-core" -version = "0.3.15" +version = "0.3.18" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_core-0.3.15-py3-none-any.whl", hash = "sha256:3d4ca6dbb8ed396a6ee061063832a2451b0ce8c345570f7b086ffa7288e4fa29"}, - {file = "langchain_core-0.3.15.tar.gz", hash = "sha256:b1a29787a4ffb7ec2103b4e97d435287201da7809b369740dd1e32f176325aba"}, + {file = "langchain_core-0.3.18-py3-none-any.whl", hash = "sha256:c38bb198152082e76859402bfff08f785ac66bcfd44c04d132708e16ee5f999c"}, + {file = "langchain_core-0.3.18.tar.gz", hash = "sha256:a14e9b9c0525b6fc9a7e4fe7f54a48b272d91ea855b1b081b364fabb966ae7af"}, ] [package.dependencies] @@ -2319,8 +2305,8 @@ jsonpatch = ">=1.33,<2.0" langsmith = ">=0.1.125,<0.2.0" packaging = ">=23.2,<25" pydantic = [ - {version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""}, {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, + {version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""}, ] PyYAML = ">=5.3" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<10.0.0" @@ -2400,17 +2386,17 @@ ollama = ">=0.3.0,<1" [[package]] name = "langchain-openai" -version = "0.2.6" +version = "0.2.8" description = "An integration package connecting OpenAI and LangChain" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_openai-0.2.6-py3-none-any.whl", hash = "sha256:d56e4d9183bdd1a5fb5f3ed9d287f15108e01d631ded170dd330a566f2927b95"}, - {file = "langchain_openai-0.2.6.tar.gz", hash = "sha256:7054e5f64498ad8e59d77cdc210103f5ea4f67258997edc48ae237298adeb316"}, + {file = "langchain_openai-0.2.8-py3-none-any.whl", hash = "sha256:0116b104d203377d2f4f61095e1d3ce1ba50e446d1a75397eaf0d1fcdf2c0d7b"}, + {file = "langchain_openai-0.2.8.tar.gz", hash = "sha256:48d22fa05bb8f7b371be47d05c7a3f42a68ff0e704647b86cc1bfc44e140f01b"}, ] [package.dependencies] -langchain-core = ">=0.3.15,<0.4.0" +langchain-core = ">=0.3.17,<0.4.0" openai = ">=1.54.0,<2.0.0" tiktoken = ">=0.7,<1" @@ -2444,13 +2430,13 @@ six = "*" [[package]] name = "langgraph" -version = "0.2.45" +version = "0.2.46" description = "Building stateful, multi-actor applications with LLMs" optional = false python-versions = "<4.0,>=3.9.0" files = [ - {file = "langgraph-0.2.45-py3-none-any.whl", hash = "sha256:adfa9545c6c27180e995b654cb5817212c134a98407c7f34253a5fae58893f28"}, - {file = "langgraph-0.2.45.tar.gz", hash = "sha256:939035e830506c5b662c9e61d95dbd1a5ef9d1fd35310dba68cebb33de2e7cdb"}, + {file = "langgraph-0.2.46-py3-none-any.whl", hash = "sha256:7ca4031bc8f06cb3697ccee21ca80464588656391d3888090b6790dd89bffad1"}, + {file = "langgraph-0.2.46.tar.gz", hash = "sha256:f1a39ba0d9b3df3b807b14015e682bc2b445ceb760d113e75ab34209a7eaadef"}, ] [package.dependencies] @@ -2460,13 +2446,13 @@ langgraph-sdk = ">=0.1.32,<0.2.0" [[package]] name = "langgraph-checkpoint" -version = "2.0.2" +version = "2.0.3" description = "Library with base interfaces for LangGraph checkpoint savers." optional = false python-versions = "<4.0.0,>=3.9.0" files = [ - {file = "langgraph_checkpoint-2.0.2-py3-none-any.whl", hash = "sha256:6e5dfd90e1fc71b91ccff75939ada1114e5d7f824df5f24c62d39bed69039ee2"}, - {file = "langgraph_checkpoint-2.0.2.tar.gz", hash = "sha256:c1d033e4e4855f580fa56830327eb86513b64ab5be527245363498e76b19a0b9"}, + {file = "langgraph_checkpoint-2.0.3-py3-none-any.whl", hash = "sha256:70da073793e0750f4b9dfe5fbbe49b267c494987aaa8f2d04be23bfd0d8a6fd7"}, + {file = "langgraph_checkpoint-2.0.3.tar.gz", hash = "sha256:d7aabbc78eeb5466301be2ede5e0b0b8e5dc97647fcbe79801f1326f85fc0344"}, ] [package.dependencies] @@ -2506,21 +2492,21 @@ orjson = ">=3.10.1" [[package]] name = "langsmith" -version = "0.1.142" +version = "0.1.143" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.142-py3-none-any.whl", hash = "sha256:f639ca23c9a0bb77af5fb881679b2f66ff1f21f19d0bebf4e51375e7585a8b38"}, - {file = "langsmith-0.1.142.tar.gz", hash = "sha256:f8a84d100f3052233ff0a1d66ae14c5dfc20b7e41a1601de011384f16ee6cb82"}, + {file = "langsmith-0.1.143-py3-none-any.whl", hash = "sha256:ba0d827269e9b03a90fababe41fa3e4e3f833300b95add10184f7e67167dde6f"}, + {file = "langsmith-0.1.143.tar.gz", hash = "sha256:4c5159e5cd84b3f8499433009e72d2076dd2daf6c044ac8a3611b30d0d0161c5"}, ] [package.dependencies] httpx = ">=0.23.0,<1" orjson = ">=3.9.14,<4.0.0" pydantic = [ - {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""}, + {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""}, ] requests = ">=2,<3" requests-toolbelt = ">=1.0.0,<2.0.0" @@ -2627,13 +2613,13 @@ rapidfuzz = ">=3.9.0,<4.0.0" [[package]] name = "litellm" -version = "1.52.4" +version = "1.52.6" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.52.4-py3-none-any.whl", hash = "sha256:bfb208c2fc2c960bea6db34dbb77cd3c8a63e76d13a4d9163815df982d7e2764"}, - {file = "litellm-1.52.4.tar.gz", hash = "sha256:aaf5de4da0fad31f8e3cb90d026660638adfb9d97fe7c2a63ac9e072d1690900"}, + {file = "litellm-1.52.6-py3-none-any.whl", hash = "sha256:9b3e9fb51f7e2a3cc8b50997b346c55aae9435a138d9a656f18e262750a1bfe1"}, + {file = "litellm-1.52.6.tar.gz", hash = "sha256:d67c653f97bd07f503b975c167de1e25632b7bc6bb3c008c46921e4acc81ec60"}, ] [package.dependencies] @@ -3213,13 +3199,13 @@ files = [ [[package]] name = "narwhals" -version = "1.13.3" +version = "1.13.5" description = "Extremely lightweight compatibility layer between dataframe libraries" optional = false python-versions = ">=3.8" files = [ - {file = "narwhals-1.13.3-py3-none-any.whl", hash = "sha256:cde49b59b4540885d822777b747ed3fad65632b3d34648040308afcf08e62547"}, - {file = "narwhals-1.13.3.tar.gz", hash = "sha256:db95cb5b5a6b99bad9fe7f2e2dacf937d57dee1c76c4544d4354a324084e36b5"}, + {file = "narwhals-1.13.5-py3-none-any.whl", hash = "sha256:91fe95ffdece9e3837780b6cd32f4309a41f39b285bc9d42d60eaff47d48b39a"}, + {file = "narwhals-1.13.5.tar.gz", hash = "sha256:2e71b70895759af455a83583052bb9dbada9f72efad786d8d1b2f38078054e73"}, ] [package.extras] @@ -3412,13 +3398,13 @@ httpx = ">=0.27.0,<0.28.0" [[package]] name = "openai" -version = "1.54.3" +version = "1.54.4" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.54.3-py3-none-any.whl", hash = "sha256:f18dbaf09c50d70c4185b892a2a553f80681d1d866323a2da7f7be2f688615d5"}, - {file = "openai-1.54.3.tar.gz", hash = "sha256:7511b74eeb894ac0b0253dc71f087a15d2e4d71d22d0088767205143d880cca6"}, + {file = "openai-1.54.4-py3-none-any.whl", hash = "sha256:0d95cef99346bf9b6d7fbf57faf61a673924c3e34fa8af84c9ffe04660673a7e"}, + {file = "openai-1.54.4.tar.gz", hash = "sha256:50f3656e45401c54e973fa05dc29f3f0b0d19348d685b2f7ddb4d92bf7b1b6bf"}, ] [package.dependencies] @@ -3579,8 +3565,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3821,13 +3807,13 @@ files = [ [[package]] name = "podcastfy" -version = "0.3.3" +version = "0.3.5" description = "An Open Source alternative to NotebookLM's podcast feature: Transforming Multimodal Content into Captivating Multilingual Audio Conversations with GenAI" optional = false python-versions = "<4.0,>=3.11" files = [ - {file = "podcastfy-0.3.3-py3-none-any.whl", hash = "sha256:857813dea2b96da292a1f22226a2696066526abbba57289bd5cb5533eb12c041"}, - {file = "podcastfy-0.3.3.tar.gz", hash = "sha256:7ce62ba1ddaccc9d46c74cdafc5def90dfe2f9f26aac53c00f7b922476e80545"}, + {file = "podcastfy-0.3.5-py3-none-any.whl", hash = "sha256:c5aef31a1320800e5d22257ee763abb0e748a115112e1914084f5331462d1a32"}, + {file = "podcastfy-0.3.5.tar.gz", hash = "sha256:5b3111a28aaadd31c6e13983719996f05de7b70a7f656dd56ce6ed74e294e1ef"}, ] [package.dependencies] @@ -3841,7 +3827,7 @@ google-cloud-texttospeech = ">=2.21.0,<3.0.0" google-generativeai = ">=0.8.2,<0.9.0" httpx = ">=0.27.2,<0.28.0" langchain = ">=0.3.3,<0.4.0" -langchain-community = ">=0.3.2,<0.4.0" +langchain-community = ">=0.3.5,<0.4.0" langchain-google-genai = ">=2.0.1,<3.0.0" langchain-google-vertexai = ">=2.0.4,<3.0.0" litellm = ">=1.52.0,<2.0.0" @@ -4203,8 +4189,8 @@ files = [ annotated-types = ">=0.6.0" pydantic-core = "2.23.4" typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, + {version = ">=4.6.1", markers = "python_version < \"3.13\""}, ] [package.extras] @@ -5180,13 +5166,13 @@ websockets = "13.1" [[package]] name = "setuptools" -version = "75.4.0" +version = "75.5.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" files = [ - {file = "setuptools-75.4.0-py3-none-any.whl", hash = "sha256:b3c5d862f98500b06ffdf7cc4499b48c46c317d8d56cb30b5c8bce4d88f5c216"}, - {file = "setuptools-75.4.0.tar.gz", hash = "sha256:1dc484f5cf56fd3fe7216d7b8df820802e7246cfb534a1db2aa64f14fcb9cdcb"}, + {file = "setuptools-75.5.0-py3-none-any.whl", hash = "sha256:87cb777c3b96d638ca02031192d40390e0ad97737e27b6b4fa831bea86f2f829"}, + {file = "setuptools-75.5.0.tar.gz", hash = "sha256:5c4ccb41111392671f02bb5f8436dfc5a9a7185e80500531b133f5775c4163ef"}, ] [package.extras] @@ -5377,13 +5363,13 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "defusedxml (>=0.7.1)", [[package]] name = "sphinx-rtd-theme" -version = "3.0.1" +version = "3.0.2" description = "Read the Docs theme for Sphinx" optional = false python-versions = ">=3.8" files = [ - {file = "sphinx_rtd_theme-3.0.1-py2.py3-none-any.whl", hash = "sha256:921c0ece75e90633ee876bd7b148cfaad136b481907ad154ac3669b6fc957916"}, - {file = "sphinx_rtd_theme-3.0.1.tar.gz", hash = "sha256:a4c5745d1b06dfcb80b7704fe532eb765b44065a8fad9851e4258c8804140703"}, + {file = "sphinx_rtd_theme-3.0.2-py2.py3-none-any.whl", hash = "sha256:422ccc750c3a3a311de4ae327e82affdaf59eb695ba4936538552f3b00f4ee13"}, + {file = "sphinx_rtd_theme-3.0.2.tar.gz", hash = "sha256:b7457bc25dda723b20b086a670b9953c859eab60a2a03ee8eb2bb23e176e5f85"}, ] [package.dependencies] @@ -5504,68 +5490,60 @@ test = ["pytest"] [[package]] name = "sqlalchemy" -version = "2.0.36" +version = "2.0.35" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-win32.whl", hash = "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa"}, - {file = "SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl", hash = "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-win32.whl", hash = "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f"}, - {file = "SQLAlchemy-2.0.36-cp311-cp311-win_amd64.whl", hash = "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-win32.whl", hash = "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e"}, - {file = "SQLAlchemy-2.0.36-cp312-cp312-win_amd64.whl", hash = "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436"}, - {file = "SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-win32.whl", hash = "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c"}, - {file = "SQLAlchemy-2.0.36-cp37-cp37m-win_amd64.whl", hash = "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-win32.whl", hash = "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e"}, - {file = "SQLAlchemy-2.0.36-cp38-cp38-win_amd64.whl", hash = "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-win32.whl", hash = "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28"}, - {file = "SQLAlchemy-2.0.36-cp39-cp39-win_amd64.whl", hash = "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a"}, - {file = "SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e"}, - {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67219632be22f14750f0d1c70e62f204ba69d28f62fd6432ba05ab295853de9b"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4668bd8faf7e5b71c0319407b608f278f279668f358857dbfd10ef1954ac9f90"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb8bea573863762bbf45d1e13f87c2d2fd32cee2dbd50d050f83f87429c9e1ea"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f552023710d4b93d8fb29a91fadf97de89c5926c6bd758897875435f2a939f33"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:016b2e665f778f13d3c438651dd4de244214b527a275e0acf1d44c05bc6026a9"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7befc148de64b6060937231cbff8d01ccf0bfd75aa26383ffdf8d82b12ec04ff"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-win32.whl", hash = "sha256:22b83aed390e3099584b839b93f80a0f4a95ee7f48270c97c90acd40ee646f0b"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-win_amd64.whl", hash = "sha256:a29762cd3d116585278ffb2e5b8cc311fb095ea278b96feef28d0b423154858e"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e21f66748ab725ade40fa7af8ec8b5019c68ab00b929f6643e1b1af461eddb60"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8a6219108a15fc6d24de499d0d515c7235c617b2540d97116b663dade1a54d62"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:042622a5306c23b972192283f4e22372da3b8ddf5f7aac1cc5d9c9b222ab3ff6"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:627dee0c280eea91aed87b20a1f849e9ae2fe719d52cbf847c0e0ea34464b3f7"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4fdcd72a789c1c31ed242fd8c1bcd9ea186a98ee8e5408a50e610edfef980d71"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:89b64cd8898a3a6f642db4eb7b26d1b28a497d4022eccd7717ca066823e9fb01"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-win32.whl", hash = "sha256:6a93c5a0dfe8d34951e8a6f499a9479ffb9258123551fa007fc708ae2ac2bc5e"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-win_amd64.whl", hash = "sha256:c68fe3fcde03920c46697585620135b4ecfdfc1ed23e75cc2c2ae9f8502c10b8"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:eb60b026d8ad0c97917cb81d3662d0b39b8ff1335e3fabb24984c6acd0c900a2"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6921ee01caf375363be5e9ae70d08ce7ca9d7e0e8983183080211a062d299468"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cdf1a0dbe5ced887a9b127da4ffd7354e9c1a3b9bb330dce84df6b70ccb3a8d"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93a71c8601e823236ac0e5d087e4f397874a421017b3318fd92c0b14acf2b6db"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e04b622bb8a88f10e439084486f2f6349bf4d50605ac3e445869c7ea5cf0fa8c"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1b56961e2d31389aaadf4906d453859f35302b4eb818d34a26fab72596076bb8"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-win32.whl", hash = "sha256:0f9f3f9a3763b9c4deb8c5d09c4cc52ffe49f9876af41cc1b2ad0138878453cf"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-win_amd64.whl", hash = "sha256:25b0f63e7fcc2a6290cb5f7f5b4fc4047843504983a28856ce9b35d8f7de03cc"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f021d334f2ca692523aaf7bbf7592ceff70c8594fad853416a81d66b35e3abf9"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05c3f58cf91683102f2f0265c0db3bd3892e9eedabe059720492dbaa4f922da1"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:032d979ce77a6c2432653322ba4cbeabf5a6837f704d16fa38b5a05d8e21fa00"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:2e795c2f7d7249b75bb5f479b432a51b59041580d20599d4e112b5f2046437a3"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:cc32b2990fc34380ec2f6195f33a76b6cdaa9eecf09f0c9404b74fc120aef36f"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-win32.whl", hash = "sha256:9509c4123491d0e63fb5e16199e09f8e262066e58903e84615c301dde8fa2e87"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-win_amd64.whl", hash = "sha256:3655af10ebcc0f1e4e06c5900bb33e080d6a1fa4228f502121f28a3b1753cde5"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4c31943b61ed8fdd63dfd12ccc919f2bf95eefca133767db6fbbd15da62078ec"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a62dd5d7cc8626a3634208df458c5fe4f21200d96a74d122c83bc2015b333bc1"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0630774b0977804fba4b6bbea6852ab56c14965a2b0c7fc7282c5f7d90a1ae72"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d625eddf7efeba2abfd9c014a22c0f6b3796e0ffb48f5d5ab106568ef01ff5a"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ada603db10bb865bbe591939de854faf2c60f43c9b763e90f653224138f910d9"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c41411e192f8d3ea39ea70e0fae48762cd11a2244e03751a98bd3c0ca9a4e936"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-win32.whl", hash = "sha256:d299797d75cd747e7797b1b41817111406b8b10a4f88b6e8fe5b5e59598b43b0"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-win_amd64.whl", hash = "sha256:0375a141e1c0878103eb3d719eb6d5aa444b490c96f3fedab8471c7f6ffe70ee"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccae5de2a0140d8be6838c331604f91d6fafd0735dbdcee1ac78fc8fbaba76b4"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2a275a806f73e849e1c309ac11108ea1a14cd7058577aba962cd7190e27c9e3c"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:732e026240cdd1c1b2e3ac515c7a23820430ed94292ce33806a95869c46bd139"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:890da8cd1941fa3dab28c5bac3b9da8502e7e366f895b3b8e500896f12f94d11"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c0d8326269dbf944b9201911b0d9f3dc524d64779a07518199a58384c3d37a44"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b76d63495b0508ab9fc23f8152bac63205d2a704cd009a2b0722f4c8e0cba8e0"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-win32.whl", hash = "sha256:69683e02e8a9de37f17985905a5eca18ad651bf592314b4d3d799029797d0eb3"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-win_amd64.whl", hash = "sha256:aee110e4ef3c528f3abbc3c2018c121e708938adeeff9006428dd7c8555e9b3f"}, + {file = "SQLAlchemy-2.0.35-py3-none-any.whl", hash = "sha256:2ab3f0336c0387662ce6221ad30ab3a5e6499aab01b9790879b6578fd9b8faa1"}, + {file = "sqlalchemy-2.0.35.tar.gz", hash = "sha256:e11d7ea4d24f0a262bccf9a7cd6284c976c5369dac21db237cff59586045ab9f"}, ] [package.dependencies] @@ -5578,7 +5556,7 @@ aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] asyncio = ["greenlet (!=0.4.17)"] asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] -mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] mssql = ["pyodbc"] mssql-pymssql = ["pymssql"] mssql-pyodbc = ["pyodbc"] diff --git a/pyproject.toml b/pyproject.toml index ed13bab..eb5c000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "open-notebook" -version = "0.0.10" +version = "0.0.11" description = "An open source implementation of a research assistant, inspired by Google Notebook LM" authors = ["Luis Novo "] license = "MIT" From d20794e271a4e3ef8f7f250b65d28b678e74adf3 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:17:16 -0300 Subject: [PATCH 37/44] correct context options for sources and notes --- pages/stream_app/consts.py | 8 +++++++- pages/stream_app/note.py | 4 ++-- pages/stream_app/source.py | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pages/stream_app/consts.py b/pages/stream_app/consts.py index 352b338..eb2446a 100644 --- a/pages/stream_app/consts.py +++ b/pages/stream_app/consts.py @@ -1,4 +1,10 @@ -context_icons = [ +source_context_icons = [ + "⛔ not in context", + "🟡 summary", + "🟢 full content", +] + +note_context_icons = [ "⛔ not in context", "🟡 summary", "🟢 full content", diff --git a/pages/stream_app/note.py b/pages/stream_app/note.py index 4a1c5be..8c85523 100644 --- a/pages/stream_app/note.py +++ b/pages/stream_app/note.py @@ -9,7 +9,7 @@ from open_notebook.graphs.multipattern import graph as pattern_graph from open_notebook.utils import surreal_clean from pages.components import note_panel -from .consts import context_icons +from .consts import note_context_icons @st.dialog("Write a Note", width="large") @@ -60,7 +60,7 @@ def note_card(note, notebook_id): context_state = st.selectbox( "Context", label_visibility="collapsed", - options=context_icons, + options=note_context_icons, index=1, key=f"note_{note.id}", ) diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index 1a2c358..13978b4 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -13,7 +13,7 @@ from open_notebook.exceptions import UnsupportedTypeException from open_notebook.graphs.source import source_graph from pages.components import source_panel -from .consts import context_icons +from .consts import source_context_icons @st.dialog("Source", width="large") @@ -113,7 +113,7 @@ def source_card(source, notebook_id): context_state = st.selectbox( "Context", label_visibility="collapsed", - options=context_icons, + options=source_context_icons, index=1, key=f"source_{source.id}", ) From bba5bf88ed2a38654e5b9b9584873e9af3bee6cd Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:22:01 -0300 Subject: [PATCH 38/44] only recommend models user can use --- pages/7_⚙️_Settings.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pages/7_⚙️_Settings.py b/pages/7_⚙️_Settings.py index 449ed39..af87821 100644 --- a/pages/7_⚙️_Settings.py +++ b/pages/7_⚙️_Settings.py @@ -80,13 +80,14 @@ def generate_new_models(models, suggested_models): # Check if model already exists if model_key not in existing_model_keys: - new_models.append( - { - "name": model_name, - "type": type_, - "provider": provider, - } - ) + if provider_status.get(provider): + new_models.append( + { + "name": model_name, + "type": type_, + "provider": provider, + } + ) return new_models From 1e35f069b0f33621a841464f9f0ed711126d0896 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 17:33:38 -0300 Subject: [PATCH 39/44] add option to save insight as note --- open_notebook/domain/notebook.py | 10 ++++++++++ pages/components/source_panel.py | 11 +++++++++-- pages/stream_app/source.py | 6 +++--- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/open_notebook/domain/notebook.py b/open_notebook/domain/notebook.py index 5ef3f14..0b9db84 100644 --- a/open_notebook/domain/notebook.py +++ b/open_notebook/domain/notebook.py @@ -130,6 +130,16 @@ class SourceInsight(ObjectModel): logger.exception(e) raise DatabaseOperationError(e) + def save_as_note(self, notebook_id: str = None) -> Any: + note = Note( + title=f"{self.insight_type} from source {self.source.title}", + content=self.content, + ) + note.save() + if notebook_id: + note.add_to_notebook(notebook_id) + return note + class Source(ObjectModel): table_name: ClassVar[str] = "source" diff --git a/pages/components/source_panel.py b/pages/components/source_panel.py index eac9e5f..57aeb6b 100644 --- a/pages/components/source_panel.py +++ b/pages/components/source_panel.py @@ -8,7 +8,7 @@ from open_notebook.utils import surreal_clean from pages.stream_app.utils import run_patterns -def source_panel(source_id: str, modal=False): +def source_panel(source_id: str, notebook_id=None, modal=False): source: Source = Source.get(source_id) if not source: raise ValueError(f"Source not found: {source_id}") @@ -36,11 +36,18 @@ def source_panel(source_id: str, modal=False): for insight in source.insights: with st.expander(f"**{insight.insight_type}**"): st.markdown(insight.content) - if st.button( + x1, x2 = st.columns(2) + if x1.button( "Delete", type="primary", key=f"delete_insight_{insight.id}" ): insight.delete() st.rerun(scope="fragment" if modal else "app") + if notebook_id: + if x2.button( + "Save as Note", icon="📝", key=f"save_note_{insight.id}" + ): + insight.save_as_note(notebook_id) + st.toast("Saved as Note. Refresh the Notebook to see it.") with c2: transformations = Transformation.get_all() diff --git a/pages/stream_app/source.py b/pages/stream_app/source.py index 13978b4..390ff2e 100644 --- a/pages/stream_app/source.py +++ b/pages/stream_app/source.py @@ -17,8 +17,8 @@ from .consts import source_context_icons @st.dialog("Source", width="large") -def source_panel_dialog(source_id): - source_panel(source_id, modal=True) +def source_panel_dialog(source_id, notebook_id=None): + source_panel(source_id, notebook_id=notebook_id, modal=True) @st.dialog("Add a Source", width="large") @@ -121,7 +121,7 @@ def source_card(source, notebook_id): f"Updated: {naturaltime(source.updated)}, **{len(source.insights)}** insights" ) if st.button("Expand", icon="📝", key=source.id): - source_panel_dialog(source.id) + source_panel_dialog(source.id, notebook_id) st.session_state[notebook_id]["context_config"][source.id] = context_state From d460b0947a8115c45c33db92d5eed05f3318fe58 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 19:03:03 -0300 Subject: [PATCH 40/44] wip - remote content from text search until can do partial search --- migrations/4.surrealql | 4 ++-- pages/3_🔍_Ask_and_Search.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/migrations/4.surrealql b/migrations/4.surrealql index f89531e..7744087 100644 --- a/migrations/4.surrealql +++ b/migrations/4.surrealql @@ -63,13 +63,13 @@ DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: let $note_results = array::union($note_title_search, $note_content_search ); let $final_results = array::union($source_results, $note_results ); - RETURN (select id, parent_id, title, math::max(relevance) as relevance, - array::flatten(content) as matches + RETURN (select id, parent_id, title, math::max(relevance) as relevance from $final_results where id is not None group by id, parent_id, title ORDER BY relevance DESC LIMIT $match_count); }; + REMOVE FUNCTION IF EXISTS fn::vector_search; DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array, $match_count: int, $sources: bool, $show_notes: bool, $min_similarity: float) { diff --git a/pages/3_🔍_Ask_and_Search.py b/pages/3_🔍_Ask_and_Search.py index 7956737..fe54ec8 100644 --- a/pages/3_🔍_Ask_and_Search.py +++ b/pages/3_🔍_Ask_and_Search.py @@ -42,9 +42,10 @@ def results_card(item): st.markdown( f"[{score:.2f}] **[{item['title']}](/?object_id={item['parent_id']})**" ) - with st.expander("Matches"): - for match in item["matches"]: - st.markdown(match) + if "matches" in item: + with st.expander("Matches"): + for match in item["matches"]: + st.markdown(match) with ask_tab: From 4f9aa63b3ebde89b02b89922d6e57da7d9f521e0 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 19:08:03 -0300 Subject: [PATCH 41/44] add longform option to podcast generation --- open_notebook/plugins/podcasts.py | 11 ++--------- pages/5_🎙️_Podcasts.py | 14 -------------- pages/stream_app/chat.py | 6 ++++++ 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py index 39adec9..b35083e 100644 --- a/open_notebook/plugins/podcasts.py +++ b/open_notebook/plugins/podcasts.py @@ -32,7 +32,6 @@ class PodcastConfig(ObjectModel): transcript_model_provider: Optional[str] = None user_instructions: Optional[str] = None ending_message: Optional[str] = None - wordcount: int = Field(ge=400, le=10000) creativity: float = Field(ge=0, le=1) provider: str = Field(default="openai") voice1: str @@ -53,12 +52,12 @@ class PodcastConfig(ObjectModel): raise ValueError("Both voice1 and voice2 must be provided") return self - def generate_episode(self, episode_name, text, instructions=None): + def generate_episode(self, episode_name, text, longform=False, instructions=None): self.user_instructions = ( instructions if instructions else self.user_instructions ) conversation_config = { - "word_count": self.wordcount, + "longform": longform, "conversation_style": self.conversation_style, "roles_person1": self.person1_role, "roles_person2": self.person2_role, @@ -130,12 +129,6 @@ class PodcastConfig(ObjectModel): raise ValueError(f"{field.field_name} cannot be None or empty string") return value.strip() - @field_validator("wordcount") - def validate_wordcount(cls, value): - if not 400 <= value <= 6000: - raise ValueError("Wordcount must be between 400 and 10000") - return value - @field_validator("creativity") def validate_creativity(cls, value): if not 0 <= value <= 1: diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py index 40ede27..841ee8b 100644 --- a/pages/5_🎙️_Podcasts.py +++ b/pages/5_🎙️_Podcasts.py @@ -104,9 +104,6 @@ with templates_tab: [], dialogue_structures, "Dialogue Structure", key="dialogue_structures" ) st.caption(f"Suggestions:{', '.join(dialogue_structures)}") - pd_cfg["wordcount"] = st.slider( - "Word Count", min_value=400, max_value=6000, step=50 - ) pd_cfg["creativity"] = st.slider( "Creativity", min_value=0.0, max_value=1.0, step=0.05 ) @@ -216,14 +213,6 @@ with templates_tab: key=f"dialogue_structure_{pd_config.id}", ) st.caption(f"Suggestions:{', '.join(dialogue_structures)}") - pd_config.wordcount = st.slider( - "Word Count", - min_value=400, - max_value=6000, - step=50, - value=pd_config.wordcount, - key=f"wordcount_{pd_config.id}", - ) pd_config.creativity = st.slider( "Creativity", min_value=0.0, @@ -240,9 +229,6 @@ with templates_tab: ) if pd_config.transcript_model_provider not in transcript_provider_models: - st.warning( - f"Transcript Model Provider {pd_config.transcript_model_provider} not setup. Changing to default." - ) index = 0 else: index = list(transcript_provider_models.keys()).index( diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index 7907eb2..a513c60 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -83,6 +83,11 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): instructions = st.text_area( "Instructions", value=selected_template.user_instructions ) + podcast_length = st.radio( + "Podcast Length", + ["Short (5-10 min)", "Long (20-30 min)"], + ) + longform = podcast_length == "Long (20-30 min)" if len(context.get("note", [])) + len(context.get("source", [])) == 0: st.warning( "No notes or sources found in context. You don't want a boring podcast, right? So, add some context first." @@ -94,6 +99,7 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): selected_template.generate_episode( episode_name=episode_name, text=context, + longform=longform, instructions=instructions, ) st.success("Episode generated successfully") From dd99531b00d725188c67eb4d613071d552451378 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 21:46:15 -0300 Subject: [PATCH 42/44] final tweaks to podcast --- open_notebook/plugins/podcasts.py | 18 ++++++++++++------ pages/5_🎙️_Podcasts.py | 6 ------ pages/stream_app/chat.py | 2 +- pages/stream_app/consts.py | 1 - poetry.lock | 12 ++++++------ pyproject.toml | 2 +- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/open_notebook/plugins/podcasts.py b/open_notebook/plugins/podcasts.py index b35083e..7f2a23a 100644 --- a/open_notebook/plugins/podcasts.py +++ b/open_notebook/plugins/podcasts.py @@ -52,12 +52,17 @@ class PodcastConfig(ObjectModel): raise ValueError("Both voice1 and voice2 must be provided") return self - def generate_episode(self, episode_name, text, longform=False, instructions=None): + def generate_episode( + self, + episode_name: str, + text: str, + instructions: str = "", + longform: bool = False, + ): self.user_instructions = ( instructions if instructions else self.user_instructions ) conversation_config = { - "longform": longform, "conversation_style": self.conversation_style, "roles_person1": self.person1_role, "roles_person2": self.person2_role, @@ -87,10 +92,6 @@ class PodcastConfig(ObjectModel): }, } - logger.debug( - f"Generating episode {episode_name} with config {conversation_config}" - ) - api_key_label = None llm_model_name = None if self.transcript_model_provider: @@ -104,12 +105,17 @@ class PodcastConfig(ObjectModel): api_key_label = "GEMINI_API_KEY" llm_model_name = self.transcript_model + logger.debug( + f"Generating episode {episode_name} with config {conversation_config} and using model {llm_model_name}" + ) + audio_file = generate_podcast( conversation_config=conversation_config, text=text, tts_model=self.provider, llm_model_name=llm_model_name, api_key_label=api_key_label, + longform=longform, ) episode = PodcastEpisode( name=episode_name, diff --git a/pages/5_🎙️_Podcasts.py b/pages/5_🎙️_Podcasts.py index 841ee8b..e9539c9 100644 --- a/pages/5_🎙️_Podcasts.py +++ b/pages/5_🎙️_Podcasts.py @@ -246,9 +246,6 @@ with templates_tab: or pd_config.transcript_model not in transcript_provider_models[pd_config.transcript_model_provider] ): - st.warning( - f"Transcript Model {pd_config.transcript_model} not setup. Changing to default." - ) index = 0 else: index = transcript_provider_models[ @@ -268,9 +265,6 @@ with templates_tab: key=f"provider_{pd_config.id}", ) if pd_config.model not in provider_models[pd_config.provider]: - st.warning( - f"Audio Model {pd_config.model} not setup. Changing to default." - ) index = 0 else: index = provider_models[pd_config.provider].index(pd_config.model) diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index a513c60..97cdef6 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -98,7 +98,7 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): with st.spinner("Go grab a coffee, almost there..."): selected_template.generate_episode( episode_name=episode_name, - text=context, + text=str(context), longform=longform, instructions=instructions, ) diff --git a/pages/stream_app/consts.py b/pages/stream_app/consts.py index eb2446a..60aadb0 100644 --- a/pages/stream_app/consts.py +++ b/pages/stream_app/consts.py @@ -6,6 +6,5 @@ source_context_icons = [ note_context_icons = [ "⛔ not in context", - "🟡 summary", "🟢 full content", ] diff --git a/poetry.lock b/poetry.lock index 7494da1..13ad0ef 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2430,13 +2430,13 @@ six = "*" [[package]] name = "langgraph" -version = "0.2.46" +version = "0.2.47" description = "Building stateful, multi-actor applications with LLMs" optional = false python-versions = "<4.0,>=3.9.0" files = [ - {file = "langgraph-0.2.46-py3-none-any.whl", hash = "sha256:7ca4031bc8f06cb3697ccee21ca80464588656391d3888090b6790dd89bffad1"}, - {file = "langgraph-0.2.46.tar.gz", hash = "sha256:f1a39ba0d9b3df3b807b14015e682bc2b445ceb760d113e75ab34209a7eaadef"}, + {file = "langgraph-0.2.47-py3-none-any.whl", hash = "sha256:597bad088c245741b79d46aea351df1b9bc0b2b127122c39ca2a7c0164e40b4f"}, + {file = "langgraph-0.2.47.tar.gz", hash = "sha256:23b6ea1fe5c6d57f510dee9a66fbc2cdb546ab6c13756fc28534343b36b7935f"}, ] [package.dependencies] @@ -3807,13 +3807,13 @@ files = [ [[package]] name = "podcastfy" -version = "0.3.5" +version = "0.3.6" description = "An Open Source alternative to NotebookLM's podcast feature: Transforming Multimodal Content into Captivating Multilingual Audio Conversations with GenAI" optional = false python-versions = "<4.0,>=3.11" files = [ - {file = "podcastfy-0.3.5-py3-none-any.whl", hash = "sha256:c5aef31a1320800e5d22257ee763abb0e748a115112e1914084f5331462d1a32"}, - {file = "podcastfy-0.3.5.tar.gz", hash = "sha256:5b3111a28aaadd31c6e13983719996f05de7b70a7f656dd56ce6ed74e294e1ef"}, + {file = "podcastfy-0.3.6-py3-none-any.whl", hash = "sha256:8584fd044f63e236d30678a75188f5e7c7335170e24f80e647f7a480661e0c13"}, + {file = "podcastfy-0.3.6.tar.gz", hash = "sha256:b4c787b999b4dda70504f99520802b0dfc99e76703089443a9b4a76c85bf5047"}, ] [package.dependencies] diff --git a/pyproject.toml b/pyproject.toml index eb5c000..7cc700f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "open-notebook" -version = "0.0.11" +version = "0.1.0" description = "An open source implementation of a research assistant, inspired by Google Notebook LM" authors = ["Luis Novo "] license = "MIT" From cf4a2198202d3f283b8a72da6bfbb94399dfa894 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 22:04:16 -0300 Subject: [PATCH 43/44] wip - longform podcasts --- pages/stream_app/chat.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pages/stream_app/chat.py b/pages/stream_app/chat.py index 97cdef6..0671c85 100644 --- a/pages/stream_app/chat.py +++ b/pages/stream_app/chat.py @@ -83,11 +83,18 @@ def chat_sidebar(current_notebook: Notebook, current_session: ChatSession): instructions = st.text_area( "Instructions", value=selected_template.user_instructions ) - podcast_length = st.radio( - "Podcast Length", - ["Short (5-10 min)", "Long (20-30 min)"], - ) - longform = podcast_length == "Long (20-30 min)" + # if selected_template.provider == "gemini": + # st.warning( + # "Gemini models are not available for long podcast generation yet. So, this will be a short podcast. Coming soon. Pinky promise. If you want to try long podcasts, please change your text to speech model to Open AI." + # ) + # longform = False + # else: + # podcast_length = st.radio( + # "Podcast Length", + # ["Short (5-10 min)", "Long (20-30 min)"], + # ) + # longform = podcast_length == "Long (20-30 min)" + longform = False if len(context.get("note", [])) + len(context.get("source", [])) == 0: st.warning( "No notes or sources found in context. You don't want a boring podcast, right? So, add some context first." From 51dc60bc82b1db9ba36930f4dc2c61e185882a09 Mon Sep 17 00:00:00 2001 From: LUIS NOVO Date: Wed, 13 Nov 2024 22:08:57 -0300 Subject: [PATCH 44/44] update docs --- README.md | 22 ++++++++++++++++++++++ docs/SETUP.md | 6 +++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4d19875..7277bc3 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,19 @@ +## 📢 Open Notebook is under very active development + +> Open Notebook is under active development! We're moving fast and making improvements every week. Your feedback is incredibly valuable to me during this exciting phase and it gives me motivation to keep improving and building this amazing tool. Please feel free to star the project if you find it useful, and don't hesitate to reach out with any questions or suggestions. I'm excited to see how you'll use it and what ideas you'll bring to the project! Let's build something amazing together! 🚀 +> +> ⚠️ **API Changes**: As we optimize and enhance the project, some APIs and interfaces might change. We'll do our best to document these changes and minimize disruption. +> +> 🙏 **We Need Your Feedback**: Please try out Open Notebook and let us know what you think! Submit issues, feature requests, or just share your experience through: +> - GitHub Issues +> - Discussions +> - Pull Requests +> +> Together, we can make it even better! +
@@ -153,6 +166,15 @@ Go to the [Usage](docs/USAGE.md) page to learn how to use all features. ## 🚀 New Features +### v0.1 - Release Candidate + +- Better citations and improved search capabilities +- The "Ask" feature is much smarter now and let's you check its thinking +- Enabled support for X.AI and Groq models +- Select default transformations to apply to all content +- Save insights as custom notes +- Items are added to context by default + ### v0.0.10 - Gemini podcast model - Added the Gemini model for generating much more fluid and engaging podcasts diff --git a/docs/SETUP.md b/docs/SETUP.md index 177129c..ec46df9 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -89,10 +89,10 @@ Go to the settings page and create your different models. | Model Type | Supported Providers | |------------|-----------| -| Language | OpenAI, Anthropic, Open Router, LiteLLM, Vertex AI, Vertex AI, Anthropic, Gemini, Ollama | +| Language | OpenAI, Anthropic, Open Router, LiteLLM, Vertex AI, Vertex AI, Anthropic, Gemini, Ollama, xAI, Groq | | Embedding | OpenAI, Gemini, Vertex AI, Ollama | -| Speech to Text | OpenAI | -| Text to Speech | OpenAI, ElevenLabs | +| Speech to Text | OpenAI, Groq | +| Text to Speech | OpenAI, ElevenLabs, Gemini | > 📝 **Notice:** For complete usage of all the features, you need to setup at least 4 models (one of each type).