moved toc to a pattern

This commit is contained in:
LUIS NOVO 2024-11-01 19:08:47 -03:00
parent edf839cd1b
commit a24faaba44
4 changed files with 45 additions and 27 deletions

View file

@ -1,7 +1,6 @@
import os
from typing import Any, ClassVar, Dict, List, Literal, Optional
from langchain_core.runnables.config import RunnableConfig
from loguru import logger
from pydantic import BaseModel, Field, field_validator
@ -15,8 +14,8 @@ from open_notebook.exceptions import (
DatabaseOperationError,
InvalidInputError,
)
from open_notebook.graphs.multipattern import graph as pattern_graph
from open_notebook.graphs.recursive_toc import graph as toc_graph
# from temp.recursive_toc import graph as toc_graph
from open_notebook.utils import split_text, surreal_clean
@ -211,29 +210,6 @@ class Source(ObjectModel):
logger.error(f"Error adding insight to source {self.id}: {str(e)}")
raise DatabaseOperationError(e)
# todo: move this to content processing pipeline as a major graph
def generate_toc_and_title(self) -> "Source":
DEFAULT_MODELS, EMBEDDING_MODEL, SPEECH_TO_TEXT_MODEL = load_default_models()
try:
config = RunnableConfig(configurable=dict(thread_id=self.id))
result = toc_graph.invoke({"content": self.full_text}, config=config)
self.add_insight("Table of Contents", surreal_clean(result["toc"]))
if not self.title:
transformations = [
"Based on the Table of Contents below, please provide a Title for this content, with max 15 words"
]
output = pattern_graph.invoke(
dict(content_stack=[result["toc"]], transformations=transformations)
)
self.title = surreal_clean(output["output"])
self.save()
return self
except Exception as e:
logger.error(f"Error summarizing source {self.id}: {str(e)}")
logger.exception(e)
raise DatabaseOperationError(e)
class Note(ObjectModel):
table_name: ClassVar[str] = "note"

View file

@ -0,0 +1,15 @@
# SYSTEM ROLE
You are a content analysis assistant that reads through documents and provides a Table of Contents (ToC) to help users identify what the document covers more easily.
Your ToC should capture all major topics and transitions in the content and should mention them in the order theh appear.
# TASK
Analyze the provided content and create a Table of Contents:
- Captures the core topics included in the text
- Gives a small description of what is covered
# INPUT
{{content}}
# OUTPUT

View file

@ -24,6 +24,28 @@ def run_patterns(input_text, patterns):
return output["output"]
# moved it here to replace it with the pipeline on 0.1.0
def generate_toc_and_title(source) -> "Source":
DEFAULT_MODELS, EMBEDDING_MODEL, SPEECH_TO_TEXT_MODEL = load_default_models()
try:
patterns = ["patterns/default/toc"]
result = run_patterns(source.full_text, patterns=patterns)
source.add_insight("Table of Contents", surreal_clean(result))
if not source.title:
transformations = [
"Based on the Table of Contents below, please provide a Title for this content, with max 15 words"
]
output = run_patterns(result["toc"], transformations=transformations)
source.title = surreal_clean(output["output"])
source.save()
return source
except Exception as e:
logger.error(f"Error summarizing source {source.id}: {str(e)}")
logger.exception(e)
raise
@st.dialog("Source", width="large")
def source_panel(source_id):
source: Source = Source.get(source_id)
@ -151,7 +173,7 @@ def add_source(session_id):
source.save()
source.add_to_notebook(st.session_state[session_id]["notebook"].id)
st.write("Summarizing...")
source.generate_toc_and_title()
generate_toc_and_title(source)
except UnsupportedTypeException as e:
st.warning(
"This type of content is not supported yet. If you think it should be, let us know on the project Issues's page"

View file

@ -16,6 +16,11 @@ source_insights:
description: "Create a dense representation of the content"
patterns:
- patterns/default/makeitdense
- name: "Table of Contents"
insight_type: "Table of Contents"
description: "Analyzes the content and returns a ToC"
patterns:
- patterns/default/analyze_paper
- name: "Analyze Paper"
insight_type: "Paper Analysis"
description: "Analyze the paper and provide a quick summary"