Merge pull request #6 from lfnovo/transformations

Transformations
This commit is contained in:
Luis Novo 2024-10-23 16:10:02 -03:00 committed by GitHub
commit d11dbf700a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 990 additions and 321 deletions

View file

@ -38,7 +38,8 @@ OPENROUTER_API_KEY=
# LANGCHAIN_PROJECT="Open Notebook"
# CONNECTION DETAILS FOR YOUR SURREAL DB
SURREAL_ADDRESS="ws://localhost:8000/rpc"
SURREAL_ADDRESS="localhost"
SURREAL_PORT=8000
SURREAL_USER="root"
SURREAL_PASS="root"
SURREAL_NAMESPACE="open_notebook"

View file

@ -28,7 +28,6 @@ docker-build: docker-buildx-prepare
docker buildx build \
--platform $(PLATFORMS) \
-t $(IMAGE_NAME):$(VERSION) \
-t $(IMAGE_NAME):latest \
--push \
.
@ -41,4 +40,14 @@ docker-release: docker-build
# Comando útil para verificar as plataformas suportadas após o build
docker-check-platforms:
docker manifest inspect $(IMAGE_NAME):$(VERSION)
docker manifest inspect $(IMAGE_NAME):$(VERSION)
docker-update-latest: docker-buildx-prepare
docker buildx build \
--platform $(PLATFORMS) \
-t $(IMAGE_NAME):latest \
--push \
.
# Release with latest
docker-release-all: docker-release docker-update-latest

View file

@ -72,7 +72,15 @@ Go to the [Usage](docs/USAGE.md) page to learn how to use all features.
## 🚀 New Features
### v0.0.2 - Several new providers are supported now:
### v0.0.3 - Transformations ✨
We just release a much more powerful way to create more value from your sources.
Transformations enable you do extract an unlimited amount of insights from your content.
It's 100% customizable and you can extend it to your own needs, like Paper Analysis, Article Writing, etc.
Head to the [Transformations](docs/TRANSFORMATIONS.md) page for more info
### v0.0.2 - Several new providers are supported now:
- OpenAI
- Anthropic
@ -106,14 +114,10 @@ Locate anything across your research with ease using full-text and vector-based
Jinja based prompts that are easy to customize to your own preferences.
## 🌟 Coming Soon
- **Podcast Generator**: Automatically convert your notes into a podcast format.
- **Multi-model support**: Anthropic, Gemini, Mistral, Ollama coming soon.
- **Enhanced Citations**: Improved layout and finer control for citations.
- **Insight Generation**: New tools for creating insights, leveraging the Fabric framework.
- **Better Embeddings & Summarization**: Smarter ways to distill information.
- **Multiple Chat Sessions**: Juggle different discussions within the same notebook.
- **Live Front-End Updates**: Real-time UI updates for a smoother experience.
@ -121,6 +125,8 @@ Jinja based prompts that are easy to customize to your own preferences.
- **Improved Error Handling**: Making everything more robust.
- **Cross-Notebook Sources and Notes**: Reuse research notes across projects.
- **Bookmark Integration**: Integrate with your favorite bookmarking app.
- **Multi-model support**: Anthropic, Gemini, Mistral, Ollama coming soon. ✅ 0.0.2
- **Insight Generation**: New tools for creating insights - [transformations](docs/TRANSFORMATIONS.md) ✅ 0.0.3
## 💻 Tech Stack

View file

@ -1,19 +1,27 @@
import streamlit as st
from open_notebook.exceptions import InvalidDatabaseSchema
from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound
from open_notebook.repository import check_version, execute_migration
try:
check_version()
st.switch_page("pages/2_📒_Notebooks.py")
except NoSchemaFound as e:
st.warning(e)
if st.button("Create Schema.."):
try:
execute_migration("db_setup.surrealql")
st.success("Schema created successfully")
st.rerun()
except Exception as e:
st.error(e)
except InvalidDatabaseSchema as e:
st.error(e)
st.warning(e)
if st.button("Execute Migration.."):
try:
execute_migration()
execute_migration("0_0_1_to_0_0_2.surrealql")
st.success("Migration executed successfully")
st.rerun()
except Exception as e:
st.error(e)
st.stop()
st.switch_page("pages/2_📒_Notebooks.py")

View file

@ -0,0 +1,77 @@
REMOVE TABLE IF EXISTS source_chunk;
REMOVE INDEX IF EXISTS idx_source_full ON TABLE source_chunk;
DEFINE INDEX idx_source_full ON TABLE source_chunk COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
REMOVE FUNCTION IF EXISTS fn::text_search;
DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count: int, $sources:bool, $show_notes:bool) {
let $source_title_search =
IF $sources {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_embedding_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_embedding
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_full_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source
WHERE full_text @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_insight_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_insight
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_title_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE title @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $note_content_search =
IF $show_notes {(
SELECT id as item_id, math::max(search::score(1)) AS relevance
FROM note
WHERE content @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
let $source_asset_results = array::union($source_title_search, $source_insight_search);
let $source_results = array::union($source_chunk_results, $source_asset_results );
let $note_results = array::union($note_title_search, $note_content_search );
let $final_results = array::union($source_results, $note_results );
RETURN (SELECT item_id, math::max(relevance) as relevance from $final_results
group by item_id ORDER BY relevance DESC LIMIT $match_count);
};
UPDATE open_notebook:database_info SET
version= "0.0.2";

View file

@ -22,18 +22,12 @@ DEFINE FIELD asset
FLEXIBLE TYPE option<object>;
DEFINE FIELD title ON TABLE source TYPE option<string>;
-- DEFINE FIELD summary ON TABLE source TYPE option<string>;
DEFINE FIELD full_text ON TABLE source TYPE option<string>;
DEFINE FIELD topics ON TABLE source TYPE option<array<string>>;
DEFINE FIELD created ON source DEFAULT time::now() VALUE $before OR time::now();
DEFINE FIELD updated ON source DEFAULT time::now() VALUE time::now();
-- temporary while surreal doesn't fix the sdk
DEFINE TABLE IF NOT EXISTS source_chunk SCHEMAFULL;
DEFINE FIELD source ON TABLE source_chunk TYPE record<source>;
DEFINE FIELD order ON TABLE source_chunk TYPE int;
DEFINE FIELD content ON TABLE source_chunk TYPE string;
DEFINE TABLE IF NOT EXISTS source_embedding SCHEMAFULL;
DEFINE FIELD source ON TABLE source_embedding TYPE record<source>;
DEFINE FIELD order ON TABLE source_embedding TYPE int;
@ -77,7 +71,7 @@ FROM note TO notebook;
DEFINE ANALYZER my_analyzer TOKENIZERS blank,class,camel,punct FILTERS snowball(english), lowercase;
DEFINE INDEX idx_source_title ON TABLE source COLUMNS title SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_full ON TABLE source_chunk COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_full_text ON TABLE source COLUMNS full_text SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_embed_chunk ON TABLE source_embedding COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_source_insight ON TABLE source_insight COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
DEFINE INDEX idx_note ON TABLE note COLUMNS content SEARCH ANALYZER my_analyzer BM25 HIGHLIGHTS;
@ -102,11 +96,11 @@ DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count:
GROUP BY item_id)}
ELSE { [] };
let $source_chunk_search =
let $source_full_search =
IF $sources {(
SELECT source as item_id, math::max(search::score(1)) AS relevance
FROM source_chunk
WHERE content @1@ $query_text
FROM source
WHERE full_text @1@ $query_text
GROUP BY item_id)}
ELSE { [] };
@ -134,7 +128,7 @@ DEFINE FUNCTION IF NOT EXISTS fn::text_search($query_text: string, $match_count:
GROUP BY item_id)}
ELSE { [] };
let $source_chunk_results = array::union($source_embedding_search, $source_chunk_search);
let $source_chunk_results = array::union($source_embedding_search, $source_full_search);
let $source_asset_results = array::union($source_title_search, $source_insight_search);
@ -188,9 +182,9 @@ DEFINE FUNCTION IF NOT EXISTS fn::vector_search($query: array<float>, $match_cou
};
CREATE open_notebook:database_info SET
version= "0.0.1";
version= "0.0.2";
UPDATE open_notebook:database_info SET
version= "0.0.1";
version= "0.0.2";

View file

@ -52,7 +52,8 @@ docker run -d \
-p 8080:8502 \
-e OPENAI_API_KEY=API_KEY \
-e DEFAULT_MODEL="gpt-4o-mini" \
-e SURREAL_ADDRESS="ws://localhost:8000/rpc" \
-e SURREAL_ADDRESS="localhost" \
-e SURREAL_PORT=8000 \
-e SURREAL_USER="root" \
-e SURREAL_PASS="root" \
-e SURREAL_NAMESPACE="open_notebook" \
@ -104,7 +105,8 @@ services:
environment:
- OPENAI_API_KEY=API_KEY
- DEFAULT_MODEL=gpt-4o-mini
- SURREAL_ADDRESS=ws://surrealdb:8000/rpc
- SURREAL_ADDRESSsurrealdb
- SURREAL_PORT=8000
- SURREAL_USER=root
- SURREAL_PASS=root
- SURREAL_NAMESPACE=open_notebook

108
docs/TRANSFORMATIONS.md Normal file
View file

@ -0,0 +1,108 @@
# Transformations Guide
**Unleashing the hidden knowledge of your content**
Transformations are a core concept within Open Notebook, providing a flexible and powerful way to generate new insights by applying a series of processing steps to your content. Based on the Fabric framework, Transformations allow you to customize how information is distilled, summarized, and enriched, opening up new ways to understand and engage with your research.
![New Notebook](assets/transformations.png)
### What is a Transformation?
A **Transformation** is a sequence of operations that modifies text input to produce a different output. Whether you're summarizing an article, generating key insights, or creating reflective questions, Transformations allow you to automate and enrich the processing of your content. Each transformation is composed of one or more blocks called **Patterns**, which can be linked in a chain to achieve complex results.
Below is a diagram that illustrates how a transformation is composed of multiple patterns that are processed sequentially:
```mermaid
graph TD;
A[Input Text] --> B[Pattern 1: Summarize];
B --> C[Pattern 2: Generate Topics];
C --> D[Pattern 3: Reflection Questions];
D --> E[Final Output]
```
For example, you could start by summarizing a text, then use that summary to generate specific topics or reflections. By applying these layered processing steps, Transformations help you generate deeper insights and maximize the value of your content.
### Setting Up Transformations
To set up your own Transformations, you'll define them in the `transformations.yaml` file. Below is an example setup:
```yaml
source_insights:
- name: "Summarize"
insight_type: "Content Summary"
description: "Summarize the content"
transformations:
- patterns/makeitdense
- patterns/summarize
- name: "Key Insights"
insight_type: "Key Insights"
description: "Extracts a list of the Key Insights of the content"
transformations:
- patterns/keyinsights
- name: "Make it Dense"
insight_type: "Dense Representation"
description: "Create a dense representation of the content"
transformations:
- patterns/makeitdense
- name: "Analyze Paper"
insight_type: "Paper Analysis"
description: "Analyze the paper and provide a quick summary"
transformations:
- patterns/analyze_paper
- name: "Reflection"
insight_type: "Reflection Questions"
description: "Generates a list of insightful questions to provoke reflection"
transformations:
- patterns/reflection_questions
```
Once you've defined your transformation, make sure to add the corresponding prompts to the `prompts/patterns` folder. Here's an example of a transformation prompt:
```jinja
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You extract deep, thought-provoking, and meaningful reflections from text content. You are especially focused on themes related to the human experience, such as the purpose of life, personal growth, the intersection of technology and humanity, artificial intelligence's societal impact, human potential, collective evolution, and transformative learning. Your reflections aim to provoke new ways of thinking, challenge assumptions, and provide a thoughtful synthesis of the content.
# STEPS
- Extract 3 to 5 of the most profound, thought-provoking, and/or meaningful ideas from the input in a section called REFLECTIONS.
- Each reflection should aim to explore underlying implications, connections to broader human experiences, or highlight a transformative perspective.
- Take a step back and consider the deeper significance or questions that arise from the content.
# OUTPUT INSTRUCTIONS
- The output section should be labeled as REFLECTIONS.
- Each bullet point should be between 20-25 words.
- Avoid repetition in the phrasing and ensure variety in sentence structure.
- The reflections should encourage deeper inquiry and provide a synthesis that transcends surface-level observations.
- Use bullet points, not numbered lists.
- Every bullet should be formatted as a question that elicits contemplation or a statement that offers a profound insight.
# INPUT
{{input_text}}
# OUTPUT
```
### Important Tips
- Always use `{{ input_text }}` to reference the text coming from the previous transformation.
- You can use `include` statements, like in the example above, to insert common instructions across all your patterns.
- Paths that start with `patterns/` will point to a corresponding `.jinja` file in the `patterns` folder.
- Any item that doesn't follow the `patterns/` format will be interpreted as a command (refer to `command.jinja` for clarity).
### Call for Contributions
Have an idea for an amazing Transformation? We'd love to see your creativity! Please submit a pull request with your favorite transformations to help expand our library. Whether it's summarization, content analysis, or something entirely unique, your contributions will help us all get more out of our research!Leveraging Transformations in Open Notebook
Your custom Patterns will automatically appear on the Sources page in Open Notebook. This makes it easy to select and apply them to your content as you research and explore. Note that we'll soon be adding **Note patterns** as well, making it even easier to transform not just sources, but also your own notes and thoughts.
### Sky's the Limit
Transformations empower you to create personalized, powerful workflows that bring out the most meaningful insights from your content. Whether you're working with articles, papers, notes, or other media, you can craft specific and meaningful outcomes tailored to your research goals. Start experimenting today and see just how transformative your content can become!

View file

@ -18,6 +18,13 @@ You'll find your new source in the first column of the Notebook Page.
![New Notebook](assets/asset_list.png)
## Using transformations
Once you have your sources created, you can start gathering insights from them using [transformations](TRANSFORMATIONS.md).
Create your own prompts and generate the wisdom that makes sense to you.
![New Notebook](assets/transformations.png)
## Talk to the Assistant
Once you have enough content in the notebook, you can decide which of them will be visible to LLM before sending your question.

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

View file

@ -11,7 +11,8 @@ from open_notebook.exceptions import (
InvalidInputError,
NotFoundError,
)
from open_notebook.graphs.summary import graph as summarizer
from open_notebook.graphs.multipattern import graph as pattern_graph
from open_notebook.graphs.recursive_toc import graph as toc_graph
from open_notebook.repository import (
repo_create,
repo_delete,
@ -78,14 +79,14 @@ class ObjectModel(BaseModel):
repo_result = repo_update(self.id, data)
# Update the current instance with the result
for key, value in repo_result.items():
for key, value in repo_result[0].items():
if hasattr(self, key):
setattr(self, key, value)
except Exception as e:
logger.error(f"Error saving {self.__class__.table_name}: {str(e)}")
logger.exception(e)
raise DatabaseOperationError(f"Failed to save {self.__class__.table_name}")
raise DatabaseOperationError(e)
def _prepare_save_data(self) -> Dict[str, Any]:
data = self.model_dump()
@ -109,14 +110,14 @@ class ObjectModel(BaseModel):
)
def relate(self, relationship: str, target_id: str) -> Any:
if not relationship or not target_id:
if not relationship or not target_id or not self.id:
raise InvalidInputError("Relationship and target ID must be provided")
try:
return repo_relate(self.id, relationship, target_id)
except Exception as e:
logger.error(f"Error creating relationship: {str(e)}")
logger.exception(e)
raise DatabaseOperationError("Failed to create relationship")
raise DatabaseOperationError(e)
class Notebook(ObjectModel):
@ -179,22 +180,13 @@ class SourceInsight(ObjectModel):
insight_type: str
content: str
@field_validator("insight_type")
@classmethod
def validate_insight_type(cls, v):
allowed_types = ["summary", "key_points", "analysis"] # Add more as needed
if v not in allowed_types:
raise InvalidInputError(
f"Invalid insight type. Allowed types are: {', '.join(allowed_types)}"
)
return v
class Source(ObjectModel):
table_name: ClassVar[str] = "source"
asset: Optional[Asset] = None
title: Optional[str] = None
topics: Optional[List[str]] = Field(default_factory=list)
full_text: Optional[str] = None
def get_context(
self, context_size: Literal["short", "long"] = "short"
@ -213,10 +205,9 @@ class Source(ObjectModel):
def insights(self) -> List[SourceInsight]:
try:
result = repo_query(
f"""
SELECT * FROM source_insight WHERE source={self.id}
"""
SELECT * FROM source_insight WHERE source=$id
""",
{"id": self.id},
)
return [SourceInsight(**insight) for insight in result]
except Exception as e:
@ -224,34 +215,6 @@ class Source(ObjectModel):
logger.exception(e)
raise DatabaseOperationError("Failed to fetch insights for source")
@property
def full_text(self) -> str:
try:
results = []
chunk_indexes = repo_query(
"""
select order
from source_chunk
where source=$id
order by order
""",
{"id": self.id},
)
for chunk_index in chunk_indexes:
chunk = repo_query(
f"""
select content
from source_chunk
where source={self.id} and order={chunk_index['order']}
"""
)
results.append(chunk[0]["content"])
return "".join(results)
except Exception as e:
logger.error(f"Error fetching full text for source {self.id}: {str(e)}")
logger.exception(e)
raise DatabaseOperationError("Failed to fetch full text for source")
def add_to_notebook(self, notebook_id: str) -> Any:
if not notebook_id:
raise InvalidInputError("Notebook ID must be provided")
@ -265,19 +228,19 @@ class Source(ObjectModel):
logger.debug(f"Split into {len(chunks)} chunks")
for i, chunk in enumerate(chunks):
logger.debug(f"Saving chunk {i}")
data = {"source": self.id, "order": i, "content": surreal_clean(chunk)}
repo_create(
"source_chunk",
{"source": self.id, "order": i, "content": surreal_clean(chunk)},
data,
)
except Exception as e:
logger.error(f"Error saving chunks for source {self.id}: {str(e)}")
logger.exception(e)
raise DatabaseOperationError("Failed to save chunks for source")
logger.error(f"Error saving chunks for source {self.id}: {str(e)}")
raise DatabaseOperationError(e)
def vectorize(self) -> None:
try:
full_text = self.full_text
if not full_text:
if not self.full_text:
return
chunks = split_text(
self.full_text,
@ -288,14 +251,15 @@ class Source(ObjectModel):
# future: we can increase the batch size after surreal launches their new SDK
for i, chunk in enumerate(chunks):
repo_create(
"source_embedding",
{
"source": self.id,
"order": i,
"content": surreal_clean(chunk),
"embedding": get_embedding(chunk),
},
repo_query(
f"""
CREATE source_embedding CONTENT {{
"source": {self.id},
"order": {i},
"content": $content,
"embedding": {get_embedding(chunk)},
}};""",
{"content": surreal_clean(chunk)},
)
except Exception as e:
logger.error(f"Error vectorizing source {self.id}: {str(e)}")
@ -323,33 +287,39 @@ class Source(ObjectModel):
logger.exception(e)
raise DatabaseOperationError("Failed to search sources")
def _add_insight(self, insight_type: str, content: str) -> Any:
def add_insight(self, insight_type: str, content: str) -> Any:
if not insight_type or not content:
raise InvalidInputError("Insight type and content must be provided")
try:
embedding = get_embedding(content)
return repo_create(
"source_insight",
{
"source": self.id,
"insight_type": insight_type,
"content": surreal_clean(content),
"embedding": embedding,
},
return repo_query(
f"""
CREATE source_insight CONTENT {{
"source": {self.id},
"insight_type": '{insight_type}',
"content": $content,
"embedding": {embedding},
}};""",
{"content": surreal_clean(content)},
)
except Exception as e:
logger.error(f"Error adding insight to source {self.id}: {str(e)}")
raise DatabaseOperationError("Failed to add insight to source")
raise DatabaseOperationError(e)
def summarize(self) -> "Source":
def generate_toc_and_title(self) -> "Source":
try:
config = RunnableConfig(configurable=dict(thread_id=self.id))
result = summarizer.invoke({"content": self.full_text}, config=config)[
"output"
result = toc_graph.invoke({"content": self.full_text}, config=config)
logger.warning(result["toc"])
self.add_insight("Table of Contents", surreal_clean(result["toc"]))
transformations = [
"Based on the Table of Contents below, please provide a Title for this content, with max 15 words"
]
self._add_insight("summary", surreal_clean(result.summary))
self.title = surreal_clean(result.title)
self.topics = result.topics
output = pattern_graph.invoke(
dict(content_stack=[result["toc"]], transformations=transformations)
)
logger.warning(output["output"])
self.title = surreal_clean(output["output"])
self.save()
return self
except Exception as e:

View file

@ -10,6 +10,12 @@ class DatabaseOperationError(OpenNotebookError):
pass
class NoSchemaFound(OpenNotebookError):
"""Raised when a database schema is not found."""
pass
class InvalidInputError(OpenNotebookError):
"""Raised when invalid input is provided."""

View file

@ -0,0 +1,66 @@
import operator
import os
from typing import List, Literal, Sequence
from langchain_core.runnables import (
RunnableConfig,
)
from langgraph.graph import END, START, StateGraph
from loguru import logger
from typing_extensions import Annotated, TypedDict
from open_notebook.graphs.utils import run_pattern
class PatternChainState(TypedDict):
content_stack: Annotated[Sequence[str], operator.add]
transformations: List[str]
output: str
def call_model(state: dict, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("DEFAULT_MODEL")
)
transformations = state["transformations"]
current_transformation = transformations.pop(0)
if current_transformation.startswith("patterns/"):
input_args = {"input_text": state["content_stack"][-1]}
else:
input_args = {
"input_text": state["content_stack"][-1],
"command": current_transformation,
}
current_transformation = "patterns/custom"
logger.warning(f"Processing transformation: {current_transformation}")
logger.debug(f"Using input: {input_args}")
transformation_result = run_pattern(
pattern_name=current_transformation,
model_name=model_name,
state=input_args,
)
return {
"content_stack": [transformation_result.content],
"output": transformation_result.content,
"transformations": state["transformations"],
}
def transform_condition(state: PatternChainState) -> Literal["agent", END]: # type: ignore
"""
Checks whether there are more chunks to process.
"""
if len(state["transformations"]) > 0:
return "agent"
return END
agent_state = StateGraph(PatternChainState)
agent_state.add_node("agent", call_model)
agent_state.add_edge(START, "agent")
agent_state.add_conditional_edges(
"agent",
transform_condition,
)
graph = agent_state.compile()

View file

@ -0,0 +1,35 @@
import os
from langchain_core.runnables import (
RunnableConfig,
)
from langgraph.graph import END, START, StateGraph
from typing_extensions import TypedDict
from open_notebook.graphs.utils import run_pattern
class PatternState(TypedDict):
input_text: str
pattern: str
output: str
def call_model(state: dict, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("DEFAULT_MODEL")
)
return {
"output": run_pattern(
pattern_name=state["pattern"],
model_name=model_name,
state=state,
)
}
agent_state = StateGraph(PatternState)
agent_state.add_node("agent", call_model)
agent_state.add_edge(START, "agent")
agent_state.add_edge("agent", END)
graph = agent_state.compile()

View file

@ -0,0 +1,78 @@
import os
from typing import List, Literal
from langchain_core.runnables import (
RunnableConfig,
)
from langgraph.graph import END, START, StateGraph
from typing_extensions import TypedDict
from open_notebook.graphs.utils import run_pattern
from open_notebook.utils import split_text
class TocState(TypedDict):
chunks: List[str]
content: str
toc: str
def build_chunks(state: TocState) -> dict:
"""
Split the input text into chunks.
"""
return {
"chunks": split_text(
state["content"],
chunk=int(os.environ.get("SUMMARY_CHUNK_SIZE", 200000)),
overlap=int(os.environ.get("SUMMARY_CHUNK_OVERLAP", 1000)),
)
}
def setup_next_chunk(state: TocState) -> dict:
"""
Move the next item in the chunk to the processing area
"""
state["content"] = state["chunks"].pop(0)
return {"chunks": state["chunks"], "content": state["content"]}
def chunk_condition(state: TocState) -> Literal["get_chunk", END]: # type: ignore
"""
Checks whether there are more chunks to process.
"""
if len(state["chunks"]) > 0:
return "get_chunk"
return END
def call_model(state: TocState, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("SUMMARIZATION_MODEL")
)
return {
"toc": run_pattern(
pattern_name="recursive_toc",
model_name=model_name,
state=state,
).content
}
agent_state = StateGraph(TocState)
agent_state.add_node("setup_chunk", build_chunks)
agent_state.add_edge(START, "setup_chunk")
agent_state.add_conditional_edges(
"setup_chunk",
chunk_condition,
)
agent_state.add_node("get_chunk", setup_next_chunk)
agent_state.add_node("agent", call_model)
agent_state.add_edge("get_chunk", "agent")
agent_state.add_conditional_edges(
"agent",
chunk_condition,
)
graph = agent_state.compile()

View file

@ -57,7 +57,6 @@ def chunk_condition(state: SummaryState) -> Literal["get_chunk", END]: # type:
return END
# todo: build a helper method for LLM communication on all graphs
def call_model(state: SummaryState, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("SUMMARIZATION_MODEL")

View file

@ -9,7 +9,15 @@ from typing import Any, Optional, Union
from jinja2 import Environment, FileSystemLoader, Template
env = Environment(loader=FileSystemLoader(os.environ.get("PROMPT_PATH", "prompts")))
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
env = Environment(
loader=FileSystemLoader(
os.path.join(project_root, os.environ.get("PROMPT_PATH", "prompts"))
)
)
@dataclass

View file

@ -1,51 +1,53 @@
import asyncio
import os
from contextlib import asynccontextmanager
from contextlib import contextmanager
from typing import Any, Dict, Optional
from loguru import logger
from surrealdb import Surreal
from sblpy.connection import SurrealSyncConnection
from open_notebook.exceptions import InvalidDatabaseSchema
from open_notebook.exceptions import InvalidDatabaseSchema, NoSchemaFound
EXPECTED_VERSION = "0.0.1"
EXPECTED_VERSION = "0.0.2"
@asynccontextmanager
async def db_connection():
db = Surreal(os.environ["SURREAL_ADDRESS"])
@contextmanager
def db_connection():
connection = SurrealSyncConnection(
host=os.environ["SURREAL_ADDRESS"],
port=int(os.environ["SURREAL_PORT"]),
user=os.environ["SURREAL_USER"],
password=os.environ["SURREAL_PASS"],
namespace=os.environ["SURREAL_NAMESPACE"],
database=os.environ["SURREAL_DATABASE"],
encrypted=False, # Set to True if using SSL
)
try:
await db.connect()
await db.signin(
{"user": os.environ["SURREAL_USER"], "pass": os.environ["SURREAL_PASS"]}
)
await db.use(os.environ["SURREAL_NAMESPACE"], os.environ["SURREAL_DATABASE"])
yield db
yield connection
finally:
await db.close()
connection.socket.close()
def repo_query(query_str, vars=None):
async def _query():
async with db_connection() as db:
result = await db.query(query_str, vars)
return result
result = asyncio.run(_query())
return result[0]["result"]
def repo_query(query_str: str, vars: Optional[Dict[str, Any]] = None):
with db_connection() as connection:
try:
result = connection.query(query_str, vars)
return result
except Exception as e:
# logger.debug(f"Query: {query_str}, Variables: {vars}")
logger.exception(e)
raise
def check_version():
async def _check_version():
async with db_connection() as db:
result = await db.query("select * from open_notebook:database_info;")
return result
try:
result = asyncio.run(_check_version())
if len(result) == 0 or len(result[0]["result"]) == 0:
raise InvalidDatabaseSchema("Database schema not found")
version = result[0]["result"][0]["version"]
result = repo_query("SELECT * FROM open_notebook:database_info;")
if not result:
raise NoSchemaFound("Database schema not found")
version = result[0]["version"]
logger.info(f"Connected to SurrealDB, using schema version {version}")
if version != EXPECTED_VERSION:
raise InvalidDatabaseSchema(
f"Version mismatch. Expected {EXPECTED_VERSION}, got {version}"
@ -55,55 +57,41 @@ def check_version():
raise e
def repo_create(table, data):
async def _create():
async with db_connection() as db:
result = await db.create(table, data)
return result
def repo_create(table: str, data: Dict[str, Any]):
query = f"CREATE {table} CONTENT {data};"
# vars = {"table": table, "data": data}
return repo_query(query)
result = asyncio.run(_create())
def repo_update(id: str, data: Dict[str, Any]):
query = "UPDATE $id CONTENT $data;"
vars = {"id": id, "data": data}
return repo_query(query, vars)
def repo_delete(id: str):
query = "DELETE $id;"
vars = {"id": id}
return repo_query(query, vars)
def repo_relate(source: str, relationship: str, target: str):
query = f"RELATE {source}->{relationship}->{target}"
# CONTENT $content;"
# vars = {
# "source": source,
# "relationship": relationship,
# "target": target,
# # "content": {}, # You can add properties to the relation here if needed
# }
logger.debug(f"Executing RELATE query: {query}")
result = repo_query(query)
logger.debug(f"RELATE query result: {result}")
return result
def repo_update(id, data):
async def _update():
async with db_connection() as db:
result = await db.update(id, data)
return result
def execute_migration(script: str):
with open(f"database/{script}", "r") as file:
content = file.read()
result = asyncio.run(_update())
return result
def repo_delete(id):
async def _delete():
async with db_connection() as db:
result = await db.delete(id)
return result
result = asyncio.run(_delete())
return result
def repo_relate(source, relationship, target):
async def _relate():
async with db_connection() as db:
query = f"RELATE {source}->{relationship}->{target};"
result = await db.query(query)
return result
result = asyncio.run(_relate())
return result
def execute_migration():
async def _query():
content = None
with open("db_setup.surrealql", "r") as file:
content = file.read()
async with db_connection() as db:
result = await db.query(content)
return result
result = asyncio.run(_query())
return result[0]["result"]
return repo_query(content)

View file

@ -1,3 +1,6 @@
import re
import unicodedata
from langchain_text_splitters import CharacterTextSplitter
from openai import OpenAI
@ -70,14 +73,38 @@ def get_embedding(text, model="text-embedding-3-small"):
return client.embeddings.create(input=[text], model=model).data[0].embedding
def remove_non_ascii(text):
return re.sub(r"[^\x00-\x7F]+", "", text)
def remove_non_printable(text):
# Remove caracteres de controle, exceto quebras de linha e tabulações
text = "".join(
char for char in text if unicodedata.category(char)[0] != "C" or char in "\n\t"
)
# Manter letras (incluindo acentuadas), números, espaços, quebras de linha, tabulações e pontuação básica
allowed = r"a-zA-Z0-9\s.,!?\-\n\t"
return re.sub(f"[^{allowed}]", "", text, flags=re.UNICODE)
def surreal_clean(text):
"""
Clean the input text by escaping colons for SurrealDB compatibility.
Clean the input text by removing non-ASCII and non-printable characters,
and adjusting colon placement for SurrealDB compatibility.
Args:
text (str): The input text to clean.
Returns:
str: The cleaned text with escaped colons.
str: The cleaned text with adjusted formatting.
"""
text = text.replace(":", "\:")
text = remove_non_printable(text)
# Add space after colon if it's before the first space
first_space_index = text.find(" ")
colon_index = text.find(":")
if colon_index != -1 and (
first_space_index == -1 or colon_index < first_space_index
):
text = text.replace(":", "\:", 1)
return text

191
poetry.lock generated
View file

@ -3974,6 +3974,20 @@ files = [
{file = "ruff-0.5.7.tar.gz", hash = "sha256:8dfc0a458797f5d9fb622dd0efc52d796f23f0a1493a9527f4e49a550ae9a7e5"},
]
[[package]]
name = "sdblpy"
version = "0.3.0"
description = "lite surrealDB client that only supports websocket raw queries and async pooled connections"
optional = false
python-versions = "*"
files = [
{file = "sdblpy-0.3.0-py3-none-any.whl", hash = "sha256:a5b963556d7979fab567fe91ac48916affdd9fdf916ca18b90224ebad2a8e01b"},
{file = "sdblpy-0.3.0.tar.gz", hash = "sha256:4d90803cff46c7472497a10c5de1d7fa09f130d012b74c7b2365b809475189d9"},
]
[package.dependencies]
websockets = "13.1"
[[package]]
name = "shapely"
version = "2.0.6"
@ -4256,22 +4270,6 @@ files = [
[package.dependencies]
streamlit = ">=0.63"
[[package]]
name = "surrealdb"
version = "0.3.2"
description = "The official SurrealDB library for Python."
optional = false
python-versions = ">=3.8,<4.0"
files = [
{file = "surrealdb-0.3.2-py3-none-any.whl", hash = "sha256:30424971be9698f0fce61dc8387576d19302e0bb0ae708d311e79a261ead7701"},
{file = "surrealdb-0.3.2.tar.gz", hash = "sha256:8ab570b8d4299f35443f0d4fa636947cf1a18f67fd88acc00e5e5a36356e82cd"},
]
[package.dependencies]
httpx = ">=0.23.0"
pydantic = ">=2.1.0,<3.0.0"
websockets = ">=10.4,<11.0"
[[package]]
name = "tenacity"
version = "9.0.0"
@ -4646,80 +4644,97 @@ files = [
[[package]]
name = "websockets"
version = "10.4"
version = "13.1"
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.8"
files = [
{file = "websockets-10.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d58804e996d7d2307173d56c297cf7bc132c52df27a3efaac5e8d43e36c21c48"},
{file = "websockets-10.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc0b82d728fe21a0d03e65f81980abbbcb13b5387f733a1a870672c5be26edab"},
{file = "websockets-10.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ba089c499e1f4155d2a3c2a05d2878a3428cf321c848f2b5a45ce55f0d7d310c"},
{file = "websockets-10.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33d69ca7612f0ddff3316b0c7b33ca180d464ecac2d115805c044bf0a3b0d032"},
{file = "websockets-10.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62e627f6b6d4aed919a2052efc408da7a545c606268d5ab5bfab4432734b82b4"},
{file = "websockets-10.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ea7b82bfcae927eeffc55d2ffa31665dc7fec7b8dc654506b8e5a518eb4d50"},
{file = "websockets-10.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e0cb5cc6ece6ffa75baccfd5c02cffe776f3f5c8bf486811f9d3ea3453676ce8"},
{file = "websockets-10.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae5e95cfb53ab1da62185e23b3130e11d64431179debac6dc3c6acf08760e9b1"},
{file = "websockets-10.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7c584f366f46ba667cfa66020344886cf47088e79c9b9d39c84ce9ea98aaa331"},
{file = "websockets-10.4-cp310-cp310-win32.whl", hash = "sha256:b029fb2032ae4724d8ae8d4f6b363f2cc39e4c7b12454df8df7f0f563ed3e61a"},
{file = "websockets-10.4-cp310-cp310-win_amd64.whl", hash = "sha256:8dc96f64ae43dde92530775e9cb169979f414dcf5cff670455d81a6823b42089"},
{file = "websockets-10.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:47a2964021f2110116cc1125b3e6d87ab5ad16dea161949e7244ec583b905bb4"},
{file = "websockets-10.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e789376b52c295c4946403bd0efecf27ab98f05319df4583d3c48e43c7342c2f"},
{file = "websockets-10.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d3f0b61c45c3fa9a349cf484962c559a8a1d80dae6977276df8fd1fa5e3cb8c"},
{file = "websockets-10.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55b5905705725af31ccef50e55391621532cd64fbf0bc6f4bac935f0fccec46"},
{file = "websockets-10.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00c870522cdb69cd625b93f002961ffb0c095394f06ba8c48f17eef7c1541f96"},
{file = "websockets-10.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f38706e0b15d3c20ef6259fd4bc1700cd133b06c3c1bb108ffe3f8947be15fa"},
{file = "websockets-10.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f2c38d588887a609191d30e902df2a32711f708abfd85d318ca9b367258cfd0c"},
{file = "websockets-10.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe10ddc59b304cb19a1bdf5bd0a7719cbbc9fbdd57ac80ed436b709fcf889106"},
{file = "websockets-10.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:90fcf8929836d4a0e964d799a58823547df5a5e9afa83081761630553be731f9"},
{file = "websockets-10.4-cp311-cp311-win32.whl", hash = "sha256:b9968694c5f467bf67ef97ae7ad4d56d14be2751000c1207d31bf3bb8860bae8"},
{file = "websockets-10.4-cp311-cp311-win_amd64.whl", hash = "sha256:a7a240d7a74bf8d5cb3bfe6be7f21697a28ec4b1a437607bae08ac7acf5b4882"},
{file = "websockets-10.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:74de2b894b47f1d21cbd0b37a5e2b2392ad95d17ae983e64727e18eb281fe7cb"},
{file = "websockets-10.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3a686ecb4aa0d64ae60c9c9f1a7d5d46cab9bfb5d91a2d303d00e2cd4c4c5cc"},
{file = "websockets-10.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d15c968ea7a65211e084f523151dbf8ae44634de03c801b8bd070b74e85033"},
{file = "websockets-10.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00213676a2e46b6ebf6045bc11d0f529d9120baa6f58d122b4021ad92adabd41"},
{file = "websockets-10.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e23173580d740bf8822fd0379e4bf30aa1d5a92a4f252d34e893070c081050df"},
{file = "websockets-10.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:dd500e0a5e11969cdd3320935ca2ff1e936f2358f9c2e61f100a1660933320ea"},
{file = "websockets-10.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4239b6027e3d66a89446908ff3027d2737afc1a375f8fd3eea630a4842ec9a0c"},
{file = "websockets-10.4-cp37-cp37m-win32.whl", hash = "sha256:8a5cc00546e0a701da4639aa0bbcb0ae2bb678c87f46da01ac2d789e1f2d2038"},
{file = "websockets-10.4-cp37-cp37m-win_amd64.whl", hash = "sha256:a9f9a735deaf9a0cadc2d8c50d1a5bcdbae8b6e539c6e08237bc4082d7c13f28"},
{file = "websockets-10.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c1289596042fad2cdceb05e1ebf7aadf9995c928e0da2b7a4e99494953b1b94"},
{file = "websockets-10.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0cff816f51fb33c26d6e2b16b5c7d48eaa31dae5488ace6aae468b361f422b63"},
{file = "websockets-10.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dd9becd5fe29773d140d68d607d66a38f60e31b86df75332703757ee645b6faf"},
{file = "websockets-10.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45ec8e75b7dbc9539cbfafa570742fe4f676eb8b0d3694b67dabe2f2ceed8aa6"},
{file = "websockets-10.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f72e5cd0f18f262f5da20efa9e241699e0cf3a766317a17392550c9ad7b37d8"},
{file = "websockets-10.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:185929b4808b36a79c65b7865783b87b6841e852ef5407a2fb0c03381092fa3b"},
{file = "websockets-10.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7d27a7e34c313b3a7f91adcd05134315002aaf8540d7b4f90336beafaea6217c"},
{file = "websockets-10.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:884be66c76a444c59f801ac13f40c76f176f1bfa815ef5b8ed44321e74f1600b"},
{file = "websockets-10.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:931c039af54fc195fe6ad536fde4b0de04da9d5916e78e55405436348cfb0e56"},
{file = "websockets-10.4-cp38-cp38-win32.whl", hash = "sha256:db3c336f9eda2532ec0fd8ea49fef7a8df8f6c804cdf4f39e5c5c0d4a4ad9a7a"},
{file = "websockets-10.4-cp38-cp38-win_amd64.whl", hash = "sha256:48c08473563323f9c9debac781ecf66f94ad5a3680a38fe84dee5388cf5acaf6"},
{file = "websockets-10.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:40e826de3085721dabc7cf9bfd41682dadc02286d8cf149b3ad05bff89311e4f"},
{file = "websockets-10.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:56029457f219ade1f2fc12a6504ea61e14ee227a815531f9738e41203a429112"},
{file = "websockets-10.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f5fc088b7a32f244c519a048c170f14cf2251b849ef0e20cbbb0fdf0fdaf556f"},
{file = "websockets-10.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc8709c00704194213d45e455adc106ff9e87658297f72d544220e32029cd3d"},
{file = "websockets-10.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0154f7691e4fe6c2b2bc275b5701e8b158dae92a1ab229e2b940efe11905dff4"},
{file = "websockets-10.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c6d2264f485f0b53adf22697ac11e261ce84805c232ed5dbe6b1bcb84b00ff0"},
{file = "websockets-10.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9bc42e8402dc5e9905fb8b9649f57efcb2056693b7e88faa8fb029256ba9c68c"},
{file = "websockets-10.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:edc344de4dac1d89300a053ac973299e82d3db56330f3494905643bb68801269"},
{file = "websockets-10.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:84bc2a7d075f32f6ed98652db3a680a17a4edb21ca7f80fe42e38753a58ee02b"},
{file = "websockets-10.4-cp39-cp39-win32.whl", hash = "sha256:c94ae4faf2d09f7c81847c63843f84fe47bf6253c9d60b20f25edfd30fb12588"},
{file = "websockets-10.4-cp39-cp39-win_amd64.whl", hash = "sha256:bbccd847aa0c3a69b5f691a84d2341a4f8a629c6922558f2a70611305f902d74"},
{file = "websockets-10.4-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:82ff5e1cae4e855147fd57a2863376ed7454134c2bf49ec604dfe71e446e2193"},
{file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d210abe51b5da0ffdbf7b43eed0cfdff8a55a1ab17abbec4301c9ff077dd0342"},
{file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:942de28af58f352a6f588bc72490ae0f4ccd6dfc2bd3de5945b882a078e4e179"},
{file = "websockets-10.4-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b27d6c1c6cd53dc93614967e9ce00ae7f864a2d9f99fe5ed86706e1ecbf485"},
{file = "websockets-10.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3d3cac3e32b2c8414f4f87c1b2ab686fa6284a980ba283617404377cd448f631"},
{file = "websockets-10.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:da39dd03d130162deb63da51f6e66ed73032ae62e74aaccc4236e30edccddbb0"},
{file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:389f8dbb5c489e305fb113ca1b6bdcdaa130923f77485db5b189de343a179393"},
{file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09a1814bb15eff7069e51fed0826df0bc0702652b5cb8f87697d469d79c23576"},
{file = "websockets-10.4-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff64a1d38d156d429404aaa84b27305e957fd10c30e5880d1765c9480bea490f"},
{file = "websockets-10.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b343f521b047493dc4022dd338fc6db9d9282658862756b4f6fd0e996c1380e1"},
{file = "websockets-10.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:932af322458da7e4e35df32f050389e13d3d96b09d274b22a7aa1808f292fee4"},
{file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a4162139374a49eb18ef5b2f4da1dd95c994588f5033d64e0bbfda4b6b6fcf"},
{file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c57e4c1349fbe0e446c9fa7b19ed2f8a4417233b6984277cce392819123142d3"},
{file = "websockets-10.4-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b627c266f295de9dea86bd1112ed3d5fafb69a348af30a2422e16590a8ecba13"},
{file = "websockets-10.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:05a7233089f8bd355e8cbe127c2e8ca0b4ea55467861906b80d2ebc7db4d6b72"},
{file = "websockets-10.4.tar.gz", hash = "sha256:eef610b23933c54d5d921c92578ae5f89813438fded840c2e9809d378dc765d3"},
{file = "websockets-13.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f48c749857f8fb598fb890a75f540e3221d0976ed0bf879cf3c7eef34151acee"},
{file = "websockets-13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7e72ce6bda6fb9409cc1e8164dd41d7c91466fb599eb047cfda72fe758a34a7"},
{file = "websockets-13.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f779498eeec470295a2b1a5d97aa1bc9814ecd25e1eb637bd9d1c73a327387f6"},
{file = "websockets-13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676df3fe46956fbb0437d8800cd5f2b6d41143b6e7e842e60554398432cf29b"},
{file = "websockets-13.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7affedeb43a70351bb811dadf49493c9cfd1ed94c9c70095fd177e9cc1541fa"},
{file = "websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1971e62d2caa443e57588e1d82d15f663b29ff9dfe7446d9964a4b6f12c1e700"},
{file = "websockets-13.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5f2e75431f8dc4a47f31565a6e1355fb4f2ecaa99d6b89737527ea917066e26c"},
{file = "websockets-13.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58cf7e75dbf7e566088b07e36ea2e3e2bd5676e22216e4cad108d4df4a7402a0"},
{file = "websockets-13.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c90d6dec6be2c7d03378a574de87af9b1efea77d0c52a8301dd831ece938452f"},
{file = "websockets-13.1-cp310-cp310-win32.whl", hash = "sha256:730f42125ccb14602f455155084f978bd9e8e57e89b569b4d7f0f0c17a448ffe"},
{file = "websockets-13.1-cp310-cp310-win_amd64.whl", hash = "sha256:5993260f483d05a9737073be197371940c01b257cc45ae3f1d5d7adb371b266a"},
{file = "websockets-13.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:61fc0dfcda609cda0fc9fe7977694c0c59cf9d749fbb17f4e9483929e3c48a19"},
{file = "websockets-13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ceec59f59d092c5007e815def4ebb80c2de330e9588e101cf8bd94c143ec78a5"},
{file = "websockets-13.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c1dca61c6db1166c48b95198c0b7d9c990b30c756fc2923cc66f68d17dc558fd"},
{file = "websockets-13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:308e20f22c2c77f3f39caca508e765f8725020b84aa963474e18c59accbf4c02"},
{file = "websockets-13.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62d516c325e6540e8a57b94abefc3459d7dab8ce52ac75c96cad5549e187e3a7"},
{file = "websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c6e35319b46b99e168eb98472d6c7d8634ee37750d7693656dc766395df096"},
{file = "websockets-13.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5f9fee94ebafbc3117c30be1844ed01a3b177bb6e39088bc6b2fa1dc15572084"},
{file = "websockets-13.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7c1e90228c2f5cdde263253fa5db63e6653f1c00e7ec64108065a0b9713fa1b3"},
{file = "websockets-13.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6548f29b0e401eea2b967b2fdc1c7c7b5ebb3eeb470ed23a54cd45ef078a0db9"},
{file = "websockets-13.1-cp311-cp311-win32.whl", hash = "sha256:c11d4d16e133f6df8916cc5b7e3e96ee4c44c936717d684a94f48f82edb7c92f"},
{file = "websockets-13.1-cp311-cp311-win_amd64.whl", hash = "sha256:d04f13a1d75cb2b8382bdc16ae6fa58c97337253826dfe136195b7f89f661557"},
{file = "websockets-13.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9d75baf00138f80b48f1eac72ad1535aac0b6461265a0bcad391fc5aba875cfc"},
{file = "websockets-13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9b6f347deb3dcfbfde1c20baa21c2ac0751afaa73e64e5b693bb2b848efeaa49"},
{file = "websockets-13.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de58647e3f9c42f13f90ac7e5f58900c80a39019848c5547bc691693098ae1bd"},
{file = "websockets-13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1b54689e38d1279a51d11e3467dd2f3a50f5f2e879012ce8f2d6943f00e83f0"},
{file = "websockets-13.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf1781ef73c073e6b0f90af841aaf98501f975d306bbf6221683dd594ccc52b6"},
{file = "websockets-13.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d23b88b9388ed85c6faf0e74d8dec4f4d3baf3ecf20a65a47b836d56260d4b9"},
{file = "websockets-13.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3c78383585f47ccb0fcf186dcb8a43f5438bd7d8f47d69e0b56f71bf431a0a68"},
{file = "websockets-13.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d6d300f8ec35c24025ceb9b9019ae9040c1ab2f01cddc2bcc0b518af31c75c14"},
{file = "websockets-13.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a9dcaf8b0cc72a392760bb8755922c03e17a5a54e08cca58e8b74f6902b433cf"},
{file = "websockets-13.1-cp312-cp312-win32.whl", hash = "sha256:2f85cf4f2a1ba8f602298a853cec8526c2ca42a9a4b947ec236eaedb8f2dc80c"},
{file = "websockets-13.1-cp312-cp312-win_amd64.whl", hash = "sha256:38377f8b0cdeee97c552d20cf1865695fcd56aba155ad1b4ca8779a5b6ef4ac3"},
{file = "websockets-13.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a9ab1e71d3d2e54a0aa646ab6d4eebfaa5f416fe78dfe4da2839525dc5d765c6"},
{file = "websockets-13.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b9d7439d7fab4dce00570bb906875734df13d9faa4b48e261c440a5fec6d9708"},
{file = "websockets-13.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:327b74e915cf13c5931334c61e1a41040e365d380f812513a255aa804b183418"},
{file = "websockets-13.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:325b1ccdbf5e5725fdcb1b0e9ad4d2545056479d0eee392c291c1bf76206435a"},
{file = "websockets-13.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:346bee67a65f189e0e33f520f253d5147ab76ae42493804319b5716e46dddf0f"},
{file = "websockets-13.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91a0fa841646320ec0d3accdff5b757b06e2e5c86ba32af2e0815c96c7a603c5"},
{file = "websockets-13.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:18503d2c5f3943e93819238bf20df71982d193f73dcecd26c94514f417f6b135"},
{file = "websockets-13.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a9cd1af7e18e5221d2878378fbc287a14cd527fdd5939ed56a18df8a31136bb2"},
{file = "websockets-13.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:70c5be9f416aa72aab7a2a76c90ae0a4fe2755c1816c153c1a2bcc3333ce4ce6"},
{file = "websockets-13.1-cp313-cp313-win32.whl", hash = "sha256:624459daabeb310d3815b276c1adef475b3e6804abaf2d9d2c061c319f7f187d"},
{file = "websockets-13.1-cp313-cp313-win_amd64.whl", hash = "sha256:c518e84bb59c2baae725accd355c8dc517b4a3ed8db88b4bc93c78dae2974bf2"},
{file = "websockets-13.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c7934fd0e920e70468e676fe7f1b7261c1efa0d6c037c6722278ca0228ad9d0d"},
{file = "websockets-13.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:149e622dc48c10ccc3d2760e5f36753db9cacf3ad7bc7bbbfd7d9c819e286f23"},
{file = "websockets-13.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a569eb1b05d72f9bce2ebd28a1ce2054311b66677fcd46cf36204ad23acead8c"},
{file = "websockets-13.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95df24ca1e1bd93bbca51d94dd049a984609687cb2fb08a7f2c56ac84e9816ea"},
{file = "websockets-13.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8dbb1bf0c0a4ae8b40bdc9be7f644e2f3fb4e8a9aca7145bfa510d4a374eeb7"},
{file = "websockets-13.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:035233b7531fb92a76beefcbf479504db8c72eb3bff41da55aecce3a0f729e54"},
{file = "websockets-13.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e4450fc83a3df53dec45922b576e91e94f5578d06436871dce3a6be38e40f5db"},
{file = "websockets-13.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:463e1c6ec853202dd3657f156123d6b4dad0c546ea2e2e38be2b3f7c5b8e7295"},
{file = "websockets-13.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6d6855bbe70119872c05107e38fbc7f96b1d8cb047d95c2c50869a46c65a8e96"},
{file = "websockets-13.1-cp38-cp38-win32.whl", hash = "sha256:204e5107f43095012b00f1451374693267adbb832d29966a01ecc4ce1db26faf"},
{file = "websockets-13.1-cp38-cp38-win_amd64.whl", hash = "sha256:485307243237328c022bc908b90e4457d0daa8b5cf4b3723fd3c4a8012fce4c6"},
{file = "websockets-13.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9b37c184f8b976f0c0a231a5f3d6efe10807d41ccbe4488df8c74174805eea7d"},
{file = "websockets-13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:163e7277e1a0bd9fb3c8842a71661ad19c6aa7bb3d6678dc7f89b17fbcc4aeb7"},
{file = "websockets-13.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4b889dbd1342820cc210ba44307cf75ae5f2f96226c0038094455a96e64fb07a"},
{file = "websockets-13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:586a356928692c1fed0eca68b4d1c2cbbd1ca2acf2ac7e7ebd3b9052582deefa"},
{file = "websockets-13.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7bd6abf1e070a6b72bfeb71049d6ad286852e285f146682bf30d0296f5fbadfa"},
{file = "websockets-13.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2aad13a200e5934f5a6767492fb07151e1de1d6079c003ab31e1823733ae79"},
{file = "websockets-13.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:df01aea34b6e9e33572c35cd16bae5a47785e7d5c8cb2b54b2acdb9678315a17"},
{file = "websockets-13.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e54affdeb21026329fb0744ad187cf812f7d3c2aa702a5edb562b325191fcab6"},
{file = "websockets-13.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9ef8aa8bdbac47f4968a5d66462a2a0935d044bf35c0e5a8af152d58516dbeb5"},
{file = "websockets-13.1-cp39-cp39-win32.whl", hash = "sha256:deeb929efe52bed518f6eb2ddc00cc496366a14c726005726ad62c2dd9017a3c"},
{file = "websockets-13.1-cp39-cp39-win_amd64.whl", hash = "sha256:7c65ffa900e7cc958cd088b9a9157a8141c991f8c53d11087e6fb7277a03f81d"},
{file = "websockets-13.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5dd6da9bec02735931fccec99d97c29f47cc61f644264eb995ad6c0c27667238"},
{file = "websockets-13.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:2510c09d8e8df777177ee3d40cd35450dc169a81e747455cc4197e63f7e7bfe5"},
{file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1c3cf67185543730888b20682fb186fc8d0fa6f07ccc3ef4390831ab4b388d9"},
{file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcc03c8b72267e97b49149e4863d57c2d77f13fae12066622dc78fe322490fe6"},
{file = "websockets-13.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004280a140f220c812e65f36944a9ca92d766b6cc4560be652a0a3883a79ed8a"},
{file = "websockets-13.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2620453c075abeb0daa949a292e19f56de518988e079c36478bacf9546ced23"},
{file = "websockets-13.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9156c45750b37337f7b0b00e6248991a047be4aa44554c9886fe6bdd605aab3b"},
{file = "websockets-13.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80c421e07973a89fbdd93e6f2003c17d20b69010458d3a8e37fb47874bd67d51"},
{file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82d0ba76371769d6a4e56f7e83bb8e81846d17a6190971e38b5de108bde9b0d7"},
{file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9875a0143f07d74dc5e1ded1c4581f0d9f7ab86c78994e2ed9e95050073c94d"},
{file = "websockets-13.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a11e38ad8922c7961447f35c7b17bffa15de4d17c70abd07bfbe12d6faa3e027"},
{file = "websockets-13.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4059f790b6ae8768471cddb65d3c4fe4792b0ab48e154c9f0a04cefaabcd5978"},
{file = "websockets-13.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:25c35bf84bf7c7369d247f0b8cfa157f989862c49104c5cf85cb5436a641d93e"},
{file = "websockets-13.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:83f91d8a9bb404b8c2c41a707ac7f7f75b9442a0a876df295de27251a856ad09"},
{file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a43cfdcddd07f4ca2b1afb459824dd3c6d53a51410636a2c7fc97b9a8cf4842"},
{file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48a2ef1381632a2f0cb4efeff34efa97901c9fbc118e01951ad7cfc10601a9bb"},
{file = "websockets-13.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459bf774c754c35dbb487360b12c5727adab887f1622b8aed5755880a21c4a20"},
{file = "websockets-13.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:95858ca14a9f6fa8413d29e0a585b31b278388aa775b8a81fa24830123874678"},
{file = "websockets-13.1-py3-none-any.whl", hash = "sha256:a9a396a6ad26130cdae92ae10c36af09d9bfe6cafe69670fd3b6da9b07b4044f"},
{file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"},
]
[[package]]
@ -4868,4 +4883,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "f40348a4e1846cdbdd353c3c9de37e7b652a561c2374dec9899cf77778686ded"
content-hash = "eb5762f3ce3c19b2d51f5aaadcf52a89160f0792534aa83812f50770986f8ba4"

View file

@ -0,0 +1,42 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You are an insightful and analytical reader of academic papers, extracting the key components, significance, and broader implications. Your focus is to uncover the core contributions, practical applications, methodological strengths or weaknesses, and any surprising findings. You are especially attuned to the clarity of arguments, the relevance to existing literature, and potential impacts on both the specific field and broader contexts.
# STEPS
1. **READ AND UNDERSTAND THE PAPER**: Thoroughly read the paper, identifying its main focus, arguments, methods, results, and conclusions.
2. **IDENTIFY CORE ELEMENTS**:
- **Purpose**: What is the main goal or research question?
- **Contribution**: What new knowledge or innovation does this paper bring to the field?
- **Methods**: What methods are used, and are they novel or particularly effective?
- **Key Findings**: What are the most critical results, and why do they matter?
- **Limitations**: Are there any notable limitations or areas for further research?
3. **SYNTHESIZE THE MAIN POINTS**:
- Extract the key elements and organize them into insightful observations.
- Highlight the broader impact and potential applications.
- Note any aspects that challenge established views or introduce new questions.
# OUTPUT INSTRUCTIONS
- Structure the output as follows:
- **PURPOSE**: A concise summary of the main research question or goal (1-2 sentences).
- **CONTRIBUTION**: A bullet list of 2-3 points that describe what the paper adds to the field.
- **KEY FINDINGS**: A bullet list of 2-3 points summarizing the critical outcomes of the study.
- **IMPLICATIONS**: A bullet list of 2-3 points discussing the significance or potential impact of the findings on the field or broader context.
- **LIMITATIONS**: A bullet list of 1-2 points identifying notable limitations or areas for future work.
- **Bullet Points** should be between 15-20 words.
- Avoid starting each bullet point with the same word to maintain variety.
- Use clear and concise language that conveys the key ideas effectively.
- Do not include warnings, disclaimers, or personal opinions.
- Output only the requested sections with their respective labels.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,6 @@
{% include 'patterns/common_text.jinja' %}
Please clean-up the following text, fixing the paragraphs, ponctuation, etc.
If you find any word or name mispellings, feel free to correct.
{{input_text}}

View file

@ -0,0 +1,4 @@
# ADDITIONAL INSTRUCTIONS
- You are working on my editorial projects. The text below is my own.
- Please do not reply with any acknowledgements or greetings, just provide the content requested.

View file

@ -0,0 +1,9 @@
{% include 'patterns/common_text.jinja' %}
{{command}}
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,30 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You extract surprising, powerful, and interesting insights from text content. You are interested in insights related to the purpose and meaning of life, human flourishing, the role of technology in the future of humanity, artificial intelligence and its affect on humans, memes, learning, reading, books, continuous improvement, and similar topics.
You create 15 word bullet points that capture the most important insights from the input.
Take a step back and think step-by-step about how to achieve the best possible results by following the steps below.
# STEPS
- Extract 20 to 50 of the most surprising, insightful, and/or interesting ideas from the input in a section called IDEAS, and write them on a virtual whiteboard in your mind using 15 word bullets. If there are less than 50 then collect all of them. Make sure you extract at least 20.
- From those IDEAS, extract the most powerful and insightful of them and write them in a section called INSIGHTS. Make sure you extract at least 10 and up to 25.
# OUTPUT INSTRUCTIONS
- INSIGHTS are essentially higher-level IDEAS that are more abstracted and wise.
- Output the INSIGHTS section only.
- Each bullet should be about 15 words in length.
- Do not give warnings or notes; only output the requested sections.
- You use bulleted lists for output, not numbered lists.
- Do not start items with the same opening words.
- Ensure you follow ALL these instructions when creating your output.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,16 @@
{% include 'patterns/common_text.jinja' %}
# MISSION
You are a Sparse Priming Representation (SPR) writer. An SPR is a particular kind of use of language for advanced NLP, NLU, and NLG tasks, particularly useful for the latest generation of Large Language Models (LLMs). You will be given information by the USER which you are to render as an SPR.
# THEORY
LLMs are a kind of deep neural network. They have been demonstrated to embed knowledge, abilities, and concepts, ranging from reasoning to planning, and even to theory of mind. These are called latent abilities and latent content, collectively referred to as latent space. The latent space of an LLM can be activated with the correct series of words as inputs, which will create a useful internal state of the neural network. This is not unlike how the right shorthand cues can prime a human mind to think in a certain way. Like human minds, LLMs are associative, meaning you only need to use the correct associations to "prime" another model to think in the same way.
# METHODOLOGY
Render the input as a distilled list of succinct statements, assertions, associations, concepts, analogies, and metaphors. The idea is to capture as much, conceptually, as possible but with as few words as possible. Write it in a way that makes sense to you, as the future audience will be another language model, not a human. Use complete sentences.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,29 @@
# IDENTITY and PURPOSE
You are an expert at data and concept visualization and in turning complex ideas into a form that can be visualized using Mermaid (markdown) syntax.
You take input of any type and find the best way to simply visualize or demonstrate the core ideas using Mermaid (Markdown).
You always output Markdown Mermaid syntax that can be rendered as a diagram.
# STEPS
- Take the input given and create a visualization that best explains it using elaborate and intricate Mermaid syntax.
- Ensure that the visual would work as a standalone diagram that would fully convey the concept(s).
- Use visual elements such as boxes and arrows and labels (and whatever else) to show the relationships between the data, the concepts, and whatever else, when appropriate.
- Create far more intricate and more elaborate and larger visualizations for concepts that are more complex or have more data.
- Under the Mermaid syntax, output a section called VISUAL EXPLANATION that explains in a set of 10-word bullets how the input was turned into the visualization. Ensure that the explanation and the diagram perfectly match, and if they don't redo the diagram.
- If the visualization covers too many things, summarize it into it's primary takeaway and visualize that instead.
- DO NOT COMPLAIN AND GIVE UP. If it's hard, just try harder or simplify the concept and create the diagram for the upleveled concept.
# OUTPUT INSTRUCTIONS
- DO NOT COMPLAIN. Just output the Mermaid syntax.
- Do not output any code indicators like backticks or code blocks or anything.
- Ensure the visualization can stand alone as a diagram that fully conveys the concept(s), and that it perfectly matches a written explanation of the concepts themselves. Start over if it can't.
- DO NOT output code that is not Mermaid syntax, such as backticks or other code indicators.
- Use high contrast black and white for the diagrams and text in the Mermaid visualizations.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,28 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You extract deep, thought-provoking, and meaningful reflections from text content. You are especially focused on themes related to the human experience, such as the purpose of life, personal growth, the intersection of technology and humanity, artificial intelligence's societal impact, human potential, collective evolution, and transformative learning. Your reflections aim to provoke new ways of thinking, challenge assumptions, and provide a thoughtful synthesis of the content.
# STEPS
- Extract 3 to 5 of the most profound, thought-provoking, and/or meaningful ideas from the input in a section called REFLECTIONS.
- Each reflection should aim to explore underlying implications, connections to broader human experiences, or highlight a transformative perspective.
- Take a step back and consider the deeper significance or questions that arise from the content.
# OUTPUT INSTRUCTIONS
- The output section should be labeled as REFLECTIONS.
- Each bullet point should be between 20-25 words.
- Avoid repetition in the phrasing and ensure variety in sentence structure.
- The reflections should encourage deeper inquiry and provide a synthesis that transcends surface-level observations.
- Use bullet points, not numbered lists.
- Every bullet should be formatted as a question that elicits contemplation or a statement that offers a profound insight.
- Do not give warnings or notes; only output the requested section.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,16 @@
{% include 'patterns/common_text.jinja' %}
# SYSTEM ROLE
You are a content summarization assistant that creates dense, information-rich summaries optimized for machine understanding. Your summaries should capture key concepts with minimal words while maintaining complete, clear sentences.
# TASK
Analyze the provided content and create a summary that:
- Captures the core concepts and key information
- Uses clear, direct language
- Maintains context from any previous summaries
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,6 @@
{% include 'patterns/common_text.jinja' %}
Please translate the following text to portuguese:
{{input_text}}

View file

@ -0,0 +1,24 @@
# SYSTEM ROLE
You are a content analysis assistant that reads through documents and provides a Table of Contents (ToC) to help users identify what the document covers more easily.
Your ToC should capture all major topics and transitions in the content and should mention them in the order theh appear.
# TASK
Analyze the provided content and create a Table of Contents:
- Captures the core topics included in the text
- Gives a small description of what is covered
# INSTRUCTIONS FOR LARGE DOCUMENTS
If you see a PREVIOUS TOC section below, it means that this request is a continuation of a previous request. Most likely to handle context length issues.
Every time, you should replace the previous toc with the new one, and append the new content to the previous content.
{% if toc %}
# PREVIOUS TOC
{{toc}}
{% endif %}
# CONTENT
{{content}}

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "open-notebook"
version = "0.0.2"
version = "0.0.3"
description = "An open source implementation of a research assistant, inspired by Google Notebook LM"
authors = ["Luis Novo <lfnovo@gmail.com>"]
license = "MIT"
@ -30,7 +30,6 @@ pymupdf = "1.24.11"
python-magic = "^0.4.27"
langdetect = "^1.0.9"
youtube-transcript-api = "^0.6.2"
surrealdb = "^0.3.2"
openai = "^1.52.0"
pre-commit = "^4.0.1"
langchain-community = "^0.3.3"
@ -39,6 +38,7 @@ langchain-openai = "^0.2.3"
langchain-anthropic = "^0.2.3"
langchain-ollama = "^0.2.0"
langchain-google-vertexai = "^2.0.5"
sdblpy = "^0.3.0"
[tool.poetry.group.dev.dependencies]
ipykernel = "^6.29.5"

View file

@ -3,7 +3,7 @@ from langchain_core.runnables import RunnableConfig
from open_notebook.domain import Note, Source
from open_notebook.graphs.chat import graph as chat_graph
from open_notebook.utils import token_cost, token_count
from open_notebook.utils import token_count
# todo: build a smarter, more robust context manager function
@ -56,11 +56,11 @@ def execute_chat(txt_input, session_id):
# seria bom ter um total de tokens no admin em algum lugar
def chat_sidebar(session_id):
context = build_context(session_id=session_id)
tokens = token_count(str(context))
cost = token_cost(tokens)
tokens = token_count(str(context) + str(st.session_state[session_id]["messages"]))
with st.container(border=True):
request = st.chat_input("Enter your question")
st.caption(f"Total tokens: {tokens}, cost: ${cost:.4f}")
# removing for now since it's not multi-model capable right now
st.caption(f"Total tokens: {tokens}")
if request:
response = execute_chat(txt_input=request, session_id=session_id)
st.session_state[session_id]["messages"] = response["messages"]

View file

@ -2,13 +2,14 @@ from pathlib import Path
import streamlit as st
import streamlit_scrollable_textbox as stx # type: ignore
import yaml
from humanize import naturaltime
from loguru import logger
from streamlit_tags import st_tags # type: ignore
from open_notebook.domain import Asset, Source
from open_notebook.graphs.content_process import graph
from open_notebook.utils import token_cost, token_count
from open_notebook.graphs.multipattern import graph as transform_graph
from open_notebook.utils import surreal_clean
from .consts import context_icons
@ -16,50 +17,71 @@ uploads_dir = Path("./.uploads")
uploads_dir.mkdir(parents=True, exist_ok=True)
def run_transformations(input_text, transformations):
output = transform_graph.invoke(
dict(content_stack=[input_text], transformations=transformations)
)
return output["output"]
@st.dialog("Source", width="large")
def source_panel(source_id):
source: Source = Source.get(source_id)
if not source:
st.error("Source not found")
return
title = st.empty()
if source.title:
title.subheader(source.title)
st.caption(f"Created {naturaltime(source.created)}")
# st.markdown(f"**URL:** {source.url}, **File:** {source.file_path}")
summary = st.empty()
for insight in source.insights:
summary.write(insight.insight_type)
summary.write(insight.content)
topics = source.topics or []
if len(topics) > 0:
st_tags(
label="",
text="Press enter to add more",
value=source.topics,
suggestions=source.topics,
maxtags=10,
key="1",
)
process_tab, source_tab = st.tabs(["Process", "Source"])
with process_tab:
c1, c2 = st.columns([3, 1])
with c1:
title = st.empty()
if source.title:
title.subheader(source.title)
if source.asset.url:
from_src = f"from URL: {source.asset.url}"
elif source.asset.file_path:
from_src = f"from file: {source.asset.file_path}"
else:
from_src = "from text"
st.caption(f"Created {naturaltime(source.created)}, {from_src}")
for insight in source.insights:
with st.expander(f"**{insight.insight_type}**"):
st.markdown(insight.content)
if st.button("Delete", key=f"delete_insight_{insight.id}"):
insight.delete()
st.rerun(scope="fragment")
if st.button("Delete", icon="🗑️"):
source.delete()
st.rerun()
with c2:
with open("transformations.yaml", "r") as file:
transformations = yaml.safe_load(file)
for transformation in transformations["source_insights"]:
if st.button(
transformation["name"], help=transformation["description"]
):
result = run_transformations(
source.full_text, transformation["transformations"]
)
source.add_insight(
transformation["insight_type"], surreal_clean(result)
)
st.rerun(scope="fragment")
cost = token_cost(token_count(source.full_text)) * 1.2
if st.button(f"Summarize (about ${cost:.4f})", icon="📝"):
source.summarize()
st.rerun(scope="fragment")
if st.button(
"Embed vectors",
icon="🦾",
help="This will generate your embedding vectors on the database for powerful search capabilities",
):
source.vectorize()
st.success("Embedding complete")
cost_embedding = token_cost(token_count(source.full_text), 0.02)
if st.button("Delete", icon="🗑️"):
source.delete()
st.rerun()
if st.button(f"Embed (${cost_embedding:.4f})", icon="📝"):
source.vectorize()
st.success("Embedding complete")
st.subheader("Content")
stx.scrollableTextbox(source.full_text, height=300)
with source_tab:
st.subheader("Content")
stx.scrollableTextbox(source.full_text, height=300)
@st.dialog("Add a Source", width="large")
@ -105,16 +127,14 @@ def add_source(session_id):
st.write("Saving..")
source = Source(
asset=Asset(url=req.get("url"), file_path=req.get("file_path")),
full_text=surreal_clean(result["content"]),
)
source.save()
source.save_chunks(result["content"])
source.add_to_notebook(st.session_state[session_id]["notebook"].id)
st.write("Summarizing...")
source.summarize()
source.generate_toc_and_title()
st.rerun()
# else:
# st.stop()
def source_card(session_id, source):

35
transformations.yaml Normal file
View file

@ -0,0 +1,35 @@
source_insights:
- name: "Summarize"
insight_type: "Content Summary"
description: "Summarize the content"
transformations:
- patterns/makeitdense
- patterns/summarize
- name: "Key Insights"
insight_type: "Key Insights"
description: "Extracts a list of the Key Insights of the content"
transformations:
- patterns/keyinsights
- name: "Make it Dense"
insight_type: "Dense Representation"
description: "Create a dense representation of the content"
transformations:
- patterns/makeitdense
- name: "Analyze Paper"
insight_type: "Paper Analysis"
description: "Analyze the paper and provide a quick summary"
transformations:
- patterns/analyze_paper
- name: "Reflection"
insight_type: "Reflection Questions"
description: "Generates a list of insightful questions to provoke reflection"
transformations:
- patterns/reflection_questions
# - name: "Reflection [PT]"
# insight_type: "Reflection Questions [PT]"
# description: "Generates a list of insightful questions to provoke reflection"
# transformations:
# - patterns/reflection_questions
# - patterns/translate