v1 of transformations

This commit is contained in:
LUIS NOVO 2024-10-23 10:59:05 -03:00
parent e020511876
commit 02ff05b6fd
16 changed files with 389 additions and 40 deletions

View file

@ -0,0 +1,66 @@
import operator
import os
from typing import List, Literal, Sequence
from langchain_core.runnables import (
RunnableConfig,
)
from langgraph.graph import END, START, StateGraph
from loguru import logger
from typing_extensions import Annotated, TypedDict
from open_notebook.graphs.utils import run_pattern
class PatternChainState(TypedDict):
content_stack: Annotated[Sequence[str], operator.add]
transformations: List[str]
output: str
def call_model(state: dict, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("DEFAULT_MODEL")
)
transformations = state["transformations"]
current_transformation = transformations.pop(0)
if current_transformation.startswith("patterns/"):
input_args = {"input_text": state["content_stack"][-1]}
else:
input_args = {
"input_text": state["content_stack"][-1],
"command": current_transformation,
}
current_transformation = "patterns/custom"
logger.warning(f"Processing transformation: {current_transformation}")
logger.debug(f"Using input: {input_args}")
transformation_result = run_pattern(
pattern_name=current_transformation,
model_name=model_name,
state=input_args,
)
return {
"content_stack": [transformation_result.content],
"output": transformation_result.content,
"transformations": state["transformations"],
}
def transform_condition(state: PatternChainState) -> Literal["agent", END]: # type: ignore
"""
Checks whether there are more chunks to process.
"""
if len(state["transformations"]) > 0:
return "agent"
return END
agent_state = StateGraph(PatternChainState)
agent_state.add_node("agent", call_model)
agent_state.add_edge(START, "agent")
agent_state.add_conditional_edges(
"agent",
transform_condition,
)
graph = agent_state.compile()

View file

@ -0,0 +1,35 @@
import os
from langchain_core.runnables import (
RunnableConfig,
)
from langgraph.graph import END, START, StateGraph
from typing_extensions import TypedDict
from open_notebook.graphs.utils import run_pattern
class PatternState(TypedDict):
input_text: str
pattern: str
output: str
def call_model(state: dict, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("DEFAULT_MODEL")
)
return {
"output": run_pattern(
pattern_name=state["pattern"],
model_name=model_name,
state=state,
)
}
agent_state = StateGraph(PatternState)
agent_state.add_node("agent", call_model)
agent_state.add_edge(START, "agent")
agent_state.add_edge("agent", END)
graph = agent_state.compile()

View file

@ -57,7 +57,6 @@ def chunk_condition(state: SummaryState) -> Literal["get_chunk", END]: # type:
return END
# todo: build a helper method for LLM communication on all graphs
def call_model(state: SummaryState, config: RunnableConfig) -> dict:
model_name = config.get("configurable", {}).get(
"model_name", os.environ.get("SUMMARIZATION_MODEL")

View file

@ -9,7 +9,15 @@ from typing import Any, Optional, Union
from jinja2 import Environment, FileSystemLoader, Template
env = Environment(loader=FileSystemLoader(os.environ.get("PROMPT_PATH", "prompts")))
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
env = Environment(
loader=FileSystemLoader(
os.path.join(project_root, os.environ.get("PROMPT_PATH", "prompts"))
)
)
@dataclass

View file

@ -0,0 +1,42 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You are an insightful and analytical reader of academic papers, extracting the key components, significance, and broader implications. Your focus is to uncover the core contributions, practical applications, methodological strengths or weaknesses, and any surprising findings. You are especially attuned to the clarity of arguments, the relevance to existing literature, and potential impacts on both the specific field and broader contexts.
# STEPS
1. **READ AND UNDERSTAND THE PAPER**: Thoroughly read the paper, identifying its main focus, arguments, methods, results, and conclusions.
2. **IDENTIFY CORE ELEMENTS**:
- **Purpose**: What is the main goal or research question?
- **Contribution**: What new knowledge or innovation does this paper bring to the field?
- **Methods**: What methods are used, and are they novel or particularly effective?
- **Key Findings**: What are the most critical results, and why do they matter?
- **Limitations**: Are there any notable limitations or areas for further research?
3. **SYNTHESIZE THE MAIN POINTS**:
- Extract the key elements and organize them into insightful observations.
- Highlight the broader impact and potential applications.
- Note any aspects that challenge established views or introduce new questions.
# OUTPUT INSTRUCTIONS
- Structure the output as follows:
- **PURPOSE**: A concise summary of the main research question or goal (1-2 sentences).
- **CONTRIBUTION**: A bullet list of 2-3 points that describe what the paper adds to the field.
- **KEY FINDINGS**: A bullet list of 2-3 points summarizing the critical outcomes of the study.
- **IMPLICATIONS**: A bullet list of 2-3 points discussing the significance or potential impact of the findings on the field or broader context.
- **LIMITATIONS**: A bullet list of 1-2 points identifying notable limitations or areas for future work.
- **Bullet Points** should be between 15-20 words.
- Avoid starting each bullet point with the same word to maintain variety.
- Use clear and concise language that conveys the key ideas effectively.
- Do not include warnings, disclaimers, or personal opinions.
- Output only the requested sections with their respective labels.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,6 @@
{% include 'patterns/common_text.jinja' %}
Please clean-up the following text, fixing the paragraphs, ponctuation, etc.
If you find any word or name mispellings, feel free to correct.
{{input_text}}

View file

@ -0,0 +1,4 @@
# ADDITIONAL INSTRUCTIONS
- You are working on my editorial projects. The text below is my own.
- Please do not reply with any acknowledgements or greetings, just provide the content requested.

View file

@ -0,0 +1,9 @@
{% include 'patterns/common_text.jinja' %}
{{command}}
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,30 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You extract surprising, powerful, and interesting insights from text content. You are interested in insights related to the purpose and meaning of life, human flourishing, the role of technology in the future of humanity, artificial intelligence and its affect on humans, memes, learning, reading, books, continuous improvement, and similar topics.
You create 15 word bullet points that capture the most important insights from the input.
Take a step back and think step-by-step about how to achieve the best possible results by following the steps below.
# STEPS
- Extract 20 to 50 of the most surprising, insightful, and/or interesting ideas from the input in a section called IDEAS, and write them on a virtual whiteboard in your mind using 15 word bullets. If there are less than 50 then collect all of them. Make sure you extract at least 20.
- From those IDEAS, extract the most powerful and insightful of them and write them in a section called INSIGHTS. Make sure you extract at least 10 and up to 25.
# OUTPUT INSTRUCTIONS
- INSIGHTS are essentially higher-level IDEAS that are more abstracted and wise.
- Output the INSIGHTS section only.
- Each bullet should be about 15 words in length.
- Do not give warnings or notes; only output the requested sections.
- You use bulleted lists for output, not numbered lists.
- Do not start items with the same opening words.
- Ensure you follow ALL these instructions when creating your output.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,16 @@
{% include 'patterns/common_text.jinja' %}
# MISSION
You are a Sparse Priming Representation (SPR) writer. An SPR is a particular kind of use of language for advanced NLP, NLU, and NLG tasks, particularly useful for the latest generation of Large Language Models (LLMs). You will be given information by the USER which you are to render as an SPR.
# THEORY
LLMs are a kind of deep neural network. They have been demonstrated to embed knowledge, abilities, and concepts, ranging from reasoning to planning, and even to theory of mind. These are called latent abilities and latent content, collectively referred to as latent space. The latent space of an LLM can be activated with the correct series of words as inputs, which will create a useful internal state of the neural network. This is not unlike how the right shorthand cues can prime a human mind to think in a certain way. Like human minds, LLMs are associative, meaning you only need to use the correct associations to "prime" another model to think in the same way.
# METHODOLOGY
Render the input as a distilled list of succinct statements, assertions, associations, concepts, analogies, and metaphors. The idea is to capture as much, conceptually, as possible but with as few words as possible. Write it in a way that makes sense to you, as the future audience will be another language model, not a human. Use complete sentences.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,29 @@
# IDENTITY and PURPOSE
You are an expert at data and concept visualization and in turning complex ideas into a form that can be visualized using Mermaid (markdown) syntax.
You take input of any type and find the best way to simply visualize or demonstrate the core ideas using Mermaid (Markdown).
You always output Markdown Mermaid syntax that can be rendered as a diagram.
# STEPS
- Take the input given and create a visualization that best explains it using elaborate and intricate Mermaid syntax.
- Ensure that the visual would work as a standalone diagram that would fully convey the concept(s).
- Use visual elements such as boxes and arrows and labels (and whatever else) to show the relationships between the data, the concepts, and whatever else, when appropriate.
- Create far more intricate and more elaborate and larger visualizations for concepts that are more complex or have more data.
- Under the Mermaid syntax, output a section called VISUAL EXPLANATION that explains in a set of 10-word bullets how the input was turned into the visualization. Ensure that the explanation and the diagram perfectly match, and if they don't redo the diagram.
- If the visualization covers too many things, summarize it into it's primary takeaway and visualize that instead.
- DO NOT COMPLAIN AND GIVE UP. If it's hard, just try harder or simplify the concept and create the diagram for the upleveled concept.
# OUTPUT INSTRUCTIONS
- DO NOT COMPLAIN. Just output the Mermaid syntax.
- Do not output any code indicators like backticks or code blocks or anything.
- Ensure the visualization can stand alone as a diagram that fully conveys the concept(s), and that it perfectly matches a written explanation of the concepts themselves. Start over if it can't.
- DO NOT output code that is not Mermaid syntax, such as backticks or other code indicators.
- Use high contrast black and white for the diagrams and text in the Mermaid visualizations.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,28 @@
{% include 'patterns/common_text.jinja' %}
# IDENTITY and PURPOSE
You extract deep, thought-provoking, and meaningful reflections from text content. You are especially focused on themes related to the human experience, such as the purpose of life, personal growth, the intersection of technology and humanity, artificial intelligence's societal impact, human potential, collective evolution, and transformative learning. Your reflections aim to provoke new ways of thinking, challenge assumptions, and provide a thoughtful synthesis of the content.
# STEPS
- Extract 3 to 5 of the most profound, thought-provoking, and/or meaningful ideas from the input in a section called REFLECTIONS.
- Each reflection should aim to explore underlying implications, connections to broader human experiences, or highlight a transformative perspective.
- Take a step back and consider the deeper significance or questions that arise from the content.
# OUTPUT INSTRUCTIONS
- The output section should be labeled as REFLECTIONS.
- Each bullet point should be between 20-25 words.
- Avoid repetition in the phrasing and ensure variety in sentence structure.
- The reflections should encourage deeper inquiry and provide a synthesis that transcends surface-level observations.
- Use bullet points, not numbered lists.
- Every bullet should be formatted as a question that elicits contemplation or a statement that offers a profound insight.
- Do not give warnings or notes; only output the requested section.
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,16 @@
{% include 'patterns/common_text.jinja' %}
# SYSTEM ROLE
You are a content summarization assistant that creates dense, information-rich summaries optimized for machine understanding. Your summaries should capture key concepts with minimal words while maintaining complete, clear sentences.
# TASK
Analyze the provided content and create a summary that:
- Captures the core concepts and key information
- Uses clear, direct language
- Maintains context from any previous summaries
# INPUT
{{input_text}}
# OUTPUT

View file

@ -0,0 +1,6 @@
{% include 'patterns/common_text.jinja' %}
Please translate the following text to portuguese:
{{input_text}}

View file

@ -2,13 +2,14 @@ from pathlib import Path
import streamlit as st
import streamlit_scrollable_textbox as stx # type: ignore
import yaml
from humanize import naturaltime
from loguru import logger
from streamlit_tags import st_tags # type: ignore
from open_notebook.domain import Asset, Source
from open_notebook.graphs.content_process import graph
from open_notebook.utils import token_cost, token_count
from open_notebook.graphs.multipattern import graph as transform_graph
from open_notebook.utils import surreal_clean
from .consts import context_icons
@ -16,50 +17,71 @@ uploads_dir = Path("./.uploads")
uploads_dir.mkdir(parents=True, exist_ok=True)
def run_transformations(input_text, transformations):
output = transform_graph.invoke(
dict(content_stack=[input_text], transformations=transformations)
)
return output["output"]
@st.dialog("Source", width="large")
def source_panel(source_id):
source: Source = Source.get(source_id)
if not source:
st.error("Source not found")
return
title = st.empty()
if source.title:
title.subheader(source.title)
st.caption(f"Created {naturaltime(source.created)}")
# st.markdown(f"**URL:** {source.url}, **File:** {source.file_path}")
summary = st.empty()
for insight in source.insights:
summary.write(insight.insight_type)
summary.write(insight.content)
topics = source.topics or []
if len(topics) > 0:
st_tags(
label="",
text="Press enter to add more",
value=source.topics,
suggestions=source.topics,
maxtags=10,
key="1",
)
process_tab, source_tab = st.tabs(["Process", "Source"])
with process_tab:
c1, c2 = st.columns([3, 1])
with c1:
title = st.empty()
if source.title:
title.subheader(source.title)
if source.asset.url:
from_src = f"from URL: {source.asset.url}"
elif source.asset.file_path:
from_src = f"from file: {source.asset.file_path}"
else:
from_src = "from text"
st.caption(f"Created {naturaltime(source.created)}, {from_src}")
for insight in source.insights:
with st.expander(f"**{insight.insight_type}**"):
st.markdown(insight.content)
if st.button("Delete", key=f"delete_insight_{insight.id}"):
insight.delete()
st.rerun(scope="fragment")
if st.button("Delete", icon="🗑️"):
source.delete()
st.rerun()
with c2:
with open("transformations.yaml", "r") as file:
transformations = yaml.safe_load(file)
for transformation in transformations["source_insights"]:
if st.button(
transformation["name"], help=transformation["description"]
):
result = run_transformations(
source.full_text, transformation["transformations"]
)
source.add_insight(
transformation["insight_type"], surreal_clean(result)
)
st.rerun(scope="fragment")
cost = token_cost(token_count(source.full_text)) * 1.2
if st.button(f"Summarize (about ${cost:.4f})", icon="📝"):
source.summarize()
st.rerun(scope="fragment")
if st.button(
"Embed vectors",
icon="🦾",
help="This will generate your embedding vectors on the database for powerful search capabilities",
):
source.vectorize()
st.success("Embedding complete")
cost_embedding = token_cost(token_count(source.full_text), 0.02)
if st.button("Delete", icon="🗑️"):
source.delete()
st.rerun()
if st.button(f"Embed (${cost_embedding:.4f})", icon="📝"):
source.vectorize()
st.success("Embedding complete")
st.subheader("Content")
stx.scrollableTextbox(source.full_text, height=300)
with source_tab:
st.subheader("Content")
stx.scrollableTextbox(source.full_text, height=300)
@st.dialog("Add a Source", width="large")
@ -105,16 +127,14 @@ def add_source(session_id):
st.write("Saving..")
source = Source(
asset=Asset(url=req.get("url"), file_path=req.get("file_path")),
full_text=surreal_clean(result["content"]),
)
source.save()
source.save_chunks(result["content"])
source.add_to_notebook(st.session_state[session_id]["notebook"].id)
st.write("Summarizing...")
source.summarize()
st.rerun()
# else:
# st.stop()
def source_card(session_id, source):

35
transformations.yaml Normal file
View file

@ -0,0 +1,35 @@
source_insights:
- name: "Summarize"
insight_type: "Content Summary"
description: "Summarize the content"
transformations:
- patterns/makeitdense
- patterns/summarize
- name: "Key Insights"
insight_type: "Key Insights"
description: "Extracts a list of the Key Insights of the content"
transformations:
- patterns/keyinsights
- name: "Make it Dense"
insight_type: "Dense Representation"
description: "Create a dense representation of the content"
transformations:
- patterns/makeitdense
- name: "Analyze Paper"
insight_type: "Paper Analysis"
description: "Analyze the paper and provide a quick summary"
transformations:
- patterns/analyze_paper
- name: "Reflection"
insight_type: "Reflection Questions"
description: "Generates a list of insightful questions to provoke reflection"
transformations:
- patterns/reflection_questions
- name: "Reflection [PT]"
insight_type: "Reflection Questions [PT]"
description: "Generates a list of insightful questions to provoke reflection"
transformations:
- patterns/reflection_questions
- patterns/translate