open-notebook/pages/stream_app/source.py
Luis Novo b7e656a319
Version 1 (#160)
New front-end
Launch Chat API
Manage Sources
Enable re-embedding of all contents
Sources can be added without a notebook now
Improved settings
Enable model selector on all chats
Background processing for better experience
Dark mode
Improved Notes

Improved Docs: 
- Remove all Streamlit references from documentation
- Update deployment guides with React frontend setup
- Fix Docker environment variables format (SURREAL_URL, SURREAL_PASSWORD)
- Update docker image tag from :latest to :v1-latest
- Change navigation references (Settings → Models to just Models)
- Update development setup to include frontend npm commands
- Add MIGRATION.md guide for users upgrading from Streamlit
- Update quick-start guide with correct environment variables
- Add port 5055 documentation for API access
- Update project structure to reflect frontend/ directory
- Remove outdated source-chat documentation files
2025-10-18 12:46:22 -03:00

193 lines
7.4 KiB
Python

import asyncio
import os
from pathlib import Path
from typing import Any
import streamlit as st
from humanize import naturaltime
from loguru import logger
from api.insights_service import insights_service
from api.models_service import models_service
from api.settings_service import settings_service
from api.sources_service import sources_service
from api.transformations_service import transformations_service
from open_notebook.config import UPLOADS_FOLDER
from open_notebook.exceptions import UnsupportedTypeException
from pages.components import source_panel
from pages.stream_app.consts import source_context_icons
@st.dialog("Source", width="large")
def source_panel_dialog(source_id, notebook_id=None):
source_panel(source_id, notebook_id=notebook_id, modal=True)
@st.dialog("Add a Source", width="large")
def add_source(notebook_id):
default_models = models_service.get_default_models()
if not default_models.default_speech_to_text_model:
st.warning(
"Since there is no speech to text model selected, you can't upload audio/video files."
)
source_link = None
source_file = None
source_text = None
content_settings = settings_service.get_settings()
source_type = st.radio("Type", ["Link", "Upload", "Text"])
req: dict[str, Any] = {}
transformations = transformations_service.get_all_transformations()
if source_type == "Link":
source_link = st.text_input("Link")
req["url"] = source_link
elif source_type == "Upload":
source_file = st.file_uploader("Upload")
req["delete_source"] = content_settings.auto_delete_files == "yes"
else:
source_text = st.text_area("Text")
req["content"] = source_text
default_transformations = [t for t in transformations if t.apply_default]
apply_transformations = st.multiselect(
"Apply transformations",
options=transformations,
format_func=lambda t: t.name,
default=default_transformations,
)
if content_settings.default_embedding_option == "ask":
run_embed = st.checkbox(
"Embed content for vector search",
help="Creates an embedded content for vector search. Costs a little money and takes a little bit more time. You can do this later if you prefer.",
)
if not run_embed:
st.caption("You can always embed later by clicking on the source.")
elif content_settings.default_embedding_option == "always":
st.caption("Embedding content for vector search automatically")
run_embed = True
else:
st.caption(
"Not embedding content for vector search as per settings. You can always embed later by clicking on the source."
)
run_embed = False
if st.button("Process", key="add_source"):
logger.debug("Adding source")
with st.status("Processing...", expanded=True):
st.write("Processing document...")
try:
if source_type == "Upload" and source_file is not None:
st.write("Uploading..")
file_name = source_file.name
file_extension = Path(file_name).suffix
base_name = Path(file_name).stem
# Generate unique filename
new_path = os.path.join(UPLOADS_FOLDER, file_name)
counter = 0
while os.path.exists(new_path):
counter += 1
new_file_name = f"{base_name}_{counter}{file_extension}"
new_path = os.path.join(UPLOADS_FOLDER, new_file_name)
req["file_path"] = str(new_path)
# Save the file
with open(new_path, "wb") as f:
f.write(source_file.getbuffer())
from api.sources_service import sources_service
# Convert transformations to IDs
transformation_ids = (
[t.id for t in apply_transformations if t.id is not None]
if apply_transformations
else []
)
# Determine source type and parameters
if source_type == "Link":
sources_service.create_source(
notebook_id=notebook_id,
source_type="link",
url=source_link,
transformations=transformation_ids,
embed=run_embed,
)
elif source_type == "Upload":
delete_source_val = req.get("delete_source", False)
sources_service.create_source(
notebook_id=notebook_id,
source_type="upload",
file_path=req["file_path"],
transformations=transformation_ids,
embed=run_embed,
delete_source=bool(delete_source_val) if not isinstance(delete_source_val, bool) else delete_source_val,
)
else: # Text
sources_service.create_source(
notebook_id=notebook_id,
source_type="text",
content=source_text,
transformations=transformation_ids,
embed=run_embed,
)
except UnsupportedTypeException as e:
st.warning(
"This type of content is not supported yet. If you think it should be, let us know on the project Issues's page"
)
st.error(e)
st.link_button(
"Go to Github Issues",
url="https://www.github.com/lfnovo/open-notebook/issues",
)
st.stop()
except Exception as e:
st.exception(e)
return
st.rerun()
def source_card(source, notebook_id):
# todo: more descriptive icons
icon = "🔗"
with st.container(border=True):
title = (source.title if source.title else "No Title").strip()
st.markdown((f"{icon}**{title}**"))
context_state = st.selectbox(
"Context",
label_visibility="collapsed",
options=source_context_icons,
index=1,
key=f"source_{source.id}",
)
insights = insights_service.get_source_insights(source.id)
st.caption(
f"Updated: {naturaltime(source.updated)}, **{len(insights)}** insights"
)
if st.button("Expand", icon="📝", key=source.id):
source_panel_dialog(source.id, notebook_id)
st.session_state[notebook_id]["context_config"][source.id] = context_state
def source_list_item(source_id, score=None):
source_with_metadata = sources_service.get_source(source_id)
source = source_with_metadata.source
if not source:
st.error("Source not found")
return
icon = "🔗"
with st.expander(
f"{icon} [{score:.2f}] **{source.title}** {naturaltime(source.updated) if source.updated else 'N/A'}"
):
source_insights = asyncio.run(source.get_insights())
for insight in source_insights:
st.markdown(f"**{insight.insight_type}**")
st.write(insight.content)
if st.button("Edit source", icon="📝", key=f"x_edit_source_{source.id}"):
source_panel_dialog(source_id=source.id)