fix: delete uploaded files when sources are removed

Previously, uploaded files remained on disk after source deletion,
causing disk space accumulation and potential privacy concerns.
The Source.delete() method now removes associated files before
database cleanup, with graceful error handling to prevent
database inconsistency if file deletion fails.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
LUIS NOVO 2026-01-14 08:11:17 -03:00
parent cb66d6e2b3
commit fad4446f36
2 changed files with 103 additions and 1 deletions

View file

@ -1,4 +1,6 @@
import asyncio
import os
from pathlib import Path
from typing import Any, ClassVar, Dict, List, Literal, Optional, Tuple, Union
from loguru import logger
@ -348,6 +350,26 @@ class Source(ObjectModel):
return data
async def delete(self) -> bool:
"""Delete source and clean up associated file if it exists."""
# Clean up uploaded file if it exists
if self.asset and self.asset.file_path:
file_path = Path(self.asset.file_path)
if file_path.exists():
try:
os.unlink(file_path)
logger.info(f"Deleted file for source {self.id}: {file_path}")
except Exception as e:
logger.warning(
f"Failed to delete file {file_path} for source {self.id}: {e}. "
"Continuing with database deletion."
)
else:
logger.debug(f"File {file_path} not found for source {self.id}, skipping cleanup")
# Call parent delete to remove database record
return await super().delete()
class Note(ObjectModel):
table_name: ClassVar[str] = "note"

View file

@ -5,13 +5,17 @@ This test suite focuses on validation logic, business rules, and data structures
that can be tested without database mocking.
"""
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, patch
import pytest
from pydantic import ValidationError
from open_notebook.ai.models import ModelManager
from open_notebook.domain.base import RecordModel
from open_notebook.domain.content_settings import ContentSettings
from open_notebook.domain.notebook import Note, Notebook, Source
from open_notebook.domain.notebook import Asset, Note, Notebook, Source
from open_notebook.domain.transformation import Transformation
from open_notebook.exceptions import InvalidInputError
from open_notebook.podcasts.models import EpisodeProfile, SpeakerProfile
@ -119,6 +123,82 @@ class TestSourceDomain:
save_data = source3._prepare_save_data()
assert "command" in save_data
@pytest.mark.asyncio
async def test_source_delete_cleans_up_file(self):
"""Test that deleting a source removes the associated file."""
# Create a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp_file:
tmp_file.write(b"Test content")
tmp_path = Path(tmp_file.name)
try:
# Create source with file asset
source = Source(
id="source:test_delete",
title="Test Source",
asset=Asset(file_path=str(tmp_path))
)
# Verify file exists
assert tmp_path.exists()
# Mock the parent delete method to avoid database operations
with patch.object(Source.__bases__[0], 'delete', new_callable=AsyncMock) as mock_delete:
mock_delete.return_value = True
# Delete the source
result = await source.delete()
# Verify parent delete was called
mock_delete.assert_called_once()
assert result is True
# Verify file was deleted
assert not tmp_path.exists()
finally:
# Cleanup in case test fails
if tmp_path.exists():
tmp_path.unlink()
@pytest.mark.asyncio
async def test_source_delete_without_file(self):
"""Test that deleting a source without a file doesn't fail."""
# Create source without file asset
source = Source(
id="source:test_no_file",
title="Test Source",
asset=None
)
# Mock the parent delete method
with patch.object(Source.__bases__[0], 'delete', new_callable=AsyncMock) as mock_delete:
mock_delete.return_value = True
# Delete should complete without error
result = await source.delete()
assert result is True
mock_delete.assert_called_once()
@pytest.mark.asyncio
async def test_source_delete_continues_on_file_error(self):
"""Test that source deletion continues even if file deletion fails."""
# Create source with non-existent file
source = Source(
id="source:test_missing_file",
title="Test Source",
asset=Asset(file_path="/nonexistent/path/file.txt")
)
# Mock the parent delete method
with patch.object(Source.__bases__[0], 'delete', new_callable=AsyncMock) as mock_delete:
mock_delete.return_value = True
# Delete should complete even though file doesn't exist
result = await source.delete()
assert result is True
mock_delete.assert_called_once()
# ============================================================================
# TEST SUITE 5: Note Domain