From ec41ef8f2fc210cdf56e1428f87943c66dd56893 Mon Sep 17 00:00:00 2001 From: Luis Novo Date: Sun, 19 Apr 2026 16:22:10 -0300 Subject: [PATCH] feat(api): add configurable CORS origins via CORS_ORIGINS (#767) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded `allow_origins=["*"]` with a parsed `CORS_ORIGINS` environment variable (comma-separated). Default remains `*` for backward compatibility — no existing deployment breaks — but the API now logs a startup warning prompting users to set it explicitly for production. Exception handlers now route their CORS headers through a shared `_cors_headers()` helper that mirrors Starlette's CORSMiddleware behavior: reflects the request Origin when allowed (handling the browser-rejected `*` + credentials combination correctly), and omits `Access-Control-Allow-Origin` for disallowed origins so error bodies don't leak cross-origin when `CORS_ORIGINS` is configured. Closes #585, #730. Based on the original work by Greg Grace in #597; rewritten on top of current main to address prior review feedback (load_dotenv kept at top, `import os` grouped with stdlib, `_cors_headers` defined before its exception-handler callers, origins parsed once at module load) and to choose a non-breaking default paired with a startup warning instead of a stricter-by-default origin. Co-authored-by: Greg Grace --- CHANGELOG.md | 1 + api/main.py | 94 ++++++++++++------- docs/5-CONFIGURATION/environment-reference.md | 19 ++++ docs/5-CONFIGURATION/security.md | 25 +++++ 4 files changed, 107 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39d06ac..9ded2a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - `OPEN_NOTEBOOK_EMBEDDING_BATCH_SIZE` environment variable to override the embedding batch size; default remains `50`. Helps with CPU-only local embedding and stricter OpenAI-compatible endpoints (#735) +- `CORS_ORIGINS` environment variable to configure the API's allowed origins (comma-separated). Default remains `*` for backward compatibility; the API now logs a startup warning prompting users to set it for production deployments. Exception responses honor the configured origins when explicitly set (#585, #597, #730) ## [1.8.5] - 2026-04-14 diff --git a/api/main.py b/api/main.py index 661637e..3fbac5f 100644 --- a/api/main.py +++ b/api/main.py @@ -3,6 +3,7 @@ from dotenv import load_dotenv load_dotenv() +import os from contextlib import asynccontextmanager from fastapi import FastAPI, Request @@ -12,16 +13,6 @@ from loguru import logger from starlette.exceptions import HTTPException as StarletteHTTPException from api.auth import PasswordAuthMiddleware -from open_notebook.exceptions import ( - AuthenticationError, - ConfigurationError, - ExternalServiceError, - InvalidInputError, - NetworkError, - NotFoundError, - OpenNotebookError, - RateLimitError, -) from api.routers import ( auth, chat, @@ -46,8 +37,57 @@ from api.routers import ( ) from api.routers import commands as commands_router from open_notebook.database.async_migrate import AsyncMigrationManager +from open_notebook.exceptions import ( + AuthenticationError, + ConfigurationError, + ExternalServiceError, + InvalidInputError, + NetworkError, + NotFoundError, + OpenNotebookError, + RateLimitError, +) from open_notebook.utils.encryption import get_secret_from_env + +def _parse_cors_origins(raw: str) -> list[str]: + """Parse CORS_ORIGINS env value into a list of origins.""" + value = raw.strip() + if value == "*": + return ["*"] + return [origin.strip() for origin in value.split(",") if origin.strip()] + + +# Parsed once at module load; CORS_ORIGINS changes require a restart. +_cors_origins_raw = os.getenv("CORS_ORIGINS") +CORS_ALLOWED_ORIGINS = _parse_cors_origins(_cors_origins_raw or "*") +CORS_IS_DEFAULT_WILDCARD = _cors_origins_raw is None + + +def _cors_headers(request: Request) -> dict[str, str]: + """ + Build CORS headers for error responses. + + Mirrors Starlette CORSMiddleware behavior: reflects the request Origin + when the origin is allowed (or when wildcard is configured, since + browsers reject `Access-Control-Allow-Origin: *` combined with + credentials). Omits `Access-Control-Allow-Origin` for disallowed + origins so the browser blocks the error body from leaking cross-origin. + """ + origin = request.headers.get("origin") + headers: dict[str, str] = { + "Access-Control-Allow-Credentials": "true", + "Access-Control-Allow-Methods": "*", + "Access-Control-Allow-Headers": "*", + } + + if origin and ("*" in CORS_ALLOWED_ORIGINS or origin in CORS_ALLOWED_ORIGINS): + headers["Access-Control-Allow-Origin"] = origin + headers["Vary"] = "Origin" + + return headers + + # Import commands to register them in the API process try: logger.info("Commands imported in API process") @@ -61,8 +101,6 @@ async def lifespan(app: FastAPI): Lifespan event handler for the FastAPI application. Runs database migrations automatically on startup. """ - import os - # Startup: Security checks logger.info("Starting API initialization...") @@ -122,6 +160,16 @@ app = FastAPI( lifespan=lifespan, ) +if CORS_IS_DEFAULT_WILDCARD: + logger.warning( + "CORS_ORIGINS is not set — API accepts cross-origin requests from any " + "origin (default: '*'). For production deployments, set CORS_ORIGINS to " + "your frontend origin(s), e.g. " + "CORS_ORIGINS=https://notebook.example.com" + ) +else: + logger.info(f"CORS allowed origins: {CORS_ALLOWED_ORIGINS}") + # Add password authentication middleware first # Exclude /api/auth/status and /api/config from authentication app.add_middleware( @@ -140,7 +188,7 @@ app.add_middleware( # Add CORS middleware last (so it processes first) app.add_middleware( CORSMiddleware, - allow_origins=["*"], # In production, replace with specific origins + allow_origins=CORS_ALLOWED_ORIGINS, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], @@ -159,31 +207,13 @@ async def custom_http_exception_handler(request: Request, exc: StarletteHTTPExce FastAPI, this handler won't be called. In that case, configure your reverse proxy to add CORS headers to error responses. """ - # Get the origin from the request - origin = request.headers.get("origin", "*") - return JSONResponse( status_code=exc.status_code, content={"detail": exc.detail}, - headers={ - **(exc.headers or {}), "Access-Control-Allow-Origin": origin, - "Access-Control-Allow-Credentials": "true", - "Access-Control-Allow-Methods": "*", - "Access-Control-Allow-Headers": "*", - }, + headers={**(exc.headers or {}), **_cors_headers(request)}, ) -def _cors_headers(request: Request) -> dict[str, str]: - origin = request.headers.get("origin", "*") - return { - "Access-Control-Allow-Origin": origin, - "Access-Control-Allow-Credentials": "true", - "Access-Control-Allow-Methods": "*", - "Access-Control-Allow-Headers": "*", - } - - @app.exception_handler(NotFoundError) async def not_found_error_handler(request: Request, exc: NotFoundError): return JSONResponse( diff --git a/docs/5-CONFIGURATION/environment-reference.md b/docs/5-CONFIGURATION/environment-reference.md index e650710..cd8977e 100644 --- a/docs/5-CONFIGURATION/environment-reference.md +++ b/docs/5-CONFIGURATION/environment-reference.md @@ -69,6 +69,25 @@ Comprehensive list of all environment variables available in Open Notebook. --- +## API / CORS + +| Variable | Required? | Default | Description | +|----------|-----------|---------|-------------| +| `CORS_ORIGINS` | No | `*` | Comma-separated list of origins allowed to call the API (e.g. `https://app.example.com,https://www.example.com`). Default `*` accepts any origin; **for production, set this explicitly to your frontend origin(s)**. Changes require an API restart. The API logs a warning on startup when unset. | + +**When to change this**: +- You access the UI at a custom domain (reverse proxy, HTTPS, public deployment). +- The frontend runs on a different port than `3000`. +- You serve the frontend from a different host than the API (e.g. CDN). + +Example for a production deployment behind a reverse proxy: + +```bash +CORS_ORIGINS=https://notebook.example.com +``` + +--- + ## Text-to-Speech (TTS) | Variable | Required? | Default | Description | diff --git a/docs/5-CONFIGURATION/security.md b/docs/5-CONFIGURATION/security.md index 283a6cb..f317963 100644 --- a/docs/5-CONFIGURATION/security.md +++ b/docs/5-CONFIGURATION/security.md @@ -287,6 +287,31 @@ iptables -A INPUT -p tcp --dport 5055 -j DROP See [Reverse Proxy Configuration](reverse-proxy.md) for complete nginx/Caddy/Traefik setup with HTTPS. +### CORS Origins + +The API accepts cross-origin requests from any origin by default (`*`). This is convenient for development and diverse self-hosted setups, but it's not recommended for internet-facing production deployments because any website the user visits can issue authenticated cross-origin requests to your API. + +When `CORS_ORIGINS` is not set, the API logs a startup warning prompting you to configure it. + +**For production, set `CORS_ORIGINS` to your frontend's actual origin(s):** + +```bash +# Single origin +CORS_ORIGINS=https://notebook.example.com + +# Multiple origins (comma-separated) +CORS_ORIGINS=https://notebook.example.com,https://admin.example.com +``` + +**Guidelines:** + +- Always use HTTPS origins in production. +- List only the exact origins that should be allowed to call the API. +- Include the scheme and port (if non-default): `https://example.com`, `http://192.168.1.10:3000`. +- Changes require an API restart to take effect. + +**Error responses** (401, 404, 500, etc.) also respect the configured origins — they only include `Access-Control-Allow-Origin` for allowed origins, so error bodies are not leaked cross-origin when `CORS_ORIGINS` is configured. + --- ## Security Limitations