Security and performance fixes addressing a comprehensive review: - Server-issued HMAC-signed session cookies; client-supplied session_id ignored. Prevents session hijacking via body substitution. - Sliding-window rate limiter per IP and per session. - SessionStore with LRU eviction, idle TTL, per-session threading locks, and a hard turn cap. Bounds memory and serializes concurrent turns for the same session so FastAPI's threadpool cannot corrupt history. - Tool-use loop capped at settings.max_tool_use_iterations; Anthropic client gets an explicit timeout. No more infinite-loop credit burn. - Every tool argument is regex-validated, length-capped, and control-character-stripped. asserts replaced with ValueError so -O cannot silently disable the checks. - PII-safe warning logs: session IDs and reply bodies are hashed, never logged in clear. - hmac.compare_digest for email comparison (constant-time). - Strict Content-Security-Policy plus X-Content-Type-Options, X-Frame-Options, Referrer-Policy, Permissions-Policy via middleware. - Explicit handlers for anthropic.RateLimitError, APIConnectionError, APIStatusError, ValueError; static dir resolved from __file__. - Prompt cache breakpoints on the last tool schema and the last message so per-turn input cost scales linearly, not quadratically. - TypedDict handler argument shapes; direct block.name/block.id access. - functools.lru_cache on _get_client. - Anchored word-boundary regexes for out-of-scope detection to kill false positives on phrases like "I'd recommend contacting...". Literate program: - Bookly.lit.md is now the single source of truth for the five core Python files. Tangles byte-for-byte; verified via tangle.ts --verify. - Prose walkthrough, three mermaid diagrams, narrative per module. - Woven to static/architecture.html with the app's palette (background #f5f3ee) via scripts/architecture-header.html. - New GET /architecture route serves the HTML with a relaxed CSP that allows pandoc's inline styles. Available at bookly.codyborders.com/architecture. - scripts/rebuild_architecture_html.sh regenerates the HTML after edits. - code_reviews/2026-04-15-1433-code-review.md captures the review that drove these changes. All 37 tests pass.
299 lines
10 KiB
Python
299 lines
10 KiB
Python
"""FastAPI app for Bookly. Hosts /api/chat, /health, and the static chat UI.
|
|
|
|
Security posture notes:
|
|
|
|
- Sessions are server-issued and HMAC-signed. The client never chooses its
|
|
own session ID, so a leaked or guessed body cannot hijack someone else's
|
|
chat history. See `_resolve_session`.
|
|
- Every response carries a strict Content-Security-Policy and related
|
|
headers (see `security_headers`). The chat UI already uses `textContent`
|
|
for model replies, so XSS is structurally impossible; CSP is defense in
|
|
depth for future edits.
|
|
- In-memory sliding-window rate limiting is applied per IP and per session.
|
|
Suitable for a single-process demo deployment; swap to a shared store for
|
|
multi-worker.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import hmac
|
|
import logging
|
|
import secrets
|
|
import threading
|
|
import time
|
|
from collections import defaultdict, deque
|
|
from pathlib import Path
|
|
|
|
import anthropic
|
|
from fastapi import FastAPI, HTTPException, Request, Response
|
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel, Field
|
|
|
|
import agent
|
|
from config import settings
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
|
|
logger = logging.getLogger("bookly.server")
|
|
|
|
app = FastAPI(title="Bookly", docs_url=None, redoc_url=None)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Security headers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
_SECURITY_HEADERS: dict[str, str] = {
|
|
# Tight CSP: only same-origin assets, no inline scripts, no embedding.
|
|
# The UI is plain HTML+JS under /static, all same-origin.
|
|
"Content-Security-Policy": (
|
|
"default-src 'self'; "
|
|
"script-src 'self'; "
|
|
"style-src 'self'; "
|
|
"img-src 'self' data:; "
|
|
"connect-src 'self'; "
|
|
"object-src 'none'; "
|
|
"base-uri 'none'; "
|
|
"frame-ancestors 'none'; "
|
|
"form-action 'self'"
|
|
),
|
|
"X-Content-Type-Options": "nosniff",
|
|
"X-Frame-Options": "DENY",
|
|
"Referrer-Policy": "no-referrer",
|
|
"Permissions-Policy": "geolocation=(), microphone=(), camera=()",
|
|
}
|
|
|
|
|
|
@app.middleware("http")
|
|
async def security_headers(request: Request, call_next):
|
|
response = await call_next(request)
|
|
for header_name, header_value in _SECURITY_HEADERS.items():
|
|
response.headers.setdefault(header_name, header_value)
|
|
return response
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sliding-window rate limiter (in-memory)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class SlidingWindowRateLimiter:
|
|
"""Per-key request counter over a fixed trailing window.
|
|
|
|
Not meant to be bulletproof -- this is a small demo deployment, not an
|
|
edge-network WAF. Enforces a ceiling per IP and per session so a single
|
|
caller cannot burn the Anthropic budget or exhaust memory by spamming
|
|
`/api/chat`.
|
|
"""
|
|
|
|
def __init__(self, *, window_seconds: int = 60) -> None:
|
|
if window_seconds <= 0:
|
|
raise ValueError("window_seconds must be positive")
|
|
self._window = window_seconds
|
|
self._hits: defaultdict[str, deque[float]] = defaultdict(deque)
|
|
self._lock = threading.Lock()
|
|
|
|
def check(self, key: str, max_hits: int) -> bool:
|
|
"""Record a hit on `key`. Returns True if under the limit, False otherwise."""
|
|
if max_hits <= 0:
|
|
return False
|
|
now = time.monotonic()
|
|
cutoff = now - self._window
|
|
with self._lock:
|
|
bucket = self._hits[key]
|
|
while bucket and bucket[0] < cutoff:
|
|
bucket.popleft()
|
|
if len(bucket) >= max_hits:
|
|
return False
|
|
bucket.append(now)
|
|
return True
|
|
|
|
|
|
_rate_limiter = SlidingWindowRateLimiter(window_seconds=60)
|
|
|
|
|
|
def _client_ip(request: Request) -> str:
|
|
"""Best-effort client IP for rate limiting.
|
|
|
|
If the app is deployed behind a reverse proxy, set the proxy to add
|
|
`X-Forwarded-For` and trust the first hop. Otherwise fall back to the
|
|
direct client address.
|
|
"""
|
|
forwarded = request.headers.get("x-forwarded-for", "")
|
|
if forwarded:
|
|
first = forwarded.split(",", 1)[0].strip()
|
|
if first:
|
|
return first
|
|
if request.client is not None:
|
|
return request.client.host
|
|
return "unknown"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Session cookies (server-issued, HMAC-signed)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
_SESSION_COOKIE_SEPARATOR = "."
|
|
|
|
|
|
def _sign_session_id(session_id: str) -> str:
|
|
secret = settings.session_secret.get_secret_value().encode("utf-8")
|
|
signature = hmac.new(secret, session_id.encode("utf-8"), hashlib.sha256).hexdigest()
|
|
return f"{session_id}{_SESSION_COOKIE_SEPARATOR}{signature}"
|
|
|
|
|
|
def _verify_signed_session(signed_value: str) -> str | None:
|
|
if not signed_value or _SESSION_COOKIE_SEPARATOR not in signed_value:
|
|
return None
|
|
session_id, _, signature = signed_value.partition(_SESSION_COOKIE_SEPARATOR)
|
|
if not session_id or not signature:
|
|
return None
|
|
expected = _sign_session_id(session_id)
|
|
# Compare the full signed form in constant time to avoid timing leaks on
|
|
# the signature bytes.
|
|
if not hmac.compare_digest(expected, signed_value):
|
|
return None
|
|
return session_id
|
|
|
|
|
|
def _issue_new_session_id() -> str:
|
|
return secrets.token_urlsafe(24)
|
|
|
|
|
|
def _resolve_session(request: Request, response: Response) -> str:
|
|
"""Return the session_id for this request, issuing a fresh cookie if needed.
|
|
|
|
The client never chooses the session_id. Anything in the request body
|
|
that claims to be one is ignored. If the cookie is missing or tampered
|
|
with, we mint a new session_id and set the cookie on the response.
|
|
"""
|
|
signed_cookie = request.cookies.get(settings.session_cookie_name, "")
|
|
session_id = _verify_signed_session(signed_cookie)
|
|
if session_id is not None:
|
|
return session_id
|
|
|
|
session_id = _issue_new_session_id()
|
|
response.set_cookie(
|
|
key=settings.session_cookie_name,
|
|
value=_sign_session_id(session_id),
|
|
max_age=settings.session_cookie_max_age_seconds,
|
|
httponly=True,
|
|
secure=settings.session_cookie_secure,
|
|
samesite="lax",
|
|
path="/",
|
|
)
|
|
return session_id
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Request/response models
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
# `session_id` is intentionally NOT accepted from clients. Sessions are
|
|
# tracked server-side via the signed cookie.
|
|
message: str = Field(..., min_length=1, max_length=4000)
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
reply: str
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Routes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@app.get("/health")
|
|
def health() -> dict:
|
|
return {"status": "ok"}
|
|
|
|
|
|
@app.get("/")
|
|
def root() -> RedirectResponse:
|
|
return RedirectResponse(url="/static/index.html")
|
|
|
|
|
|
@app.post("/api/chat", response_model=ChatResponse)
|
|
def chat(body: ChatRequest, http_request: Request, http_response: Response) -> ChatResponse:
|
|
session_id = _resolve_session(http_request, http_response)
|
|
|
|
ip = _client_ip(http_request)
|
|
if not _rate_limiter.check(f"ip:{ip}", settings.rate_limit_per_ip_per_minute):
|
|
logger.info("rate_limited scope=ip")
|
|
raise HTTPException(status_code=429, detail="Too many requests. Please slow down.")
|
|
if not _rate_limiter.check(f"session:{session_id}", settings.rate_limit_per_session_per_minute):
|
|
logger.info("rate_limited scope=session")
|
|
raise HTTPException(status_code=429, detail="Too many requests. Please slow down.")
|
|
|
|
try:
|
|
reply = agent.run_turn(session_id, body.message)
|
|
except anthropic.RateLimitError:
|
|
logger.warning("anthropic_rate_limited")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Our AI provider is busy right now. Please try again in a moment.",
|
|
)
|
|
except anthropic.APIConnectionError:
|
|
logger.warning("anthropic_connection_error")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="We couldn't reach our AI provider. Please try again in a moment.",
|
|
)
|
|
except anthropic.APIStatusError as exc:
|
|
logger.error("anthropic_api_error status=%s", exc.status_code)
|
|
raise HTTPException(
|
|
status_code=502,
|
|
detail="Our AI provider returned an error. Please try again.",
|
|
)
|
|
except ValueError:
|
|
# Programmer-visible input errors (e.g., blank message). Surface a
|
|
# 400 rather than a 500 so clients can distinguish.
|
|
raise HTTPException(status_code=400, detail="Invalid request.")
|
|
except Exception:
|
|
logger.exception("chat_failed")
|
|
raise HTTPException(status_code=500, detail="Something went wrong handling that message.")
|
|
|
|
return ChatResponse(reply=reply)
|
|
|
|
|
|
# Absolute path so the mount works regardless of the process working directory.
|
|
_STATIC_DIR = Path(__file__).resolve().parent / "static"
|
|
_ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html"
|
|
|
|
|
|
# Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md
|
|
# and contains inline styles (and inline SVG from mermaid-filter), so the
|
|
# default strict CSP must be relaxed for this one route.
|
|
_ARCHITECTURE_CSP = (
|
|
"default-src 'self'; "
|
|
"style-src 'self' 'unsafe-inline'; "
|
|
"script-src 'none'; "
|
|
"img-src 'self' data:; "
|
|
"object-src 'none'; "
|
|
"base-uri 'none'; "
|
|
"frame-ancestors 'none'"
|
|
)
|
|
|
|
|
|
@app.get("/architecture", response_class=HTMLResponse)
|
|
def architecture() -> HTMLResponse:
|
|
"""Serve the woven literate program for the Bookly codebase."""
|
|
try:
|
|
html = _ARCHITECTURE_HTML_PATH.read_text(encoding="utf-8")
|
|
except FileNotFoundError:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail="Architecture document has not been built yet.",
|
|
)
|
|
response = HTMLResponse(content=html)
|
|
response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP
|
|
return response
|
|
|
|
|
|
app.mount("/static", StaticFiles(directory=str(_STATIC_DIR)), name="static")
|