diff --git a/Bookly.lit.md b/Bookly.lit.md index 8870a52..9eda36e 100644 --- a/Bookly.lit.md +++ b/Bookly.lit.md @@ -1568,9 +1568,12 @@ in `static/chat.js` already renders model replies with impossible today. The CSP exists to catch any future regression that accidentally switches to `innerHTML`. -The `/architecture` route overrides the middleware CSP with a -more permissive one because pandoc's standalone HTML has inline -styles. +Datadog RUM adds three narrow allowances to that baseline: the +Browser SDK CDN in `script-src`, the Datadog intake origin in +`connect-src`, and `worker-src blob:` for Session Replay. The +real environment gate lives in `static/rum.js`, which checks for +exactly `bookly.codyborders.com` before loading the SDK, so +localhost and preview hosts still stay dark. ## Sliding-window rate limiter @@ -1610,9 +1613,13 @@ the agent becomes 400, anything else becomes 500. This is where the woven literate program is served. The handler reads `static/architecture.html` (produced by pandoc from this -file) and returns it with a relaxed CSP. If the file does not -exist yet, the route 404s with a clear message rather than -raising a 500. +file) and returns it with a relaxed CSP. The one deliberate CSP +change is `style-src 'unsafe-inline'`, because pandoc's +standalone HTML emits inline styles. The page also gets the same +`/static/rum.js` bootstrap as the chat UI, but that injection +happens at response time so the generated artifact on disk stays +unchanged. If the file does not exist yet, the route 404s with a +clear message rather than raising a 500. ```python {chunk="server-py" file="server.py"} """FastAPI app for Bookly. Hosts /api/chat, /health, and the static chat UI. @@ -1662,20 +1669,66 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None) # --------------------------------------------------------------------------- +_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com" +_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com" +_RUM_BOOTSTRAP_TAG = '' + + +def _build_content_security_policy(*, allow_inline_styles: bool) -> str: + """Return the CSP shared by the chat UI and the architecture page. + + Datadog RUM needs explicit allowances for its CDN loader, its intake + endpoint, and its Session Replay worker. We keep the policy otherwise + strict and let the browser-side bootstrap decide whether the current host + is allowed to initialize RUM at all. + """ + style_source = "style-src 'self'" + if allow_inline_styles: + style_source = "style-src 'self' 'unsafe-inline'" + + directives = ( + "default-src 'self'", + f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}", + style_source, + "img-src 'self' data:", + f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}", + "worker-src blob:", + "object-src 'none'", + "base-uri 'none'", + "frame-ancestors 'none'", + "form-action 'self'", + ) + return "; ".join(directives) + + +def _inject_rum_bootstrap(html: str) -> str: + """Inject the shared RUM bootstrap into a standalone HTML document. + + `/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the + shared bootstrap here keeps the artifact byte-for-byte unchanged while + ensuring the live page gets the same RUM loader as `/static/index.html`. + """ + if not html: + raise ValueError("html must be non-empty") + if _RUM_BOOTSTRAP_TAG in html: + return html + + head_close = "" + if head_close not in html: + raise ValueError("architecture html is missing ") + + updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG} +{head_close}", 1) + assert _RUM_BOOTSTRAP_TAG in updated_html + assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1 + return updated_html + + _SECURITY_HEADERS: dict[str, str] = { - # Tight CSP: only same-origin assets, no inline scripts, no embedding. - # The UI is plain HTML+JS under /static, all same-origin. - "Content-Security-Policy": ( - "default-src 'self'; " - "script-src 'self'; " - "style-src 'self'; " - "img-src 'self' data:; " - "connect-src 'self'; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'; " - "form-action 'self'" - ), + # Tight CSP: same-origin assets plus only the Datadog endpoints needed for + # browser RUM and Session Replay. The exact hostname gate lives in + # `static/rum.js`, so localhost and preview hosts stay dark. + "Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False), "X-Content-Type-Options": "nosniff", "X-Frame-Options": "DENY", "Referrer-Policy": "no-referrer", @@ -1885,16 +1938,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html" # Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md # and contains inline styles (and inline SVG from mermaid-filter), so the -# default strict CSP must be relaxed for this one route. -_ARCHITECTURE_CSP = ( - "default-src 'self'; " - "style-src 'self' 'unsafe-inline'; " - "script-src 'none'; " - "img-src 'self' data:; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'" -) +# chat-page CSP needs one change here: allow inline styles while keeping the +# same Datadog allowances used by the shared RUM bootstrap. +_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True) @app.get("/architecture", response_class=HTMLResponse) @@ -1907,7 +1953,7 @@ def architecture() -> HTMLResponse: status_code=404, detail="Architecture document has not been built yet.", ) - response = HTMLResponse(content=html) + response = HTMLResponse(content=_inject_rum_bootstrap(html)) response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP return response diff --git a/server.py b/server.py index 8f77b7a..e082113 100644 --- a/server.py +++ b/server.py @@ -45,20 +45,65 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None) # --------------------------------------------------------------------------- +_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com" +_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com" +_RUM_BOOTSTRAP_TAG = '' + + +def _build_content_security_policy(*, allow_inline_styles: bool) -> str: + """Return the CSP shared by the chat UI and the architecture page. + + Datadog RUM needs explicit allowances for its CDN loader, its intake + endpoint, and its Session Replay worker. We keep the policy otherwise + strict and let the browser-side bootstrap decide whether the current host + is allowed to initialize RUM at all. + """ + style_source = "style-src 'self'" + if allow_inline_styles: + style_source = "style-src 'self' 'unsafe-inline'" + + directives = ( + "default-src 'self'", + f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}", + style_source, + "img-src 'self' data:", + f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}", + "worker-src blob:", + "object-src 'none'", + "base-uri 'none'", + "frame-ancestors 'none'", + "form-action 'self'", + ) + return "; ".join(directives) + + +def _inject_rum_bootstrap(html: str) -> str: + """Inject the shared RUM bootstrap into a standalone HTML document. + + `/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the + shared bootstrap here keeps the artifact byte-for-byte unchanged while + ensuring the live page gets the same RUM loader as `/static/index.html`. + """ + if not html: + raise ValueError("html must be non-empty") + if _RUM_BOOTSTRAP_TAG in html: + return html + + head_close = "" + if head_close not in html: + raise ValueError("architecture html is missing ") + + updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG}\n{head_close}", 1) + assert _RUM_BOOTSTRAP_TAG in updated_html + assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1 + return updated_html + + _SECURITY_HEADERS: dict[str, str] = { - # Tight CSP: only same-origin assets, no inline scripts, no embedding. - # The UI is plain HTML+JS under /static, all same-origin. - "Content-Security-Policy": ( - "default-src 'self'; " - "script-src 'self'; " - "style-src 'self'; " - "img-src 'self' data:; " - "connect-src 'self'; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'; " - "form-action 'self'" - ), + # Tight CSP: same-origin assets plus only the Datadog endpoints needed for + # browser RUM and Session Replay. The exact hostname gate lives in + # `static/rum.js`, so localhost and preview hosts stay dark. + "Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False), "X-Content-Type-Options": "nosniff", "X-Frame-Options": "DENY", "Referrer-Policy": "no-referrer", @@ -268,16 +313,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html" # Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md # and contains inline styles (and inline SVG from mermaid-filter), so the -# default strict CSP must be relaxed for this one route. -_ARCHITECTURE_CSP = ( - "default-src 'self'; " - "style-src 'self' 'unsafe-inline'; " - "script-src 'none'; " - "img-src 'self' data:; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'" -) +# chat-page CSP needs one change here: allow inline styles while keeping the +# same Datadog allowances used by the shared RUM bootstrap. +_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True) @app.get("/architecture", response_class=HTMLResponse) @@ -290,7 +328,7 @@ def architecture() -> HTMLResponse: status_code=404, detail="Architecture document has not been built yet.", ) - response = HTMLResponse(content=html) + response = HTMLResponse(content=_inject_rum_bootstrap(html)) response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP return response diff --git a/static/index.html b/static/index.html index ea2067a..6d70203 100644 --- a/static/index.html +++ b/static/index.html @@ -5,6 +5,7 @@