From 31b26be23f5fd28fd974ee81fce74099f41b7e23 Mon Sep 17 00:00:00 2001 From: Cody Borders Date: Wed, 15 Apr 2026 17:16:08 -0700 Subject: [PATCH] Add Datadog RUM to chat and architecture pages --- Bookly.lit.md | 106 +++++++++++++++++++++++++++++++------------ server.py | 86 +++++++++++++++++++++++++---------- static/index.html | 1 + static/rum.js | 77 +++++++++++++++++++++++++++++++ tests/test_server.py | 76 +++++++++++++++++++++++++++++++ 5 files changed, 292 insertions(+), 54 deletions(-) create mode 100644 static/rum.js create mode 100644 tests/test_server.py diff --git a/Bookly.lit.md b/Bookly.lit.md index 8870a52..9eda36e 100644 --- a/Bookly.lit.md +++ b/Bookly.lit.md @@ -1568,9 +1568,12 @@ in `static/chat.js` already renders model replies with impossible today. The CSP exists to catch any future regression that accidentally switches to `innerHTML`. -The `/architecture` route overrides the middleware CSP with a -more permissive one because pandoc's standalone HTML has inline -styles. +Datadog RUM adds three narrow allowances to that baseline: the +Browser SDK CDN in `script-src`, the Datadog intake origin in +`connect-src`, and `worker-src blob:` for Session Replay. The +real environment gate lives in `static/rum.js`, which checks for +exactly `bookly.codyborders.com` before loading the SDK, so +localhost and preview hosts still stay dark. ## Sliding-window rate limiter @@ -1610,9 +1613,13 @@ the agent becomes 400, anything else becomes 500. This is where the woven literate program is served. The handler reads `static/architecture.html` (produced by pandoc from this -file) and returns it with a relaxed CSP. If the file does not -exist yet, the route 404s with a clear message rather than -raising a 500. +file) and returns it with a relaxed CSP. The one deliberate CSP +change is `style-src 'unsafe-inline'`, because pandoc's +standalone HTML emits inline styles. The page also gets the same +`/static/rum.js` bootstrap as the chat UI, but that injection +happens at response time so the generated artifact on disk stays +unchanged. If the file does not exist yet, the route 404s with a +clear message rather than raising a 500. ```python {chunk="server-py" file="server.py"} """FastAPI app for Bookly. Hosts /api/chat, /health, and the static chat UI. @@ -1662,20 +1669,66 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None) # --------------------------------------------------------------------------- +_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com" +_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com" +_RUM_BOOTSTRAP_TAG = '' + + +def _build_content_security_policy(*, allow_inline_styles: bool) -> str: + """Return the CSP shared by the chat UI and the architecture page. + + Datadog RUM needs explicit allowances for its CDN loader, its intake + endpoint, and its Session Replay worker. We keep the policy otherwise + strict and let the browser-side bootstrap decide whether the current host + is allowed to initialize RUM at all. + """ + style_source = "style-src 'self'" + if allow_inline_styles: + style_source = "style-src 'self' 'unsafe-inline'" + + directives = ( + "default-src 'self'", + f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}", + style_source, + "img-src 'self' data:", + f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}", + "worker-src blob:", + "object-src 'none'", + "base-uri 'none'", + "frame-ancestors 'none'", + "form-action 'self'", + ) + return "; ".join(directives) + + +def _inject_rum_bootstrap(html: str) -> str: + """Inject the shared RUM bootstrap into a standalone HTML document. + + `/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the + shared bootstrap here keeps the artifact byte-for-byte unchanged while + ensuring the live page gets the same RUM loader as `/static/index.html`. + """ + if not html: + raise ValueError("html must be non-empty") + if _RUM_BOOTSTRAP_TAG in html: + return html + + head_close = "" + if head_close not in html: + raise ValueError("architecture html is missing ") + + updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG} +{head_close}", 1) + assert _RUM_BOOTSTRAP_TAG in updated_html + assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1 + return updated_html + + _SECURITY_HEADERS: dict[str, str] = { - # Tight CSP: only same-origin assets, no inline scripts, no embedding. - # The UI is plain HTML+JS under /static, all same-origin. - "Content-Security-Policy": ( - "default-src 'self'; " - "script-src 'self'; " - "style-src 'self'; " - "img-src 'self' data:; " - "connect-src 'self'; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'; " - "form-action 'self'" - ), + # Tight CSP: same-origin assets plus only the Datadog endpoints needed for + # browser RUM and Session Replay. The exact hostname gate lives in + # `static/rum.js`, so localhost and preview hosts stay dark. + "Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False), "X-Content-Type-Options": "nosniff", "X-Frame-Options": "DENY", "Referrer-Policy": "no-referrer", @@ -1885,16 +1938,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html" # Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md # and contains inline styles (and inline SVG from mermaid-filter), so the -# default strict CSP must be relaxed for this one route. -_ARCHITECTURE_CSP = ( - "default-src 'self'; " - "style-src 'self' 'unsafe-inline'; " - "script-src 'none'; " - "img-src 'self' data:; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'" -) +# chat-page CSP needs one change here: allow inline styles while keeping the +# same Datadog allowances used by the shared RUM bootstrap. +_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True) @app.get("/architecture", response_class=HTMLResponse) @@ -1907,7 +1953,7 @@ def architecture() -> HTMLResponse: status_code=404, detail="Architecture document has not been built yet.", ) - response = HTMLResponse(content=html) + response = HTMLResponse(content=_inject_rum_bootstrap(html)) response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP return response diff --git a/server.py b/server.py index 8f77b7a..e082113 100644 --- a/server.py +++ b/server.py @@ -45,20 +45,65 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None) # --------------------------------------------------------------------------- +_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com" +_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com" +_RUM_BOOTSTRAP_TAG = '' + + +def _build_content_security_policy(*, allow_inline_styles: bool) -> str: + """Return the CSP shared by the chat UI and the architecture page. + + Datadog RUM needs explicit allowances for its CDN loader, its intake + endpoint, and its Session Replay worker. We keep the policy otherwise + strict and let the browser-side bootstrap decide whether the current host + is allowed to initialize RUM at all. + """ + style_source = "style-src 'self'" + if allow_inline_styles: + style_source = "style-src 'self' 'unsafe-inline'" + + directives = ( + "default-src 'self'", + f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}", + style_source, + "img-src 'self' data:", + f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}", + "worker-src blob:", + "object-src 'none'", + "base-uri 'none'", + "frame-ancestors 'none'", + "form-action 'self'", + ) + return "; ".join(directives) + + +def _inject_rum_bootstrap(html: str) -> str: + """Inject the shared RUM bootstrap into a standalone HTML document. + + `/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the + shared bootstrap here keeps the artifact byte-for-byte unchanged while + ensuring the live page gets the same RUM loader as `/static/index.html`. + """ + if not html: + raise ValueError("html must be non-empty") + if _RUM_BOOTSTRAP_TAG in html: + return html + + head_close = "" + if head_close not in html: + raise ValueError("architecture html is missing ") + + updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG}\n{head_close}", 1) + assert _RUM_BOOTSTRAP_TAG in updated_html + assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1 + return updated_html + + _SECURITY_HEADERS: dict[str, str] = { - # Tight CSP: only same-origin assets, no inline scripts, no embedding. - # The UI is plain HTML+JS under /static, all same-origin. - "Content-Security-Policy": ( - "default-src 'self'; " - "script-src 'self'; " - "style-src 'self'; " - "img-src 'self' data:; " - "connect-src 'self'; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'; " - "form-action 'self'" - ), + # Tight CSP: same-origin assets plus only the Datadog endpoints needed for + # browser RUM and Session Replay. The exact hostname gate lives in + # `static/rum.js`, so localhost and preview hosts stay dark. + "Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False), "X-Content-Type-Options": "nosniff", "X-Frame-Options": "DENY", "Referrer-Policy": "no-referrer", @@ -268,16 +313,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html" # Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md # and contains inline styles (and inline SVG from mermaid-filter), so the -# default strict CSP must be relaxed for this one route. -_ARCHITECTURE_CSP = ( - "default-src 'self'; " - "style-src 'self' 'unsafe-inline'; " - "script-src 'none'; " - "img-src 'self' data:; " - "object-src 'none'; " - "base-uri 'none'; " - "frame-ancestors 'none'" -) +# chat-page CSP needs one change here: allow inline styles while keeping the +# same Datadog allowances used by the shared RUM bootstrap. +_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True) @app.get("/architecture", response_class=HTMLResponse) @@ -290,7 +328,7 @@ def architecture() -> HTMLResponse: status_code=404, detail="Architecture document has not been built yet.", ) - response = HTMLResponse(content=html) + response = HTMLResponse(content=_inject_rum_bootstrap(html)) response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP return response diff --git a/static/index.html b/static/index.html index ea2067a..6d70203 100644 --- a/static/index.html +++ b/static/index.html @@ -5,6 +5,7 @@ Bookly Support +
diff --git a/static/rum.js b/static/rum.js new file mode 100644 index 0000000..690a7dd --- /dev/null +++ b/static/rum.js @@ -0,0 +1,77 @@ +(function () { + "use strict"; + + const ALLOWED_HOSTNAME = "bookly.codyborders.com"; + const DATADOG_AGENT_URL = + "https://www.datadoghq-browser-agent.com/us1/v6/datadog-rum.js"; + const RUM_CONFIG = Object.freeze({ + applicationId: "ad60336f-85fe-4631-9469-973180243552", + clientToken: "pube161402da279b685acbb640a4366129b", + site: "datadoghq.com", + service: "csb", + env: "prod", + version: "0.1", + sessionSampleRate: 100, + sessionReplaySampleRate: 100, + trackResources: true, + trackUserInteractions: true, + trackLongTasks: true, + defaultPrivacyLevel: "allow", + }); + + function initializeRum(win) { + if (win.__booklyRumStarted === true) { + return; + } + + if (!win.DD_RUM) { + console.error("Datadog RUM agent was not available after loading."); + return; + } + + win.DD_RUM.init(RUM_CONFIG); + win.__booklyRumStarted = true; + win.__booklyRumLoading = false; + } + + function loadRumAgent(doc, win) { + const headEl = doc.head; + if (!headEl) { + console.error("Datadog RUM was not loaded because document.head is missing."); + return; + } + + const scriptEl = doc.createElement("script"); + scriptEl.src = DATADOG_AGENT_URL; + scriptEl.async = true; + scriptEl.addEventListener("load", function () { + initializeRum(win); + }); + scriptEl.addEventListener("error", function () { + win.__booklyRumLoading = false; + console.error("Datadog RUM agent failed to load."); + }); + + win.__booklyRumLoading = true; + headEl.appendChild(scriptEl); + } + + if (window.location.hostname !== ALLOWED_HOSTNAME) { + return; + } + + if (window.__booklyRumStarted === true) { + return; + } + + if (window.__booklyRumLoading === true) { + return; + } + + if (window.DD_RUM) { + initializeRum(window); + return; + } + + loadRumAgent(document, window); +})(); diff --git a/tests/test_server.py b/tests/test_server.py new file mode 100644 index 0000000..185fc79 --- /dev/null +++ b/tests/test_server.py @@ -0,0 +1,76 @@ +"""Server and static asset tests for Bookly's HTTP surface. + +Goal: verify the shared Datadog RUM bootstrap is exposed on both public HTML +pages, and verify the Content-Security-Policy permits only the Datadog +origins required for RUM and Session Replay. +""" + +from __future__ import annotations + +from pathlib import Path + +from fastapi.testclient import TestClient + +import server + +client = TestClient(server.app) + +_REPO_ROOT = Path(__file__).resolve().parent.parent +_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com" +_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com" + + +def _read_repo_text(relative_path: str) -> str: + path = _REPO_ROOT / relative_path + assert path.is_file(), f"expected file at {path}" + text = path.read_text(encoding="utf-8") + assert text, f"expected non-empty file at {path}" + return text + + +def test_static_index_html_loads_shared_rum_bootstrap(): + response = client.get("/static/index.html") + assert response.status_code == 200 + assert "/static/rum.js" in response.text + + +def test_architecture_page_loads_shared_rum_bootstrap(): + response = client.get("/architecture") + assert response.status_code == 200 + assert "/static/rum.js" in response.text + + +def test_static_page_csp_allows_only_required_datadog_origins(): + response = client.get("/static/index.html") + csp = response.headers["content-security-policy"] + assert response.status_code == 200 + assert f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}" in csp + assert f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}" in csp + assert "worker-src blob:" in csp + assert "style-src 'self'" in csp + + +def test_architecture_page_csp_keeps_inline_styles_and_datadog_allowlist(): + response = client.get("/architecture") + csp = response.headers["content-security-policy"] + assert response.status_code == 200 + assert f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}" in csp + assert f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}" in csp + assert "worker-src blob:" in csp + assert "style-src 'self' 'unsafe-inline'" in csp + + +def test_rum_bootstrap_file_contains_expected_exact_configuration(): + rum_js = _read_repo_text("static/rum.js") + assert "bookly.codyborders.com" in rum_js + assert "ad60336f-85fe-4631-9469-973180243552" in rum_js + assert "pube161402da279b685acbb640a4366129b" in rum_js + assert 'service: "csb"' in rum_js + assert 'env: "prod"' in rum_js + assert 'version: "0.1"' in rum_js + assert "https://www.datadoghq-browser-agent.com/us1/v6/datadog-rum.js" in rum_js + + +def test_runtime_injection_leaves_architecture_artifact_unchanged(): + architecture_html = _read_repo_text("static/architecture.html") + assert "/static/rum.js" not in architecture_html