Add Datadog RUM to chat and architecture pages

This commit is contained in:
Cody Borders 2026-04-15 17:16:08 -07:00
parent 2dc6acc67e
commit 31b26be23f
5 changed files with 292 additions and 54 deletions

View File

@ -1568,9 +1568,12 @@ in `static/chat.js` already renders model replies with
impossible today. The CSP exists to catch any future regression
that accidentally switches to `innerHTML`.
The `/architecture` route overrides the middleware CSP with a
more permissive one because pandoc's standalone HTML has inline
styles.
Datadog RUM adds three narrow allowances to that baseline: the
Browser SDK CDN in `script-src`, the Datadog intake origin in
`connect-src`, and `worker-src blob:` for Session Replay. The
real environment gate lives in `static/rum.js`, which checks for
exactly `bookly.codyborders.com` before loading the SDK, so
localhost and preview hosts still stay dark.
## Sliding-window rate limiter
@ -1610,9 +1613,13 @@ the agent becomes 400, anything else becomes 500.
This is where the woven literate program is served. The handler
reads `static/architecture.html` (produced by pandoc from this
file) and returns it with a relaxed CSP. If the file does not
exist yet, the route 404s with a clear message rather than
raising a 500.
file) and returns it with a relaxed CSP. The one deliberate CSP
change is `style-src 'unsafe-inline'`, because pandoc's
standalone HTML emits inline styles. The page also gets the same
`/static/rum.js` bootstrap as the chat UI, but that injection
happens at response time so the generated artifact on disk stays
unchanged. If the file does not exist yet, the route 404s with a
clear message rather than raising a 500.
```python {chunk="server-py" file="server.py"}
"""FastAPI app for Bookly. Hosts /api/chat, /health, and the static chat UI.
@ -1662,20 +1669,66 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None)
# ---------------------------------------------------------------------------
_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com"
_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com"
_RUM_BOOTSTRAP_TAG = '<script src="/static/rum.js"></script>'
def _build_content_security_policy(*, allow_inline_styles: bool) -> str:
"""Return the CSP shared by the chat UI and the architecture page.
Datadog RUM needs explicit allowances for its CDN loader, its intake
endpoint, and its Session Replay worker. We keep the policy otherwise
strict and let the browser-side bootstrap decide whether the current host
is allowed to initialize RUM at all.
"""
style_source = "style-src 'self'"
if allow_inline_styles:
style_source = "style-src 'self' 'unsafe-inline'"
directives = (
"default-src 'self'",
f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}",
style_source,
"img-src 'self' data:",
f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}",
"worker-src blob:",
"object-src 'none'",
"base-uri 'none'",
"frame-ancestors 'none'",
"form-action 'self'",
)
return "; ".join(directives)
def _inject_rum_bootstrap(html: str) -> str:
"""Inject the shared RUM bootstrap into a standalone HTML document.
`/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the
shared bootstrap here keeps the artifact byte-for-byte unchanged while
ensuring the live page gets the same RUM loader as `/static/index.html`.
"""
if not html:
raise ValueError("html must be non-empty")
if _RUM_BOOTSTRAP_TAG in html:
return html
head_close = "</head>"
if head_close not in html:
raise ValueError("architecture html is missing </head>")
updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG}
{head_close}", 1)
assert _RUM_BOOTSTRAP_TAG in updated_html
assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1
return updated_html
_SECURITY_HEADERS: dict[str, str] = {
# Tight CSP: only same-origin assets, no inline scripts, no embedding.
# The UI is plain HTML+JS under /static, all same-origin.
"Content-Security-Policy": (
"default-src 'self'; "
"script-src 'self'; "
"style-src 'self'; "
"img-src 'self' data:; "
"connect-src 'self'; "
"object-src 'none'; "
"base-uri 'none'; "
"frame-ancestors 'none'; "
"form-action 'self'"
),
# Tight CSP: same-origin assets plus only the Datadog endpoints needed for
# browser RUM and Session Replay. The exact hostname gate lives in
# `static/rum.js`, so localhost and preview hosts stay dark.
"Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False),
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
"Referrer-Policy": "no-referrer",
@ -1885,16 +1938,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html"
# Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md
# and contains inline styles (and inline SVG from mermaid-filter), so the
# default strict CSP must be relaxed for this one route.
_ARCHITECTURE_CSP = (
"default-src 'self'; "
"style-src 'self' 'unsafe-inline'; "
"script-src 'none'; "
"img-src 'self' data:; "
"object-src 'none'; "
"base-uri 'none'; "
"frame-ancestors 'none'"
)
# chat-page CSP needs one change here: allow inline styles while keeping the
# same Datadog allowances used by the shared RUM bootstrap.
_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True)
@app.get("/architecture", response_class=HTMLResponse)
@ -1907,7 +1953,7 @@ def architecture() -> HTMLResponse:
status_code=404,
detail="Architecture document has not been built yet.",
)
response = HTMLResponse(content=html)
response = HTMLResponse(content=_inject_rum_bootstrap(html))
response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP
return response

View File

@ -45,20 +45,65 @@ app = FastAPI(title="Bookly", docs_url=None, redoc_url=None)
# ---------------------------------------------------------------------------
_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com"
_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com"
_RUM_BOOTSTRAP_TAG = '<script src="/static/rum.js"></script>'
def _build_content_security_policy(*, allow_inline_styles: bool) -> str:
"""Return the CSP shared by the chat UI and the architecture page.
Datadog RUM needs explicit allowances for its CDN loader, its intake
endpoint, and its Session Replay worker. We keep the policy otherwise
strict and let the browser-side bootstrap decide whether the current host
is allowed to initialize RUM at all.
"""
style_source = "style-src 'self'"
if allow_inline_styles:
style_source = "style-src 'self' 'unsafe-inline'"
directives = (
"default-src 'self'",
f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}",
style_source,
"img-src 'self' data:",
f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}",
"worker-src blob:",
"object-src 'none'",
"base-uri 'none'",
"frame-ancestors 'none'",
"form-action 'self'",
)
return "; ".join(directives)
def _inject_rum_bootstrap(html: str) -> str:
"""Inject the shared RUM bootstrap into a standalone HTML document.
`/architecture` serves a prebuilt Pandoc artifact from disk. Injecting the
shared bootstrap here keeps the artifact byte-for-byte unchanged while
ensuring the live page gets the same RUM loader as `/static/index.html`.
"""
if not html:
raise ValueError("html must be non-empty")
if _RUM_BOOTSTRAP_TAG in html:
return html
head_close = "</head>"
if head_close not in html:
raise ValueError("architecture html is missing </head>")
updated_html = html.replace(head_close, f" {_RUM_BOOTSTRAP_TAG}\n{head_close}", 1)
assert _RUM_BOOTSTRAP_TAG in updated_html
assert updated_html.count(_RUM_BOOTSTRAP_TAG) == 1
return updated_html
_SECURITY_HEADERS: dict[str, str] = {
# Tight CSP: only same-origin assets, no inline scripts, no embedding.
# The UI is plain HTML+JS under /static, all same-origin.
"Content-Security-Policy": (
"default-src 'self'; "
"script-src 'self'; "
"style-src 'self'; "
"img-src 'self' data:; "
"connect-src 'self'; "
"object-src 'none'; "
"base-uri 'none'; "
"frame-ancestors 'none'; "
"form-action 'self'"
),
# Tight CSP: same-origin assets plus only the Datadog endpoints needed for
# browser RUM and Session Replay. The exact hostname gate lives in
# `static/rum.js`, so localhost and preview hosts stay dark.
"Content-Security-Policy": _build_content_security_policy(allow_inline_styles=False),
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
"Referrer-Policy": "no-referrer",
@ -268,16 +313,9 @@ _ARCHITECTURE_HTML_PATH = _STATIC_DIR / "architecture.html"
# Pandoc-generated literate program. The HTML comes from weaving Bookly.lit.md
# and contains inline styles (and inline SVG from mermaid-filter), so the
# default strict CSP must be relaxed for this one route.
_ARCHITECTURE_CSP = (
"default-src 'self'; "
"style-src 'self' 'unsafe-inline'; "
"script-src 'none'; "
"img-src 'self' data:; "
"object-src 'none'; "
"base-uri 'none'; "
"frame-ancestors 'none'"
)
# chat-page CSP needs one change here: allow inline styles while keeping the
# same Datadog allowances used by the shared RUM bootstrap.
_ARCHITECTURE_CSP = _build_content_security_policy(allow_inline_styles=True)
@app.get("/architecture", response_class=HTMLResponse)
@ -290,7 +328,7 @@ def architecture() -> HTMLResponse:
status_code=404,
detail="Architecture document has not been built yet.",
)
response = HTMLResponse(content=html)
response = HTMLResponse(content=_inject_rum_bootstrap(html))
response.headers["Content-Security-Policy"] = _ARCHITECTURE_CSP
return response

View File

@ -5,6 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Bookly Support</title>
<link rel="stylesheet" href="/static/style.css" />
<script src="/static/rum.js"></script>
</head>
<body>
<main class="chat">

77
static/rum.js Normal file
View File

@ -0,0 +1,77 @@
(function () {
"use strict";
const ALLOWED_HOSTNAME = "bookly.codyborders.com";
const DATADOG_AGENT_URL =
"https://www.datadoghq-browser-agent.com/us1/v6/datadog-rum.js";
const RUM_CONFIG = Object.freeze({
applicationId: "ad60336f-85fe-4631-9469-973180243552",
clientToken: "pube161402da279b685acbb640a4366129b",
site: "datadoghq.com",
service: "csb",
env: "prod",
version: "0.1",
sessionSampleRate: 100,
sessionReplaySampleRate: 100,
trackResources: true,
trackUserInteractions: true,
trackLongTasks: true,
defaultPrivacyLevel: "allow",
});
function initializeRum(win) {
if (win.__booklyRumStarted === true) {
return;
}
if (!win.DD_RUM) {
console.error("Datadog RUM agent was not available after loading.");
return;
}
win.DD_RUM.init(RUM_CONFIG);
win.__booklyRumStarted = true;
win.__booklyRumLoading = false;
}
function loadRumAgent(doc, win) {
const headEl = doc.head;
if (!headEl) {
console.error("Datadog RUM was not loaded because document.head is missing.");
return;
}
const scriptEl = doc.createElement("script");
scriptEl.src = DATADOG_AGENT_URL;
scriptEl.async = true;
scriptEl.addEventListener("load", function () {
initializeRum(win);
});
scriptEl.addEventListener("error", function () {
win.__booklyRumLoading = false;
console.error("Datadog RUM agent failed to load.");
});
win.__booklyRumLoading = true;
headEl.appendChild(scriptEl);
}
if (window.location.hostname !== ALLOWED_HOSTNAME) {
return;
}
if (window.__booklyRumStarted === true) {
return;
}
if (window.__booklyRumLoading === true) {
return;
}
if (window.DD_RUM) {
initializeRum(window);
return;
}
loadRumAgent(document, window);
})();

76
tests/test_server.py Normal file
View File

@ -0,0 +1,76 @@
"""Server and static asset tests for Bookly's HTTP surface.
Goal: verify the shared Datadog RUM bootstrap is exposed on both public HTML
pages, and verify the Content-Security-Policy permits only the Datadog
origins required for RUM and Session Replay.
"""
from __future__ import annotations
from pathlib import Path
from fastapi.testclient import TestClient
import server
client = TestClient(server.app)
_REPO_ROOT = Path(__file__).resolve().parent.parent
_DATADOG_SCRIPT_ORIGIN = "https://www.datadoghq-browser-agent.com"
_DATADOG_RUM_INTAKE_ORIGIN = "https://browser-intake-datadoghq.com"
def _read_repo_text(relative_path: str) -> str:
path = _REPO_ROOT / relative_path
assert path.is_file(), f"expected file at {path}"
text = path.read_text(encoding="utf-8")
assert text, f"expected non-empty file at {path}"
return text
def test_static_index_html_loads_shared_rum_bootstrap():
response = client.get("/static/index.html")
assert response.status_code == 200
assert "/static/rum.js" in response.text
def test_architecture_page_loads_shared_rum_bootstrap():
response = client.get("/architecture")
assert response.status_code == 200
assert "/static/rum.js" in response.text
def test_static_page_csp_allows_only_required_datadog_origins():
response = client.get("/static/index.html")
csp = response.headers["content-security-policy"]
assert response.status_code == 200
assert f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}" in csp
assert f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}" in csp
assert "worker-src blob:" in csp
assert "style-src 'self'" in csp
def test_architecture_page_csp_keeps_inline_styles_and_datadog_allowlist():
response = client.get("/architecture")
csp = response.headers["content-security-policy"]
assert response.status_code == 200
assert f"script-src 'self' {_DATADOG_SCRIPT_ORIGIN}" in csp
assert f"connect-src 'self' {_DATADOG_RUM_INTAKE_ORIGIN}" in csp
assert "worker-src blob:" in csp
assert "style-src 'self' 'unsafe-inline'" in csp
def test_rum_bootstrap_file_contains_expected_exact_configuration():
rum_js = _read_repo_text("static/rum.js")
assert "bookly.codyborders.com" in rum_js
assert "ad60336f-85fe-4631-9469-973180243552" in rum_js
assert "pube161402da279b685acbb640a4366129b" in rum_js
assert 'service: "csb"' in rum_js
assert 'env: "prod"' in rum_js
assert 'version: "0.1"' in rum_js
assert "https://www.datadoghq-browser-agent.com/us1/v6/datadog-rum.js" in rum_js
def test_runtime_injection_leaves_architecture_artifact_unchanged():
architecture_html = _read_repo_text("static/architecture.html")
assert "/static/rum.js" not in architecture_html