diff --git a/scripts/architecture-header.html b/scripts/architecture-header.html index 0000cc2..7f24491 100644 --- a/scripts/architecture-header.html +++ b/scripts/architecture-header.html @@ -3,50 +3,92 @@ --bg: #f5f3ee; --panel: #ffffff; --ink: #1a1a1a; - --ink-muted: #6b6b6b; + --ink-muted: #5a5a5a; + --ink-faint: #8a8a8a; --accent: #2e5b8a; + --accent-hover: #1f4470; --border: #e2ddd2; + --border-strong: #c9c2b0; --code-bg: #f0ede4; } + * { box-sizing: border-box; } html, body { background: var(--bg); color: var(--ink); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 16px; - line-height: 1.6; + line-height: 1.65; margin: 0; } body { max-width: 820px; margin: 0 auto; - padding: 48px 32px 96px; + padding: 56px 36px 120px; } - h1, h2, h3, h4 { - color: var(--ink); - letter-spacing: -0.01em; - line-height: 1.25; - margin-top: 2.2em; - margin-bottom: 0.6em; + + /* Title block */ + header#title-block-header { + margin: 0 0 12px; + padding-bottom: 28px; + border-bottom: 1px solid var(--border-strong); } h1.title { - font-size: 44px; + font-size: 48px; font-weight: 700; - margin-top: 0; - margin-bottom: 0.2em; - letter-spacing: -0.02em; + letter-spacing: -0.025em; + margin: 0 0 4px; + line-height: 1.1; } - h1 { font-size: 28px; font-weight: 700; border-bottom: 1px solid var(--border); padding-bottom: 0.3em; } - h2 { font-size: 22px; font-weight: 600; } - h3 { font-size: 18px; font-weight: 600; color: var(--ink-muted); } + p.author, p.date { color: var(--ink-faint); margin: 0; font-size: 14px; } + + /* Headings -- clear hierarchy with real breathing room between sections */ + h1, h2, h3, h4 { + color: var(--ink); + letter-spacing: -0.012em; + line-height: 1.25; + } + h1 { + font-size: 30px; + font-weight: 700; + margin: 3.2em 0 0.8em; + padding: 1.2em 0 0.5em; + border-top: 2px solid var(--border-strong); + } + h1:first-of-type { border-top: none; padding-top: 0; margin-top: 2em; } + h2 { + font-size: 22px; + font-weight: 600; + margin: 2.4em 0 0.6em; + padding-top: 0.4em; + border-top: 1px solid var(--border); + } + h3 { + font-size: 17px; + font-weight: 600; + color: var(--ink-muted); + margin: 1.8em 0 0.5em; + } + /* Section numbers sit in their own visual slot so the title reads cleanly */ + .header-section-number { + display: inline-block; + color: var(--ink-faint); + font-weight: 500; + margin-right: 0.55em; + font-variant-numeric: tabular-nums; + } + + /* Body text */ p { margin: 0 0 1.1em; } a { color: var(--accent); text-decoration: none; } - a:hover { text-decoration: underline; } + a:hover { color: var(--accent-hover); text-decoration: underline; } + + /* Inline + block code */ code { font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; font-size: 0.88em; background: var(--code-bg); - padding: 0.1em 0.35em; + padding: 0.12em 0.4em; border-radius: 4px; border: 1px solid var(--border); } @@ -54,10 +96,11 @@ background: var(--panel); border: 1px solid var(--border); border-radius: 8px; - padding: 16px 20px; + padding: 18px 22px; overflow-x: auto; - font-size: 13.5px; + font-size: 13px; line-height: 1.55; + margin: 1.4em 0; } pre code { background: transparent; @@ -66,39 +109,82 @@ font-size: inherit; } .sourceCode { background: transparent; } - #TOC { + + /* Table of contents -- real bullets, clear nesting */ + nav#TOC { background: var(--panel); border: 1px solid var(--border); - border-radius: 8px; - padding: 20px 28px; - margin: 28px 0 40px; + border-radius: 10px; + padding: 26px 36px 26px 28px; + margin: 32px 0 48px; } - #TOC ul { list-style: none; padding-left: 1.1em; margin: 0.3em 0; } - #TOC > ul { padding-left: 0; } - #TOC li { margin: 0.25em 0; } - #TOC a { color: var(--ink); } - .mermaid, .diagram, figure { + nav#TOC::before { + content: "Contents"; + display: block; + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.12em; + font-weight: 700; + color: var(--ink-faint); + margin-bottom: 14px; + } + nav#TOC ul { + list-style: disc; + padding-left: 1.35em; + margin: 0.35em 0; + } + nav#TOC > ul { padding-left: 1.1em; } + nav#TOC ul ul { + list-style: circle; + margin: 0.2em 0 0.35em; + } + nav#TOC li { + margin: 0.35em 0; + padding-left: 0.25em; + color: var(--ink-muted); + } + nav#TOC li::marker { color: var(--border-strong); } + nav#TOC a { color: var(--ink); } + nav#TOC a:hover { color: var(--accent); } + nav#TOC .toc-section-number { + color: var(--ink-faint); + font-variant-numeric: tabular-nums; + font-weight: 500; + margin-right: 0.45em; + } + + /* Mermaid / figures */ + figure, .mermaid, .diagram { background: var(--panel); border: 1px solid var(--border); - border-radius: 8px; - padding: 20px; - margin: 24px 0; + border-radius: 10px; + padding: 22px; + margin: 28px 0; text-align: center; } figure img, figure svg, p img { max-width: 100%; height: auto; } + + /* Callouts */ blockquote { border-left: 3px solid var(--accent); - margin: 1.2em 0; - padding: 0.2em 1em; + margin: 1.3em 0; + padding: 0.25em 1.1em; color: var(--ink-muted); background: var(--panel); border-radius: 0 6px 6px 0; } - header#title-block-header { - margin-bottom: 0.4em; + + /* Body bullet lists (outside TOC) */ + body > ul, body > ol, + h1 ~ ul, h1 ~ ol, h2 ~ ul, h2 ~ ol, h3 ~ ul, h3 ~ ol { + padding-left: 1.4em; } + @media (max-width: 720px) { - body { padding: 28px 18px 72px; } - h1.title { font-size: 34px; } + body { padding: 32px 20px 80px; } + h1.title { font-size: 36px; } + h1 { font-size: 24px; } + h2 { font-size: 19px; } + nav#TOC { padding: 22px 26px; } } diff --git a/scripts/rebuild_architecture_html.sh b/scripts/rebuild_architecture_html.sh index f0e9c30..a35f6bd 100755 --- a/scripts/rebuild_architecture_html.sh +++ b/scripts/rebuild_architecture_html.sh @@ -16,6 +16,7 @@ pandoc Bookly.lit.md \ --filter mermaid-filter \ --toc \ --toc-depth=3 \ + --number-sections \ --highlight-style=tango \ -H scripts/architecture-header.html \ --metadata pagetitle="Bookly" diff --git a/static/architecture.html b/static/architecture.html index 0c3f339..a61105c 100644 --- a/static/architecture.html +++ b/static/architecture.html @@ -236,50 +236,88 @@ code span.wa { color: #8f5902; font-weight: bold; font-style: italic; } --bg: #f5f3ee; --panel: #ffffff; --ink: #1a1a1a; ---ink-muted: #6b6b6b; +--ink-muted: #5a5a5a; +--ink-faint: #8a8a8a; --accent: #2e5b8a; +--accent-hover: #1f4470; --border: #e2ddd2; +--border-strong: #c9c2b0; --code-bg: #f0ede4; } +* { box-sizing: border-box; } html, body { background: var(--bg); color: var(--ink); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; font-size: 16px; -line-height: 1.6; +line-height: 1.65; margin: 0; } body { max-width: 820px; margin: 0 auto; -padding: 48px 32px 96px; +padding: 56px 36px 120px; } -h1, h2, h3, h4 { -color: var(--ink); -letter-spacing: -0.01em; -line-height: 1.25; -margin-top: 2.2em; -margin-bottom: 0.6em; + +header#title-block-header { +margin: 0 0 12px; +padding-bottom: 28px; +border-bottom: 1px solid var(--border-strong); } h1.title { -font-size: 44px; +font-size: 48px; font-weight: 700; -margin-top: 0; -margin-bottom: 0.2em; -letter-spacing: -0.02em; +letter-spacing: -0.025em; +margin: 0 0 4px; +line-height: 1.1; } -h1 { font-size: 28px; font-weight: 700; border-bottom: 1px solid var(--border); padding-bottom: 0.3em; } -h2 { font-size: 22px; font-weight: 600; } -h3 { font-size: 18px; font-weight: 600; color: var(--ink-muted); } +p.author, p.date { color: var(--ink-faint); margin: 0; font-size: 14px; } + +h1, h2, h3, h4 { +color: var(--ink); +letter-spacing: -0.012em; +line-height: 1.25; +} +h1 { +font-size: 30px; +font-weight: 700; +margin: 3.2em 0 0.8em; +padding: 1.2em 0 0.5em; +border-top: 2px solid var(--border-strong); +} +h1:first-of-type { border-top: none; padding-top: 0; margin-top: 2em; } +h2 { +font-size: 22px; +font-weight: 600; +margin: 2.4em 0 0.6em; +padding-top: 0.4em; +border-top: 1px solid var(--border); +} +h3 { +font-size: 17px; +font-weight: 600; +color: var(--ink-muted); +margin: 1.8em 0 0.5em; +} + +.header-section-number { +display: inline-block; +color: var(--ink-faint); +font-weight: 500; +margin-right: 0.55em; +font-variant-numeric: tabular-nums; +} + p { margin: 0 0 1.1em; } a { color: var(--accent); text-decoration: none; } -a:hover { text-decoration: underline; } +a:hover { color: var(--accent-hover); text-decoration: underline; } + code { font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace; font-size: 0.88em; background: var(--code-bg); -padding: 0.1em 0.35em; +padding: 0.12em 0.4em; border-radius: 4px; border: 1px solid var(--border); } @@ -287,10 +325,11 @@ pre { background: var(--panel); border: 1px solid var(--border); border-radius: 8px; -padding: 16px 20px; +padding: 18px 22px; overflow-x: auto; -font-size: 13.5px; +font-size: 13px; line-height: 1.55; +margin: 1.4em 0; } pre code { background: transparent; @@ -299,40 +338,78 @@ padding: 0; font-size: inherit; } .sourceCode { background: transparent; } -#TOC { + +nav#TOC { background: var(--panel); border: 1px solid var(--border); -border-radius: 8px; -padding: 20px 28px; -margin: 28px 0 40px; +border-radius: 10px; +padding: 26px 36px 26px 28px; +margin: 32px 0 48px; } -#TOC ul { list-style: none; padding-left: 1.1em; margin: 0.3em 0; } -#TOC > ul { padding-left: 0; } -#TOC li { margin: 0.25em 0; } -#TOC a { color: var(--ink); } -.mermaid, .diagram, figure { +nav#TOC::before { +content: "Contents"; +display: block; +font-size: 12px; +text-transform: uppercase; +letter-spacing: 0.12em; +font-weight: 700; +color: var(--ink-faint); +margin-bottom: 14px; +} +nav#TOC ul { +list-style: disc; +padding-left: 1.35em; +margin: 0.35em 0; +} +nav#TOC > ul { padding-left: 1.1em; } +nav#TOC ul ul { +list-style: circle; +margin: 0.2em 0 0.35em; +} +nav#TOC li { +margin: 0.35em 0; +padding-left: 0.25em; +color: var(--ink-muted); +} +nav#TOC li::marker { color: var(--border-strong); } +nav#TOC a { color: var(--ink); } +nav#TOC a:hover { color: var(--accent); } +nav#TOC .toc-section-number { +color: var(--ink-faint); +font-variant-numeric: tabular-nums; +font-weight: 500; +margin-right: 0.45em; +} + +figure, .mermaid, .diagram { background: var(--panel); border: 1px solid var(--border); -border-radius: 8px; -padding: 20px; -margin: 24px 0; +border-radius: 10px; +padding: 22px; +margin: 28px 0; text-align: center; } figure img, figure svg, p img { max-width: 100%; height: auto; } + blockquote { border-left: 3px solid var(--accent); -margin: 1.2em 0; -padding: 0.2em 1em; +margin: 1.3em 0; +padding: 0.25em 1.1em; color: var(--ink-muted); background: var(--panel); border-radius: 0 6px 6px 0; } -header#title-block-header { -margin-bottom: 0.4em; + +body > ul, body > ol, +h1 ~ ul, h1 ~ ol, h2 ~ ul, h2 ~ ol, h3 ~ ul, h3 ~ ol { +padding-left: 1.4em; } @media (max-width: 720px) { -body { padding: 28px 18px 72px; } -h1.title { font-size: 34px; } +body { padding: 32px 20px 80px; } +h1.title { font-size: 36px; } +h1 { font-size: 24px; } +h2 { font-size: 19px; } +nav#TOC { padding: 22px 26px; } } @@ -342,43 +419,34 @@ h1.title { font-size: 34px; } -

Introduction

+

1 Introduction

Bookly is a customer-support chatbot for a bookstore. It handles three things: looking up orders, processing returns, and answering a small set of standard policy questions. Everything else it refuses, @@ -393,11 +461,11 @@ previous layers have failed.

code you see below is the code that runs. Tangling this file produces the Python source tree byte-for-byte; weaving it produces the HTML you are reading.

-

The four guardrail layers

+

2 The four guardrail layers

Before anything else, it helps to see the layers laid out in one picture. Each layer is a separate defence, and a malicious or confused input has to defeat all of them to cause harm.

-

+

Layer 1 is the system prompt itself. It tells the model what Bookly is, what it can and cannot help with, what the return policy actually says (quoted verbatim, not paraphrased), and exactly which template to @@ -409,17 +477,17 @@ of what the model decides. Layer 4 lives at the end of the agent loop and does a deterministic regex pass over the final reply looking for things like fabricated order IDs, markdown leakage, and off-topic engagement.

-

Request lifecycle

+

3 Request lifecycle

A single user message travels this path:

-

Module layout

+

4 Module layout

Five Python files form the core. They depend on each other in one direction only – there are no cycles.

The rest of this document visits each module in dependency order: configuration first, then the data fixtures they read, then tools, then the agent loop, then the HTTP layer on top.

-

Configuration

+

5 Configuration

Every setting that might reasonably change between environments lives in one place. The two required values – the Anthropic API key and the session-cookie signing secret – are wrapped in SecretStr so @@ -503,7 +571,7 @@ limiter in server.py. # and `session_secret` from environment / .env at runtime, but mypy sees them as # required constructor arguments and has no way to know about that. settings = Settings() # type: ignore[call-arg] -

Data fixtures

+

6 Data fixtures

Bookly does not talk to a real database. Four fixture orders are enough to cover the interesting scenarios: a delivered order that is still inside the 30-day return window, an in-flight order that has not @@ -634,7 +702,7 @@ successful return.

# Mutated at runtime by `initiate_return`. Keyed by return_id. RETURNS: dict[str, dict] = {} -

Tools: Layer 3 enforcement

+

7 Tools: Layer 3 enforcement

Four tools back the agent: lookup_order, check_return_eligibility, initiate_return, and lookup_policy. Each has an Anthropic-format schema (used in @@ -1105,12 +1173,12 @@ the model is an untrusted caller, so the runtime checks stay.

# `_require_string` already stripped control characters, and the error # messages themselves are constructed from field names, not user data. return {"error": "invalid_arguments", "message": str(exc)} -

Agent loop

+

8 Agent loop

This is the biggest file. It wires everything together: the system prompt, runtime reminders, output validation (Layer 4), the in-memory session store with per-session locking, the cached Anthropic client, and the actual tool-use loop that drives a turn end to end.

-

System prompt

+

8.1 System prompt

The prompt is structured with XML-style tags (<identity>, <critical_rules>, <scope>, <return_policy>, @@ -1125,7 +1193,7 @@ enforcement in tools.py cannot disagree.

demonstrates a case that is easy to get wrong: missing order ID, quoting a policy verbatim, refusing an off-topic request, disambiguating between two orders.

-

Runtime reminders

+

8.2 Runtime reminders

On every turn, build_system_content appends a short CRITICAL_REMINDER block to the system content. Once the turn count crosses LONG_CONVERSATION_TURN_THRESHOLD, a @@ -1134,7 +1202,7 @@ second LONG_CONVERSATION_REMINDER is added. The big cache_control: ephemeral – the reminders vary per turn and we want them at the highest-attention position, not in the cached prefix.

-

Layer 4 output validation

+

8.3 Layer 4 output validation

After the model produces its final reply, validate_reply runs four cheap deterministic checks: every BK-NNNN string in the reply must also appear in a tool result from this turn, every ISO @@ -1146,7 +1214,7 @@ returned as a frozen ValidationResult.

keyword set. That false-positived on plenty of legitimate support replies (“I’d recommend contacting…”). The current patterns use word boundaries so only the intended phrases trip them.

-

Session store

+

8.4 Session store

SessionStore is a bounded in-memory LRU with an idle TTL. It stores Session objects (history, guard state, turn count) keyed by opaque server-issued session IDs. It also owns the @@ -1160,7 +1228,7 @@ two different lock instances.

Under the “single-process demo deployment” constraint this is enough. For multi-worker, the whole class would get swapped for a Redis-backed equivalent.

-

The tool-use loop

+

8.5 The tool-use loop

_run_tool_use_loop drives the model until it stops asking for tools. It is bounded by settings.max_tool_use_iterations so a runaway model cannot @@ -1173,7 +1241,7 @@ with cache_control: ephemeral so prior turns do not need to be re-tokenized on every call. This turns the per-turn input-token cost from O(turns^2) into O(turns) across a session.

-

run_turn

+

8.6 run_turn

run_turn is the top-level entry point the server calls. It validates its inputs, acquires the per-session lock, appends the user message, runs the loop, and then either persists the final reply to @@ -1810,14 +1878,14 @@ response.

) session.turn_count += 1 return reply_text -

HTTP surface

+

9 HTTP surface

The FastAPI app exposes four routes: GET /health, GET / (redirects to /static/index.html), POST /api/chat, and GET /architecture (this very document). Everything else is deliberately missing – the OpenAPI docs pages and the redoc pages are disabled so the public surface is as small as possible.

-

Security headers

+

9.1 Security headers

A middleware injects a strict Content-Security-Policy and friends on every response. CSP is defense in depth: the chat UI in static/chat.js already renders model replies with @@ -1827,7 +1895,8 @@ regression that accidentally switches to innerHTML.

The /architecture route overrides the middleware CSP with a more permissive one because pandoc’s standalone HTML has inline styles.

-

Sliding-window rate limiter

+

9.2 Sliding-window rate +limiter

SlidingWindowRateLimiter keeps a deque of timestamps per key and evicts anything older than the window. The /api/chat handler checks twice per call – once with an @@ -1837,7 +1906,7 @@ cookies, and a legitimate user does not get locked out by a noisy neighbour on the same IP.

Suitable for a single-process demo deployment. A multi-worker deployment would externalize this to Redis.

-

Session cookies

+

9.3 Session cookies

The client never chooses its own session ID. On the first request a new random ID is minted, HMAC-signed with settings.session_secret, and set in an HttpOnly, @@ -1846,7 +1915,7 @@ verifies the signature in constant time (hmac.compare_digest) and trusts nothing else. A leaked or guessed request body cannot hijack another user’s conversation because the session ID is not in the body at all.

-

/api/chat

+

9.4 /api/chat

The handler resolves the session, checks both rate limits, then calls into agent.run_turn. The Anthropic exception hierarchy is caught explicitly so a rate-limit incident and a code bug cannot look @@ -1854,7 +1923,7 @@ identical to operators: anthropic.RateLimitError becomes 503, APIConnectionError becomes 503, APIStatusError becomes 502, ValueError from the agent becomes 400, anything else becomes 500.

-

/architecture

+

9.5 /architecture

This is where the woven literate program is served. The handler reads static/architecture.html (produced by pandoc from this file) and returns it with a relaxed CSP. If the file does not exist yet,