A FastAPI + vanilla JS chat app fronting an Anthropic Claude agent for order status, returns, and policy questions. Architecture: - agent.py: system prompt, runtime reminder injection, output validation, agentic tool-use loop with prompt caching on the system prompt block - tools.py: four tools (lookup_order, check_return_eligibility, initiate_return, lookup_policy) with per-session SessionGuardState enforcing protocol ordering on the tool side - mock_data.py: orders, return policy, and FAQ entries used as the single source of truth by both the prompt and the tools - server.py: FastAPI app exposing /api/chat, /health, and the static UI - static/: vanilla HTML/CSS/JS chat UI, no build step - tests/: 30 tests covering tool-side enforcement, the privacy boundary, output validation, and the agent loop with a mocked Anthropic client - deploy/: systemd unit and nginx site config for production
184 lines
5.5 KiB
Python
184 lines
5.5 KiB
Python
"""Tool-handler tests covering Layer 3 enforcement and the privacy boundary.
|
|
|
|
Goal: verify that the tools, on their own, refuse the unsafe operations even
|
|
if the model ignores every system-prompt rule. The model never appears in
|
|
these tests — only the deterministic handlers and the per-session guard state.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
|
|
import pytest
|
|
|
|
from mock_data import POLICIES, RETURNS
|
|
from tools import SessionGuardState, dispatch_tool
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_returns():
|
|
RETURNS.clear()
|
|
yield
|
|
RETURNS.clear()
|
|
|
|
|
|
@pytest.fixture
|
|
def state() -> SessionGuardState:
|
|
return SessionGuardState()
|
|
|
|
|
|
def test_lookup_order_returns_order_for_known_id(state):
|
|
result = dispatch_tool("lookup_order", {"order_id": "BK-10042"}, state)
|
|
assert "order" in result
|
|
assert result["order"]["customer_name"] == "Sarah Chen"
|
|
|
|
|
|
def test_lookup_order_unknown_id_returns_not_found(state):
|
|
result = dispatch_tool("lookup_order", {"order_id": "BK-99999"}, state)
|
|
assert result.get("error") == "order_not_found"
|
|
|
|
|
|
def test_lookup_order_email_mismatch_masquerades_as_not_found(state):
|
|
"""Privacy: a wrong email must look identical to a missing order so
|
|
callers cannot enumerate which IDs exist on the system."""
|
|
result = dispatch_tool(
|
|
"lookup_order",
|
|
{"order_id": "BK-10042", "customer_email": "wrong@example.com"},
|
|
state,
|
|
)
|
|
assert result.get("error") == "order_not_found"
|
|
|
|
|
|
def test_lookup_order_email_match_returns_order(state):
|
|
result = dispatch_tool(
|
|
"lookup_order",
|
|
{"order_id": "BK-10042", "customer_email": "Sarah.Chen@example.com"},
|
|
state,
|
|
)
|
|
assert "order" in result
|
|
|
|
|
|
def test_eligibility_check_passes_for_recent_delivered_order(state):
|
|
result = dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-10042", "customer_email": "sarah.chen@example.com"},
|
|
state,
|
|
)
|
|
assert result["eligible"] is True
|
|
assert "BK-10042" in state.eligibility_checks_passed
|
|
|
|
|
|
def test_eligibility_check_rejects_past_window(state):
|
|
result = dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-9871", "customer_email": "maria.gonzalez@example.com"},
|
|
state,
|
|
)
|
|
assert result["eligible"] is False
|
|
assert "BK-9871" not in state.eligibility_checks_passed
|
|
assert "30-day" in result["reason"]
|
|
|
|
|
|
def test_eligibility_check_rejects_not_yet_delivered(state):
|
|
result = dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-10089", "customer_email": "james.murphy@example.com"},
|
|
state,
|
|
)
|
|
assert result["eligible"] is False
|
|
assert "shipped" in result["reason"]
|
|
|
|
|
|
def test_eligibility_check_email_mismatch_returns_auth_failed(state):
|
|
result = dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-10042", "customer_email": "wrong@example.com"},
|
|
state,
|
|
)
|
|
assert result.get("error") == "auth_failed"
|
|
|
|
|
|
def test_initiate_return_refuses_without_prior_eligibility_check(state):
|
|
"""Layer 3 protocol guard: the most important guardrail in the system."""
|
|
result = dispatch_tool(
|
|
"initiate_return",
|
|
{
|
|
"order_id": "BK-10042",
|
|
"customer_email": "sarah.chen@example.com",
|
|
"reason": "Bought by mistake",
|
|
},
|
|
state,
|
|
)
|
|
assert result.get("error") == "eligibility_not_verified"
|
|
assert not RETURNS
|
|
|
|
|
|
def test_initiate_return_succeeds_after_eligibility_check(state):
|
|
dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-10042", "customer_email": "sarah.chen@example.com"},
|
|
state,
|
|
)
|
|
result = dispatch_tool(
|
|
"initiate_return",
|
|
{
|
|
"order_id": "BK-10042",
|
|
"customer_email": "sarah.chen@example.com",
|
|
"reason": "Bought by mistake",
|
|
},
|
|
state,
|
|
)
|
|
assert "return_id" in result
|
|
assert result["return_id"].startswith("RMA-")
|
|
assert "BK-10042" in state.returns_initiated
|
|
assert result["return_id"] in RETURNS
|
|
|
|
|
|
def test_initiate_return_refuses_duplicate(state):
|
|
dispatch_tool(
|
|
"check_return_eligibility",
|
|
{"order_id": "BK-10042", "customer_email": "sarah.chen@example.com"},
|
|
state,
|
|
)
|
|
dispatch_tool(
|
|
"initiate_return",
|
|
{
|
|
"order_id": "BK-10042",
|
|
"customer_email": "sarah.chen@example.com",
|
|
"reason": "Bought by mistake",
|
|
},
|
|
state,
|
|
)
|
|
second = dispatch_tool(
|
|
"initiate_return",
|
|
{
|
|
"order_id": "BK-10042",
|
|
"customer_email": "sarah.chen@example.com",
|
|
"reason": "Bought by mistake",
|
|
},
|
|
state,
|
|
)
|
|
assert second.get("error") == "already_initiated"
|
|
|
|
|
|
def test_lookup_policy_returns_verbatim_text(state):
|
|
result = dispatch_tool("lookup_policy", {"topic": "password_reset"}, state)
|
|
assert result["text"] == POLICIES["password_reset"]
|
|
|
|
|
|
def test_lookup_policy_unknown_topic_returns_not_supported(state):
|
|
result = dispatch_tool("lookup_policy", {"topic": "loyalty_program"}, state)
|
|
assert result.get("error") == "topic_not_supported"
|
|
assert "shipping" in result["available_topics"]
|
|
|
|
|
|
def test_lookup_policy_topic_is_case_insensitive(state):
|
|
result = dispatch_tool("lookup_policy", {"topic": "SHIPPING"}, state)
|
|
assert result["text"] == POLICIES["shipping"]
|
|
|
|
|
|
def test_dispatch_unknown_tool_returns_error(state):
|
|
result = dispatch_tool("delete_account", {}, state)
|
|
assert result.get("error") == "unknown_tool"
|