claudish/tests/snapshot.test.ts

460 lines
13 KiB
TypeScript

/**
* Snapshot Integration Tests
*
* Replays captured fixtures through the proxy and validates protocol compliance.
* Ensures 1:1 compatibility with official Claude Code protocol.
*/
import { describe, test, expect, beforeAll, afterAll } from "bun:test";
import { readFileSync, readdirSync } from "fs";
import { join } from "path";
import type { ProxyServer } from "../src/types";
import { createProxyServer } from "../src/proxy-server";
interface FixtureEvent {
event: string;
data: any;
}
interface Fixture {
name: string;
description: string;
category: string;
request: {
headers: Record<string, string>;
body: any;
};
response: {
type: "streaming" | "json";
events?: FixtureEvent[];
data?: any;
};
assertions: {
eventSequence: string[];
contentBlocks: Array<{
index: number;
type: string;
name?: string;
hasContent?: boolean;
}>;
stopReason: string | null;
hasUsage: boolean;
minInputTokens?: number;
minOutputTokens?: number;
};
}
/**
* Load all fixtures from the fixtures directory
*/
function loadFixtures(): Fixture[] {
const fixturesDir = join(import.meta.dir, "fixtures");
const files = readdirSync(fixturesDir).filter(f => f.endsWith(".json"));
return files.map(file => {
const content = readFileSync(join(fixturesDir, file), "utf-8");
return JSON.parse(content) as Fixture;
});
}
/**
* Parse SSE stream into events
*/
async function parseSSEStream(response: Response): Promise<FixtureEvent[]> {
const events: FixtureEvent[] = [];
if (!response.body) {
throw new Error("Response has no body");
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
let currentEvent: string | null = null;
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (!line.trim()) {
currentEvent = null;
continue;
}
if (line.startsWith("event:")) {
currentEvent = line.substring(6).trim();
} else if (line.startsWith("data:")) {
const dataStr = line.substring(5).trim();
if (dataStr === "[DONE]") {
continue;
}
try {
const data = JSON.parse(dataStr);
const eventType = currentEvent || data.type || "unknown";
events.push({ event: eventType, data });
} catch (e) {
console.warn("Failed to parse SSE data:", dataStr);
}
}
}
}
return events;
}
/**
* Validate event sequence matches expected pattern
*/
function validateEventSequence(
actual: string[],
expected: string[]
): { valid: boolean; errors: string[] } {
const errors: string[] = [];
// Check for required events
const requiredEvents = ["message_start", "message_delta", "message_stop"];
for (const required of requiredEvents) {
if (!actual.includes(required)) {
errors.push(`Missing required event: ${required}`);
}
}
// Check proper order
const startIndex = actual.indexOf("message_start");
const stopIndex = actual.indexOf("message_stop");
if (startIndex !== 0) {
errors.push(`message_start must be first event (found at index ${startIndex})`);
}
if (stopIndex !== actual.length - 1) {
errors.push(`message_stop must be last event (found at index ${stopIndex})`);
}
// Check content blocks have proper start/stop pairs
const blockStarts = actual.filter(e => e === "content_block_start").length;
const blockStops = actual.filter(e => e === "content_block_stop").length;
if (blockStarts !== blockStops) {
errors.push(`Mismatched content blocks: ${blockStarts} starts, ${blockStops} stops`);
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate content block indices are sequential and correct
*/
function validateContentBlocks(
events: FixtureEvent[],
expected: Fixture["assertions"]["contentBlocks"]
): { valid: boolean; errors: string[] } {
const errors: string[] = [];
const blocks: Array<{ index: number; type: string; name?: string }> = [];
for (const event of events) {
if (event.event === "content_block_start") {
const index = event.data.index;
const type = event.data.content_block?.type;
const name = event.data.content_block?.name;
blocks.push({ index, type, name });
}
}
// Check indices are sequential
for (let i = 0; i < blocks.length; i++) {
if (blocks[i].index !== i) {
errors.push(`Block ${i}: expected index ${i}, got ${blocks[i].index}`);
}
}
// Check block types match
if (blocks.length !== expected.length) {
errors.push(`Expected ${expected.length} blocks, got ${blocks.length}`);
} else {
for (let i = 0; i < blocks.length; i++) {
if (blocks[i].type !== expected[i].type) {
errors.push(
`Block ${i}: expected type ${expected[i].type}, got ${blocks[i].type}`
);
}
// Check tool names if present
if (expected[i].name && blocks[i].name !== expected[i].name) {
errors.push(
`Block ${i}: expected tool ${expected[i].name}, got ${blocks[i].name || "none"}`
);
}
}
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate tool input streaming (fine-grained)
*/
function validateToolInputStreaming(
events: FixtureEvent[]
): { valid: boolean; errors: string[] } {
const errors: string[] = [];
const toolBlocks = new Map<number, { args: string; complete: boolean }>();
for (const event of events) {
if (event.event === "content_block_start" && event.data.content_block?.type === "tool_use") {
const index = event.data.index;
toolBlocks.set(index, { args: "", complete: false });
}
if (event.event === "content_block_delta" && event.data.delta?.type === "input_json_delta") {
const index = event.data.index;
const toolState = toolBlocks.get(index);
if (!toolState) {
errors.push(`Tool delta at index ${index} without corresponding start`);
continue;
}
toolState.args += event.data.delta.partial_json;
}
if (event.event === "content_block_stop") {
const index = event.data.index;
const toolState = toolBlocks.get(index);
if (toolState) {
toolState.complete = true;
// Validate JSON is complete
if (toolState.args) {
try {
JSON.parse(toolState.args);
} catch (e) {
errors.push(`Tool at index ${index}: incomplete or malformed JSON: ${toolState.args.substring(0, 100)}...`);
}
}
}
}
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate usage metrics are present
*/
function validateUsage(
events: FixtureEvent[],
hasUsage: boolean
): { valid: boolean; errors: string[] } {
const errors: string[] = [];
if (!hasUsage) {
return { valid: true, errors: [] };
}
// Check message_start has usage
const messageStart = events.find(e => e.event === "message_start");
if (!messageStart?.data?.message?.usage) {
errors.push("message_start missing usage field");
}
// Check message_delta has usage
// According to real Claude Code protocol, message_delta should ONLY have output_tokens
const messageDelta = events.find(e => e.event === "message_delta");
if (!messageDelta?.data?.usage) {
errors.push("message_delta missing usage field");
} else {
const usage = messageDelta.data.usage;
if (typeof usage.output_tokens !== "number") {
errors.push("message_delta usage.output_tokens must be a number");
}
// input_tokens and cache tokens should NOT be in message_delta (only in message_start)
if (usage.input_tokens !== undefined) {
errors.push("message_delta should not contain input_tokens (only output_tokens per protocol)");
}
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate stop_reason is present and valid
*/
function validateStopReason(
events: FixtureEvent[],
expectedStopReason: string | null
): { valid: boolean; errors: string[] } {
const errors: string[] = [];
const messageDelta = events.find(e => e.event === "message_delta");
if (!messageDelta) {
errors.push("message_delta event not found");
return { valid: false, errors };
}
const stopReason = messageDelta.data?.delta?.stop_reason;
if (!stopReason) {
errors.push("message_delta missing stop_reason");
} else {
const validReasons = ["end_turn", "max_tokens", "tool_use", "stop_sequence"];
if (!validReasons.includes(stopReason)) {
errors.push(`Invalid stop_reason: ${stopReason}`);
}
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Main test suite
*/
describe("Snapshot Integration Tests", () => {
let server: ProxyServer | null = null;
const PORT = 8338;
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY || "";
const TEST_MODEL = "anthropic/claude-sonnet-4.5"; // Use Claude for exact comparison
beforeAll(async () => {
if (!OPENROUTER_API_KEY) {
console.warn("⚠️ OPENROUTER_API_KEY not set, tests will be skipped");
return;
}
// Start proxy server
server = await createProxyServer(PORT, OPENROUTER_API_KEY, TEST_MODEL, false);
console.log(`✅ Proxy server started on port ${PORT}`);
});
afterAll(async () => {
if (server) {
await server.shutdown();
console.log("✅ Proxy server stopped");
}
});
test("fixtures directory exists", () => {
const fixturesDir = join(import.meta.dir, "fixtures");
expect(readdirSync(fixturesDir)).toBeDefined();
});
// Load and test each fixture
const fixtures = loadFixtures();
if (fixtures.length === 0) {
test.skip("no fixtures found - run capture-fixture.ts first", () => {});
}
for (const fixture of fixtures) {
describe(`Fixture: ${fixture.name} (${fixture.category})`, () => {
let actualEvents: FixtureEvent[] = [];
test.skipIf(!OPENROUTER_API_KEY)("replays request through proxy", async () => {
const response = await fetch(`http://127.0.0.1:${PORT}/v1/messages`, {
method: "POST",
headers: {
"Content-Type": "application/json",
...fixture.request.headers,
},
body: JSON.stringify(fixture.request.body),
});
expect(response.ok).toBe(true);
if (fixture.response.type === "streaming") {
actualEvents = await parseSSEStream(response);
expect(actualEvents.length).toBeGreaterThan(0);
} else {
const data = await response.json();
expect(data).toBeDefined();
}
});
test.skipIf(!OPENROUTER_API_KEY)("validates event sequence", () => {
const actualSequence = actualEvents.map(e => e.event);
const result = validateEventSequence(actualSequence, fixture.assertions.eventSequence);
if (!result.valid) {
console.error("Event sequence errors:");
result.errors.forEach(err => console.error(` - ${err}`));
}
expect(result.valid).toBe(true);
});
test.skipIf(!OPENROUTER_API_KEY)("validates content block indices", () => {
const result = validateContentBlocks(actualEvents, fixture.assertions.contentBlocks);
if (!result.valid) {
console.error("Content block errors:");
result.errors.forEach(err => console.error(` - ${err}`));
}
expect(result.valid).toBe(true);
});
test.skipIf(!OPENROUTER_API_KEY)("validates tool input streaming (if tool_use)", () => {
const hasTools = fixture.assertions.contentBlocks.some(b => b.type === "tool_use");
if (!hasTools) {
return; // Skip if no tools
}
const result = validateToolInputStreaming(actualEvents);
if (!result.valid) {
console.error("Tool input streaming errors:");
result.errors.forEach(err => console.error(` - ${err}`));
}
expect(result.valid).toBe(true);
});
test.skipIf(!OPENROUTER_API_KEY)("validates usage metrics", () => {
const result = validateUsage(actualEvents, fixture.assertions.hasUsage);
if (!result.valid) {
console.error("Usage validation errors:");
result.errors.forEach(err => console.error(` - ${err}`));
}
expect(result.valid).toBe(true);
});
test.skipIf(!OPENROUTER_API_KEY)("validates stop_reason", () => {
const result = validateStopReason(actualEvents, fixture.assertions.stopReason);
if (!result.valid) {
console.error("Stop reason errors:");
result.errors.forEach(err => console.error(` - ${err}`));
}
expect(result.valid).toBe(true);
});
});
}
});