claudish/tests/grok-tool-format.test.ts

import { describe, test, expect, beforeAll, afterAll } from "bun:test";
import { createProxyServer } from "../src/proxy-server";

/**
 * Regression test for Grok xAI XML function call format issue
 *
 * ISSUE: Grok models sometimes output <xai:function_call> XML format as text
 * instead of using OpenAI's tool_calls JSON format. This breaks Claude Code.
 *
 * FIX: We inject a system message for Grok models forcing OpenAI format.
 *
 * This test validates that:
 * 1. The system message is correctly injected for Grok models
 * 2. The message warns against XML format
 * 3. The tools array is properly passed to OpenRouter
 */

const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY || "test-key";
const TEST_PORT = 8339;

let proxyServer: Awaited<ReturnType<typeof createProxyServer>>;

beforeAll(async () => {
  proxyServer = await createProxyServer(
    TEST_PORT,
    OPENROUTER_API_KEY,
    "x-ai/grok-code-fast-1",
    false,
    undefined
  );
});

afterAll(async () => {
  if (proxyServer) {
    await proxyServer.shutdown();
  }
});

describe("Grok Tool Format Fix", () => {
  test("should inject system message for Grok models with tools", async () => {
    // Simulate Claude Code request with tools
    const claudeRequest = {
      model: "claude-sonnet-4-20250514",
      max_tokens: 4096,
      messages: [
        {
          role: "user",
          content: "Read the package.json file"
        }
      ],
      tools: [
        {
          name: "Read",
          description: "Read a file from the filesystem",
          input_schema: {
            type: "object",
            properties: {
              file_path: {
                type: "string",
                description: "The absolute path to the file to read"
              }
            },
            required: ["file_path"]
          }
        }
      ]
    };

    // Mock fetch to capture the request sent to OpenRouter
    let capturedPayload: any = null;
    const originalFetch = global.fetch;
    global.fetch = async (url: any, options: any) => {
      if (typeof url === "string" && url.includes("openrouter")) {
        capturedPayload = JSON.parse(options.body);
        // Return a mock response to avoid actual API call
        return new Response(
          JSON.stringify({
            choices: [{
              message: { role: "assistant", content: "Test response" }
            }]
          }),
          {
            status: 200,
            headers: { "Content-Type": "application/json" }
          }
        );
      }
      return originalFetch(url, options);
    };

    try {
      // Make request to proxy
      const response = await fetch(`http://localhost:${TEST_PORT}/v1/messages`, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          "anthropic-version": "2023-06-01"
        },
        body: JSON.stringify(claudeRequest)
      });

      // Wait for response
      await response.text();

      // Verify the payload sent to OpenRouter
      expect(capturedPayload).toBeDefined();
      expect(capturedPayload.model).toBe("x-ai/grok-code-fast-1");
      expect(capturedPayload.messages).toBeDefined();
      expect(capturedPayload.tools).toBeDefined();
      expect(capturedPayload.tools.length).toBeGreaterThan(0);

      // CRITICAL: Verify system message was injected
      const firstMessage = capturedPayload.messages[0];
      expect(firstMessage.role).toBe("system");
      expect(firstMessage.content).toContain("OpenAI tool_calls format");
      expect(firstMessage.content).toContain("NEVER use XML format");
      expect(firstMessage.content).toContain("<xai:function_call>");

      console.log("✅ Grok tool format fix verified:");
      console.log("  - System message injected");
      console.log("  - Warns against XML format");
      console.log("  - Content:", firstMessage.content);
    } finally {
      // Restore original fetch
      global.fetch = originalFetch;
    }
  });

  test("should NOT inject system message for non-Grok models", async () => {
    // Create a non-Grok proxy
    const nonGrokProxy = await createProxyServer(
      8340,
      OPENROUTER_API_KEY,
      "openai/gpt-4",
      false,
      undefined
    );

    try {
      const claudeRequest = {
        model: "claude-sonnet-4-20250514",
        max_tokens: 4096,
        messages: [
          {
            role: "user",
            content: "Read the package.json file"
          }
        ],
        tools: [
          {
            name: "Read",
            description: "Read a file",
            input_schema: {
              type: "object",
              properties: {
                file_path: { type: "string" }
              },
              required: ["file_path"]
            }
          }
        ]
      };

      let capturedPayload: any = null;
      const originalFetch = global.fetch;
      global.fetch = async (url: any, options: any) => {
        if (typeof url === "string" && url.includes("openrouter")) {
          capturedPayload = JSON.parse(options.body);
          return new Response(
            JSON.stringify({
              choices: [{
                message: { role: "assistant", content: "Test response" }
              }]
            }),
            {
              status: 200,
              headers: { "Content-Type": "application/json" }
            }
          );
        }
        return originalFetch(url, options);
      };

      try {
        const response = await fetch(`http://localhost:8340/v1/messages`, {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
            "anthropic-version": "2023-06-01"
          },
          body: JSON.stringify(claudeRequest)
        });

        await response.text();

        // Verify NO system message was injected for non-Grok models
        expect(capturedPayload).toBeDefined();
        const firstMessage = capturedPayload.messages[0];

        // Should be user message, NOT system message
        expect(firstMessage.role).toBe("user");
        expect(firstMessage.content).not.toContain("OpenAI tool_calls format");

        console.log("✅ Non-Grok model verified:");
        console.log("  - No system message injected");
        console.log("  - First message is user message");
      } finally {
        global.fetch = originalFetch;
      }
    } finally {
      await nonGrokProxy.shutdown();
    }
  });

  test("should append to existing system message if present", async () => {
    const claudeRequest = {
      model: "claude-sonnet-4-20250514",
      max_tokens: 4096,
      system: "You are a helpful coding assistant.",
      messages: [
        {
          role: "user",
          content: "Read the package.json file"
        }
      ],
      tools: [
        {
          name: "Read",
          description: "Read a file",
          input_schema: {
            type: "object",
            properties: {
              file_path: { type: "string" }
            },
            required: ["file_path"]
          }
        }
      ]
    };

    let capturedPayload: any = null;
    const originalFetch = global.fetch;
    global.fetch = async (url: any, options: any) => {
      if (typeof url === "string" && url.includes("openrouter")) {
        capturedPayload = JSON.parse(options.body);
        return new Response(
          JSON.stringify({
            choices: [{
              message: { role: "assistant", content: "Test response" }
            }]
          }),
          {
            status: 200,
            headers: { "Content-Type": "application/json" }
          }
        );
      }
      return originalFetch(url, options);
    };

    try {
      const response = await fetch(`http://localhost:${TEST_PORT}/v1/messages`, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          "anthropic-version": "2023-06-01"
        },
        body: JSON.stringify(claudeRequest)
      });

      await response.text();

      // Verify system message contains BOTH original and appended content
      expect(capturedPayload).toBeDefined();
      const firstMessage = capturedPayload.messages[0];
      expect(firstMessage.role).toBe("system");
      expect(firstMessage.content).toContain("helpful coding assistant");
      expect(firstMessage.content).toContain("OpenAI tool_calls format");
      expect(firstMessage.content).toContain("NEVER use XML format");

      console.log("✅ Existing system message verified:");
      console.log("  - Original content preserved");
      console.log("  - Tool format instruction appended");
    } finally {
      global.fetch = originalFetch;
    }
  });
});

// Export for documentation
export const GROK_FIX_DOCUMENTATION = {
  issue: "Grok outputs <xai:function_call> XML as text instead of tool_calls JSON",
  impact: "Claude Code UI gets stuck, tools don't execute",
  fix: "Inject system message forcing OpenAI tool_calls format",
  tested: "Regression test validates system message injection",
  discovered: "2025-11-11",
  severity: "CRITICAL"
};
Initial commit: Claudish - OpenRouter proxy for Claude Code A proxy server that enables Claude Code to work with any OpenRouter model (Grok, GPT-5, Gemini, DeepSeek, etc.) with automatic message transformation. Features: - Model-specific adapters for Grok, Gemini, OpenAI, DeepSeek, Qwen, MiniMax - Interactive and single-shot CLI modes - MCP server support - Monitor mode for debugging - Comprehensive test suite 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-28 13:25:08 +03:00			`import { describe, test, expect, beforeAll, afterAll } from "bun:test";`
			`import { createProxyServer } from "../src/proxy-server";`

			`/**`
			`* Regression test for Grok xAI XML function call format issue`
			`*`
			`* ISSUE: Grok models sometimes output <xai:function_call> XML format as text`
			`* instead of using OpenAI's tool_calls JSON format. This breaks Claude Code.`
			`*`
			`* FIX: We inject a system message for Grok models forcing OpenAI format.`
			`*`
			`* This test validates that:`
			`* 1. The system message is correctly injected for Grok models`
			`* 2. The message warns against XML format`
			`* 3. The tools array is properly passed to OpenRouter`
			`*/`

			`const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY \|\| "test-key";`
			`const TEST_PORT = 8339;`

			`let proxyServer: Awaited<ReturnType<typeof createProxyServer>>;`

			`beforeAll(async () => {`
			`proxyServer = await createProxyServer(`
			`TEST_PORT,`
			`OPENROUTER_API_KEY,`
			`"x-ai/grok-code-fast-1",`
			`false,`
			`undefined`
			`);`
			`});`

			`afterAll(async () => {`
			`if (proxyServer) {`
			`await proxyServer.shutdown();`
			`}`
			`});`

			`describe("Grok Tool Format Fix", () => {`
			`test("should inject system message for Grok models with tools", async () => {`
			`// Simulate Claude Code request with tools`
			`const claudeRequest = {`
			`model: "claude-sonnet-4-20250514",`
			`max_tokens: 4096,`
			`messages: [`
			`{`
			`role: "user",`
			`content: "Read the package.json file"`
			`}`
			`],`
			`tools: [`
			`{`
			`name: "Read",`
			`description: "Read a file from the filesystem",`
			`input_schema: {`
			`type: "object",`
			`properties: {`
			`file_path: {`
			`type: "string",`
			`description: "The absolute path to the file to read"`
			`}`
			`},`
			`required: ["file_path"]`
			`}`
			`}`
			`]`
			`};`

			`// Mock fetch to capture the request sent to OpenRouter`
			`let capturedPayload: any = null;`
			`const originalFetch = global.fetch;`
			`global.fetch = async (url: any, options: any) => {`
			`if (typeof url === "string" && url.includes("openrouter")) {`
			`capturedPayload = JSON.parse(options.body);`
			`// Return a mock response to avoid actual API call`
			`return new Response(`
			`JSON.stringify({`
			`choices: [{`
			`message: { role: "assistant", content: "Test response" }`
			`}]`
			`}),`
			`{`
			`status: 200,`
			`headers: { "Content-Type": "application/json" }`
			`}`
			`);`
			`}`
			`return originalFetch(url, options);`
			`};`

			`try {`
			`// Make request to proxy`
			const response = await fetch(`http://localhost:${TEST_PORT}/v1/messages`, {
			`method: "POST",`
			`headers: {`
			`"Content-Type": "application/json",`
			`"anthropic-version": "2023-06-01"`
			`},`
			`body: JSON.stringify(claudeRequest)`
			`});`

			`// Wait for response`
			`await response.text();`

			`// Verify the payload sent to OpenRouter`
			`expect(capturedPayload).toBeDefined();`
			`expect(capturedPayload.model).toBe("x-ai/grok-code-fast-1");`
			`expect(capturedPayload.messages).toBeDefined();`
			`expect(capturedPayload.tools).toBeDefined();`
			`expect(capturedPayload.tools.length).toBeGreaterThan(0);`

			`// CRITICAL: Verify system message was injected`
			`const firstMessage = capturedPayload.messages[0];`
			`expect(firstMessage.role).toBe("system");`
			`expect(firstMessage.content).toContain("OpenAI tool_calls format");`
			`expect(firstMessage.content).toContain("NEVER use XML format");`
			`expect(firstMessage.content).toContain("<xai:function_call>");`

			`console.log("✅ Grok tool format fix verified:");`
			`console.log(" - System message injected");`
			`console.log(" - Warns against XML format");`
			`console.log(" - Content:", firstMessage.content);`
			`} finally {`
			`// Restore original fetch`
			`global.fetch = originalFetch;`
			`}`
			`});`

			`test("should NOT inject system message for non-Grok models", async () => {`
			`// Create a non-Grok proxy`
			`const nonGrokProxy = await createProxyServer(`
			`8340,`
			`OPENROUTER_API_KEY,`
			`"openai/gpt-4",`
			`false,`
			`undefined`
			`);`

			`try {`
			`const claudeRequest = {`
			`model: "claude-sonnet-4-20250514",`
			`max_tokens: 4096,`
			`messages: [`
			`{`
			`role: "user",`
			`content: "Read the package.json file"`
			`}`
			`],`
			`tools: [`
			`{`
			`name: "Read",`
			`description: "Read a file",`
			`input_schema: {`
			`type: "object",`
			`properties: {`
			`file_path: { type: "string" }`
			`},`
			`required: ["file_path"]`
			`}`
			`}`
			`]`
			`};`

			`let capturedPayload: any = null;`
			`const originalFetch = global.fetch;`
			`global.fetch = async (url: any, options: any) => {`
			`if (typeof url === "string" && url.includes("openrouter")) {`
			`capturedPayload = JSON.parse(options.body);`
			`return new Response(`
			`JSON.stringify({`
			`choices: [{`
			`message: { role: "assistant", content: "Test response" }`
			`}]`
			`}),`
			`{`
			`status: 200,`
			`headers: { "Content-Type": "application/json" }`
			`}`
			`);`
			`}`
			`return originalFetch(url, options);`
			`};`

			`try {`
			const response = await fetch(`http://localhost:8340/v1/messages`, {
			`method: "POST",`
			`headers: {`
			`"Content-Type": "application/json",`
			`"anthropic-version": "2023-06-01"`
			`},`
			`body: JSON.stringify(claudeRequest)`
			`});`

			`await response.text();`

			`// Verify NO system message was injected for non-Grok models`
			`expect(capturedPayload).toBeDefined();`
			`const firstMessage = capturedPayload.messages[0];`

			`// Should be user message, NOT system message`
			`expect(firstMessage.role).toBe("user");`
			`expect(firstMessage.content).not.toContain("OpenAI tool_calls format");`

			`console.log("✅ Non-Grok model verified:");`
			`console.log(" - No system message injected");`
			`console.log(" - First message is user message");`
			`} finally {`
			`global.fetch = originalFetch;`
			`}`
			`} finally {`
			`await nonGrokProxy.shutdown();`
			`}`
			`});`

			`test("should append to existing system message if present", async () => {`
			`const claudeRequest = {`
			`model: "claude-sonnet-4-20250514",`
			`max_tokens: 4096,`
			`system: "You are a helpful coding assistant.",`
			`messages: [`
			`{`
			`role: "user",`
			`content: "Read the package.json file"`
			`}`
			`],`
			`tools: [`
			`{`
			`name: "Read",`
			`description: "Read a file",`
			`input_schema: {`
			`type: "object",`
			`properties: {`
			`file_path: { type: "string" }`
			`},`
			`required: ["file_path"]`
			`}`
			`}`
			`]`
			`};`

			`let capturedPayload: any = null;`
			`const originalFetch = global.fetch;`
			`global.fetch = async (url: any, options: any) => {`
			`if (typeof url === "string" && url.includes("openrouter")) {`
			`capturedPayload = JSON.parse(options.body);`
			`return new Response(`
			`JSON.stringify({`
			`choices: [{`
			`message: { role: "assistant", content: "Test response" }`
			`}]`
			`}),`
			`{`
			`status: 200,`
			`headers: { "Content-Type": "application/json" }`
			`}`
			`);`
			`}`
			`return originalFetch(url, options);`
			`};`

			`try {`
			const response = await fetch(`http://localhost:${TEST_PORT}/v1/messages`, {
			`method: "POST",`
			`headers: {`
			`"Content-Type": "application/json",`
			`"anthropic-version": "2023-06-01"`
			`},`
			`body: JSON.stringify(claudeRequest)`
			`});`

			`await response.text();`

			`// Verify system message contains BOTH original and appended content`
			`expect(capturedPayload).toBeDefined();`
			`const firstMessage = capturedPayload.messages[0];`
			`expect(firstMessage.role).toBe("system");`
			`expect(firstMessage.content).toContain("helpful coding assistant");`
			`expect(firstMessage.content).toContain("OpenAI tool_calls format");`
			`expect(firstMessage.content).toContain("NEVER use XML format");`

			`console.log("✅ Existing system message verified:");`
			`console.log(" - Original content preserved");`
			`console.log(" - Tool format instruction appended");`
			`} finally {`
			`global.fetch = originalFetch;`
			`}`
			`});`
			`});`

			`// Export for documentation`
			`export const GROK_FIX_DOCUMENTATION = {`
			`issue: "Grok outputs <xai:function_call> XML as text instead of tool_calls JSON",`
			`impact: "Claude Code UI gets stuck, tools don't execute",`
			`fix: "Inject system message forcing OpenAI tool_calls format",`
			`tested: "Regression test validates system message injection",`
			`discovered: "2025-11-11",`
			`severity: "CRITICAL"`
			`};`