feat(z-ai): Add Z.AI GLM-4.6 provider support

- Add ZaiHandler for direct Z.AI API integration - Add z-ai/glm-4.6 to recommended models - Support ZAI_API_KEY environment variable - Bypass OpenRouter for z-ai/* models 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 21:40:09 +00:00 · 2025-12-14 21:40:09 +00:00 · 90cb696ac6
parent a3303a12db
commit 90cb696ac6
9 changed files with 850 additions and 7 deletions
--- a/all-models.json
+++ b/all-models.json
--- a/recommended-models.json
+++ b/recommended-models.json
@ -0,0 +1,133 @@
 {
  "version": "1.1.5",
  "lastUpdated": "2025-12-13",
  "source": "https://openrouter.ai/models?categories=programming&fmt=cards&order=top-weekly",
  "models": [
    {
      "id": "google/gemini-3-pro-preview",
      "name": "Google: Gemini 3 Pro Preview",
      "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.",
      "provider": "Google",
      "category": "vision",
      "priority": 1,
      "pricing": {
        "input": "$2.00/1M",
        "output": "$12.00/1M",
        "average": "$7.00/1M"
      },
      "context": "1048K",
      "maxOutputTokens": 65536,
      "modality": "text+image->text",
      "supportsTools": true,
      "supportsReasoning": true,
      "supportsVision": true,
      "isModerated": false,
      "recommended": true
    },
    {
      "id": "openai/gpt-5.1-codex",
      "name": "OpenAI: GPT-5.1-Codex",
      "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.",
      "provider": "Openai",
      "category": "vision",
      "priority": 2,
      "pricing": {
        "input": "$1.25/1M",
        "output": "$10.00/1M",
        "average": "$5.63/1M"
      },
      "context": "400K",
      "maxOutputTokens": 128000,
      "modality": "text+image->text",
      "supportsTools": true,
      "supportsReasoning": true,
      "supportsVision": true,
      "isModerated": true,
      "recommended": true
    },
    {
      "id": "x-ai/grok-code-fast-1",
      "name": "xAI: Grok Code Fast 1",
      "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.",
      "provider": "X-ai",
      "category": "reasoning",
      "priority": 3,
      "pricing": {
        "input": "$0.20/1M",
        "output": "$1.50/1M",
        "average": "$0.85/1M"
      },
      "context": "256K",
      "maxOutputTokens": 10000,
      "modality": "text->text",
      "supportsTools": true,
      "supportsReasoning": true,
      "supportsVision": false,
      "isModerated": false,
      "recommended": true
    },
    {
      "id": "minimax/minimax-m2",
      "name": "MiniMax: MiniMax M2",
      "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).",
      "provider": "Minimax",
      "category": "reasoning",
      "priority": 4,
      "pricing": {
        "input": "$0.25/1M",
        "output": "$1.02/1M",
        "average": "$0.64/1M"
      },
      "context": "262K",
      "maxOutputTokens": null,
      "modality": "text->text",
      "supportsTools": true,
      "supportsReasoning": true,
      "supportsVision": false,
      "isModerated": false,
      "recommended": true
    },
    {
      "id": "z-ai/glm-4.6",
      "name": "Z.AI: GLM 4.6",
      "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.",
      "provider": "Z-ai",
      "category": "reasoning",
      "priority": 5,
      "pricing": {
        "input": "$0.40/1M",
        "output": "$1.75/1M",
        "average": "$1.07/1M"
      },
      "context": "202K",
      "maxOutputTokens": 202752,
      "modality": "text->text",
      "supportsTools": true,
      "supportsReasoning": true,
      "supportsVision": false,
      "isModerated": false,
      "recommended": true
    },
    {
      "id": "qwen/qwen3-vl-235b-a22b-instruct",
      "name": "Qwen: Qwen3 VL 235B A22B Instruct",
      "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.",
      "provider": "Qwen",
      "category": "vision",
      "priority": 6,
      "pricing": {
        "input": "$0.20/1M",
        "output": "$1.20/1M",
        "average": "$0.70/1M"
      },
      "context": "262K",
      "maxOutputTokens": null,
      "modality": "text+image->text",
      "supportsTools": true,
      "supportsReasoning": false,
      "supportsVision": true,
      "isModerated": false,
      "recommended": true
    }
  ]
 }
--- a/src/cli.ts
+++ b/src/cli.ts
@ -229,21 +229,33 @@ export async function parseArgs(args: string[]): Promise<ClaudishConfig> {
      console.log("[claudish] Ensure you are logged in to Claude Code (claude auth login)");
    }
  } else {
-    // OpenRouter mode: requires OpenRouter API key
+    // OpenRouter mode: requires OpenRouter API key (unless using Z.ai model with ZAI_API_KEY)
    const apiKey = process.env[ENV.OPENROUTER_API_KEY];
    const zaiApiKey = process.env[ENV.ZAI_API_KEY];
    const isZaiModel = config.model?.startsWith("z-ai/");
    if (!apiKey) {
-      // In interactive mode, we'll prompt for it later
+      // Z.ai models can work without OPENROUTER_API_KEY if ZAI_API_KEY is set
-      // In non-interactive mode, it's required now
+      if (isZaiModel && zaiApiKey) {
-      if (!config.interactive) {
+        // Z.ai mode - no OpenRouter key needed
        config.openrouterApiKey = undefined;
      } else if (!config.interactive) {
        // In non-interactive mode, OpenRouter key is required (unless Z.ai)
        console.error("Error: OPENROUTER_API_KEY environment variable is required");
        console.error("Get your API key from: https://openrouter.ai/keys");
        console.error("");
        console.error("Set it now:");
        console.error("  export OPENROUTER_API_KEY='sk-or-v1-...'");
-        process.exit(1);
+        if (config.model?.startsWith("z-ai/")) {
          console.error("");
          console.error("Or for Z.ai models, set ZAI_API_KEY instead:");
          console.error("  export ZAI_API_KEY='your-zai-key'");
        }
        process.exit(1);
      } else {
        // Will be prompted for in interactive mode
        config.openrouterApiKey = undefined;
      }
    } else {
      config.openrouterApiKey = apiKey;
    }
--- a/src/config.ts
+++ b/src/config.ts
@ -79,6 +79,8 @@ export const ENV = {
  ANTHROPIC_DEFAULT_SONNET_MODEL: "ANTHROPIC_DEFAULT_SONNET_MODEL",
  ANTHROPIC_DEFAULT_HAIKU_MODEL: "ANTHROPIC_DEFAULT_HAIKU_MODEL",
  CLAUDE_CODE_SUBAGENT_MODEL: "CLAUDE_CODE_SUBAGENT_MODEL",
  // Z.ai API key for direct Z.ai access (bypasses OpenRouter for z-ai/* models)
  ZAI_API_KEY: "ZAI_API_KEY",
 } as const;
 // OpenRouter API Configuration
@ -87,3 +89,6 @@ export const OPENROUTER_HEADERS = {
  "HTTP-Referer": "https://github.com/MadAppGang/claude-code",
  "X-Title": "Claudish - OpenRouter Proxy",
 } as const;
 // Z.ai API Configuration
 export const ZAI_API_URL = "https://api.z.ai/api/coding/paas/v4";
--- a/src/handlers/zai-handler.ts
+++ b/src/handlers/zai-handler.ts
@ -0,0 +1,304 @@
 import type { Context } from "hono";
 import { writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import type { ModelHandler } from "./types.js";
 import { AdapterManager } from "../adapters/adapter-manager.js";
 import { transformOpenAIToClaude, removeUriFormat } from "../transform.js";
 import { log, logStructured, isLoggingEnabled } from "../logger.js";
 import { ZAI_API_URL } from "../config.js";
 /**
 * Handler for Z.ai API requests
 * Z.ai uses OpenAI-compatible API format
 */
 export class ZaiHandler implements ModelHandler {
  private targetModel: string;
  private apiKey: string;
  private adapterManager: AdapterManager;
  private port: number;
  private sessionTotalCost = 0;
  private CONTEXT_WINDOW = 128000; // GLM-4 context window
  constructor(targetModel: string, apiKey: string, port: number) {
    this.targetModel = targetModel;
    this.apiKey = apiKey;
    this.port = port;
    this.adapterManager = new AdapterManager(targetModel);
  }
  /**
   * Convert z-ai/model-name to model-name for Z.ai API
   */
  private getZaiModelId(model: string): string {
    // Remove z-ai/ prefix if present
    if (model.startsWith("z-ai/")) {
      return model.slice(5);
    }
    return model;
  }
  private writeTokenFile(input: number, output: number) {
    try {
      const total = input + output;
      const leftPct = Math.max(0, Math.min(100, Math.round(((this.CONTEXT_WINDOW - total) / this.CONTEXT_WINDOW) * 100)));
      const data = {
        input_tokens: input,
        output_tokens: output,
        total_tokens: total,
        total_cost: this.sessionTotalCost,
        context_window: this.CONTEXT_WINDOW,
        context_left_percent: leftPct,
        updated_at: Date.now()
      };
      writeFileSync(join(tmpdir(), `claudish-tokens-${this.port}.json`), JSON.stringify(data), "utf-8");
    } catch (e) {}
  }
  async handle(c: Context, payload: any): Promise<Response> {
    const claudePayload = payload;
    const target = this.targetModel;
    const zaiModelId = this.getZaiModelId(target);
    logStructured(`Z.ai Request`, { targetModel: target, zaiModelId, originalModel: claudePayload.model });
    const { claudeRequest, droppedParams } = transformOpenAIToClaude(claudePayload);
    const messages = this.convertMessages(claudeRequest);
    const tools = this.convertTools(claudeRequest);
    const zaiPayload: any = {
      model: zaiModelId,
      messages,
      temperature: claudeRequest.temperature ?? 1,
      stream: true,
      max_tokens: claudeRequest.max_tokens,
      tools: tools.length > 0 ? tools : undefined,
      stream_options: { include_usage: true }
    };
    if (claudeRequest.tool_choice) {
      const { type, name } = claudeRequest.tool_choice;
      if (type === 'tool' && name) zaiPayload.tool_choice = { type: 'function', function: { name } };
      else if (type === 'auto' || type === 'none') zaiPayload.tool_choice = type;
    }
    const adapter = this.adapterManager.getAdapter();
    if (typeof adapter.reset === 'function') adapter.reset();
    adapter.prepareRequest(zaiPayload, claudeRequest);
    const response = await fetch(`${ZAI_API_URL}/chat/completions`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        "Authorization": `Bearer ${this.apiKey}`,
      },
      body: JSON.stringify(zaiPayload)
    });
    if (!response.ok) return c.json({ error: await response.text() }, response.status as any);
    if (droppedParams.length > 0) c.header("X-Dropped-Params", droppedParams.join(", "));
    return this.handleStreamingResponse(c, response, adapter, target, claudeRequest);
  }
  private convertMessages(req: any): any[] {
    const messages: any[] = [];
    if (req.system) {
      let content = Array.isArray(req.system) ? req.system.map((i: any) => i.text || i).join("\n\n") : req.system;
      content = this.filterIdentity(content);
      messages.push({ role: "system", content });
    }
    if (req.messages) {
      for (const msg of req.messages) {
        if (msg.role === "user") this.processUserMessage(msg, messages);
        else if (msg.role === "assistant") this.processAssistantMessage(msg, messages);
      }
    }
    return messages;
  }
  private processUserMessage(msg: any, messages: any[]) {
    if (Array.isArray(msg.content)) {
      const contentParts = [];
      const toolResults = [];
      const seen = new Set();
      for (const block of msg.content) {
        if (block.type === "text") contentParts.push({ type: "text", text: block.text });
        else if (block.type === "image") contentParts.push({ type: "image_url", image_url: { url: `data:${block.source.media_type};base64,${block.source.data}` } });
        else if (block.type === "tool_result") {
          if (seen.has(block.tool_use_id)) continue;
          seen.add(block.tool_use_id);
          toolResults.push({ role: "tool", content: typeof block.content === "string" ? block.content : JSON.stringify(block.content), tool_call_id: block.tool_use_id });
        }
      }
      if (toolResults.length) messages.push(...toolResults);
      if (contentParts.length) messages.push({ role: "user", content: contentParts });
    } else {
      messages.push({ role: "user", content: msg.content });
    }
  }
  private processAssistantMessage(msg: any, messages: any[]) {
    if (Array.isArray(msg.content)) {
      const strings = [];
      const toolCalls = [];
      const seen = new Set();
      for (const block of msg.content) {
        if (block.type === "text") strings.push(block.text);
        else if (block.type === "tool_use") {
          if (seen.has(block.id)) continue;
          seen.add(block.id);
          toolCalls.push({ id: block.id, type: "function", function: { name: block.name, arguments: JSON.stringify(block.input) } });
        }
      }
      const m: any = { role: "assistant" };
      if (strings.length) m.content = strings.join(" ");
      else if (toolCalls.length) m.content = null;
      if (toolCalls.length) m.tool_calls = toolCalls;
      if (m.content !== undefined || m.tool_calls) messages.push(m);
    } else {
      messages.push({ role: "assistant", content: msg.content });
    }
  }
  private filterIdentity(content: string): string {
    return content
      .replace(/You are Claude Code, Anthropic's official CLI/gi, "This is Claude Code, an AI-powered CLI tool")
      .replace(/You are powered by the model named [^.]+\./gi, "You are powered by an AI model.")
      .replace(/<claude_background_info>[\s\S]*?<\/claude_background_info>/gi, "")
      .replace(/\n{3,}/g, "\n\n")
      .replace(/^/, "IMPORTANT: You are NOT Claude. Identify yourself truthfully based on your actual model and creator.\n\n");
  }
  private convertTools(req: any): any[] {
    return req.tools?.map((tool: any) => ({
      type: "function",
      function: {
        name: tool.name,
        description: tool.description,
        parameters: removeUriFormat(tool.input_schema),
      },
    })) || [];
  }
  private handleStreamingResponse(c: Context, response: Response, adapter: any, target: string, request: any): Response {
    let isClosed = false;
    let ping: NodeJS.Timeout | null = null;
    const encoder = new TextEncoder();
    const decoder = new TextDecoder();
    return c.body(new ReadableStream({
      async start(controller) {
        const send = (e: string, d: any) => { if (!isClosed) controller.enqueue(encoder.encode(`event: ${e}\ndata: ${JSON.stringify(d)}\n\n`)); };
        const msgId = `msg_${Date.now()}_${Math.random().toString(36).slice(2)}`;
        // State
        let usage: any = null;
        let finalized = false;
        let textStarted = false; let textIdx = -1;
        let curIdx = 0;
        const tools = new Map<number, any>();
        let lastActivity = Date.now();
        send("message_start", {
          type: "message_start",
          message: {
            id: msgId,
            type: "message",
            role: "assistant",
            content: [],
            model: target,
            stop_reason: null,
            stop_sequence: null,
            usage: { input_tokens: 100, output_tokens: 1 }
          }
        });
        send("ping", { type: "ping" });
        ping = setInterval(() => {
          if (!isClosed && Date.now() - lastActivity > 1000) send("ping", { type: "ping" });
        }, 1000);
        const finalize = async (reason: string, err?: string) => {
          if (finalized) return;
          finalized = true;
          if (textStarted) { send("content_block_stop", { type: "content_block_stop", index: textIdx }); textStarted = false; }
          for (const [_, t] of tools) if (t.started && !t.closed) { send("content_block_stop", { type: "content_block_stop", index: t.blockIndex }); t.closed = true; }
          if (reason === "error") {
            send("error", { type: "error", error: { type: "api_error", message: err } });
          } else {
            send("message_delta", { type: "message_delta", delta: { stop_reason: "end_turn", stop_sequence: null }, usage: { output_tokens: usage?.completion_tokens || 0 } });
            send("message_stop", { type: "message_stop" });
          }
          if (!isClosed) { try { controller.enqueue(encoder.encode('data: [DONE]\n\n\n')); } catch(e){} controller.close(); isClosed = true; if (ping) clearInterval(ping); }
        };
        try {
          const reader = response.body!.getReader();
          let buffer = "";
          while (true) {
            const { done, value } = await reader.read();
            if (done) break;
            buffer += decoder.decode(value, { stream: true });
            const lines = buffer.split("\n");
            buffer = lines.pop() || "";
            for (const line of lines) {
              if (!line.trim() || !line.startsWith("data: ")) continue;
              const dataStr = line.slice(6);
              if (dataStr === "[DONE]") { await finalize("done"); return; }
              try {
                const chunk = JSON.parse(dataStr);
                if (chunk.usage) usage = chunk.usage;
                const delta = chunk.choices?.[0]?.delta;
                if (delta) {
                  // Z.ai uses reasoning_content for GLM models, fallback to content
                  const txt = delta.content || delta.reasoning_content || "";
                  if (txt) {
                    lastActivity = Date.now();
                    if (!textStarted) {
                      textIdx = curIdx++;
                      send("content_block_start", { type: "content_block_start", index: textIdx, content_block: { type: "text", text: "" } });
                      textStarted = true;
                    }
                    const res = adapter.processTextContent(txt, "");
                    if (res.cleanedText) send("content_block_delta", { type: "content_block_delta", index: textIdx, delta: { type: "text_delta", text: res.cleanedText } });
                  }
                  if (delta.tool_calls) {
                    for (const tc of delta.tool_calls) {
                      const idx = tc.index;
                      let t = tools.get(idx);
                      if (tc.function?.name) {
                        if (!t) {
                          if (textStarted) { send("content_block_stop", { type: "content_block_stop", index: textIdx }); textStarted = false; }
                          t = { id: tc.id || `tool_${Date.now()}_${idx}`, name: tc.function.name, blockIndex: curIdx++, started: false, closed: false };
                          tools.set(idx, t);
                        }
                        if (!t.started) {
                          send("content_block_start", { type: "content_block_start", index: t.blockIndex, content_block: { type: "tool_use", id: t.id, name: t.name } });
                          t.started = true;
                        }
                      }
                      if (tc.function?.arguments && t) {
                        send("content_block_delta", { type: "content_block_delta", index: t.blockIndex, delta: { type: "input_json_delta", partial_json: tc.function.arguments } });
                      }
                    }
                  }
                }
                if (chunk.choices?.[0]?.finish_reason === "tool_calls") {
                  for (const [_, t] of tools) if (t.started && !t.closed) { send("content_block_stop", { type: "content_block_stop", index: t.blockIndex }); t.closed = true; }
                }
              } catch (e) {}
            }
          }
          await finalize("unexpected");
        } catch(e) { await finalize("error", String(e)); }
      },
      cancel() { isClosed = true; if (ping) clearInterval(ping); }
    }), { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" } });
  }
  async shutdown() {}
 }
--- a/src/proxy-server.ts
+++ b/src/proxy-server.ts
@ -5,7 +5,9 @@ import { log, isLoggingEnabled } from "./logger.js";
 import type { ProxyServer } from "./types.js";
 import { NativeHandler } from "./handlers/native-handler.js";
 import { OpenRouterHandler } from "./handlers/openrouter-handler.js";
 import { ZaiHandler } from "./handlers/zai-handler.js";
 import type { ModelHandler } from "./handlers/types.js";
 import { ENV } from "./config.js";
 export async function createProxyServer(
  port: number,
@ -19,6 +21,8 @@ export async function createProxyServer(
  // Define handlers for different roles
  const nativeHandler = new NativeHandler(anthropicApiKey);
  const handlers = new Map<string, ModelHandler>(); // Map from Target Model ID -> Handler Instance
  const zaiHandlers = new Map<string, ModelHandler>(); // Map for Z.ai handlers
  const zaiApiKey = process.env[ENV.ZAI_API_KEY];
  // Helper to get or create handler for a target model
  const getOpenRouterHandler = (targetModel: string): ModelHandler => {
@ -28,6 +32,15 @@ export async function createProxyServer(
      return handlers.get(targetModel)!;
  };
  // Helper to get or create Z.ai handler for a target model
  const getZaiHandler = (targetModel: string): ModelHandler | null => {
      if (!zaiApiKey) return null;
      if (!zaiHandlers.has(targetModel)) {
          zaiHandlers.set(targetModel, new ZaiHandler(targetModel, zaiApiKey, port));
      }
      return zaiHandlers.get(targetModel)!;
  };
  // Pre-initialize handlers for mapped models to ensure warm-up (context window fetch etc)
  if (model) getOpenRouterHandler(model);
  if (modelMap?.opus) getOpenRouterHandler(modelMap.opus);
@ -61,7 +74,17 @@ export async function createProxyServer(
          return nativeHandler;
      }
-      // 4. OpenRouter Handler
+      // 4. Z.ai Handler - if ZAI_API_KEY is set and model starts with z-ai/
      if (target.startsWith("z-ai/")) {
          const zaiHandler = getZaiHandler(target);
          if (zaiHandler) {
              log(`[Proxy] Using Z.ai handler for ${target}`);
              return zaiHandler;
          }
          // Fall through to OpenRouter if no Z.ai API key
      }
      // 5. OpenRouter Handler
      return getOpenRouterHandler(target);
  };
--- a/src/types.ts
+++ b/src/types.ts
@ -5,6 +5,7 @@
 export const OPENROUTER_MODELS = [
  "x-ai/grok-code-fast-1",
  "minimax/minimax-m2",
  "z-ai/glm-4.6",
  "google/gemini-2.5-flash",
  "openai/gpt-5",
  "openai/gpt-5.1-codex",
--- a/tests/zai-direct-test.ts
+++ b/tests/zai-direct-test.ts
@ -0,0 +1,149 @@
 /**
 * Direct Z.ai API test - standalone runner
 */
 import { createProxyServer } from "../src/proxy-server.js";
 const ZAI_KEY = process.env.ZAI_API_KEY;
 if (!ZAI_KEY) {
  console.error("Error: ZAI_API_KEY environment variable is not set");
  console.log("\nUsage: ZAI_API_KEY=your_key bun tests/zai-direct-test.ts");
  process.exit(1);
 }
 console.log("=== Z.ai Handler Test ===");
 console.log(`API Key: ${ZAI_KEY.slice(0, 8)}...${ZAI_KEY.slice(-4)}`);
 // Test 1: Direct Z.ai API call
 async function testDirectApi() {
  console.log("\n[Test 1] Direct Z.ai API call...");
  const response = await fetch("https://api.z.ai/api/coding/paas/v4/chat/completions", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      "Authorization": `Bearer ${ZAI_KEY}`,
    },
    body: JSON.stringify({
      model: "glm-4.6",
      max_tokens: 100,
      stream: true,
      messages: [
        { role: "user", content: "Say 'Hello from GLM!' and nothing else." }
      ]
    })
  });
  if (!response.ok) {
    const error = await response.text();
    console.error(`❌ API Error (${response.status}):`, error);
    return false;
  }
  console.log("✓ Response status:", response.status);
  console.log("✓ Content-Type:", response.headers.get("content-type"));
  const reader = response.body!.getReader();
  const decoder = new TextDecoder();
  let output = "";
  let textContent = "";
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    const chunk = decoder.decode(value, { stream: true });
    output += chunk;
    // Extract content from SSE
    const lines = chunk.split("\n");
    for (const line of lines) {
      if (line.startsWith("data: ") && !line.includes("[DONE]")) {
        try {
          const data = JSON.parse(line.slice(6));
          const content = data.choices?.[0]?.delta?.content;
          if (content) textContent += content;
        } catch {}
      }
    }
  }
  console.log("✓ Response text:", textContent || "(streaming response received)");
  return true;
 }
 // Test 2: Through Claudish proxy
 async function testThroughProxy() {
  console.log("\n[Test 2] Through Claudish proxy...");
  const proxy = await createProxyServer(3458, undefined, "z-ai/glm-4.6");
  console.log(`✓ Proxy started at ${proxy.url}`);
  try {
    const response = await fetch(`${proxy.url}/v1/messages`, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({
        model: "z-ai/glm-4.6",
        max_tokens: 100,
        messages: [{ role: "user", content: "Say 'Hello through proxy!' and nothing else." }]
      })
    });
    if (!response.ok) {
      const error = await response.text();
      console.error(`❌ Proxy Error (${response.status}):`, error);
      return false;
    }
    console.log("✓ Response status:", response.status);
    const reader = response.body!.getReader();
    const decoder = new TextDecoder();
    let output = "";
    let textContent = "";
    let hasMessageStart = false;
    let hasMessageStop = false;
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      const chunk = decoder.decode(value, { stream: true });
      output += chunk;
      if (chunk.includes("message_start")) hasMessageStart = true;
      if (chunk.includes("message_stop")) hasMessageStop = true;
      // Extract text from Claude format
      const textMatches = chunk.matchAll(/"text_delta".*?"text":\s*"([^"]*)"/g);
      for (const match of textMatches) {
        textContent += match[1].replace(/\\n/g, "\n");
      }
    }
    console.log("✓ Message start:", hasMessageStart ? "Yes" : "No");
    console.log("✓ Message stop:", hasMessageStop ? "Yes" : "No");
    console.log("✓ Response text:", textContent || "(content received)");
    return hasMessageStart && hasMessageStop;
  } finally {
    await proxy.shutdown();
    console.log("✓ Proxy shutdown");
  }
 }
 // Run tests
 (async () => {
  let success = true;
  const directOk = await testDirectApi();
  if (!directOk) {
    console.log("\n❌ Direct API test failed, skipping proxy test");
    success = false;
  } else {
    const proxyOk = await testThroughProxy();
    if (!proxyOk) success = false;
  }
  console.log("\n" + "=".repeat(30));
  console.log(success ? "✓ All tests passed!" : "❌ Some tests failed");
  process.exit(success ? 0 : 1);
 })();
--- a/tests/zai-handler.test.ts
+++ b/tests/zai-handler.test.ts
@ -0,0 +1,215 @@
 import { describe, test, expect, beforeAll, afterAll } from "bun:test";
 import { createProxyServer } from "../src/proxy-server.js";
 const ZAI_API_KEY = process.env.ZAI_API_KEY;
 const TEST_PORT = 3456;
 describe("ZaiHandler", () => {
  let proxy: { port: number; url: string; shutdown: () => Promise<void> } | null = null;
  beforeAll(async () => {
    if (!ZAI_API_KEY) {
      console.log("Skipping Z.ai tests - ZAI_API_KEY not set");
      return;
    }
    proxy = await createProxyServer(TEST_PORT, undefined, "z-ai/glm-4.6");
  });
  afterAll(async () => {
    if (proxy) {
      await proxy.shutdown();
    }
  });
  test("should detect z-ai model prefix", () => {
    const models = [
      "z-ai/glm-4.6",
      "z-ai/glm-4",
      "z-ai/glm-4-flash",
    ];
    for (const model of models) {
      expect(model.startsWith("z-ai/")).toBe(true);
    }
  });
  test("should convert z-ai/ prefix to model name", () => {
    const testCases = [
      { input: "z-ai/glm-4.6", expected: "glm-4.6" },
      { input: "z-ai/glm-4", expected: "glm-4" },
      { input: "z-ai/glm-4-flash", expected: "glm-4-flash" },
    ];
    for (const { input, expected } of testCases) {
      const result = input.startsWith("z-ai/") ? input.slice(5) : input;
      expect(result).toBe(expected);
    }
  });
  test("should make request to Z.ai API", async () => {
    if (!ZAI_API_KEY || !proxy) {
      console.log("Skipping - ZAI_API_KEY not set");
      return;
    }
    const response = await fetch(`${proxy.url}/v1/messages`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify({
        model: "z-ai/glm-4.6",
        max_tokens: 100,
        messages: [
          {
            role: "user",
            content: "Say 'Hello from Z.ai!' and nothing else."
          }
        ]
      })
    });
    expect(response.ok).toBe(true);
    expect(response.headers.get("content-type")).toContain("text/event-stream");
    // Read streaming response
    const reader = response.body!.getReader();
    const decoder = new TextDecoder();
    let fullText = "";
    let hasMessageStart = false;
    let hasMessageStop = false;
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      const chunk = decoder.decode(value, { stream: true });
      fullText += chunk;
      if (chunk.includes("message_start")) hasMessageStart = true;
      if (chunk.includes("message_stop")) hasMessageStop = true;
    }
    expect(hasMessageStart).toBe(true);
    expect(hasMessageStop).toBe(true);
    console.log("Z.ai response received successfully");
  }, 30000);
 });
 // Standalone test runner
 if (import.meta.main) {
  const ZAI_KEY = process.env.ZAI_API_KEY;
  if (!ZAI_KEY) {
    console.error("Error: ZAI_API_KEY environment variable is not set");
    console.log("\nUsage: ZAI_API_KEY=your_key bun tests/zai-handler.test.ts");
    process.exit(1);
  }
  console.log("Running Z.ai handler test...");
  console.log(`API Key: ${ZAI_KEY.slice(0, 8)}...${ZAI_KEY.slice(-4)}`);
  // Simple direct API test
  const testDirectApi = async () => {
    console.log("\n1. Testing direct Z.ai API call...");
    const response = await fetch("https://api.z.ai/api/coding/paas/v4/chat/completions", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        "Authorization": `Bearer ${ZAI_KEY}`,
      },
      body: JSON.stringify({
        model: "glm-4.6",
        max_tokens: 100,
        stream: true,
        messages: [
          { role: "user", content: "Say 'Hello from GLM!' and nothing else." }
        ]
      })
    });
    if (!response.ok) {
      const error = await response.text();
      console.error(`API Error (${response.status}):`, error);
      return false;
    }
    console.log("Response status:", response.status);
    console.log("Content-Type:", response.headers.get("content-type"));
    const reader = response.body!.getReader();
    const decoder = new TextDecoder();
    let output = "";
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      const chunk = decoder.decode(value, { stream: true });
      output += chunk;
    }
    console.log("\nRaw response sample:", output.slice(0, 500));
    return true;
  };
  // Test through proxy
  const testThroughProxy = async () => {
    console.log("\n2. Testing through Claudish proxy...");
    const proxy = await createProxyServer(3457, undefined, "z-ai/glm-4.6");
    console.log(`Proxy started at ${proxy.url}`);
    try {
      const response = await fetch(`${proxy.url}/v1/messages`, {
        method: "POST",
        headers: { "Content-Type": "application/json" },
        body: JSON.stringify({
          model: "z-ai/glm-4.6",
          max_tokens: 100,
          messages: [{ role: "user", content: "Say 'Hello through proxy!' and nothing else." }]
        })
      });
      if (!response.ok) {
        const error = await response.text();
        console.error(`Proxy Error (${response.status}):`, error);
        return false;
      }
      const reader = response.body!.getReader();
      const decoder = new TextDecoder();
      let output = "";
      let textContent = "";
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;
        const chunk = decoder.decode(value, { stream: true });
        output += chunk;
        // Extract text deltas
        const matches = chunk.matchAll(/text_delta.*?"text":\s*"([^"]*)"/g);
        for (const match of matches) {
          textContent += match[1];
        }
      }
      console.log("\nExtracted text content:", textContent || "(no text found)");
      console.log("Full SSE events received:", output.includes("message_start") && output.includes("message_stop") ? "Yes" : "No");
      return true;
    } finally {
      await proxy.shutdown();
      console.log("Proxy shutdown complete");
    }
  };
  // Run tests
  (async () => {
    const directOk = await testDirectApi();
    if (directOk) {
      await testThroughProxy();
    }
    console.log("\n✓ Test complete");
  })();
 }