feat: SSE streaming for chat providers

This commit is contained in:
2026-03-01 10:10:54 +01:00
parent 5938aa9642
commit 78c2cb7bb7
3 changed files with 1417 additions and 104 deletions

View File

@@ -12,6 +12,16 @@ import https from 'https';
import http from 'http';
import { URL } from 'url';
import { BrowserWindow } from 'electron';
import {
parseSSELines,
parseAnthropicStreamEvent,
parseOpenAIStreamEvent,
createAnthropicStreamAccumulator,
createOpenAIStreamAccumulator,
httpRequestStream,
withRetry,
type HttpStreamError,
} from './streaming';
import { ChatEngine } from './ChatEngine';
import { PostEngine, type PostData } from './PostEngine';
import { MediaEngine, type MediaData } from './MediaEngine';
@@ -470,10 +480,20 @@ export class OpenCodeManager {
system: systemPrompt,
messages,
tools,
stream: true,
cache_control: { type: 'ephemeral' },
};
const response = await this.httpRequest(ZEN_ANTHROPIC_URL, {
// Stream the response with retry for transient errors
const streamAccumulator = createAnthropicStreamAccumulator();
let stopReason = '';
let inputTokens = 0;
let outputTokens = 0;
let cacheReadTokens = 0;
let cacheWriteTokens = 0;
let roundText = ''; // Text produced in this round only
const { events } = await withRetry(() => httpRequestStream(ZEN_ANTHROPIC_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -483,29 +503,43 @@ export class OpenCodeManager {
},
body: JSON.stringify(body),
signal,
});
}));
if (response.statusCode >= 400) {
const errorMsg = this.parseErrorResponse(response);
throw new Error(errorMsg);
for await (const event of events) {
const result = parseAnthropicStreamEvent(event, streamAccumulator);
// Emit text deltas immediately for real-time streaming
if (result.textDelta) {
accumulatedText += result.textDelta;
roundText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
// Collect usage from message_start (input tokens) and message_delta (output tokens)
if (result.usage) {
if (result.usage.inputTokens !== undefined) inputTokens = result.usage.inputTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
if (result.usage.cacheWriteTokens !== undefined) cacheWriteTokens = result.usage.cacheWriteTokens;
if (result.usage.outputTokens !== undefined) outputTokens = result.usage.outputTokens;
}
if (result.finishReason) {
stopReason = result.finishReason;
}
}
const data = JSON.parse(response.body);
// Extract and emit token usage
if (data.usage && callbacks.onTokenUsage) {
const usage = data.usage;
const cacheReadTokens = usage.cache_read_input_tokens || 0;
const cacheWriteTokens = usage.cache_creation_input_tokens || 0;
const inputTokens = (usage.input_tokens || 0) - cacheReadTokens - cacheWriteTokens;
const outputTokens = usage.output_tokens || 0;
const totalTokens = (usage.input_tokens || 0) + outputTokens;
// Emit token usage after stream completes
if (callbacks.onTokenUsage) {
const adjustedInputTokens = inputTokens - cacheReadTokens - cacheWriteTokens;
const totalTokens = inputTokens + outputTokens;
const prev = this.conversationUsage.get(conversationId) || {
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0,
};
const cumulative = {
inputTokens: prev.inputTokens + inputTokens,
inputTokens: prev.inputTokens + adjustedInputTokens,
outputTokens: prev.outputTokens + outputTokens,
cacheReadTokens: prev.cacheReadTokens + cacheReadTokens,
cacheWriteTokens: prev.cacheWriteTokens + cacheWriteTokens,
@@ -513,7 +547,7 @@ export class OpenCodeManager {
this.conversationUsage.set(conversationId, cumulative);
callbacks.onTokenUsage({
inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, totalTokens,
inputTokens: adjustedInputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, totalTokens,
cumulativeInputTokens: cumulative.inputTokens,
cumulativeOutputTokens: cumulative.outputTokens,
cumulativeCacheReadTokens: cumulative.cacheReadTokens,
@@ -522,35 +556,19 @@ export class OpenCodeManager {
});
}
console.log('[OpenCodeManager] Round', round, 'stop_reason:', data.stop_reason, 'content blocks:', JSON.stringify(data.content?.map((b: AnthropicContentBlock) => ({ type: b.type, textLen: b.text?.length, name: b.name }))));
if (!data.content) {
throw new Error('API response missing content field');
}
// Check if there are tool_use blocks
const toolUseBlocks = (data.content as AnthropicContentBlock[]).filter(
(b: AnthropicContentBlock) => b.type === 'tool_use'
);
// Capture text from any block type that has a text field (text, thinking, etc.)
const textBlocks = (data.content as AnthropicContentBlock[]).filter(
(b: AnthropicContentBlock) => b.text
);
// Accumulate and stream text content to frontend
for (const block of textBlocks) {
if (block.text) {
accumulatedText += block.text;
if (callbacks.onDelta) {
callbacks.onDelta(block.text);
}
// Collect tool calls from stream accumulator
const toolUseBlocks: Array<{ id: string; name: string; input: unknown }> = [];
for (const [, tc] of streamAccumulator.toolCalls) {
try {
toolUseBlocks.push({ id: tc.id, name: tc.name, input: JSON.parse(tc.arguments) });
} catch {
toolUseBlocks.push({ id: tc.id, name: tc.name, input: {} });
}
}
console.log('[OpenCodeManager] Round', round, 'accumulatedText length:', accumulatedText.length, 'toolUseBlocks:', toolUseBlocks.length);
console.log('[OpenCodeManager] Round', round, 'stopReason:', stopReason, 'accumulatedText length:', accumulatedText.length, 'toolCalls:', toolUseBlocks.length);
if (toolUseBlocks.length === 0 || data.stop_reason !== 'tool_use') {
if (toolUseBlocks.length === 0 || stopReason !== 'tool_use') {
// No more tool calls - return all accumulated text
console.log('[OpenCodeManager] Returning accumulated text length:', accumulatedText.length);
return { content: accumulatedText, toolCalls: allToolCalls };
@@ -558,11 +576,26 @@ export class OpenCodeManager {
// Execute tool calls
const toolResults: AnthropicContentBlock[] = [];
// Build assistant content blocks for the next message round
const assistantContentBlocks: AnthropicContentBlock[] = [];
// Add text block with text from this round
if (roundText) {
assistantContentBlocks.push({ type: 'text', text: roundText });
}
for (const toolBlock of toolUseBlocks) {
const toolName = toolBlock.name!;
const toolName = toolBlock.name;
const toolArgs = toolBlock.input;
const toolUseId = toolBlock.id!;
const toolUseId = toolBlock.id;
// Add tool_use block to assistant content
assistantContentBlocks.push({
type: 'tool_use',
id: toolUseId,
name: toolName,
input: toolArgs,
});
allToolCalls.push({ name: toolName, args: toolArgs });
@@ -643,7 +676,7 @@ export class OpenCodeManager {
// Add assistant response and tool results to messages for next round
messages = [
...messages,
{ role: 'assistant' as const, content: data.content },
{ role: 'assistant' as const, content: assistantContentBlocks },
{ role: 'user' as const, content: toolResults },
];
}
@@ -718,9 +751,18 @@ export class OpenCodeManager {
max_tokens: 4096,
messages,
tools: openaiTools,
stream: true,
stream_options: { include_usage: true },
};
const response = await this.httpRequest(ZEN_OPENAI_URL, {
// Stream the response with retry for transient errors
const streamAccumulator = createOpenAIStreamAccumulator();
let finishReason = '';
let promptTokens = 0;
let completionTokens = 0;
let totalTokens = 0;
const { events } = await withRetry(() => httpRequestStream(ZEN_OPENAI_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -728,23 +770,38 @@ export class OpenCodeManager {
},
body: JSON.stringify(body),
signal,
});
}));
if (response.statusCode >= 400) {
const errorMsg = this.parseErrorResponse(response);
throw new Error(errorMsg);
for await (const event of events) {
const result = parseOpenAIStreamEvent(event, streamAccumulator);
// Emit text deltas immediately for real-time streaming
if (result.textDelta) {
accumulatedText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
// Collect usage from final chunk
if (result.usage) {
if (result.usage.promptTokens !== undefined) promptTokens = result.usage.promptTokens;
if (result.usage.completionTokens !== undefined) completionTokens = result.usage.completionTokens;
if (result.usage.totalTokens !== undefined) totalTokens = result.usage.totalTokens;
}
if (result.finishReason) {
finishReason = result.finishReason;
}
if (result.done) break;
}
const data = JSON.parse(response.body);
const choice = data.choices?.[0];
// Extract and emit token usage (OpenAI format)
if (data.usage && callbacks.onTokenUsage) {
const usage = data.usage;
const cacheReadTokens = usage.prompt_tokens_details?.cached_tokens || 0;
const inputTokens = (usage.prompt_tokens || 0) - cacheReadTokens;
const outputTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || (usage.prompt_tokens || 0) + outputTokens;
// Emit token usage after stream completes
if (callbacks.onTokenUsage) {
const cacheReadTokens = 0; // OpenAI doesn't provide cache info in streaming
const inputTokens = promptTokens;
const outputTokens = completionTokens;
const prev = this.conversationUsage.get(conversationId) || {
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0,
@@ -758,7 +815,8 @@ export class OpenCodeManager {
this.conversationUsage.set(conversationId, cumulative);
callbacks.onTokenUsage({
inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens: 0, totalTokens,
inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens: 0,
totalTokens: totalTokens || inputTokens + outputTokens,
cumulativeInputTokens: cumulative.inputTokens,
cumulativeOutputTokens: cumulative.outputTokens,
cumulativeCacheReadTokens: cumulative.cacheReadTokens,
@@ -767,57 +825,40 @@ export class OpenCodeManager {
});
}
console.log('[OpenCodeManager:OpenAI] Round', round, 'status:', response.statusCode, 'content type:', typeof choice?.message?.content, 'content length:', choice?.message?.content?.length, 'tool_calls:', choice?.message?.tool_calls?.length);
if (!choice?.message) {
throw new Error('API response missing expected message content');
// Collect tool calls from stream accumulator
const parsedToolCalls: Array<{ id: string; name: string; args: unknown }> = [];
for (const [, tc] of streamAccumulator.toolCalls) {
try {
parsedToolCalls.push({ id: tc.id, name: tc.name, args: JSON.parse(tc.arguments) });
} catch {
parsedToolCalls.push({ id: tc.id, name: tc.name, args: {} });
}
}
// Handle content that might be a string or an array of content parts
let textContent = '';
const content = choice.message.content;
if (typeof content === 'string') {
textContent = content;
} else if (Array.isArray(content)) {
// Handle array of content parts (some models return this format)
// Accept any part that has a text field, regardless of type
textContent = content
.filter((part: { type?: string; text?: string }) => part.text)
.map((part: { text: string }) => part.text)
.join('');
// Log what types we're seeing for debugging
const types = content.map((p: { type?: string }) => p.type).filter(Boolean);
if (types.length > 0) {
console.log('[OpenCodeManager:OpenAI] Content block types:', types);
}
} else if (content && typeof content === 'object') {
// Handle single object with text field
if ('text' in content && typeof content.text === 'string') {
textContent = content.text;
}
}
if (textContent) {
accumulatedText += textContent;
if (callbacks.onDelta) {
callbacks.onDelta(textContent);
}
}
console.log('[OpenCodeManager:OpenAI] Round', round, 'finishReason:', finishReason, 'text length:', accumulatedText.length, 'toolCalls:', parsedToolCalls.length);
// If no tool calls, we're done
if (!choice.message.tool_calls || choice.message.tool_calls.length === 0) {
if (parsedToolCalls.length === 0 || finishReason !== 'tool_calls') {
console.log('[OpenCodeManager:OpenAI] Done. Accumulated text length:', accumulatedText.length);
return { content: accumulatedText, toolCalls: allToolCalls };
}
// Add assistant message (with tool_calls) to conversation
messages.push(choice.message);
// Build the assistant message with tool_calls for conversation history
const assistantMessage: Record<string, unknown> = {
role: 'assistant',
content: accumulatedText || null,
tool_calls: parsedToolCalls.map((tc) => ({
id: tc.id,
type: 'function',
function: { name: tc.name, arguments: JSON.stringify(tc.args) },
})),
};
messages.push(assistantMessage);
// Execute tool calls and add results
for (const toolCall of choice.message.tool_calls) {
const toolName = toolCall.function.name;
const toolArgs = JSON.parse(toolCall.function.arguments || '{}');
for (const toolCall of parsedToolCalls) {
const toolName = toolCall.name;
const toolArgs = toolCall.args;
allToolCalls.push({ name: toolName, args: toolArgs });
if (callbacks.onToolCall) {
@@ -826,7 +867,7 @@ export class OpenCodeManager {
// Check if this is a render tool
if (isRenderTool(toolName)) {
const a2uiMessages = generateFromToolCall(conversationId, toolName, toolArgs);
const a2uiMessages = generateFromToolCall(conversationId, toolName, toolArgs as Record<string, unknown>);
if (a2uiMessages) {
emitA2UIMessages(a2uiMessages);
}
@@ -843,7 +884,7 @@ export class OpenCodeManager {
continue;
}
const result = await this.executeTool(toolName, toolArgs);
const result = await this.executeTool(toolName, toolArgs as Record<string, unknown>);
if (callbacks.onToolResult) {
callbacks.onToolResult({ name: toolName, result });