fix: scope retry to connection only, prevent onDelta double-emission

This commit is contained in:
2026-03-01 11:42:07 +01:00
parent 2dec5592c9
commit bae229a9a2
3 changed files with 204 additions and 79 deletions

View File

@@ -13,14 +13,12 @@ import http from 'http';
import { URL } from 'url';
import { BrowserWindow } from 'electron';
import {
parseSSELines,
parseAnthropicStreamEvent,
parseOpenAIStreamEvent,
createAnthropicStreamAccumulator,
createOpenAIStreamAccumulator,
httpRequestStream,
withRetry,
type HttpStreamError,
} from './streaming';
import { ChatEngine } from './ChatEngine';
import { PostEngine, type PostData } from './PostEngine';
@@ -485,17 +483,10 @@ export class OpenCodeManager {
cache_control: { type: 'ephemeral' },
};
// Stream the response with retry for transient errors (including mid-stream failures)
const streamResult = await withRetry(async () => {
const streamAccumulator = createAnthropicStreamAccumulator();
let stopReason = '';
let inputTokens = 0;
let outputTokens = 0;
let cacheReadTokens = 0;
let cacheWriteTokens = 0;
let roundText = '';
const { events } = await httpRequestStream(ZEN_ANTHROPIC_URL, {
// Retry only the HTTP connection (429/502/503 are caught before any events are emitted).
// Event processing is outside retry scope to prevent double-emission of onDelta on retry.
const { events } = await withRetry(async () => {
return httpRequestStream(ZEN_ANTHROPIC_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -506,35 +497,42 @@ export class OpenCodeManager {
body: JSON.stringify(body),
signal,
});
for await (const event of events) {
const result = parseAnthropicStreamEvent(event, streamAccumulator);
if (result.textDelta) {
roundText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
if (result.usage) {
if (result.usage.inputTokens !== undefined) inputTokens = result.usage.inputTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
if (result.usage.cacheWriteTokens !== undefined) cacheWriteTokens = result.usage.cacheWriteTokens;
if (result.usage.outputTokens !== undefined) outputTokens = result.usage.outputTokens;
}
if (result.finishReason) {
stopReason = result.finishReason;
}
if (result.done) break;
}
return { roundText, stopReason, toolCalls: streamAccumulator.toolCalls, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens };
});
const { roundText, stopReason, toolCalls: streamToolCalls, inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens } = streamResult;
// Process stream events outside retry scope — onDelta is never called twice for the same text
const streamAccumulator = createAnthropicStreamAccumulator();
let stopReason = '';
let inputTokens = 0;
let outputTokens = 0;
let cacheReadTokens = 0;
let cacheWriteTokens = 0;
let roundText = '';
for await (const event of events) {
const result = parseAnthropicStreamEvent(event, streamAccumulator);
if (result.textDelta) {
roundText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
if (result.usage) {
if (result.usage.inputTokens !== undefined) inputTokens = result.usage.inputTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
if (result.usage.cacheWriteTokens !== undefined) cacheWriteTokens = result.usage.cacheWriteTokens;
if (result.usage.outputTokens !== undefined) outputTokens = result.usage.outputTokens;
}
if (result.finishReason) {
stopReason = result.finishReason;
}
if (result.done) break;
}
const streamToolCalls = streamAccumulator.toolCalls;
accumulatedText += roundText;
// Emit token usage after stream completes
@@ -681,6 +679,8 @@ export class OpenCodeManager {
}
}
if (signal.aborted) break;
// Add assistant response and tool results to messages for next round
messages = [
...messages,
@@ -764,17 +764,10 @@ export class OpenCodeManager {
stream_options: { include_usage: true },
};
// Stream the response with retry for transient errors (including mid-stream failures)
const streamResult = await withRetry(async () => {
const streamAccumulator = createOpenAIStreamAccumulator();
let finishReason = '';
let promptTokens = 0;
let completionTokens = 0;
let totalTokens = 0;
let cacheReadTokens = 0;
let roundText = '';
const { events } = await httpRequestStream(ZEN_OPENAI_URL, {
// Retry only the HTTP connection (429/502/503 are caught before any events are emitted).
// Event processing is outside retry scope to prevent double-emission of onDelta on retry.
const { events } = await withRetry(async () => {
return httpRequestStream(ZEN_OPENAI_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -783,35 +776,42 @@ export class OpenCodeManager {
body: JSON.stringify(body),
signal,
});
for await (const event of events) {
const result = parseOpenAIStreamEvent(event, streamAccumulator);
if (result.textDelta) {
roundText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
if (result.usage) {
if (result.usage.promptTokens !== undefined) promptTokens = result.usage.promptTokens;
if (result.usage.completionTokens !== undefined) completionTokens = result.usage.completionTokens;
if (result.usage.totalTokens !== undefined) totalTokens = result.usage.totalTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
}
if (result.finishReason) {
finishReason = result.finishReason;
}
if (result.done) break;
}
return { roundText, finishReason, toolCalls: streamAccumulator.toolCalls, promptTokens, completionTokens, totalTokens, cacheReadTokens };
});
const { roundText, finishReason, toolCalls: streamToolCalls, promptTokens, completionTokens, totalTokens, cacheReadTokens } = streamResult;
// Process stream events outside retry scope — onDelta is never called twice for the same text
const streamAccumulator = createOpenAIStreamAccumulator();
let finishReason = '';
let promptTokens = 0;
let completionTokens = 0;
let totalTokens = 0;
let cacheReadTokens = 0;
let roundText = '';
for await (const event of events) {
const result = parseOpenAIStreamEvent(event, streamAccumulator);
if (result.textDelta) {
roundText += result.textDelta;
if (callbacks.onDelta) {
callbacks.onDelta(result.textDelta);
}
}
if (result.usage) {
if (result.usage.promptTokens !== undefined) promptTokens = result.usage.promptTokens;
if (result.usage.completionTokens !== undefined) completionTokens = result.usage.completionTokens;
if (result.usage.totalTokens !== undefined) totalTokens = result.usage.totalTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
}
if (result.finishReason) {
finishReason = result.finishReason;
}
if (result.done) break;
}
const streamToolCalls = streamAccumulator.toolCalls;
accumulatedText += roundText;
// Emit token usage after stream completes
@@ -915,6 +915,8 @@ export class OpenCodeManager {
tool_call_id: toolCall.id,
});
}
if (signal.aborted) break;
}
// Hit max rounds

View File

@@ -323,8 +323,15 @@ const RETRYABLE_STATUS_CODES = new Set([429, 502, 503]);
* Retry a function with exponential backoff for transient HTTP errors.
*
* Retries on 429 (rate limit), 502 (bad gateway), 503 (service unavailable).
* Does NOT retry on other 4xx errors or abort.
* Also retries errors without a statusCode (e.g. ECONNRESET, EPIPE) since
* these indicate transient network failures during connection.
*
* Does NOT retry on other 4xx errors (400, 401, 403 — client errors) or abort.
* Respects Retry-After header for 429 responses.
*
* Best practice: wrap only the HTTP connection (httpRequestStream) in withRetry,
* NOT the event processing loop. This ensures onDelta callbacks are never
* called twice for the same text on retry.
*/
export async function withRetry<T>(
fn: () => Promise<T>,