fix: thinking signature capture, abort-aware retry delay, usage tracking

This commit is contained in:
2026-03-01 12:13:14 +01:00
parent 72410b2973
commit 2ddaad422f
3 changed files with 248 additions and 9 deletions

View File

@@ -138,6 +138,7 @@ interface AnthropicContentBlock {
tool_use_id?: string;
content?: string | AnthropicToolResultContent[];
is_error?: boolean;
signature?: string;
source?: {
type: 'base64';
media_type: string;
@@ -508,6 +509,7 @@ export class OpenCodeManager {
let cacheReadTokens = 0;
let cacheWriteTokens = 0;
let roundText = '';
let receivedUsage = false;
try {
for await (const event of events) {
@@ -521,6 +523,7 @@ export class OpenCodeManager {
}
if (result.usage) {
receivedUsage = true;
if (result.usage.inputTokens !== undefined) inputTokens = result.usage.inputTokens;
if (result.usage.cacheReadTokens !== undefined) cacheReadTokens = result.usage.cacheReadTokens;
if (result.usage.cacheWriteTokens !== undefined) cacheWriteTokens = result.usage.cacheWriteTokens;
@@ -542,8 +545,7 @@ export class OpenCodeManager {
const streamThinkingBlocks = streamAccumulator.thinkingBlocks;
// Emit token usage after stream completes (only when usage data was received)
const hasUsageData = inputTokens > 0 || outputTokens > 0;
if (callbacks.onTokenUsage && hasUsageData) {
if (callbacks.onTokenUsage && receivedUsage) {
const adjustedInputTokens = inputTokens - cacheReadTokens - cacheWriteTokens;
const totalTokens = inputTokens + outputTokens;
@@ -595,7 +597,11 @@ export class OpenCodeManager {
// Add thinking blocks first (Anthropic requires thinking before text when extended thinking is enabled)
for (const [, tb] of streamThinkingBlocks) {
if (tb.text) {
assistantContentBlocks.push({ type: 'thinking', text: tb.text });
const thinkingBlock: AnthropicContentBlock = { type: 'thinking', text: tb.text };
if (tb.signature) {
thinkingBlock.signature = tb.signature;
}
assistantContentBlocks.push(thinkingBlock);
}
}
@@ -816,6 +822,7 @@ export class OpenCodeManager {
let totalTokens = 0;
let cacheReadTokens = 0;
let roundText = '';
let receivedUsage = false;
try {
for await (const event of events) {
@@ -829,6 +836,7 @@ export class OpenCodeManager {
}
if (result.usage) {
receivedUsage = true;
if (result.usage.promptTokens !== undefined) promptTokens = result.usage.promptTokens;
if (result.usage.completionTokens !== undefined) completionTokens = result.usage.completionTokens;
if (result.usage.totalTokens !== undefined) totalTokens = result.usage.totalTokens;
@@ -849,8 +857,7 @@ export class OpenCodeManager {
const streamToolCalls = streamAccumulator.toolCalls;
// Emit token usage after stream completes (only when usage data was received)
const hasUsageData = promptTokens > 0 || completionTokens > 0;
if (callbacks.onTokenUsage && hasUsageData) {
if (callbacks.onTokenUsage && receivedUsage) {
const inputTokens = promptTokens - cacheReadTokens;
const outputTokens = completionTokens;

View File

@@ -50,7 +50,7 @@ export interface OpenAIStreamAccumulator {
export interface AnthropicStreamAccumulator {
toolCalls: Map<number, ToolCallAccumulator>;
thinkingBlocks: Map<number, { text: string }>;
thinkingBlocks: Map<number, { text: string; signature?: string }>;
}
export interface HttpStreamError extends Error {
@@ -286,9 +286,18 @@ export function parseAnthropicStreamEvent(
break;
}
case 'content_block_stop':
case 'content_block_stop': {
// Block is complete. Tool arguments can now be parsed by the caller.
// For thinking blocks, capture the signature (required by Anthropic when replaying thinking blocks).
const stopBlock = (data as any).content_block;
if (stopBlock?.type === 'thinking' && stopBlock.signature) {
const tb = accumulator.thinkingBlocks.get(data.index as number);
if (tb) {
tb.signature = stopBlock.signature;
}
}
break;
}
case 'message_delta': {
if ((data as any).usage) {
@@ -343,7 +352,7 @@ const RETRYABLE_STATUS_CODES = new Set([429, 502, 503]);
*/
export async function withRetry<T>(
fn: () => Promise<T>,
options: { maxRetries?: number; onRetry?: (attempt: number, error: Error) => void } = {},
options: { maxRetries?: number; onRetry?: (attempt: number, error: Error) => void; signal?: AbortSignal } = {},
): Promise<T> {
const maxRetries = options.maxRetries ?? 3;
let lastError: Error | undefined;
@@ -360,6 +369,13 @@ export async function withRetry<T>(
throw error;
}
// Check signal before retrying
if (options.signal?.aborted) {
const abortError: HttpStreamError = new Error('Request cancelled') as HttpStreamError;
abortError.isAbort = true;
throw abortError;
}
// Don't retry on non-retryable status codes
if (httpError.statusCode && !RETRYABLE_STATUS_CODES.has(httpError.statusCode)) {
throw error;
@@ -384,7 +400,26 @@ export async function withRetry<T>(
options.onRetry(attempt + 1, lastError);
}
await new Promise(resolve => setTimeout(resolve, delay));
// Abort-aware delay: reject immediately if signal fires during wait
await new Promise<void>((resolve, reject) => {
const timer = setTimeout(resolve, delay);
if (options.signal) {
const onAbort = () => {
clearTimeout(timer);
const abortError: HttpStreamError = new Error('Request cancelled') as HttpStreamError;
abortError.isAbort = true;
reject(abortError);
};
if (options.signal.aborted) {
clearTimeout(timer);
const abortError: HttpStreamError = new Error('Request cancelled') as HttpStreamError;
abortError.isAbort = true;
reject(abortError);
return;
}
options.signal.addEventListener('abort', onAbort, { once: true });
}
});
}
}