fix: prompt caching, conversation length management and token usage display

This commit is contained in:
2026-02-26 20:07:06 +01:00
parent daf8addb53
commit 9149c21bdf
20 changed files with 317 additions and 7 deletions

View File

@@ -75,6 +75,18 @@ export interface SendMessageOptions {
onToolCall?: (toolCall: { name: string; args: unknown }) => void;
onToolResult?: (result: { name: string; result: unknown }) => void;
onA2UIMessage?: (message: A2UIServerMessage) => void;
onTokenUsage?: (usage: {
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
totalTokens: number;
cumulativeInputTokens: number;
cumulativeOutputTokens: number;
cumulativeCacheReadTokens: number;
cumulativeCacheWriteTokens: number;
cumulativeTotalTokens: number;
}) => void;
}
export interface SendMessageResult {
@@ -136,6 +148,12 @@ export class OpenCodeManager {
private getMainWindow: () => BrowserWindow | null;
private apiKey: string = '';
private abortControllers: Map<string, AbortController> = new Map();
private conversationUsage: Map<string, {
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
}> = new Map();
constructor(
chatEngine: ChatEngine,
@@ -243,7 +261,7 @@ export class OpenCodeManager {
userMessage: string,
options: SendMessageOptions = {}
): Promise<SendMessageResult> {
const { metadata, onDelta, onToolCall, onToolResult, onA2UIMessage } = options;
const { metadata, onDelta, onToolCall, onToolResult, onA2UIMessage, onTokenUsage } = options;
try {
const readyCheck = await this.checkReady();
@@ -318,7 +336,7 @@ export class OpenCodeManager {
prompt,
messages,
abortController.signal,
{ onDelta, onToolCall, onToolResult },
{ onDelta, onToolCall, onToolResult, onTokenUsage },
conversationId,
emitA2UIMessages,
);
@@ -329,7 +347,7 @@ export class OpenCodeManager {
prompt,
messages,
abortController.signal,
{ onDelta, onToolCall, onToolResult },
{ onDelta, onToolCall, onToolResult, onTokenUsage },
conversationId,
emitA2UIMessages,
);
@@ -393,6 +411,7 @@ export class OpenCodeManager {
onDelta?: (delta: string) => void;
onToolCall?: (toolCall: { name: string; args: unknown }) => void;
onToolResult?: (result: { name: string; result: unknown }) => void;
onTokenUsage?: SendMessageOptions['onTokenUsage'];
},
conversationId: string,
emitA2UIMessages: (messages: A2UIServerMessage[]) => void,
@@ -404,6 +423,9 @@ export class OpenCodeManager {
// Convert DB messages to Anthropic format
let messages = this.buildAnthropicMessages(dbMessages);
// Truncate to fit within context window
messages = this.truncateToTokenBudget(messages, systemPrompt, tools);
// Tool use loop - keep going until the model stops calling tools
const MAX_TOOL_ROUNDS = 10;
let round = 0;
@@ -417,6 +439,7 @@ export class OpenCodeManager {
system: systemPrompt,
messages,
tools,
cache_control: { type: 'ephemeral' },
};
const response = await this.httpRequest(ZEN_ANTHROPIC_URL, {
@@ -438,6 +461,36 @@ export class OpenCodeManager {
const data = JSON.parse(response.body);
// Extract and emit token usage
if (data.usage && callbacks.onTokenUsage) {
const usage = data.usage;
const cacheReadTokens = usage.cache_read_input_tokens || 0;
const cacheWriteTokens = usage.cache_creation_input_tokens || 0;
const inputTokens = (usage.input_tokens || 0) - cacheReadTokens - cacheWriteTokens;
const outputTokens = usage.output_tokens || 0;
const totalTokens = (usage.input_tokens || 0) + outputTokens;
const prev = this.conversationUsage.get(conversationId) || {
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0,
};
const cumulative = {
inputTokens: prev.inputTokens + inputTokens,
outputTokens: prev.outputTokens + outputTokens,
cacheReadTokens: prev.cacheReadTokens + cacheReadTokens,
cacheWriteTokens: prev.cacheWriteTokens + cacheWriteTokens,
};
this.conversationUsage.set(conversationId, cumulative);
callbacks.onTokenUsage({
inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, totalTokens,
cumulativeInputTokens: cumulative.inputTokens,
cumulativeOutputTokens: cumulative.outputTokens,
cumulativeCacheReadTokens: cumulative.cacheReadTokens,
cumulativeCacheWriteTokens: cumulative.cacheWriteTokens,
cumulativeTotalTokens: cumulative.inputTokens + cumulative.outputTokens + cumulative.cacheReadTokens + cumulative.cacheWriteTokens,
});
}
console.log('[OpenCodeManager] Round', round, 'stop_reason:', data.stop_reason, 'content blocks:', JSON.stringify(data.content?.map((b: AnthropicContentBlock) => ({ type: b.type, textLen: b.text?.length, name: b.name }))));
if (!data.content) {
@@ -581,12 +634,13 @@ export class OpenCodeManager {
onDelta?: (delta: string) => void;
onToolCall?: (toolCall: { name: string; args: unknown }) => void;
onToolResult?: (result: { name: string; result: unknown }) => void;
onTokenUsage?: (usage: { inputTokens: number; outputTokens: number; cacheReadTokens: number; cacheWriteTokens: number; totalTokens: number; cumulativeInputTokens: number; cumulativeOutputTokens: number; cumulativeCacheReadTokens: number; cumulativeCacheWriteTokens: number; cumulativeTotalTokens: number }) => void;
},
conversationId: string,
emitA2UIMessages: (messages: A2UIServerMessage[]) => void,
): Promise<{ content: string; toolCalls: Array<{ name: string; args: unknown }> }> {
// Build OpenAI-format messages
const messages: Array<Record<string, unknown>> = [
const allMessages: Array<Record<string, unknown>> = [
{ role: 'system', content: systemPrompt },
...dbMessages
.filter(m => m.role === 'user' || m.role === 'assistant')
@@ -607,6 +661,19 @@ export class OpenCodeManager {
},
}));
// Truncate conversation history to fit within context window
// Keep system message (index 0), truncate from oldest conversation messages
const conversationMessages = allMessages.slice(1);
const anthropicFmt = conversationMessages.map(m => ({
role: m.role as 'user' | 'assistant',
content: (m.content as string) || '',
}));
const truncated = this.truncateToTokenBudget(anthropicFmt, systemPrompt, anthropicTools);
const messages: Array<Record<string, unknown>> = [
allMessages[0],
...truncated.map(m => ({ role: m.role, content: m.content })),
];
let accumulatedText = '';
const allToolCalls: Array<{ name: string; args: unknown }> = [];
const MAX_TOOL_ROUNDS = 10;
@@ -640,6 +707,35 @@ export class OpenCodeManager {
const data = JSON.parse(response.body);
const choice = data.choices?.[0];
// Extract and emit token usage (OpenAI format)
if (data.usage && callbacks.onTokenUsage) {
const usage = data.usage;
const cacheReadTokens = usage.prompt_tokens_details?.cached_tokens || 0;
const inputTokens = (usage.prompt_tokens || 0) - cacheReadTokens;
const outputTokens = usage.completion_tokens || 0;
const totalTokens = usage.total_tokens || (usage.prompt_tokens || 0) + outputTokens;
const prev = this.conversationUsage.get(conversationId) || {
inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0,
};
const cumulative = {
inputTokens: prev.inputTokens + inputTokens,
outputTokens: prev.outputTokens + outputTokens,
cacheReadTokens: prev.cacheReadTokens + cacheReadTokens,
cacheWriteTokens: prev.cacheWriteTokens,
};
this.conversationUsage.set(conversationId, cumulative);
callbacks.onTokenUsage({
inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens: 0, totalTokens,
cumulativeInputTokens: cumulative.inputTokens,
cumulativeOutputTokens: cumulative.outputTokens,
cumulativeCacheReadTokens: cumulative.cacheReadTokens,
cumulativeCacheWriteTokens: cumulative.cacheWriteTokens,
cumulativeTotalTokens: cumulative.inputTokens + cumulative.outputTokens + cumulative.cacheReadTokens + cumulative.cacheWriteTokens,
});
}
console.log('[OpenCodeManager:OpenAI] Round', round, 'status:', response.statusCode, 'content type:', typeof choice?.message?.content, 'content length:', choice?.message?.content?.length, 'tool_calls:', choice?.message?.tool_calls?.length);
if (!choice?.message) {
@@ -1482,7 +1578,76 @@ export class OpenCodeManager {
}
/**
* Build Anthropic-format messages from DB message history
* Estimate token count for a string using a rough character heuristic.
* ~3.5 characters per token for English text (conservative, tends to overestimate).
*/
private estimateTokens(text: string): number {
return Math.ceil(text.length / 3.5);
}
/**
* Estimate total tokens for an array of Anthropic messages.
*/
private estimateMessageTokens(messages: AnthropicMessage[]): number {
let total = 0;
for (const msg of messages) {
if (typeof msg.content === 'string') {
total += this.estimateTokens(msg.content);
} else if (Array.isArray(msg.content)) {
for (const block of msg.content) {
if (block.text) total += this.estimateTokens(block.text);
if (typeof block.content === 'string') total += this.estimateTokens(block.content);
}
}
}
return total;
}
/**
* Truncate messages to fit within a token budget.
* Drops oldest user/assistant pairs first, keeping the most recent messages.
*/
private truncateToTokenBudget(
messages: AnthropicMessage[],
systemPrompt: string,
tools: ToolDefinition[],
maxContextTokens: number = 150000,
): AnthropicMessage[] {
const systemTokens = this.estimateTokens(systemPrompt);
const toolsTokens = this.estimateTokens(JSON.stringify(tools));
const responseReserve = 4096;
const availableBudget = maxContextTokens - systemTokens - toolsTokens - responseReserve;
if (availableBudget <= 0) {
return messages.slice(-1);
}
if (this.estimateMessageTokens(messages) <= availableBudget) {
return messages;
}
// Drop oldest pairs until we fit
let truncated = [...messages];
while (truncated.length > 2 && this.estimateMessageTokens(truncated) > availableBudget) {
// Ensure valid message structure (must start with user for Anthropic)
if (truncated[0].role === 'user') {
truncated = truncated.slice(2); // Drop user + assistant pair
} else {
truncated = truncated.slice(1);
}
}
if (truncated.length !== messages.length) {
console.log(`[OpenCodeManager] Truncated conversation from ${messages.length} to ${truncated.length} messages (budget: ${availableBudget} tokens)`);
}
return truncated;
}
/**
* Build Anthropic-format messages from DB message history.
* For assistant messages that had tool calls, appends a summary annotation
* so the model retains context about what tools were used on resume.
*/
private buildAnthropicMessages(
dbMessages: Array<{ role: string; content?: string; toolCalls?: string; toolCallId?: string }>
@@ -1493,9 +1658,25 @@ export class OpenCodeManager {
if (msg.role === 'user') {
messages.push({ role: 'user', content: msg.content || '' });
} else if (msg.role === 'assistant') {
messages.push({ role: 'assistant', content: msg.content || '' });
let content = msg.content || '';
// If this message had tool calls, append a summary for context on resume
if (msg.toolCalls) {
try {
const toolCalls = JSON.parse(msg.toolCalls) as Array<{ name: string; args: unknown }>;
if (toolCalls.length > 0) {
const summary = toolCalls
.map(tc => `- ${tc.name}(${JSON.stringify(tc.args)})`)
.join('\n');
content += `\n\n[Tools used in this turn:\n${summary}\n]`;
}
} catch {
// Ignore malformed toolCalls JSON
}
}
messages.push({ role: 'assistant', content });
}
// Tool messages from history are already incorporated into assistant responses
}
return messages;

View File

@@ -283,6 +283,11 @@ export function registerChatHandlers(): void {
mainWindow.webContents.send('a2ui-message', { conversationId, message });
}
},
onTokenUsage: (usage) => {
if (mainWindow) {
mainWindow.webContents.send('chat-token-usage', { conversationId, ...usage });
}
},
});
return result;

View File

@@ -339,6 +339,11 @@ export const electronAPI: ElectronAPI = {
ipcRenderer.on('chat-title-updated', subscription);
return () => ipcRenderer.removeListener('chat-title-updated', subscription);
},
onTokenUsage: (callback: (data: import('./shared/electronApi').ChatTokenUsage) => void) => {
const subscription = (_event: Electron.IpcRendererEvent, data: import('./shared/electronApi').ChatTokenUsage) => callback(data);
ipcRenderer.on('chat-token-usage', subscription);
return () => ipcRenderer.removeListener('chat-token-usage', subscription);
},
// A2UI streaming
onA2UIMessage: (callback: (data: { conversationId: string; message: import('./a2ui/types').A2UIServerMessage }) => void) => {

View File

@@ -431,6 +431,20 @@ export interface ChatTitleUpdate {
title: string;
}
export interface ChatTokenUsage {
conversationId: string;
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
totalTokens: number;
cumulativeInputTokens: number;
cumulativeOutputTokens: number;
cumulativeCacheReadTokens: number;
cumulativeCacheWriteTokens: number;
cumulativeTotalTokens: number;
}
export interface ChatSendMetadata {
surface?: 'tab' | 'sidebar';
}
@@ -765,6 +779,7 @@ export interface ElectronAPI {
onToolCall: (callback: (data: ChatToolCall) => void) => () => void;
onToolResult: (callback: (data: ChatToolResult) => void) => () => void;
onTitleUpdated: (callback: (data: ChatTitleUpdate) => void) => () => void;
onTokenUsage: (callback: (data: ChatTokenUsage) => void) => () => void;
// A2UI streaming
onA2UIMessage: (callback: (data: { conversationId: string; message: A2UIServerMessage }) => void) => () => void;

View File

@@ -123,11 +123,24 @@ export const AssistantSidebar: React.FC = () => {
}
});
const unsubTokenUsage = window.electronAPI?.chat.onTokenUsage((data) => {
if (data.conversationId === conversationId) {
useAppStore.getState().setChatTokenUsage(conversationId, {
inputTokens: data.cumulativeInputTokens,
outputTokens: data.cumulativeOutputTokens,
cacheReadTokens: data.cumulativeCacheReadTokens,
cacheWriteTokens: data.cumulativeCacheWriteTokens,
totalTokens: data.cumulativeTotalTokens,
});
}
});
return () => {
unsubDelta?.();
unsubToolCall?.();
unsubToolResult?.();
unsubTitle?.();
unsubTokenUsage?.();
};
}, [conversationId, appendStreamDelta, recordToolCall, recordToolResult]);

View File

@@ -148,11 +148,24 @@ export const ChatPanel: React.FC<ChatPanelProps> = ({ conversationId }) => {
}
});
const unsubTokenUsage = window.electronAPI?.chat.onTokenUsage((data) => {
if (data.conversationId === conversationId) {
useAppStore.getState().setChatTokenUsage(conversationId, {
inputTokens: data.cumulativeInputTokens,
outputTokens: data.cumulativeOutputTokens,
cacheReadTokens: data.cumulativeCacheReadTokens,
cacheWriteTokens: data.cumulativeCacheWriteTokens,
totalTokens: data.cumulativeTotalTokens,
});
}
});
return () => {
unsubDelta?.();
unsubToolCall?.();
unsubToolResult?.();
unsubTitle?.();
unsubTokenUsage?.();
};
}, [conversationId, loadData, scrollToBottom, checkReady, appendStreamDelta, recordToolCall, recordToolResult]);

View File

@@ -95,6 +95,11 @@
border-radius: 3px;
}
.status-bar-item.token-usage {
font-variant-numeric: tabular-nums;
opacity: 0.85;
}
.status-bar-item.language-badge {
border: 1px solid var(--vscode-statusBar-border, transparent);
border-radius: 3px;

View File

@@ -21,6 +21,9 @@ export const StatusBar: React.FC = () => {
selectedPostId,
totalPosts,
picoTheme,
tabs,
activeTabId,
chatTokenUsage,
} = useAppStore();
const [selectedPostStatus, setSelectedPostStatus] = useState<string | null>(null);
@@ -39,6 +42,10 @@ export const StatusBar: React.FC = () => {
const runningTasks = tasks.filter(t => t.status === 'running');
const activeTheme = getRendererPicoTheme(picoTheme);
// Detect active chat tab and its token usage
const activeTab = tabs.find(tab => tab.id === activeTabId);
const activeChatUsage = activeTab?.type === 'chat' ? chatTokenUsage[activeTab.id] : null;
return (
<div className="status-bar">
<div className="status-bar-left">
@@ -74,6 +81,17 @@ export const StatusBar: React.FC = () => {
<span>{t('statusBar.media', { count: media.length })}</span>
</div>
{/* Token Usage (visible when chat tab is active) */}
{activeChatUsage && (
<div className="status-bar-item token-usage">
<span>{t('statusBar.tokens', {
input: activeChatUsage.inputTokens.toLocaleString(),
output: activeChatUsage.outputTokens.toLocaleString(),
cached: activeChatUsage.cacheReadTokens.toLocaleString(),
})}</span>
</div>
)}
<div className="status-bar-item theme-badge">
<span>{t('statusBar.theme', { theme: activeTheme })}</span>
</div>

View File

@@ -817,6 +817,7 @@
"statusBar.posts": "{count} Beiträge",
"statusBar.media": "{count} Medien",
"statusBar.more": "+{count} weitere",
"statusBar.tokens": "Token: {input} ein / {output} aus ({cached} zwischengesp.)",
"statusBar.theme": "Theme: {theme}",
"statusBar.ui": "UI",
"statusBar.uiLanguage": "UI-Sprache",

View File

@@ -817,6 +817,7 @@
"statusBar.posts": "{count} posts",
"statusBar.media": "{count} media",
"statusBar.more": "+{count} more",
"statusBar.tokens": "Tokens: {input} in / {output} out ({cached} cached)",
"statusBar.theme": "Theme: {theme}",
"statusBar.ui": "UI",
"statusBar.uiLanguage": "UI language",

View File

@@ -817,6 +817,7 @@
"statusBar.posts": "Publicaciones",
"statusBar.media": "Medios",
"statusBar.more": "+{count} más",
"statusBar.tokens": "Tokens: {input} entr. / {output} sal. ({cached} en caché)",
"statusBar.theme": "Tema: {theme}",
"statusBar.ui": "UI",
"statusBar.uiLanguage": "Idioma de la interfaz",

View File

@@ -815,6 +815,7 @@
"statusBar.posts": "Articles",
"statusBar.media": "Médias",
"statusBar.more": "+{count} en plus",
"statusBar.tokens": "Tokens : {input} entr. / {output} sort. ({cached} en cache)",
"statusBar.theme": "Thème : {theme}",
"statusBar.ui": "UI",
"statusBar.uiLanguage": "Langue de linterface",

View File

@@ -815,6 +815,7 @@
"statusBar.posts": "Articoli",
"statusBar.media": "Media",
"statusBar.more": "+{count} in più",
"statusBar.tokens": "Token: {input} ingr. / {output} usc. ({cached} in cache)",
"statusBar.theme": "Tema: {theme}",
"statusBar.ui": "UI",
"statusBar.uiLanguage": "Lingua interfaccia",

View File

@@ -100,6 +100,15 @@ interface AppState {
isLoading: boolean;
error: string | null;
// Chat token usage (keyed by conversationId, ephemeral - not persisted)
chatTokenUsage: Record<string, {
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
totalTokens: number;
}>;
// Project Actions
setProjects: (projects: ProjectData[]) => void;
setActiveProject: (project: ProjectData | null) => void;
@@ -160,6 +169,16 @@ interface AppState {
// Loading Actions
setLoading: (loading: boolean) => void;
setError: (error: string | null) => void;
// Chat token usage actions
setChatTokenUsage: (conversationId: string, usage: {
inputTokens: number;
outputTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
totalTokens: number;
}) => void;
clearChatTokenUsage: (conversationId: string) => void;
}
export const useAppStore = create<AppState>()(
@@ -212,6 +231,9 @@ export const useAppStore = create<AppState>()(
isLoading: false,
error: null,
// Chat token usage (ephemeral, not persisted)
chatTokenUsage: {},
// Project Actions
setProjects: (projects) => set({ projects }),
setActiveProject: (activeProject) => set({ activeProject }),
@@ -388,6 +410,15 @@ export const useAppStore = create<AppState>()(
// Loading Actions
setLoading: (isLoading) => set({ isLoading }),
setError: (error) => set({ error }),
// Chat token usage actions
setChatTokenUsage: (conversationId, usage) => set((state) => ({
chatTokenUsage: { ...state.chatTokenUsage, [conversationId]: usage },
})),
clearChatTokenUsage: (conversationId) => set((state) => {
const { [conversationId]: _, ...rest } = state.chatTokenUsage;
return { chatTokenUsage: rest };
}),
}),
{
name: STORAGE_KEY,

View File

@@ -63,6 +63,13 @@ vi.mock('../../src/main/engine/OpenCodeManager', () => ({
options?.onDelta?.('stream-delta');
options?.onToolCall?.({ name: 'search_posts', args: { query: 'q' } });
options?.onToolResult?.({ name: 'search_posts', result: { ok: true } });
options?.onTokenUsage?.({
inputTokens: 100, outputTokens: 50,
cacheReadTokens: 80, cacheWriteTokens: 20, totalTokens: 250,
cumulativeInputTokens: 100, cumulativeOutputTokens: 50,
cumulativeCacheReadTokens: 80, cumulativeCacheWriteTokens: 20,
cumulativeTotalTokens: 250,
});
return {
success: true,
message: 'assistant reply',
@@ -120,6 +127,7 @@ describe('chatHandlers', () => {
onDelta: expect.any(Function),
onToolCall: expect.any(Function),
onToolResult: expect.any(Function),
onTokenUsage: expect.any(Function),
}),
);
@@ -135,5 +143,11 @@ describe('chatHandlers', () => {
conversationId: 'conversation-1',
result: { name: 'search_posts', result: { ok: true } },
});
expect(webContentsSend).toHaveBeenCalledWith('chat-token-usage', expect.objectContaining({
conversationId: 'conversation-1',
inputTokens: 100,
outputTokens: 50,
cacheReadTokens: 80,
}));
});
});

View File

@@ -81,6 +81,7 @@ function setupChatApi() {
onToolResult: vi.fn(() => vi.fn()),
onTitleUpdated: vi.fn(() => vi.fn()),
onA2UIMessage: vi.fn(() => vi.fn()),
onTokenUsage: vi.fn(() => vi.fn()),
dispatchA2UIAction: vi.fn(),
} as never;
}

View File

@@ -37,6 +37,7 @@ describe('AssistantSidebar wiring', () => {
onToolResult,
onTitleUpdated,
onA2UIMessage: vi.fn(() => vi.fn()),
onTokenUsage: vi.fn(() => vi.fn()),
dispatchA2UIAction: vi.fn(),
} as never;
});

View File

@@ -35,6 +35,7 @@ describe('assistant sidebar guard rails', () => {
onToolResult: vi.fn(() => vi.fn()),
onTitleUpdated: vi.fn(() => vi.fn()),
onA2UIMessage: vi.fn(() => vi.fn()),
onTokenUsage: vi.fn(() => vi.fn()),
dispatchA2UIAction: vi.fn(),
} as never;
});

View File

@@ -46,6 +46,7 @@ describe('chat surface mode usage guards', () => {
onToolResult: vi.fn(() => vi.fn()),
onTitleUpdated: vi.fn(() => vi.fn()),
onA2UIMessage: vi.fn(() => vi.fn()),
onTokenUsage: vi.fn(() => vi.fn()),
dispatchA2UIAction: vi.fn(),
} as never;
});

View File

@@ -49,6 +49,7 @@ describe('chat surface shared usage guards', () => {
onToolResult: vi.fn(() => vi.fn()),
onTitleUpdated: vi.fn(() => vi.fn()),
onA2UIMessage: vi.fn(() => vi.fn()),
onTokenUsage: vi.fn(() => vi.fn()),
dispatchA2UIAction: vi.fn(),
} as never;
});