wip: desparate models fucking around

This commit is contained in:
2026-02-26 00:13:52 +01:00
parent 021cddefa3
commit 2a923c7e48
16 changed files with 684 additions and 113 deletions

View File

@@ -97,6 +97,12 @@ describe('OpenCodeManager protocol integration', () => {
const telemetryAfter = getProtocolTelemetryService().getSnapshot();
expect(telemetryAfter.totalTurns).toBe(telemetryBefore.totalTurns + 1);
expect(telemetryAfter.validEnvelopeTurns).toBe(telemetryBefore.validEnvelopeTurns + 1);
expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
conversationId: 'conversation-1',
role: 'assistant',
content: 'Please provide a date range.',
}));
});
it('blocks unsupported actions and records blocked-action telemetry', async () => {
@@ -149,4 +155,71 @@ describe('OpenCodeManager protocol integration', () => {
const telemetryAfter = getProtocolTelemetryService().getSnapshot();
expect(telemetryAfter.blockedActionCount).toBe(telemetryBefore.blockedActionCount + 1);
});
it('retries once with protocol repair prompt when first output is non-canonical', async () => {
const conversation: MockConversation = {
id: 'conversation-3',
model: 'gpt-5',
messages: [{ role: 'user', content: 'show chart' }],
};
const chatEngineMock = createChatEngineMock(conversation);
const manager = new OpenCodeManager(
chatEngineMock as never,
{} as never,
{} as never,
() => null,
);
manager.setApiKey('test-api-key');
const sendSpy = vi.spyOn(manager as never, 'sendOpenAIMessage')
.mockResolvedValueOnce({
content: JSON.stringify({
title: 'Legacy JSON',
widgets: [{ type: 'chart', chartType: 'bar' }],
}),
toolCalls: [],
})
.mockResolvedValueOnce({
content: JSON.stringify({
protocolVersion: '2.0',
assistantText: 'Here is your chart.',
ui: {
specVersion: '1',
elements: [
{
type: 'chart',
chartType: 'bar',
series: [{ label: '2015', value: 86 }],
},
],
},
intent: 'summarize',
needsInput: { required: false, fields: [] },
actions: [],
confidence: 0.8,
traceId: 'trace-retry-success',
}),
toolCalls: [],
});
const result = await manager.sendMessage('conversation-3', 'Build chart', {
metadata: { surface: 'tab' },
});
expect(result.success).toBe(true);
expect(result.envelope?.traceId).toBe('trace-retry-success');
expect(sendSpy).toHaveBeenCalledTimes(2);
const retryMessages = sendSpy.mock.calls[1]?.[2] as Array<{ role: string; content?: string }>;
const lastMessage = retryMessages[retryMessages.length - 1]?.content ?? '';
expect(lastMessage).toContain('failed protocol validation');
expect(lastMessage).toContain('Return ONLY one valid protocol envelope JSON object');
expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
conversationId: 'conversation-3',
role: 'assistant',
content: 'Here is your chart.',
}));
});
});

View File

@@ -171,4 +171,92 @@ describe('ProtocolResponseBuilder', () => {
requiresConfirmation: false,
}));
});
it('drops invalid ui payloads from canonical envelopes before renderer consumption', () => {
const builder = new ProtocolResponseBuilder();
const raw = JSON.stringify({
protocolVersion: '2.0',
assistantText: 'Here is the result',
intent: 'summarize',
needsInput: { required: false, fields: [] },
actions: [],
ui: {
specVersion: '1',
elements: [
{
type: 'chart',
chartType: 'bar',
},
],
},
confidence: 0.7,
traceId: 'trace-invalid-ui',
});
const result = builder.build({
rawAssistantOutput: raw,
surface: 'tab',
capabilities: {
widgets: ['chart'],
actions: ['openPost'],
tools: ['search_posts'],
},
});
expect(result.envelope.ui).toBeUndefined();
expect(result.warnings.some((warning) => warning.includes('Invalid ui payload'))).toBe(true);
});
it('normalizes non-canonical ui element fields inside canonical envelopes', () => {
const builder = new ProtocolResponseBuilder();
const raw = JSON.stringify({
protocolVersion: '2.0',
assistantText: 'Distribution chart ready.',
ui: {
specVersion: '1',
elements: [
{
type: 'chart',
chartType: 'bar',
data: {
labels: ['aside', 'article'],
datasets: [{ data: [181, 53] }],
},
},
{
type: 'text',
content: 'Category breakdown',
},
],
},
intent: 'summarize',
needsInput: { required: false, fields: [] },
actions: [],
confidence: 0.95,
traceId: 'trace-normalize-ui',
});
const result = builder.build({
rawAssistantOutput: raw,
surface: 'tab',
capabilities: {
widgets: ['chart', 'text'],
actions: ['openPost'],
tools: ['search_posts'],
},
});
const elements = result.envelope.ui?.elements as Array<{ type: string; series?: Array<{ label: string; value: number }>; text?: string }>;
expect(elements).toHaveLength(2);
expect(elements[0]?.type).toBe('chart');
expect(elements[0]?.series).toEqual([
{ label: 'aside', value: 181 },
{ label: 'article', value: 53 },
]);
expect(elements[1]).toEqual({ type: 'text', text: 'Category breakdown' });
expect(result.warnings.some((warning) => warning.includes('Normalized non-canonical ui payload'))).toBe(true);
});
});