wip: desparate models fucking around

2026-02-26 00:13:52 +01:00
parent 021cddefa3
commit 2a923c7e48
16 changed files with 684 additions and 113 deletions
--- a/tests/engine/OpenCodeManager.protocol.test.ts
+++ b/tests/engine/OpenCodeManager.protocol.test.ts
@@ -97,6 +97,12 @@ describe('OpenCodeManager protocol integration', () => {
    const telemetryAfter = getProtocolTelemetryService().getSnapshot();
    expect(telemetryAfter.totalTurns).toBe(telemetryBefore.totalTurns + 1);
    expect(telemetryAfter.validEnvelopeTurns).toBe(telemetryBefore.validEnvelopeTurns + 1);
+
+    expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
+      conversationId: 'conversation-1',
+      role: 'assistant',
+      content: 'Please provide a date range.',
+    }));
  });

  it('blocks unsupported actions and records blocked-action telemetry', async () => {
@@ -149,4 +155,71 @@ describe('OpenCodeManager protocol integration', () => {
    const telemetryAfter = getProtocolTelemetryService().getSnapshot();
    expect(telemetryAfter.blockedActionCount).toBe(telemetryBefore.blockedActionCount + 1);
  });
+
+  it('retries once with protocol repair prompt when first output is non-canonical', async () => {
+    const conversation: MockConversation = {
+      id: 'conversation-3',
+      model: 'gpt-5',
+      messages: [{ role: 'user', content: 'show chart' }],
+    };
+
+    const chatEngineMock = createChatEngineMock(conversation);
+    const manager = new OpenCodeManager(
+      chatEngineMock as never,
+      {} as never,
+      {} as never,
+      () => null,
+    );
+    manager.setApiKey('test-api-key');
+
+    const sendSpy = vi.spyOn(manager as never, 'sendOpenAIMessage')
+      .mockResolvedValueOnce({
+        content: JSON.stringify({
+          title: 'Legacy JSON',
+          widgets: [{ type: 'chart', chartType: 'bar' }],
+        }),
+        toolCalls: [],
+      })
+      .mockResolvedValueOnce({
+        content: JSON.stringify({
+          protocolVersion: '2.0',
+          assistantText: 'Here is your chart.',
+          ui: {
+            specVersion: '1',
+            elements: [
+              {
+                type: 'chart',
+                chartType: 'bar',
+                series: [{ label: '2015', value: 86 }],
+              },
+            ],
+          },
+          intent: 'summarize',
+          needsInput: { required: false, fields: [] },
+          actions: [],
+          confidence: 0.8,
+          traceId: 'trace-retry-success',
+        }),
+        toolCalls: [],
+      });
+
+    const result = await manager.sendMessage('conversation-3', 'Build chart', {
+      metadata: { surface: 'tab' },
+    });
+
+    expect(result.success).toBe(true);
+    expect(result.envelope?.traceId).toBe('trace-retry-success');
+    expect(sendSpy).toHaveBeenCalledTimes(2);
+
+    const retryMessages = sendSpy.mock.calls[1]?.[2] as Array<{ role: string; content?: string }>;
+    const lastMessage = retryMessages[retryMessages.length - 1]?.content ?? '';
+    expect(lastMessage).toContain('failed protocol validation');
+    expect(lastMessage).toContain('Return ONLY one valid protocol envelope JSON object');
+
+    expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
+      conversationId: 'conversation-3',
+      role: 'assistant',
+      content: 'Here is your chart.',
+    }));
+  });
 });
--- a/tests/engine/agentic/protocol/responseBuilder.test.ts
+++ b/tests/engine/agentic/protocol/responseBuilder.test.ts
@@ -171,4 +171,92 @@ describe('ProtocolResponseBuilder', () => {
      requiresConfirmation: false,
    }));
  });
+
+  it('drops invalid ui payloads from canonical envelopes before renderer consumption', () => {
+    const builder = new ProtocolResponseBuilder();
+
+    const raw = JSON.stringify({
+      protocolVersion: '2.0',
+      assistantText: 'Here is the result',
+      intent: 'summarize',
+      needsInput: { required: false, fields: [] },
+      actions: [],
+      ui: {
+        specVersion: '1',
+        elements: [
+          {
+            type: 'chart',
+            chartType: 'bar',
+          },
+        ],
+      },
+      confidence: 0.7,
+      traceId: 'trace-invalid-ui',
+    });
+
+    const result = builder.build({
+      rawAssistantOutput: raw,
+      surface: 'tab',
+      capabilities: {
+        widgets: ['chart'],
+        actions: ['openPost'],
+        tools: ['search_posts'],
+      },
+    });
+
+    expect(result.envelope.ui).toBeUndefined();
+    expect(result.warnings.some((warning) => warning.includes('Invalid ui payload'))).toBe(true);
+  });
+
+  it('normalizes non-canonical ui element fields inside canonical envelopes', () => {
+    const builder = new ProtocolResponseBuilder();
+
+    const raw = JSON.stringify({
+      protocolVersion: '2.0',
+      assistantText: 'Distribution chart ready.',
+      ui: {
+        specVersion: '1',
+        elements: [
+          {
+            type: 'chart',
+            chartType: 'bar',
+            data: {
+              labels: ['aside', 'article'],
+              datasets: [{ data: [181, 53] }],
+            },
+          },
+          {
+            type: 'text',
+            content: 'Category breakdown',
+          },
+        ],
+      },
+      intent: 'summarize',
+      needsInput: { required: false, fields: [] },
+      actions: [],
+      confidence: 0.95,
+      traceId: 'trace-normalize-ui',
+    });
+
+    const result = builder.build({
+      rawAssistantOutput: raw,
+      surface: 'tab',
+      capabilities: {
+        widgets: ['chart', 'text'],
+        actions: ['openPost'],
+        tools: ['search_posts'],
+      },
+    });
+
+    const elements = result.envelope.ui?.elements as Array<{ type: string; series?: Array<{ label: string; value: number }>; text?: string }>;
+    expect(elements).toHaveLength(2);
+    expect(elements[0]?.type).toBe('chart');
+    expect(elements[0]?.series).toEqual([
+      { label: 'aside', value: 181 },
+      { label: 'article', value: 53 },
+    ]);
+    expect(elements[1]).toEqual({ type: 'text', text: 'Category breakdown' });
+    expect(result.warnings.some((warning) => warning.includes('Normalized non-canonical ui payload'))).toBe(true);
+  });
+
 });