wip: desparate models fucking around
This commit is contained in:
@@ -97,6 +97,12 @@ describe('OpenCodeManager protocol integration', () => {
|
||||
const telemetryAfter = getProtocolTelemetryService().getSnapshot();
|
||||
expect(telemetryAfter.totalTurns).toBe(telemetryBefore.totalTurns + 1);
|
||||
expect(telemetryAfter.validEnvelopeTurns).toBe(telemetryBefore.validEnvelopeTurns + 1);
|
||||
|
||||
expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
|
||||
conversationId: 'conversation-1',
|
||||
role: 'assistant',
|
||||
content: 'Please provide a date range.',
|
||||
}));
|
||||
});
|
||||
|
||||
it('blocks unsupported actions and records blocked-action telemetry', async () => {
|
||||
@@ -149,4 +155,71 @@ describe('OpenCodeManager protocol integration', () => {
|
||||
const telemetryAfter = getProtocolTelemetryService().getSnapshot();
|
||||
expect(telemetryAfter.blockedActionCount).toBe(telemetryBefore.blockedActionCount + 1);
|
||||
});
|
||||
|
||||
it('retries once with protocol repair prompt when first output is non-canonical', async () => {
|
||||
const conversation: MockConversation = {
|
||||
id: 'conversation-3',
|
||||
model: 'gpt-5',
|
||||
messages: [{ role: 'user', content: 'show chart' }],
|
||||
};
|
||||
|
||||
const chatEngineMock = createChatEngineMock(conversation);
|
||||
const manager = new OpenCodeManager(
|
||||
chatEngineMock as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
() => null,
|
||||
);
|
||||
manager.setApiKey('test-api-key');
|
||||
|
||||
const sendSpy = vi.spyOn(manager as never, 'sendOpenAIMessage')
|
||||
.mockResolvedValueOnce({
|
||||
content: JSON.stringify({
|
||||
title: 'Legacy JSON',
|
||||
widgets: [{ type: 'chart', chartType: 'bar' }],
|
||||
}),
|
||||
toolCalls: [],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
content: JSON.stringify({
|
||||
protocolVersion: '2.0',
|
||||
assistantText: 'Here is your chart.',
|
||||
ui: {
|
||||
specVersion: '1',
|
||||
elements: [
|
||||
{
|
||||
type: 'chart',
|
||||
chartType: 'bar',
|
||||
series: [{ label: '2015', value: 86 }],
|
||||
},
|
||||
],
|
||||
},
|
||||
intent: 'summarize',
|
||||
needsInput: { required: false, fields: [] },
|
||||
actions: [],
|
||||
confidence: 0.8,
|
||||
traceId: 'trace-retry-success',
|
||||
}),
|
||||
toolCalls: [],
|
||||
});
|
||||
|
||||
const result = await manager.sendMessage('conversation-3', 'Build chart', {
|
||||
metadata: { surface: 'tab' },
|
||||
});
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.envelope?.traceId).toBe('trace-retry-success');
|
||||
expect(sendSpy).toHaveBeenCalledTimes(2);
|
||||
|
||||
const retryMessages = sendSpy.mock.calls[1]?.[2] as Array<{ role: string; content?: string }>;
|
||||
const lastMessage = retryMessages[retryMessages.length - 1]?.content ?? '';
|
||||
expect(lastMessage).toContain('failed protocol validation');
|
||||
expect(lastMessage).toContain('Return ONLY one valid protocol envelope JSON object');
|
||||
|
||||
expect(chatEngineMock.addMessage).toHaveBeenCalledWith(expect.objectContaining({
|
||||
conversationId: 'conversation-3',
|
||||
role: 'assistant',
|
||||
content: 'Here is your chart.',
|
||||
}));
|
||||
});
|
||||
});
|
||||
@@ -171,4 +171,92 @@ describe('ProtocolResponseBuilder', () => {
|
||||
requiresConfirmation: false,
|
||||
}));
|
||||
});
|
||||
|
||||
it('drops invalid ui payloads from canonical envelopes before renderer consumption', () => {
|
||||
const builder = new ProtocolResponseBuilder();
|
||||
|
||||
const raw = JSON.stringify({
|
||||
protocolVersion: '2.0',
|
||||
assistantText: 'Here is the result',
|
||||
intent: 'summarize',
|
||||
needsInput: { required: false, fields: [] },
|
||||
actions: [],
|
||||
ui: {
|
||||
specVersion: '1',
|
||||
elements: [
|
||||
{
|
||||
type: 'chart',
|
||||
chartType: 'bar',
|
||||
},
|
||||
],
|
||||
},
|
||||
confidence: 0.7,
|
||||
traceId: 'trace-invalid-ui',
|
||||
});
|
||||
|
||||
const result = builder.build({
|
||||
rawAssistantOutput: raw,
|
||||
surface: 'tab',
|
||||
capabilities: {
|
||||
widgets: ['chart'],
|
||||
actions: ['openPost'],
|
||||
tools: ['search_posts'],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.envelope.ui).toBeUndefined();
|
||||
expect(result.warnings.some((warning) => warning.includes('Invalid ui payload'))).toBe(true);
|
||||
});
|
||||
|
||||
it('normalizes non-canonical ui element fields inside canonical envelopes', () => {
|
||||
const builder = new ProtocolResponseBuilder();
|
||||
|
||||
const raw = JSON.stringify({
|
||||
protocolVersion: '2.0',
|
||||
assistantText: 'Distribution chart ready.',
|
||||
ui: {
|
||||
specVersion: '1',
|
||||
elements: [
|
||||
{
|
||||
type: 'chart',
|
||||
chartType: 'bar',
|
||||
data: {
|
||||
labels: ['aside', 'article'],
|
||||
datasets: [{ data: [181, 53] }],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
content: 'Category breakdown',
|
||||
},
|
||||
],
|
||||
},
|
||||
intent: 'summarize',
|
||||
needsInput: { required: false, fields: [] },
|
||||
actions: [],
|
||||
confidence: 0.95,
|
||||
traceId: 'trace-normalize-ui',
|
||||
});
|
||||
|
||||
const result = builder.build({
|
||||
rawAssistantOutput: raw,
|
||||
surface: 'tab',
|
||||
capabilities: {
|
||||
widgets: ['chart', 'text'],
|
||||
actions: ['openPost'],
|
||||
tools: ['search_posts'],
|
||||
},
|
||||
});
|
||||
|
||||
const elements = result.envelope.ui?.elements as Array<{ type: string; series?: Array<{ label: string; value: number }>; text?: string }>;
|
||||
expect(elements).toHaveLength(2);
|
||||
expect(elements[0]?.type).toBe('chart');
|
||||
expect(elements[0]?.series).toEqual([
|
||||
{ label: 'aside', value: 181 },
|
||||
{ label: 'article', value: 53 },
|
||||
]);
|
||||
expect(elements[1]).toEqual({ type: 'text', text: 'Category breakdown' });
|
||||
expect(result.warnings.some((warning) => warning.includes('Normalized non-canonical ui payload'))).toBe(true);
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -192,4 +192,49 @@ describe('assistantPanelSpec', () => {
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.elements).toHaveLength(7);
|
||||
});
|
||||
|
||||
it('parses canonical protocol envelope JSON and extracts assistant text plus ui spec', () => {
|
||||
const raw = JSON.stringify({
|
||||
protocolVersion: '2.0',
|
||||
assistantText: 'Here is your chart.',
|
||||
ui: {
|
||||
specVersion: '1',
|
||||
elements: [
|
||||
{
|
||||
type: 'chart',
|
||||
chartType: 'bar',
|
||||
data: {
|
||||
labels: ['aside', 'article'],
|
||||
datasets: [{ data: [181, 53] }],
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
content: 'Breakdown details',
|
||||
},
|
||||
],
|
||||
},
|
||||
intent: 'summarize',
|
||||
needsInput: { required: false, fields: [] },
|
||||
actions: [],
|
||||
confidence: 0.9,
|
||||
traceId: 'trace-1',
|
||||
});
|
||||
|
||||
const result = extractAssistantResponseContent(raw);
|
||||
|
||||
expect(result.displayText).toBe('Here is your chart.');
|
||||
expect(result.panelSpec).not.toBeNull();
|
||||
expect(result.panelSpec?.elements[0]).toMatchObject({
|
||||
type: 'chart',
|
||||
series: [
|
||||
{ label: 'aside', value: 181 },
|
||||
{ label: 'article', value: 53 },
|
||||
],
|
||||
});
|
||||
expect(result.panelSpec?.elements[1]).toEqual({
|
||||
type: 'text',
|
||||
text: 'Breakdown details',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
56
tests/renderer/navigation/protocolActionPolicies.test.ts
Normal file
56
tests/renderer/navigation/protocolActionPolicies.test.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { buildActionPoliciesFromEnvelope } from '../../../src/renderer/navigation/protocolActionPolicies';
|
||||
|
||||
describe('buildActionPoliciesFromEnvelope', () => {
|
||||
it('preserves server-provided action policies', () => {
|
||||
const result = buildActionPoliciesFromEnvelope({
|
||||
actions: [
|
||||
{
|
||||
id: 'a1',
|
||||
action: 'openSettings',
|
||||
policy: 'confirm',
|
||||
requiresConfirmation: true,
|
||||
},
|
||||
],
|
||||
needsInput: {
|
||||
required: false,
|
||||
fields: [],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
openSettings: 'confirm',
|
||||
});
|
||||
});
|
||||
|
||||
it('adds confirm policy for submitNeedsInput when clarification is required', () => {
|
||||
const result = buildActionPoliciesFromEnvelope({
|
||||
actions: [],
|
||||
needsInput: {
|
||||
required: true,
|
||||
fields: [{ key: 'date', label: 'Date', inputType: 'date' }],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.submitNeedsInput).toBe('confirm');
|
||||
});
|
||||
|
||||
it('does not override explicit server policy for submitNeedsInput', () => {
|
||||
const result = buildActionPoliciesFromEnvelope({
|
||||
actions: [
|
||||
{
|
||||
id: 'a1',
|
||||
action: 'submitNeedsInput',
|
||||
policy: 'danger',
|
||||
requiresConfirmation: true,
|
||||
},
|
||||
],
|
||||
needsInput: {
|
||||
required: true,
|
||||
fields: [{ key: 'title', label: 'Title', inputType: 'text' }],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.submitNeedsInput).toBe('danger');
|
||||
});
|
||||
});
|
||||
@@ -44,9 +44,34 @@ describe('pythonApiContractV1', () => {
|
||||
});
|
||||
});
|
||||
|
||||
it('documents chat.sendMessage protocol envelope return contract and metadata input', () => {
|
||||
expect(getPythonApiMethodContract('chat.sendMessage')).toEqual({
|
||||
method: 'chat.sendMessage',
|
||||
description: 'Send message to chat conversation.',
|
||||
params: [
|
||||
{
|
||||
name: 'conversationId',
|
||||
type: 'string',
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
name: 'message',
|
||||
type: 'string',
|
||||
required: true,
|
||||
},
|
||||
{
|
||||
name: 'metadata',
|
||||
type: 'object',
|
||||
required: false,
|
||||
},
|
||||
],
|
||||
returns: "{ success: boolean; message?: string; envelope?: ProtocolResponseEnvelope; protocolVersion?: '2.0'; traceId?: string; warnings?: string[]; error?: string }",
|
||||
});
|
||||
});
|
||||
|
||||
it('contains semantic version metadata for compatibility checks', () => {
|
||||
expect(BDS_PYTHON_API_CONTRACT_V1).toMatchObject({
|
||||
version: '1.4.0',
|
||||
version: '1.5.0',
|
||||
generatedAt: expect.any(String),
|
||||
});
|
||||
});
|
||||
@@ -56,6 +81,7 @@ describe('pythonApiContractV1', () => {
|
||||
expect.objectContaining({ name: 'PostData' }),
|
||||
expect.objectContaining({ name: 'MediaData' }),
|
||||
expect.objectContaining({ name: 'ProjectData' }),
|
||||
expect.objectContaining({ name: 'ProtocolResponseEnvelope' }),
|
||||
expect.objectContaining({ name: 'ProtocolTelemetrySnapshot' }),
|
||||
]));
|
||||
});
|
||||
@@ -76,7 +102,7 @@ describe('generatePythonApiModuleV1', () => {
|
||||
expect(moduleCode).toContain('async def search(self, query):');
|
||||
expect(moduleCode).toContain('async def get_project_metadata(self):');
|
||||
expect(moduleCode).toContain('async def get_conversations(self):');
|
||||
expect(moduleCode).toContain('async def send_message(self, conversation_id, message):');
|
||||
expect(moduleCode).toContain('async def send_message(self, conversation_id, message, metadata=None):');
|
||||
expect(moduleCode).toContain('class BdsApi:');
|
||||
expect(moduleCode).toContain('bds = BdsApi(_transport)');
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user