feat: start on AI integration
This commit is contained in:
216
specs/ai.allium
216
specs/ai.allium
@@ -18,6 +18,61 @@ entity AiEndpoint {
|
||||
-- airplane: local model (Ollama, LM Studio, etc.)
|
||||
}
|
||||
|
||||
entity AiCatalogProvider {
|
||||
id: String
|
||||
name: String
|
||||
env_keys: Set<String>
|
||||
package_ref: String?
|
||||
api_url: String?
|
||||
doc_url: String?
|
||||
updated_at: Timestamp
|
||||
}
|
||||
|
||||
entity AiModel {
|
||||
provider: AiCatalogProvider
|
||||
model_id: String
|
||||
name: String
|
||||
family: String?
|
||||
supports_attachment: Boolean
|
||||
supports_reasoning: Boolean
|
||||
supports_tool_calls: Boolean
|
||||
supports_structured_output: Boolean
|
||||
supports_temperature: Boolean
|
||||
knowledge: String?
|
||||
release_date: String?
|
||||
last_updated_date: String?
|
||||
open_weights: Boolean
|
||||
input_price: Integer?
|
||||
output_price: Integer?
|
||||
cache_read_price: Integer?
|
||||
cache_write_price: Integer?
|
||||
context_window: Integer
|
||||
max_input_tokens: Integer
|
||||
max_output_tokens: Integer
|
||||
interleaved: String?
|
||||
status: String?
|
||||
updated_at: Timestamp
|
||||
|
||||
-- Relationships
|
||||
modalities: AiModelModality with provider = this.provider and model_id = this.model_id
|
||||
|
||||
-- Derived
|
||||
input_modalities: modalities where direction = input -> modality
|
||||
output_modalities: modalities where direction = output -> modality
|
||||
}
|
||||
|
||||
entity AiModelModality {
|
||||
provider: AiCatalogProvider
|
||||
model_id: String
|
||||
direction: input | output
|
||||
modality: text | image | audio | file | tool
|
||||
}
|
||||
|
||||
entity AiCatalogMeta {
|
||||
key: String
|
||||
value: String
|
||||
}
|
||||
|
||||
surface AiEndpointSurface {
|
||||
context endpoint: AiEndpoint
|
||||
|
||||
@@ -28,6 +83,37 @@ surface AiEndpointSurface {
|
||||
endpoint.model
|
||||
}
|
||||
|
||||
surface AiModelSurface {
|
||||
context catalog_model: AiModel
|
||||
|
||||
exposes:
|
||||
catalog_model.provider
|
||||
catalog_model.model_id
|
||||
catalog_model.name
|
||||
catalog_model.family when catalog_model.family != null
|
||||
catalog_model.supports_attachment
|
||||
catalog_model.supports_reasoning
|
||||
catalog_model.supports_tool_calls
|
||||
catalog_model.supports_structured_output
|
||||
catalog_model.supports_temperature
|
||||
catalog_model.knowledge when catalog_model.knowledge != null
|
||||
catalog_model.release_date when catalog_model.release_date != null
|
||||
catalog_model.last_updated_date when catalog_model.last_updated_date != null
|
||||
catalog_model.open_weights
|
||||
catalog_model.input_price when catalog_model.input_price != null
|
||||
catalog_model.output_price when catalog_model.output_price != null
|
||||
catalog_model.cache_read_price when catalog_model.cache_read_price != null
|
||||
catalog_model.cache_write_price when catalog_model.cache_write_price != null
|
||||
catalog_model.context_window
|
||||
catalog_model.max_input_tokens
|
||||
catalog_model.max_output_tokens
|
||||
catalog_model.interleaved when catalog_model.interleaved != null
|
||||
catalog_model.status when catalog_model.status != null
|
||||
catalog_model.input_modalities
|
||||
catalog_model.output_modalities
|
||||
catalog_model.updated_at
|
||||
}
|
||||
|
||||
entity SecureKeyStore {
|
||||
-- Encrypts API keys using the host operating system's secure storage.
|
||||
-- Stored in application settings in encrypted form.
|
||||
@@ -62,8 +148,12 @@ entity ChatMessage {
|
||||
conversation: ChatConversation
|
||||
role: system | user | assistant | tool
|
||||
content: String
|
||||
tool_call_id: String?
|
||||
tool_calls: String?
|
||||
token_usage_input: Integer?
|
||||
token_usage_output: Integer?
|
||||
cache_read_tokens: Integer?
|
||||
cache_write_tokens: Integer?
|
||||
created_at: Timestamp
|
||||
}
|
||||
|
||||
@@ -74,11 +164,24 @@ surface ChatMessageSurface {
|
||||
message.conversation
|
||||
message.role
|
||||
message.content
|
||||
message.tool_call_id when message.tool_call_id != null
|
||||
message.tool_calls when message.tool_calls != null
|
||||
message.token_usage_input when message.token_usage_input != null
|
||||
message.token_usage_output when message.token_usage_output != null
|
||||
message.cache_read_tokens when message.cache_read_tokens != null
|
||||
message.cache_write_tokens when message.cache_write_tokens != null
|
||||
message.created_at
|
||||
}
|
||||
|
||||
surface AiConfigurationSurface {
|
||||
facing _: AiOperator
|
||||
|
||||
provides:
|
||||
SetAiEndpointRequested(kind, url, api_key, model)
|
||||
RemoveAiEndpointRequested(kind)
|
||||
RefreshModelCatalogRequested(source)
|
||||
}
|
||||
|
||||
surface OneShotAiSurface {
|
||||
facing _: AiOperator
|
||||
|
||||
@@ -97,7 +200,31 @@ surface AiChatSurface {
|
||||
provides:
|
||||
StartChatRequested(model)
|
||||
SendChatMessageRequested(conversation, content)
|
||||
RefreshModelCatalogRequested(endpoint)
|
||||
CancelChatRequested(conversation)
|
||||
}
|
||||
|
||||
config {
|
||||
model_catalog_ttl: Duration = 5.minutes
|
||||
chat_max_tool_rounds: Integer = 10
|
||||
default_max_output_tokens: Integer = 16384
|
||||
}
|
||||
|
||||
rule SetAiEndpoint {
|
||||
when: SetAiEndpointRequested(kind, url, api_key, model)
|
||||
ensures:
|
||||
let endpoint = AiEndpoint.created(
|
||||
kind: kind,
|
||||
url: url,
|
||||
api_key: api_key,
|
||||
model: model
|
||||
)
|
||||
endpoint.kind = kind
|
||||
}
|
||||
|
||||
rule RemoveAiEndpoint {
|
||||
when: RemoveAiEndpointRequested(kind)
|
||||
for endpoint in AiEndpoints where endpoint.kind = kind:
|
||||
ensures: not exists endpoint
|
||||
}
|
||||
|
||||
-- One-shot AI tasks (core scope, no streaming)
|
||||
@@ -173,17 +300,24 @@ rule SendChatMessage {
|
||||
ensures: conversation.updated_at = now
|
||||
ensures: AiStreamingResponse(conversation)
|
||||
-- Streaming response with bounded tool-call loop.
|
||||
-- Blog data tools for post/media querying during chat.
|
||||
-- Token usage tracking (input, output, cache read/write).
|
||||
-- Blog data tools for post/media querying and mutation during chat.
|
||||
-- Render tools may emit structured chart/table/form payloads.
|
||||
-- Token usage tracking includes input, output, cache read, cache write.
|
||||
}
|
||||
|
||||
rule CancelChat {
|
||||
when: CancelChatRequested(conversation)
|
||||
ensures: AiStreamingResponseCancelled(conversation)
|
||||
}
|
||||
|
||||
-- Model catalog
|
||||
|
||||
rule RefreshModelCatalog {
|
||||
when: RefreshModelCatalogRequested(endpoint)
|
||||
-- Queries the endpoint's model list API
|
||||
-- 5-minute cache TTL
|
||||
ensures: ModelCatalogUpdated(endpoint)
|
||||
when: RefreshModelCatalogRequested(source)
|
||||
-- Refreshes advisory provider/model metadata used for capability checks,
|
||||
-- default token budgeting, and model selection UX.
|
||||
-- Uses conditional GET with ETag where supported.
|
||||
ensures: ModelCatalogUpdated()
|
||||
}
|
||||
|
||||
invariant AirplaneModeGating {
|
||||
@@ -196,6 +330,14 @@ invariant AirplaneModeGating {
|
||||
-- show toast "AI unavailable — configure {online|airplane} endpoint in Settings"
|
||||
}
|
||||
|
||||
invariant AirplaneModeModelSwap {
|
||||
-- In airplane mode, cloud models are never contacted.
|
||||
-- Chat uses the configured offline chat model when needed.
|
||||
-- Image analysis uses the configured offline vision-capable model when needed.
|
||||
-- If no suitable offline model is configured, the operation fails with
|
||||
-- actionable guidance instead of silently falling back to the online endpoint.
|
||||
}
|
||||
|
||||
invariant TwoEndpointModel {
|
||||
-- Two configurable OpenAI-compatible endpoints:
|
||||
-- online: for cloud providers (requires API key)
|
||||
@@ -204,6 +346,66 @@ invariant TwoEndpointModel {
|
||||
-- Endpoint selection is configurable rather than tied to hard-coded providers.
|
||||
}
|
||||
|
||||
invariant AdvisoryModelCatalog {
|
||||
-- Model metadata is stored separately from runtime endpoint configuration.
|
||||
-- It supplies capability hints such as context window, tool-call support,
|
||||
-- structured output support, vision/input modalities, and pricing metadata.
|
||||
-- The catalog remains usable offline after the last successful refresh.
|
||||
}
|
||||
|
||||
invariant ConditionalCatalogRefresh {
|
||||
-- Model catalog refresh uses conditional HTTP requests when possible.
|
||||
-- The latest ETag and fetch timestamp are persisted in AiCatalogMeta.
|
||||
-- A 304 response updates freshness metadata without rewriting model rows.
|
||||
exists meta in AiCatalogMeta where meta.key = "etag" or meta.key = "last_fetched_at"
|
||||
}
|
||||
|
||||
invariant ProviderDetection {
|
||||
-- Runtime provider selection may be inferred from model identifiers,
|
||||
-- local-endpoint registration, or explicit endpoint configuration.
|
||||
-- The system does not rely on a single hard-coded provider list for routing.
|
||||
}
|
||||
|
||||
invariant VisionCapabilityGate {
|
||||
-- AnalyzeImage only runs against models that accept image input.
|
||||
-- Local/offline models must advertise or be configured with image capability
|
||||
-- before the runtime sends multimodal requests to them.
|
||||
}
|
||||
|
||||
invariant ChatContextTruncation {
|
||||
-- Chat requests are trimmed to fit within the selected model's context window.
|
||||
-- Oldest user/assistant pairs are dropped first.
|
||||
-- The system prompt, tool schema budget, and output-token reserve are preserved.
|
||||
}
|
||||
|
||||
invariant BoundedToolLoop {
|
||||
-- Chat tool execution is bounded by config.chat_max_tool_rounds.
|
||||
-- Tool-capable models may call blog-domain tools and render tools.
|
||||
-- Non-tool-capable models skip tool exposure entirely.
|
||||
}
|
||||
|
||||
invariant TokenUsageAccounting {
|
||||
-- Chat turn accounting tracks input, output, cache-read, and cache-write tokens.
|
||||
-- Usage is reported per turn and accumulated per conversation.
|
||||
-- Cache token accounting is surfaced when the underlying provider reports it.
|
||||
}
|
||||
|
||||
invariant ChatCancellation {
|
||||
-- Each in-flight chat turn can be aborted independently.
|
||||
-- Cancellation stops streaming and tool execution for that request only.
|
||||
}
|
||||
|
||||
invariant StructuredRenderTools {
|
||||
-- Chat may emit structured render payloads for charts, tables, and forms.
|
||||
-- These payloads are data contracts, not arbitrary HTML strings.
|
||||
}
|
||||
|
||||
invariant BlogStatsPromptAugmentation {
|
||||
-- The base system prompt may be augmented with current blog statistics
|
||||
-- such as post counts, media counts, tag/category totals, and date ranges
|
||||
-- so long as the augmentation reflects current project state.
|
||||
}
|
||||
|
||||
invariant AiSpecPartitioning {
|
||||
-- This file covers two distinct but related AI contracts:
|
||||
-- 1. Core one-shot operations (taxonomy, vision, translation, language detection)
|
||||
|
||||
Reference in New Issue
Block a user