Files
bDS2/specs/media_processing.allium

381 lines
14 KiB
Plaintext

-- allium: 1
-- bDS Media Processing Specification
-- Scope: core (Wave 1 — media import and processing)
-- Distilled from: ../bDS/src/main/engine/MediaEngine.ts,
-- mediaProcessing.ts, thumbnail generation logic
--
-- This document specifies the exact media processing behavior:
-- thumbnail generation, format conversion, EXIF handling, and file organization.
use "./media.allium" as media
use "./search.allium" as search
surface MediaProcessingControlSurface {
facing _: MediaProcessingOperator
provides:
ImportMediaRequested(source_path, project)
TagMediaRequested(media, tags)
DeleteMediaRequested(media)
ValidateMediaRequested(project)
}
surface MediaProcessingRuntimeSurface {
facing _: MediaProcessingRuntime
provides:
MediaImported(media)
}
-- ============================================================================
-- MEDIA FILE ORGANIZATION
-- ============================================================================
value MediaFileLayout {
-- Binary assets stored in: media/{YYYY}/{MM}/{uuid}.{ext}
-- Sidecar metadata in: {binary_path}.meta
-- Thumbnails in: thumbnails/{id[0:2]}/{id}-{size}.webp
-- (ai thumbnail is JPEG: thumbnails/{id[0:2]}/{id}-ai.jpg)
binary_path: String -- media/{YYYY}/{MM}/{uuid}.{ext}
sidecar_path: String -- {binary_path}.meta
thumbnail_small: String -- thumbnails/{prefix}/{id}-small.webp
thumbnail_medium: String -- thumbnails/{prefix}/{id}-medium.webp
thumbnail_large: String -- thumbnails/{prefix}/{id}-large.webp
thumbnail_ai: String -- thumbnails/{prefix}/{id}-ai.jpg
}
surface MediaFileLayoutSurface {
context layout: MediaFileLayout
exposes:
layout.binary_path
layout.sidecar_path
layout.thumbnail_small
layout.thumbnail_medium
layout.thumbnail_large
layout.thumbnail_ai
}
invariant MediaFileNaming {
-- Original filename is preserved in original_name field
-- Stored filename uses UUID v4: {uuid}.{ext}
-- Example: a1b2c3d4-e5f6-7890-abcd-ef1234567890.jpg
for m in Media:
m.filename = format("{uuid}.{ext}",
uuid: generate_uuid_v4(),
ext: file_extension(m.original_name))
}
invariant ThumbnailPathBucketing {
-- Thumbnails are bucketed by first 2 chars of media ID
-- This avoids filesystem slowdowns from too many files in one directory
for m in Media:
let prefix = substring(m.id, 0, 2)
m.thumbnails.small = format("thumbnails/{prefix}/{id}-small.webp",
prefix: prefix, id: m.id)
m.thumbnails.medium = format("thumbnails/{prefix}/{id}-medium.webp",
prefix: prefix, id: m.id)
m.thumbnails.large = format("thumbnails/{prefix}/{id}-large.webp",
prefix: prefix, id: m.id)
m.thumbnails.ai = format("thumbnails/{prefix}/{id}-ai.jpg",
prefix: prefix, id: m.id)
}
-- ============================================================================
-- THUMBNAIL GENERATION
-- ============================================================================
config {
-- Four thumbnail sizes generated per image
thumbnail_small_width: Integer = 150
thumbnail_medium_width: Integer = 400
thumbnail_large_width: Integer = 800
thumbnail_ai_size: Integer = 448 -- 448x448 square crop, JPEG
thumbnail_format: String = "webp" -- All sizes except AI
thumbnail_quality: Integer = 80 -- WebP quality for small/medium/large
thumbnail_ai_format: String = "jpeg" -- AI thumbnail only
thumbnail_ai_quality: Integer = 85 -- JPEG quality for AI thumbnail
}
rule GenerateThumbnails {
when: MediaImported(media)
requires: is_image(media.mime_type)
-- Generate all four thumbnail sizes
ensures: ThumbnailGenerated(
source: media.file_path,
destination: media.thumbnails.small,
width: config.thumbnail_small_width,
format: config.thumbnail_format,
quality: config.thumbnail_quality
)
ensures: ThumbnailGenerated(
source: media.file_path,
destination: media.thumbnails.medium,
width: config.thumbnail_medium_width,
format: config.thumbnail_format,
quality: config.thumbnail_quality
)
ensures: ThumbnailGenerated(
source: media.file_path,
destination: media.thumbnails.large,
width: config.thumbnail_large_width,
format: config.thumbnail_format,
quality: config.thumbnail_quality
)
ensures: ThumbnailGenerated(
source: media.file_path,
destination: media.thumbnails.ai,
size: config.thumbnail_ai_size,
format: config.thumbnail_ai_format,
quality: config.thumbnail_ai_quality
)
}
-- Thumbnail generation algorithm
value ThumbnailGeneration {
-- 1. Load source image
-- 2. Apply EXIF orientation correction (rotation, flip) so thumbnails display correctly
-- 3. Resize: small/medium/large preserve aspect ratio (width-constrained)
-- AI thumbnail is a 448x448 center crop (letterboxed on black background)
-- 4. Encode as WebP quality 80 for small/medium/large
-- Encode as JPEG quality 85 for AI thumbnail
-- 5. Write to bucketed thumbnail path: thumbnails/{id[0:2]}/{id}-{size}.{ext}
--
-- No _source copy is made. Thumbnails regenerated from the original binary.
}
surface ThumbnailGenerationSurface {
context _: ThumbnailGeneration
}
invariant ThumbnailExifHandling {
-- EXIF orientation IS applied during thumbnail generation so that
-- thumbnails always appear right-side-up regardless of camera metadata.
-- Width/height stored in DB are the raw header values (pre-rotation).
}
-- ============================================================================
-- IMAGE PROCESSING RULES
-- ============================================================================
value ImageProcessing {
-- Supported input formats:
input_formats: Set<String> = {
"image/jpeg", "image/png", "image/gif",
"image/webp", "image/tiff", "image/bmp",
"image/heic", "image/heif"
}
-- Output formats:
output_formats: Set<String> = {
"image/webp", -- Primary output for thumbnails
"image/jpeg" -- AI thumbnail only
}
-- Processing rules:
-- 1. All thumbnails (except AI) are encoded as WebP quality 80
-- 2. AI thumbnail is encoded as JPEG quality 85 (for vision model compatibility)
-- 3. Original format is preserved for full-size assets (no conversion)
-- 4. EXIF data is not stripped (thumbnails are re-encoded, so EXIF is naturally absent)
}
surface ImageProcessingSurface {
context processing: ImageProcessing
exposes:
processing.input_formats
processing.output_formats
}
rule ProcessImageMetadata {
when: MediaImported(media)
-- Extract image metadata from raw file header
ensures: media.width = extract_width_from_header(source_file)
ensures: media.height = extract_height_from_header(source_file)
ensures: media.mime_type = detect_mime_from_extension(source_file)
ensures: media.size = file_size(source_file)
}
invariant MimeDetection {
-- MIME type is detected from file extension, not from file content/magic bytes
-- Extension mapping: .jpg/.jpeg -> image/jpeg, .png -> image/png, etc.
}
-- ============================================================================
-- MEDIA TRANSLATION FILES
-- ============================================================================
value MediaTranslationFile {
-- File path: {binary_path}.{language}.meta
-- Format: YAML-like key-value sidecar (same as canonical sidecar)
translation_for: String -- Canonical media ID
language: String -- ISO 639-1 code
title: String?
alt: String?
caption: String?
}
surface MediaTranslationFileSurface {
context file: MediaTranslationFile
exposes:
file.translation_for
file.language
file.title when file.title != null
file.alt when file.alt != null
file.caption when file.caption != null
}
invariant MediaTranslationFileLayout {
for t in MediaTranslations:
-- Translation sidecars sit next to the binary, with language suffix
t.file_path = format("{binary_path}.{lang}.meta",
binary_path: t.media.file_path,
lang: t.language)
}
-- ============================================================================
-- MEDIA IMPORT RULES
-- ============================================================================
rule ImportMedia {
when: ImportMediaRequested(source_path, project)
-- 1. Validate file type (must be supported image)
-- 2. Generate UUID v4 filename
-- 3. Copy to media/{YYYY}/{MM}/{uuid}.{ext}
-- 4. Write sidecar {binary_path}.meta
-- 5. Generate four thumbnail sizes
-- 6. Index for search (FTS5)
ensures: media/Media.created(
filename: generate_uuid_v4_filename(source_path),
original_name: basename(source_path),
mime_type: detect_mime_from_extension(source_path),
size: file_size(source_path),
width: extract_width_from_header(source_path),
height: extract_height_from_header(source_path),
file_path: format("media/{yyyy}/{mm}/{uuid}.{ext}"),
sidecar_path: format("media/{yyyy}/{mm}/{uuid}.{ext}.meta"),
checksum: sha256(source_path)
)
ensures: ThumbnailsGenerated(media_id)
ensures: SearchIndexUpdated(media_id)
}
-- ============================================================================
-- MEDIA TAGGING
-- ============================================================================
invariant MediaTagsFormat {
-- Media tags are stored as JSON array in sidecar file
-- Tags are optional and only written if present
-- Same format as post tags
}
rule TagMedia {
when: TagMediaRequested(media, tags)
ensures: media.tags = tags
ensures: SidecarFileUpdated(media)
ensures: SearchIndexUpdated(media)
}
-- ============================================================================
-- MEDIA DELETION
-- ============================================================================
rule DeleteMedia {
when: DeleteMediaRequested(media)
-- 1. Remove from database
-- 2. Delete binary file
-- 3. Delete sidecar file ({binary_path}.meta)
-- 4. Delete all four thumbnail files
-- 5. Delete translation sidecars ({binary_path}.{lang}.meta)
-- 6. Remove from search index
-- 7. Remove from all post links
ensures: FileDeleted(media.file_path)
ensures: FileDeleted(media.sidecar_path)
ensures: FileDeleted(media.thumbnails.small)
ensures: FileDeleted(media.thumbnails.medium)
ensures: FileDeleted(media.thumbnails.large)
ensures: FileDeleted(media.thumbnails.ai)
ensures: SearchIndexRemoved(media)
ensures:
for p in media.linked_posts:
p.linked_media = p.linked_media - {media}
}
-- ============================================================================
-- MEDIA VALIDATION
-- ============================================================================
rule ValidateMedia {
when: ValidateMediaRequested(project)
-- Check for:
-- 1. Missing binary files
-- 2. Missing sidecar files
-- 3. Missing thumbnails (all 4 sizes)
-- 4. Corrupted image files
-- 5. Orphan media (not linked to any post)
for m in project.media:
if not file_exists(m.file_path):
ensures: ValidationIssueReported(m, "missing_binary")
if not file_exists(m.sidecar_path):
ensures: ValidationIssueReported(m, "missing_sidecar")
if not file_exists(m.thumbnails.small):
ensures: ValidationIssueReported(m, "missing_thumbnail_small")
if not file_exists(m.thumbnails.medium):
ensures: ValidationIssueReported(m, "missing_thumbnail_medium")
if not file_exists(m.thumbnails.large):
ensures: ValidationIssueReported(m, "missing_thumbnail_large")
if not file_exists(m.thumbnails.ai):
ensures: ValidationIssueReported(m, "missing_thumbnail_ai")
if not is_valid_image(m.file_path):
ensures: ValidationIssueReported(m, "corrupted")
if not exists (p in Posts where m in p.linked_media):
ensures: ValidationIssueReported(m, "orphan")
}
-- ============================================================================
-- MEDIA SIDECAR FORMAT
-- ============================================================================
invariant MediaSidecarFormat {
-- Sidecar files use YAML-like key-value format (hand-built, not gray-matter)
-- Path: {binary_path}.meta
-- Only truthy fields are written (except required fields)
-- Fields: title, alt, caption, author, tags, language, linkedPostIds
-- Note: 'filename' is NOT written to sidecar (it is the binary filename itself)
}
-- ============================================================================
-- IMAGE OPTIMIZATION
-- ============================================================================
config {
-- No file size limit on import
-- Original files are stored as-is (no compression, no resize)
-- Only thumbnails are generated from the original
strip_exif: Boolean = false -- Not explicitly stripped; re-encoding naturally omits it
}
-- ============================================================================
-- MEDIA SEARCH INDEXING
-- ============================================================================
rule IndexMediaForSearch {
when: SearchIndexUpdated(media: media/Media)
-- Index fields: title, alt, caption, original_name, tags
-- Plus all translation titles, alts, and captions
let all_text = concat(
media.title,
media.alt,
media.caption,
media.original_name,
join(media.tags, " ")
)
let stemmed = stem(all_text, detect_language(all_text))
ensures: search/MediaSearchIndex.created(
media: media,
stemmed_content: stemmed
)
}