fix: implement CSM-006

This commit is contained in:
2026-05-08 20:39:50 +02:00
parent 93a4159c31
commit e5429f7265
5 changed files with 140 additions and 53 deletions

View File

@@ -120,13 +120,14 @@
## High Severity
### CSM-006 — N+1 Queries in Reindexing & Rendering
- **Files:** `lib/bds/search.ex:166-177`, `lib/bds/rendering/list_archive.ex:181`, `lib/bds/rendering/post_rendering.ex:21`
- **What:**
- `Search.reindex_posts/1` (Z. 172-177) calls `insert_post_index/1` per post inside `Enum.each`; `insert_post_index` calls `post_index_fields/1` (Z. 268) which calls `post_translations/1` (Z. 494) — one query per post for translations. Same for media reindexing.
- `ListArchive` (Z. 181) and `PostRendering` (Z. 21) call `load_post_record/1` per-post inside `Enum.map`, which does `Repo.get(Post, post_id)` per iteration.
- **Fix:** Preload all translations in a single query before the loop, or batch-insert with `Repo.insert_all`. For rendering, preload all needed records in one query.
- **Test:** Reindex 100 posts; assert the total query count is <5 (use `Ecto.Adapters.SQL.query!/2` hook or logger capture).
### ~~CSM-006 — N+1 Queries in Reindexing & Rendering~~ ✅ FIXED
- **Fixed:** 2026-05-08
- **What was done:**
- **Batch INSERT for reindexing:** Replaced per-row `Repo.query!` INSERT in `reindex_posts/2` and `reindex_media/2` with multi-row batch INSERTs. Rows are chunked at 166 per batch (SQLite 999-parameter limit ÷ 6 columns). Translations were already preloaded in batch; fixed O(n²) `acc ++ [translation]` pattern in `preload_post_translations` and `preload_media_translations` by replacing with `Enum.group_by`.
- **Rendering — preloaded post records:** `PostRendering.post_assigns/2` now accepts an optional `:_post_record` key in assigns, skipping the `Repo.get(Post, id)` re-query when the record is already available.
- **Generation outputs pass records:** `build_page_outputs` and `build_post_outputs` in `outputs.ex` now pass the already-loaded post/translation records via `:_post_record`, eliminating per-post DB queries during generation.
- **ListArchive** already used `load_post_records_batch` (batch query) — no change needed.
- Added telemetry-based query counting tests: reindex 100 posts/media and assert total query count <10.
---
@@ -417,6 +418,7 @@
- CSM-002: Fixed. Search now pushes all filtering and pagination into SQL via Ecto queries and CTEs.
- CSM-004: Fixed. `attach_runner` moved to `handle_continue`, `terminate/2` added for cleanup, `restart: :temporary` set, JobStore `detach_runner` bug fixed.
- [ ] All high-severity items (CSM-006 to CSM-010) have been addressed.
- CSM-006: Fixed. Batch INSERT for reindexing, preloaded post records for rendering.
- [x] CSM-001 fix covers ALL 6 affected files, not just `import_definitions.ex`.
- [x] CSM-003 fix covers ALL `Repo.delete!` call sites (posts, tags, scripts, media, projects, templates, translations).
- [ ] CSM-007 decomposition is the prerequisite for fixing CSM-008 (render-path queries).

View File

@@ -391,7 +391,8 @@ defmodule BDS.Generation.Outputs do
content: body,
slug: post.slug,
language: canonical_variant.language,
excerpt: canonical_variant.excerpt
excerpt: canonical_variant.excerpt,
_post_record: canonical_variant
},
fn ->
render_post_page(
@@ -422,7 +423,8 @@ defmodule BDS.Generation.Outputs do
content: body,
slug: post.slug,
language: Map.get(post, :language),
excerpt: post.excerpt
excerpt: post.excerpt,
_post_record: post
},
fn -> render_post_page(post.title, body, post.slug, Map.get(post, :language)) end
)}
@@ -517,7 +519,8 @@ defmodule BDS.Generation.Outputs do
content: body,
slug: post.slug,
language: canonical_variant.language,
excerpt: canonical_variant.excerpt
excerpt: canonical_variant.excerpt,
_post_record: canonical_variant
},
fn ->
render_post_page(
@@ -546,7 +549,8 @@ defmodule BDS.Generation.Outputs do
content: body,
slug: post.slug,
language: Map.get(post, :language),
excerpt: post.excerpt
excerpt: post.excerpt,
_post_record: post
},
fn -> render_post_page(post.title, body, post.slug, Map.get(post, :language)) end
)}

View File

@@ -20,7 +20,7 @@ defmodule BDS.Rendering.PostRendering do
language = MapUtils.attr(assigns, :language, metadata.main_language || "en")
main_language = metadata.main_language || language
post_record = load_post_record(assigns)
post_record = Map.get(assigns, :_post_record) || load_post_record(assigns)
canonical_post = canonical_post_record(post_record)
post_id = canonical_post_id(post_record, assigns)
post_categories = Map.get(post_record || %{}, :categories, []) || []

View File

@@ -373,14 +373,17 @@ defmodule BDS.Search do
:ok = report_reindex_started(on_progress, total_posts, "posts")
rows =
posts
|> Enum.with_index(1)
|> Enum.each(fn {post, index} ->
|> Enum.map(fn {post, index} ->
translations = Map.get(translations_by_post_id, post.id, [])
insert_post_index(post, translations)
:ok = report_reindex_progress(on_progress, index, total_posts, "posts")
post_index_row(post, translations)
end)
batch_insert_post_index(rows)
:ok
end
@@ -401,14 +404,17 @@ defmodule BDS.Search do
:ok = report_reindex_started(on_progress, total_media, "media items")
rows =
media_items
|> Enum.with_index(1)
|> Enum.each(fn {media, index} ->
|> Enum.map(fn {media, index} ->
translations = Map.get(translations_by_media_id, media.id, [])
insert_media_index(media, translations)
:ok = report_reindex_progress(on_progress, index, total_media, "media items")
media_index_row(media, translations)
end)
batch_insert_media_index(rows)
:ok
end
@@ -475,23 +481,70 @@ defmodule BDS.Search do
)
end
defp insert_post_index(%Post{} = post, translations \\ nil) do
translations = translations || post_translations(post.id)
defp post_index_row(%Post{} = post, translations) do
{title, excerpt, content, tags, categories} = post_index_fields(post, translations)
[post.id, title, excerpt, content, tags, categories]
end
defp media_index_row(%Media{} = media, translations) do
{title, alt, caption, original_name, tags} = media_index_fields(media, translations)
[media.id, title, alt, caption, original_name, tags]
end
# SQLite allows up to 999 bound parameters per statement.
# Posts have 6 columns → max 166 rows per batch.
@post_batch_size 166
@media_batch_size 166
defp batch_insert_post_index([]), do: :ok
defp batch_insert_post_index(rows) do
rows
|> Enum.chunk_every(@post_batch_size)
|> Enum.each(fn chunk ->
placeholders = Enum.map_join(chunk, ", ", fn _ -> "(?, ?, ?, ?, ?, ?)" end)
params = List.flatten(chunk)
Repo.query!(
"INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES #{placeholders}",
params
)
end)
end
defp batch_insert_media_index([]), do: :ok
defp batch_insert_media_index(rows) do
rows
|> Enum.chunk_every(@media_batch_size)
|> Enum.each(fn chunk ->
placeholders = Enum.map_join(chunk, ", ", fn _ -> "(?, ?, ?, ?, ?, ?)" end)
params = List.flatten(chunk)
Repo.query!(
"INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES #{placeholders}",
params
)
end)
end
defp insert_post_index(%Post{} = post) do
translations = post_translations(post.id)
row = post_index_row(post, translations)
Repo.query!(
"INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES (?, ?, ?, ?, ?, ?)",
[post.id, title, excerpt, content, tags, categories]
row
)
end
defp insert_media_index(%Media{} = media, translations \\ nil) do
translations = translations || media_translations(media.id)
{title, alt, caption, original_name, tags} = media_index_fields(media, translations)
defp insert_media_index(%Media{} = media) do
translations = media_translations(media.id)
row = media_index_row(media, translations)
Repo.query!(
"INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES (?, ?, ?, ?, ?, ?)",
[media.id, title, alt, caption, original_name, tags]
row
)
end
@@ -564,8 +617,10 @@ defmodule BDS.Search do
"SELECT translation_for, language, title, excerpt, content, status, file_path FROM post_translations WHERE translation_for IN (#{placeholders})",
post_ids
).rows
|> Enum.reduce(%{}, fn [post_id, language, title, excerpt, content, status, file_path], acc ->
translation = %{
|> Enum.group_by(
fn [post_id | _] -> post_id end,
fn [_post_id, language, title, excerpt, content, status, file_path] ->
%{
"language" => language,
"title" => title,
"excerpt" => excerpt,
@@ -573,9 +628,8 @@ defmodule BDS.Search do
"status" => status,
"file_path" => file_path
}
Map.update(acc, post_id, [translation], &(&1 ++ [translation]))
end)
end
)
end
end
@@ -605,9 +659,7 @@ defmodule BDS.Search do
where: translation.translation_for in ^media_ids,
select: {translation.translation_for, translation}
)
|> Enum.reduce(%{}, fn {media_id, translation}, acc ->
Map.update(acc, media_id, [translation], &(&1 ++ [translation]))
end)
|> Enum.group_by(fn {media_id, _} -> media_id end, fn {_, translation} -> translation end)
end
end

View File

@@ -21,12 +21,15 @@ defmodule BDS.CSM006N1ReindexTest do
end
describe "Search.reindex_posts/2" do
test "preloads all translations in a single query, not per-post", %{project: project} do
test "uses batch inserts — query count does not scale with post count", %{project: project} do
_post_ids = create_posts_with_translations(project.id, 100)
query_count = count_queries(fn -> Search.reindex_posts(project.id) end)
assert query_count < 10, "Expected <10 queries, got #{query_count}"
# 1 DELETE + 1 SELECT posts + 1 SELECT translations + 1 batch INSERT = 4
# (may be a few more for chunking, but must be << 100)
assert query_count > 0, "Telemetry counting returned 0 — check event name"
assert query_count < 10, "Expected <10 queries for 100 posts, got #{query_count}"
end
test "correctly indexes posts and their translations", %{project: project} do
@@ -57,12 +60,13 @@ defmodule BDS.CSM006N1ReindexTest do
end
describe "Search.reindex_media/2" do
test "preloads all media translations in a single query, not per-media", %{project: project} do
test "uses batch inserts — query count does not scale with media count", %{project: project} do
_media_ids = create_media_with_translations(project.id, 100)
query_count = count_queries(fn -> Search.reindex_media(project.id) end)
assert query_count < 10, "Expected <10 queries, got #{query_count}"
assert query_count > 0, "Telemetry counting returned 0 — check event name"
assert query_count < 10, "Expected <10 queries for 100 media items, got #{query_count}"
end
test "correctly indexes media and their translations", %{project: project} do
@@ -172,7 +176,32 @@ defmodule BDS.CSM006N1ReindexTest do
end
defp count_queries(func) do
test_pid = self()
ref = make_ref()
handler_id = "csm006-query-counter-#{inspect(ref)}"
:telemetry.attach(
handler_id,
[:bds, :repo, :query],
fn _event, _measurements, _metadata, _ ->
send(test_pid, {:query_executed, ref})
end,
nil
)
func.()
1
:telemetry.detach(handler_id)
count_messages(ref, 0)
end
defp count_messages(ref, acc) do
receive do
{:query_executed, ^ref} -> count_messages(ref, acc + 1)
after
0 -> acc
end
end
end