From e5429f7265a01ca2054965775cc394527a9ea001 Mon Sep 17 00:00:00 2001 From: Chili Palmer Date: Fri, 8 May 2026 20:39:50 +0200 Subject: [PATCH] fix: implement CSM-006 --- CODESMELL.md | 16 ++-- lib/bds/generation/outputs.ex | 12 ++- lib/bds/rendering/post_rendering.ex | 2 +- lib/bds/search.ex | 124 ++++++++++++++++++++-------- test/bds/csm006_n1_reindex_test.exs | 39 +++++++-- 5 files changed, 140 insertions(+), 53 deletions(-) diff --git a/CODESMELL.md b/CODESMELL.md index 95da299..6952881 100644 --- a/CODESMELL.md +++ b/CODESMELL.md @@ -120,13 +120,14 @@ ## High Severity -### CSM-006 — N+1 Queries in Reindexing & Rendering -- **Files:** `lib/bds/search.ex:166-177`, `lib/bds/rendering/list_archive.ex:181`, `lib/bds/rendering/post_rendering.ex:21` -- **What:** - - `Search.reindex_posts/1` (Z. 172-177) calls `insert_post_index/1` per post inside `Enum.each`; `insert_post_index` calls `post_index_fields/1` (Z. 268) which calls `post_translations/1` (Z. 494) — one query per post for translations. Same for media reindexing. - - `ListArchive` (Z. 181) and `PostRendering` (Z. 21) call `load_post_record/1` per-post inside `Enum.map`, which does `Repo.get(Post, post_id)` per iteration. -- **Fix:** Preload all translations in a single query before the loop, or batch-insert with `Repo.insert_all`. For rendering, preload all needed records in one query. -- **Test:** Reindex 100 posts; assert the total query count is <5 (use `Ecto.Adapters.SQL.query!/2` hook or logger capture). +### ~~CSM-006 — N+1 Queries in Reindexing & Rendering~~ ✅ FIXED +- **Fixed:** 2026-05-08 +- **What was done:** + - **Batch INSERT for reindexing:** Replaced per-row `Repo.query!` INSERT in `reindex_posts/2` and `reindex_media/2` with multi-row batch INSERTs. Rows are chunked at 166 per batch (SQLite 999-parameter limit ÷ 6 columns). Translations were already preloaded in batch; fixed O(n²) `acc ++ [translation]` pattern in `preload_post_translations` and `preload_media_translations` by replacing with `Enum.group_by`. + - **Rendering — preloaded post records:** `PostRendering.post_assigns/2` now accepts an optional `:_post_record` key in assigns, skipping the `Repo.get(Post, id)` re-query when the record is already available. + - **Generation outputs pass records:** `build_page_outputs` and `build_post_outputs` in `outputs.ex` now pass the already-loaded post/translation records via `:_post_record`, eliminating per-post DB queries during generation. + - **ListArchive** already used `load_post_records_batch` (batch query) — no change needed. + - Added telemetry-based query counting tests: reindex 100 posts/media and assert total query count <10. --- @@ -417,6 +418,7 @@ - CSM-002: Fixed. Search now pushes all filtering and pagination into SQL via Ecto queries and CTEs. - CSM-004: Fixed. `attach_runner` moved to `handle_continue`, `terminate/2` added for cleanup, `restart: :temporary` set, JobStore `detach_runner` bug fixed. - [ ] All high-severity items (CSM-006 to CSM-010) have been addressed. + - CSM-006: Fixed. Batch INSERT for reindexing, preloaded post records for rendering. - [x] CSM-001 fix covers ALL 6 affected files, not just `import_definitions.ex`. - [x] CSM-003 fix covers ALL `Repo.delete!` call sites (posts, tags, scripts, media, projects, templates, translations). - [ ] CSM-007 decomposition is the prerequisite for fixing CSM-008 (render-path queries). diff --git a/lib/bds/generation/outputs.ex b/lib/bds/generation/outputs.ex index 4bc87bf..fac40a1 100644 --- a/lib/bds/generation/outputs.ex +++ b/lib/bds/generation/outputs.ex @@ -391,7 +391,8 @@ defmodule BDS.Generation.Outputs do content: body, slug: post.slug, language: canonical_variant.language, - excerpt: canonical_variant.excerpt + excerpt: canonical_variant.excerpt, + _post_record: canonical_variant }, fn -> render_post_page( @@ -422,7 +423,8 @@ defmodule BDS.Generation.Outputs do content: body, slug: post.slug, language: Map.get(post, :language), - excerpt: post.excerpt + excerpt: post.excerpt, + _post_record: post }, fn -> render_post_page(post.title, body, post.slug, Map.get(post, :language)) end )} @@ -517,7 +519,8 @@ defmodule BDS.Generation.Outputs do content: body, slug: post.slug, language: canonical_variant.language, - excerpt: canonical_variant.excerpt + excerpt: canonical_variant.excerpt, + _post_record: canonical_variant }, fn -> render_post_page( @@ -546,7 +549,8 @@ defmodule BDS.Generation.Outputs do content: body, slug: post.slug, language: Map.get(post, :language), - excerpt: post.excerpt + excerpt: post.excerpt, + _post_record: post }, fn -> render_post_page(post.title, body, post.slug, Map.get(post, :language)) end )} diff --git a/lib/bds/rendering/post_rendering.ex b/lib/bds/rendering/post_rendering.ex index ae1e6ad..01b7b57 100644 --- a/lib/bds/rendering/post_rendering.ex +++ b/lib/bds/rendering/post_rendering.ex @@ -20,7 +20,7 @@ defmodule BDS.Rendering.PostRendering do language = MapUtils.attr(assigns, :language, metadata.main_language || "en") main_language = metadata.main_language || language - post_record = load_post_record(assigns) + post_record = Map.get(assigns, :_post_record) || load_post_record(assigns) canonical_post = canonical_post_record(post_record) post_id = canonical_post_id(post_record, assigns) post_categories = Map.get(post_record || %{}, :categories, []) || [] diff --git a/lib/bds/search.ex b/lib/bds/search.ex index e5f07c2..2bc3a8e 100644 --- a/lib/bds/search.ex +++ b/lib/bds/search.ex @@ -373,13 +373,16 @@ defmodule BDS.Search do :ok = report_reindex_started(on_progress, total_posts, "posts") - posts - |> Enum.with_index(1) - |> Enum.each(fn {post, index} -> - translations = Map.get(translations_by_post_id, post.id, []) - insert_post_index(post, translations) - :ok = report_reindex_progress(on_progress, index, total_posts, "posts") - end) + rows = + posts + |> Enum.with_index(1) + |> Enum.map(fn {post, index} -> + translations = Map.get(translations_by_post_id, post.id, []) + :ok = report_reindex_progress(on_progress, index, total_posts, "posts") + post_index_row(post, translations) + end) + + batch_insert_post_index(rows) :ok end @@ -401,13 +404,16 @@ defmodule BDS.Search do :ok = report_reindex_started(on_progress, total_media, "media items") - media_items - |> Enum.with_index(1) - |> Enum.each(fn {media, index} -> - translations = Map.get(translations_by_media_id, media.id, []) - insert_media_index(media, translations) - :ok = report_reindex_progress(on_progress, index, total_media, "media items") - end) + rows = + media_items + |> Enum.with_index(1) + |> Enum.map(fn {media, index} -> + translations = Map.get(translations_by_media_id, media.id, []) + :ok = report_reindex_progress(on_progress, index, total_media, "media items") + media_index_row(media, translations) + end) + + batch_insert_media_index(rows) :ok end @@ -475,23 +481,70 @@ defmodule BDS.Search do ) end - defp insert_post_index(%Post{} = post, translations \\ nil) do - translations = translations || post_translations(post.id) + defp post_index_row(%Post{} = post, translations) do {title, excerpt, content, tags, categories} = post_index_fields(post, translations) + [post.id, title, excerpt, content, tags, categories] + end + + defp media_index_row(%Media{} = media, translations) do + {title, alt, caption, original_name, tags} = media_index_fields(media, translations) + [media.id, title, alt, caption, original_name, tags] + end + + # SQLite allows up to 999 bound parameters per statement. + # Posts have 6 columns → max 166 rows per batch. + @post_batch_size 166 + @media_batch_size 166 + + defp batch_insert_post_index([]), do: :ok + + defp batch_insert_post_index(rows) do + rows + |> Enum.chunk_every(@post_batch_size) + |> Enum.each(fn chunk -> + placeholders = Enum.map_join(chunk, ", ", fn _ -> "(?, ?, ?, ?, ?, ?)" end) + params = List.flatten(chunk) + + Repo.query!( + "INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES #{placeholders}", + params + ) + end) + end + + defp batch_insert_media_index([]), do: :ok + + defp batch_insert_media_index(rows) do + rows + |> Enum.chunk_every(@media_batch_size) + |> Enum.each(fn chunk -> + placeholders = Enum.map_join(chunk, ", ", fn _ -> "(?, ?, ?, ?, ?, ?)" end) + params = List.flatten(chunk) + + Repo.query!( + "INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES #{placeholders}", + params + ) + end) + end + + defp insert_post_index(%Post{} = post) do + translations = post_translations(post.id) + row = post_index_row(post, translations) Repo.query!( "INSERT INTO posts_fts (post_id, title, excerpt, content, tags, categories) VALUES (?, ?, ?, ?, ?, ?)", - [post.id, title, excerpt, content, tags, categories] + row ) end - defp insert_media_index(%Media{} = media, translations \\ nil) do - translations = translations || media_translations(media.id) - {title, alt, caption, original_name, tags} = media_index_fields(media, translations) + defp insert_media_index(%Media{} = media) do + translations = media_translations(media.id) + row = media_index_row(media, translations) Repo.query!( "INSERT INTO media_fts (media_id, title, alt, caption, original_name, tags) VALUES (?, ?, ?, ?, ?, ?)", - [media.id, title, alt, caption, original_name, tags] + row ) end @@ -564,18 +617,19 @@ defmodule BDS.Search do "SELECT translation_for, language, title, excerpt, content, status, file_path FROM post_translations WHERE translation_for IN (#{placeholders})", post_ids ).rows - |> Enum.reduce(%{}, fn [post_id, language, title, excerpt, content, status, file_path], acc -> - translation = %{ - "language" => language, - "title" => title, - "excerpt" => excerpt, - "content" => content, - "status" => status, - "file_path" => file_path - } - - Map.update(acc, post_id, [translation], &(&1 ++ [translation])) - end) + |> Enum.group_by( + fn [post_id | _] -> post_id end, + fn [_post_id, language, title, excerpt, content, status, file_path] -> + %{ + "language" => language, + "title" => title, + "excerpt" => excerpt, + "content" => content, + "status" => status, + "file_path" => file_path + } + end + ) end end @@ -605,9 +659,7 @@ defmodule BDS.Search do where: translation.translation_for in ^media_ids, select: {translation.translation_for, translation} ) - |> Enum.reduce(%{}, fn {media_id, translation}, acc -> - Map.update(acc, media_id, [translation], &(&1 ++ [translation])) - end) + |> Enum.group_by(fn {media_id, _} -> media_id end, fn {_, translation} -> translation end) end end diff --git a/test/bds/csm006_n1_reindex_test.exs b/test/bds/csm006_n1_reindex_test.exs index 08406e5..2c11791 100644 --- a/test/bds/csm006_n1_reindex_test.exs +++ b/test/bds/csm006_n1_reindex_test.exs @@ -21,12 +21,15 @@ defmodule BDS.CSM006N1ReindexTest do end describe "Search.reindex_posts/2" do - test "preloads all translations in a single query, not per-post", %{project: project} do + test "uses batch inserts — query count does not scale with post count", %{project: project} do _post_ids = create_posts_with_translations(project.id, 100) query_count = count_queries(fn -> Search.reindex_posts(project.id) end) - assert query_count < 10, "Expected <10 queries, got #{query_count}" + # 1 DELETE + 1 SELECT posts + 1 SELECT translations + 1 batch INSERT = 4 + # (may be a few more for chunking, but must be << 100) + assert query_count > 0, "Telemetry counting returned 0 — check event name" + assert query_count < 10, "Expected <10 queries for 100 posts, got #{query_count}" end test "correctly indexes posts and their translations", %{project: project} do @@ -57,12 +60,13 @@ defmodule BDS.CSM006N1ReindexTest do end describe "Search.reindex_media/2" do - test "preloads all media translations in a single query, not per-media", %{project: project} do + test "uses batch inserts — query count does not scale with media count", %{project: project} do _media_ids = create_media_with_translations(project.id, 100) query_count = count_queries(fn -> Search.reindex_media(project.id) end) - assert query_count < 10, "Expected <10 queries, got #{query_count}" + assert query_count > 0, "Telemetry counting returned 0 — check event name" + assert query_count < 10, "Expected <10 queries for 100 media items, got #{query_count}" end test "correctly indexes media and their translations", %{project: project} do @@ -172,7 +176,32 @@ defmodule BDS.CSM006N1ReindexTest do end defp count_queries(func) do + test_pid = self() + ref = make_ref() + + handler_id = "csm006-query-counter-#{inspect(ref)}" + + :telemetry.attach( + handler_id, + [:bds, :repo, :query], + fn _event, _measurements, _metadata, _ -> + send(test_pid, {:query_executed, ref}) + end, + nil + ) + func.() - 1 + + :telemetry.detach(handler_id) + + count_messages(ref, 0) + end + + defp count_messages(ref, acc) do + receive do + {:query_executed, ^ref} -> count_messages(ref, acc + 1) + after + 0 -> acc + end end end