chore: merged different progress reporters

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
2026-05-01 17:12:49 +02:00
parent 79ee67c2e0
commit f6425de51d
8 changed files with 343 additions and 184 deletions

View File

@@ -9,6 +9,7 @@ defmodule BDS.Embeddings do
alias BDS.Embeddings.Key
alias BDS.Metadata
alias BDS.Posts.Post
alias BDS.ProgressReporter
alias BDS.Projects
alias BDS.Repo
@@ -87,7 +88,11 @@ defmodule BDS.Embeddings do
on_progress = progress_callback(opts)
posts =
Repo.all(from post in Post, where: post.project_id == ^project_id, order_by: [asc: post.created_at, asc: post.slug])
Repo.all(
from post in Post,
where: post.project_id == ^project_id,
order_by: [asc: post.created_at, asc: post.slug]
)
post_ids = Enum.map(posts, & &1.id)
total_posts = length(posts)
@@ -162,7 +167,8 @@ defmodule BDS.Embeddings do
case Repo.get_by(Key, post_id: post.id, project_id: post.project_id) do
%Key{content_hash: ^content_hash} ->
if Keyword.get(opts, :refresh_index, true) and snapshot_content_hash(post.project_id, post.id) != content_hash do
if Keyword.get(opts, :refresh_index, true) and
snapshot_content_hash(post.project_id, post.id) != content_hash do
:ok = rebuild_snapshot(post.project_id)
end
@@ -193,11 +199,14 @@ defmodule BDS.Embeddings do
def remove_post(post_id) when is_binary(post_id) do
project_id =
case Repo.get_by(Key, post_id: post_id) do
%Key{project_id: project_id} -> project_id
nil -> case Repo.get(Post, post_id) do
%Post{project_id: project_id} -> project_id
nil -> nil
end
%Key{project_id: project_id} ->
project_id
nil ->
case Repo.get(Post, post_id) do
%Post{project_id: project_id} -> project_id
nil -> nil
end
end
Repo.delete_all(from key in Key, where: key.post_id == ^post_id)
@@ -212,26 +221,33 @@ defmodule BDS.Embeddings do
def index_unindexed(project_id) when is_binary(project_id) do
if enabled_for_project?(project_id) do
posts =
Repo.all(from post in Post, where: post.project_id == ^project_id, order_by: [asc: post.created_at, asc: post.slug])
Repo.all(
from post in Post,
where: post.project_id == ^project_id,
order_by: [asc: post.created_at, asc: post.slug]
)
Enum.each(posts, fn post ->
body = resolve_post_body(post)
content_hash = hash_text(compose_embedding_source(post.title, body))
body = resolve_post_body(post)
content_hash = hash_text(compose_embedding_source(post.title, body))
case Repo.get_by(Key, post_id: post.id, project_id: project_id) do
%Key{content_hash: ^content_hash} -> :ok
_other ->
:ok =
sync_post_if_enabled(
%{post | content: if(post.content in [nil, ""], do: body, else: post.content)},
refresh_index: false
)
end
end)
case Repo.get_by(Key, post_id: post.id, project_id: project_id) do
%Key{content_hash: ^content_hash} ->
:ok
_other ->
:ok =
sync_post_if_enabled(
%{post | content: if(post.content in [nil, ""], do: body, else: post.content)},
refresh_index: false
)
end
end)
:ok = rebuild_snapshot(project_id)
indexed = Repo.all(from key in Key, where: key.project_id == ^project_id, select: key.post_id)
indexed =
Repo.all(from key in Key, where: key.project_id == ^project_id, select: key.post_id)
{:ok, indexed}
else
@@ -241,15 +257,29 @@ defmodule BDS.Embeddings do
def find_similar(post_id, limit \\ 5) when is_binary(post_id) and is_integer(limit) do
case source_post_and_vector(post_id) do
{:disabled, _project_id} -> {:ok, []}
{:error, :not_found} -> {:ok, []}
{:disabled, _project_id} ->
{:ok, []}
{:error, :not_found} ->
{:ok, []}
{:ok, post, source_vector} ->
similar =
case Index.neighbors(post.project_id, post.id, limit) do
{:ok, neighbors} -> neighbors
{:ok, neighbors} ->
neighbors
{:error, :missing} ->
Repo.all(from key in Key, where: key.project_id == ^post.project_id and key.post_id != ^post.id)
|> Enum.map(fn key -> %{post_id: key.post_id, score: cosine_similarity(source_vector, decode_vector(key.vector))} end)
Repo.all(
from key in Key,
where: key.project_id == ^post.project_id and key.post_id != ^post.id
)
|> Enum.map(fn key ->
%{
post_id: key.post_id,
score: cosine_similarity(source_vector, decode_vector(key.vector))
}
end)
|> Enum.sort_by(& &1.score, :desc)
|> Enum.take(max(limit, 0))
end
@@ -261,18 +291,29 @@ defmodule BDS.Embeddings do
def compute_similarities(source_post_id, target_post_ids)
when is_binary(source_post_id) and is_list(target_post_ids) do
case source_post_and_vector(source_post_id) do
{:disabled, _project_id} -> {:ok, %{}}
{:error, :not_found} -> {:ok, %{}}
{:disabled, _project_id} ->
{:ok, %{}}
{:error, :not_found} ->
{:ok, %{}}
{:ok, post, source_vector} ->
target_ids = Enum.uniq(target_post_ids)
scores =
Repo.all(from key in Key, where: key.project_id == ^post.project_id and key.post_id in ^target_ids)
Repo.all(
from key in Key,
where: key.project_id == ^post.project_id and key.post_id in ^target_ids
)
|> Enum.reduce(%{}, fn key, acc ->
if key.post_id == source_post_id do
acc
else
Map.put(acc, key.post_id, cosine_similarity(source_vector, decode_vector(key.vector)))
Map.put(
acc,
key.post_id,
cosine_similarity(source_vector, decode_vector(key.vector))
)
end
end)
@@ -289,7 +330,9 @@ defmodule BDS.Embeddings do
|> then(fn posts_by_id ->
Enum.reduce(similar, %{}, fn %{post_id: similar_post_id, score: score}, acc ->
case Map.get(posts_by_id, similar_post_id) do
nil -> acc
nil ->
acc
similar_post ->
Enum.reduce(similar_post.tags || [], acc, fn tag, tag_acc ->
Map.update(tag_acc, tag, score, &(&1 + score))
@@ -320,7 +363,13 @@ defmodule BDS.Embeddings do
|> enrich_duplicate_pairs(project_id)
{:error, :missing} ->
keys = Repo.all(from key in Key, where: key.project_id == ^project_id, order_by: [asc: key.post_id])
keys =
Repo.all(
from key in Key,
where: key.project_id == ^project_id,
order_by: [asc: key.post_id]
)
total_keys = length(keys)
:ok = report_rebuild_started(on_progress, total_keys, "embedding entries")
@@ -333,7 +382,8 @@ defmodule BDS.Embeddings do
for right <- keys,
left.post_id < right.post_id,
pair_key(left.post_id, right.post_id) not in dismissed,
similarity = cosine_similarity(decode_vector(left.vector), decode_vector(right.vector)),
similarity =
cosine_similarity(decode_vector(left.vector), decode_vector(right.vector)),
similarity >= @duplicate_threshold do
%{
post_id_a: left.post_id,
@@ -438,7 +488,9 @@ defmodule BDS.Embeddings do
|> Enum.flat_map(&[&1.post_id_a, &1.post_id_b])
|> Enum.uniq()
|> then(fn post_ids ->
Repo.all(from post in Post, where: post.project_id == ^project_id and post.id in ^post_ids)
Repo.all(
from post in Post, where: post.project_id == ^project_id and post.id in ^post_ids
)
|> Map.new(&{&1.id, &1})
end)
@@ -454,7 +506,9 @@ defmodule BDS.Embeddings do
|> Map.put(:similarity, pair.score)
|> Map.put(:exact_match, exact_match)
end)
|> Enum.sort_by(fn pair -> {not pair.exact_match, -pair.score, pair.post_id_a, pair.post_id_b} end)
|> Enum.sort_by(fn pair ->
{not pair.exact_match, -pair.score, pair.post_id_a, pair.post_id_b}
end)
end
defp exact_duplicate_match?(score, %Post{} = post_a, %Post{} = post_b) do
@@ -485,7 +539,8 @@ defmodule BDS.Embeddings do
end
end
defp resolve_post_body(%Post{content: content}) when is_binary(content) and content != "", do: content
defp resolve_post_body(%Post{content: content}) when is_binary(content) and content != "",
do: content
defp resolve_post_body(%Post{project_id: project_id, file_path: file_path}) do
if file_path in [nil, ""] do
@@ -507,7 +562,8 @@ defmodule BDS.Embeddings do
end
end
defp compose_embedding_source(title, content), do: string_or_empty(title) <> "\n\n" <> string_or_empty(content)
defp compose_embedding_source(title, content),
do: string_or_empty(title) <> "\n\n" <> string_or_empty(content)
defp string_or_empty(nil), do: ""
defp string_or_empty(value) when is_binary(value), do: value
@@ -525,39 +581,27 @@ defmodule BDS.Embeddings do
Index.rebuild(project_id, model_id: model_id(), dimensions: dimensions())
end
defp progress_callback(opts) do
case Keyword.get(opts, :on_progress) do
callback when is_function(callback, 2) -> callback
_other -> nil
end
end
defp report_rebuild_started(nil, _total, _label), do: :ok
defp report_rebuild_started(callback, 0, label) do
callback.(1.0, "No #{label} to rebuild")
:ok
end
defp progress_callback(opts), do: ProgressReporter.callback(opts)
defp report_rebuild_started(callback, total, label) do
callback.(0.0, "Rebuilding 0/#{total} #{label}")
:ok
ProgressReporter.report_count_started(callback, total, label,
verb: "Rebuilding",
start_progress: 0.0,
empty_suffix: "to rebuild",
message_style: :prefix_count
)
end
defp report_rebuild_progress(nil, _current, _total, _label), do: :ok
defp report_rebuild_progress(_callback, _current, 0, _label), do: :ok
defp report_rebuild_progress(callback, current, total, label) do
callback.(current / total, "Rebuilding #{current}/#{total} #{label}")
:ok
ProgressReporter.report_count_progress(callback, current, total, label,
verb: "Rebuilding",
start_progress: 0.0,
message_style: :prefix_count
)
end
defp report_rebuild_phase(nil, _value, _label), do: :ok
defp report_rebuild_phase(callback, value, label) do
callback.(value, label)
:ok
end
defp report_rebuild_phase(callback, value, label),
do: ProgressReporter.report_phase(callback, value, label)
defp snapshot_content_hash(project_id, post_id) do
case Index.read(project_id) do

View File

@@ -6,6 +6,7 @@ defmodule BDS.Embeddings.Index do
alias BDS.Persistence
alias BDS.Embeddings.Key
alias BDS.Projects
alias BDS.ProgressReporter
alias BDS.Repo
@neighbor_limit 21
@@ -206,30 +207,22 @@ defmodule BDS.Embeddings.Index do
defp sort_pair(post_id_a, post_id_b) when post_id_a <= post_id_b, do: {post_id_a, post_id_b}
defp sort_pair(post_id_a, post_id_b), do: {post_id_b, post_id_a}
defp progress_callback(opts) do
case Keyword.get(opts, :on_progress) do
callback when is_function(callback, 2) -> callback
_other -> nil
end
end
defp report_scan_started(nil, _total, _label), do: :ok
defp report_scan_started(callback, 0, label) do
callback.(1.0, "No #{label} to scan")
:ok
end
defp progress_callback(opts), do: ProgressReporter.callback(opts)
defp report_scan_started(callback, total, label) do
callback.(0.0, "Scanning 0/#{total} #{label}")
:ok
ProgressReporter.report_count_started(callback, total, label,
verb: "Scanning",
start_progress: 0.0,
empty_suffix: "to scan",
message_style: :prefix_count
)
end
defp report_scan_progress(nil, _current, _total, _label), do: :ok
defp report_scan_progress(_callback, _current, 0, _label), do: :ok
defp report_scan_progress(callback, current, total, label) do
callback.(current / total, "Scanning #{current}/#{total} #{label}")
:ok
ProgressReporter.report_count_progress(callback, current, total, label,
verb: "Scanning",
start_progress: 0.0,
message_style: :prefix_count
)
end
end

View File

@@ -12,64 +12,54 @@ defmodule BDS.Generation.Progress do
def callback(opts), do: BDS.ProgressReporter.callback(opts)
@spec report_generation_started(callback(), non_neg_integer(), String.t()) :: :ok
def report_generation_started(nil, _total, _label), do: :ok
def report_generation_started(callback, 0, label) do
callback.(1.0, "No #{label} to process")
:ok
end
def report_generation_started(callback, total, label) do
callback.(0.0, "Processing 0/#{total} #{label}")
:ok
BDS.ProgressReporter.report_count_started(callback, total, label,
verb: "Processing",
start_progress: 0.0,
empty_suffix: "to process",
message_style: :prefix_count
)
end
@spec report_generation_progress(callback(), non_neg_integer(), non_neg_integer(), String.t()) ::
:ok
def report_generation_progress(nil, _current, _total, _label), do: :ok
def report_generation_progress(_callback, _current, 0, _label), do: :ok
def report_generation_progress(callback, current, total, label) do
callback.(current / total, "Processing #{current}/#{total} #{label}")
:ok
BDS.ProgressReporter.report_count_progress(callback, current, total, label,
verb: "Processing",
start_progress: 0.0,
message_style: :prefix_count
)
end
@spec report_validation_progress(callback(), float(), String.t()) :: :ok
def report_validation_progress(nil, _progress, _message), do: :ok
def report_validation_progress(callback, progress, message) do
callback.(progress, message)
:ok
end
def report_validation_progress(callback, progress, message),
do: BDS.ProgressReporter.report_phase(callback, progress, message)
@spec report_validation_snapshot_progress(callback(), atom(), non_neg_integer(), integer()) ::
:ok
def report_validation_snapshot_progress(nil, _stage, _current, _total), do: :ok
def report_validation_snapshot_progress(_callback, _stage, _current, total)
when total <= 0,
do: :ok
def report_validation_snapshot_progress(callback, :posts, current, total) do
progress = min(0.18, current / total * 0.18)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
BDS.ProgressReporter.report_count_progress(callback, current, total, "sitemap URLs...",
verb: "Collecting",
range: {0.0, 0.18},
message_style: :verb_label_count
)
end
def report_validation_snapshot_progress(callback, :translations, current, total) do
progress = 0.18 + min(0.12, current / total * 0.12)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
BDS.ProgressReporter.report_count_progress(callback, current, total, "sitemap URLs...",
verb: "Collecting",
range: {0.18, 0.30},
message_style: :verb_label_count
)
end
@spec report_validation_collection_progress(callback(), non_neg_integer(), integer()) :: :ok
def report_validation_collection_progress(nil, _current, _total), do: :ok
def report_validation_collection_progress(_callback, _current, total) when total <= 0, do: :ok
def report_validation_collection_progress(callback, current, total) do
progress = min(0.49, 0.30 + current / total * 0.19)
callback.(progress, "Collecting sitemap URLs... #{current}/#{total}")
:ok
BDS.ProgressReporter.report_count_progress(callback, current, total, "sitemap URLs...",
verb: "Collecting",
range: {0.30, 0.49},
message_style: :verb_label_count
)
end
@spec report_snapshot_stage_progress(stage_callback(), atom(), non_neg_integer(), integer()) ::
@@ -83,12 +73,15 @@ defmodule BDS.Generation.Progress do
end
@spec report_validation_compare_progress(callback(), non_neg_integer(), integer()) :: :ok
def report_validation_compare_progress(nil, _current, _total), do: :ok
def report_validation_compare_progress(_callback, _current, total) when total <= 0, do: :ok
def report_validation_compare_progress(callback, current, total) do
progress = min(0.99, 0.5 + current / total * 0.49)
callback.(progress, "Comparing sitemap to html pages... #{current}/#{total}")
:ok
BDS.ProgressReporter.report_count_progress(
callback,
current,
total,
"sitemap to html pages...",
verb: "Comparing",
range: {0.5, 0.99},
message_style: :verb_label_count
)
end
end

View File

@@ -1,45 +1,31 @@
defmodule BDS.Maintenance.Progress do
@moduledoc false
def progress_callback(opts) do
case Keyword.get(opts, :on_progress) do
callback when is_function(callback, 2) -> callback
_other -> nil
end
end
def report_metadata_diff_phase(nil, _current, _total, _label), do: :ok
def progress_callback(opts), do: BDS.ProgressReporter.callback(opts)
def report_metadata_diff_phase(callback, current, total, label) do
value = if total <= 1, do: 0.0, else: (current - 1) / total
callback.(value, "#{label} (#{current}/#{total})")
:ok
progress = if total <= 1, do: 0.0, else: (current - 1) / total
BDS.ProgressReporter.report_phase(callback, progress, "#{label} (#{current}/#{total})")
end
def report_metadata_diff_complete(nil), do: :ok
def report_metadata_diff_complete(callback) do
callback.(1.0, "Metadata diff complete")
:ok
end
def report_started(nil, _total, _label), do: :ok
def report_started(callback, 0, label) do
callback.(1.0, label)
:ok
BDS.ProgressReporter.report_phase(callback, 1.0, "Metadata diff complete")
end
def report_started(callback, total, label) do
callback.(0.05, "#{label} (0/#{total})")
:ok
BDS.ProgressReporter.report_count_started(callback, total, label,
verb: nil,
start_progress: 0.05,
empty_message: label,
message_style: :label
)
end
def report_progress(nil, _current, _total, _label), do: :ok
def report_progress(_callback, _current, 0, _label), do: :ok
def report_progress(callback, current, total, label) do
callback.(0.05 + 0.95 * (current / total), "#{label} (#{current}/#{total})")
:ok
BDS.ProgressReporter.report_count_progress(callback, current, total, label,
verb: nil,
start_progress: 0.05,
message_style: :label
)
end
end

View File

@@ -3,6 +3,15 @@ defmodule BDS.ProgressReporter do
@typedoc "A 2-arity progress callback `(progress :: float(), message :: String.t()) -> any()`."
@type callback :: (float(), String.t() -> any()) | nil
@type message_style :: :verb_label_parenthesized | :prefix_count | :verb_label_count | :label
@type count_opts :: [
{:verb, String.t() | nil},
{:start_progress, float()},
{:range, {float(), float()}},
{:empty_message, String.t()},
{:empty_suffix, String.t()},
{:message_style, message_style()}
]
@spec callback(keyword()) :: callback()
def callback(opts) do
@@ -22,27 +31,54 @@ defmodule BDS.ProgressReporter do
end
end
@spec report_rebuild_started(callback(), non_neg_integer(), String.t()) :: :ok
def report_rebuild_started(nil, _total, _label), do: :ok
@spec report_count_started(callback(), non_neg_integer(), String.t(), count_opts()) :: :ok
def report_count_started(callback, total, label, opts \\ [])
def report_count_started(nil, _total, _label, _opts), do: :ok
def report_rebuild_started(callback, 0, label) do
callback.(1.0, "No #{label} found")
def report_count_started(callback, 0, label, opts) do
callback.(1.0, empty_message(label, opts))
:ok
end
def report_rebuild_started(callback, total, label) do
callback.(0.05, "Rebuilding #{label} (0/#{total})")
def report_count_started(callback, total, label, opts) do
callback.(start_progress(opts), count_message(0, total, label, opts))
:ok
end
@spec report_count_progress(
callback(),
non_neg_integer(),
non_neg_integer(),
String.t(),
count_opts()
) :: :ok
def report_count_progress(callback, current, total, label, opts \\ [])
def report_count_progress(nil, _current, _total, _label, _opts), do: :ok
def report_count_progress(_callback, _current, 0, _label, _opts), do: :ok
def report_count_progress(callback, current, total, label, opts) do
callback.(count_progress(current, total, opts), count_message(current, total, label, opts))
:ok
end
@spec report_rebuild_started(callback(), non_neg_integer(), String.t()) :: :ok
def report_rebuild_started(callback, total, label) do
report_count_started(callback, total, label,
verb: "Rebuilding",
start_progress: 0.05,
empty_suffix: "found",
message_style: :verb_label_parenthesized
)
end
@spec report_rebuild_progress(callback(), non_neg_integer(), non_neg_integer(), String.t()) ::
:ok
def report_rebuild_progress(nil, _current, _total, _label), do: :ok
def report_rebuild_progress(_callback, _current, 0, _label), do: :ok
def report_rebuild_progress(callback, current, total, label) do
callback.(0.05 + 0.95 * (current / total), "Rebuilding #{label} (#{current}/#{total})")
:ok
report_count_progress(callback, current, total, label,
verb: "Rebuilding",
start_progress: 0.05,
message_style: :verb_label_parenthesized
)
end
@spec report_phase(callback(), float(), String.t()) :: :ok
@@ -52,4 +88,29 @@ defmodule BDS.ProgressReporter do
callback.(progress, message)
:ok
end
defp count_progress(current, total, opts) do
{start_value, end_value} = Keyword.get(opts, :range, {start_progress(opts), 1.0})
start_value + (end_value - start_value) * (current / total)
end
defp start_progress(opts), do: Keyword.get(opts, :start_progress, 0.0)
defp empty_message(label, opts) do
case Keyword.fetch(opts, :empty_message) do
{:ok, message} -> message
:error -> "No #{label} #{Keyword.get(opts, :empty_suffix, "found")}"
end
end
defp count_message(current, total, label, opts) do
case Keyword.get(opts, :message_style, :verb_label_parenthesized) do
:prefix_count -> "#{verb!(opts)} #{current}/#{total} #{label}"
:verb_label_count -> "#{verb!(opts)} #{label} #{current}/#{total}"
:label -> "#{label} (#{current}/#{total})"
:verb_label_parenthesized -> "#{verb!(opts)} #{label} (#{current}/#{total})"
end
end
defp verb!(opts), do: Keyword.get(opts, :verb, "Processing")
end

View File

@@ -248,24 +248,21 @@ defmodule BDS.Search do
defp progress_callback(opts), do: ProgressReporter.callback(opts)
defp report_reindex_started(nil, _total, _label), do: :ok
defp report_reindex_started(callback, 0, label) do
callback.(1.0, "No #{label} to reindex")
:ok
end
defp report_reindex_started(callback, total, label) do
callback.(0.0, "Reindexing 0/#{total} #{label}")
:ok
ProgressReporter.report_count_started(callback, total, label,
verb: "Reindexing",
start_progress: 0.0,
empty_suffix: "to reindex",
message_style: :prefix_count
)
end
defp report_reindex_progress(nil, _current, _total, _label), do: :ok
defp report_reindex_progress(_callback, _current, 0, _label), do: :ok
defp report_reindex_progress(callback, current, total, label) do
callback.(current / total, "Reindexing #{current}/#{total} #{label}")
:ok
ProgressReporter.report_count_progress(callback, current, total, label,
verb: "Reindexing",
start_progress: 0.0,
message_style: :prefix_count
)
end
defp insert_post_index(%Post{} = post) do