fix: more work on metadata diff

This commit is contained in:
2026-04-27 11:40:20 +02:00
parent 53dd9deeab
commit 59833dcabe
7 changed files with 255 additions and 44 deletions

View File

@@ -61,6 +61,25 @@ defmodule BDS.Embeddings do
end
end
def repair_posts(project_id, post_ids) when is_binary(project_id) and is_list(post_ids) do
if enabled_for_project?(project_id) do
post_ids = Enum.uniq(post_ids)
posts =
Repo.all(
from post in Post,
where: post.project_id == ^project_id and post.id in ^post_ids,
order_by: [asc: post.created_at, asc: post.slug]
)
Enum.each(posts, &sync_post_if_enabled(&1, refresh_index: false))
:ok = rebuild_snapshot(project_id)
{:ok, Enum.map(posts, & &1.id)}
else
{:ok, []}
end
end
def rebuild_project(project_id) when is_binary(project_id) do
if enabled_for_project?(project_id) do
posts =
@@ -83,12 +102,6 @@ defmodule BDS.Embeddings do
def diff_reports(project_id) when is_binary(project_id) do
if enabled_for_project?(project_id) do
snapshot_entries =
case Index.read(project_id) do
{:ok, snapshot} -> Map.get(snapshot, "entries", %{})
_other -> %{}
end
keys_by_post =
Repo.all(from key in Key, where: key.project_id == ^project_id)
|> Map.new(&{&1.post_id, &1})
@@ -97,15 +110,14 @@ defmodule BDS.Embeddings do
|> Enum.flat_map(fn post ->
expected_hash = post_content_hash(post)
key = Map.get(keys_by_post, post.id)
snapshot_entry = Map.get(snapshot_entries, post.id)
differences =
[
diff_field("content_hash", key && key.content_hash, expected_hash),
diff_field(
"snapshot_content_hash",
snapshot_entry && snapshot_entry["content_hash"],
key && key.content_hash
"embedding",
current_embedding_status(key, expected_hash),
expected_embedding_status(key, expected_hash)
)
]
|> Enum.reject(&is_nil/1)
@@ -136,6 +148,10 @@ defmodule BDS.Embeddings do
case Repo.get_by(Key, post_id: post.id, project_id: post.project_id) do
%Key{content_hash: ^content_hash} ->
if Keyword.get(opts, :refresh_index, true) and snapshot_content_hash(post.project_id, post.id) != content_hash do
:ok = rebuild_snapshot(post.project_id)
end
:ok
existing_key ->
@@ -485,6 +501,31 @@ defmodule BDS.Embeddings do
Index.rebuild(project_id, model_id: model_id(), dimensions: dimensions())
end
defp snapshot_content_hash(project_id, post_id) do
case Index.read(project_id) do
{:ok, snapshot} -> get_in(snapshot, ["entries", post_id, "content_hash"])
_other -> nil
end
end
defp current_embedding_status(nil, _expected_hash), do: "missing"
defp current_embedding_status(%Key{vector: vector}, _expected_hash) when vector in [nil, ""],
do: "missing"
defp current_embedding_status(%Key{content_hash: content_hash}, expected_hash)
when content_hash != expected_hash,
do: "stale"
defp current_embedding_status(%Key{}, _expected_hash), do: "ready"
defp expected_embedding_status(key, expected_hash) do
case current_embedding_status(key, expected_hash) do
"ready" -> "ready"
_other -> "re-embed required"
end
end
defp diff_field(name, db_value, file_value) do
db_value = if(is_binary(db_value), do: db_value, else: db_value || "")
file_value = if(is_binary(file_value), do: file_value, else: file_value || "")