fix: work on step 12

This commit is contained in:
2026-04-30 16:55:00 +02:00
parent a6033cb86a
commit 8358f9000e
15 changed files with 893 additions and 192 deletions

View File

@@ -243,11 +243,15 @@ defmodule BDS.Desktop.Automation do
{messages, buffer} = split_driver_buffer(state.driver_buffer)
case Enum.reduce_while(messages, {%{state | driver_buffer: buffer}, nil}, fn message, {acc, _} ->
decoded = Jason.decode!(message)
case decode_driver_message(message) do
:skip ->
{:cont, {acc, nil}}
case matcher.(decoded) do
{:ok, reply} -> {:halt, {acc, reply}}
:continue -> {:cont, {acc, nil}}
{:ok, decoded} ->
case matcher.(decoded) do
{:ok, reply} -> {:halt, {acc, reply}}
:continue -> {:cont, {acc, nil}}
end
end
end) do
{state, nil} ->
@@ -282,6 +286,24 @@ defmodule BDS.Desktop.Automation do
end
end
defp decode_driver_message(message) do
trimmed = String.trim(message)
cond do
trimmed == "" ->
:skip
not String.starts_with?(trimmed, "{") ->
:skip
true ->
case Jason.decode(trimmed) do
{:ok, decoded} -> {:ok, decoded}
{:error, _reason} -> :skip
end
end
end
defp wait_for_server(base_url) do
deadline = System.monotonic_time(:millisecond) + @ready_timeout
do_wait_for_server(base_url, deadline)

View File

@@ -106,7 +106,7 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
Task.Supervisor.async_nolink(BDS.Tasks.TaskSupervisor, fn ->
ImportAnalysis.analyze_wxr(project_id, wxr_file_path, definition.uploads_folder_path,
on_progress: fn step, detail ->
send(live_view_pid, {:import_analysis_progress, definition_id, step, detail})
send(live_view_pid, {:import_analysis_progress, definition_id, translate_phase(step), detail})
end
)
end)
@@ -165,6 +165,8 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
)
end)
progress_phase = translate_execution_phase("posts")
:ok = allow_repo_sandbox(task.pid)
socket
@@ -176,10 +178,11 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
error: nil,
count: counts.total,
result: nil,
phase: translated("importAnalysis.executionStarting"),
phase: progress_phase,
current: 0,
total: counts.total,
detail: nil,
eta: nil,
ref: task.ref
})
)
@@ -344,16 +347,20 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
end
def note_execution_progress(socket, definition_id, phase, current, total, detail, reload) do
{detail_text, eta} = decompose_progress_detail(detail)
translated_phase = translate_execution_phase(phase)
socket
|> assign(
:import_editor_execution_states,
Map.update(socket.assigns.import_editor_execution_states, definition_id, default_execution_state(), fn state ->
state
|> Map.put(:is_executing, true)
|> Map.put(:phase, phase)
|> Map.put(:phase, translated_phase)
|> Map.put(:current, current)
|> Map.put(:total, total)
|> Map.put(:detail, detail)
|> Map.put(:detail, detail_text)
|> Map.put(:eta, eta)
end)
)
|> reload.(socket.assigns.workbench)
@@ -595,6 +602,9 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
<div class="import-stat-cards">
<.stat_card label={translated("importAnalysis.posts")} stats={@report.post_stats} />
<%= if Map.get(@report, :other_stats) && Map.get(@report.other_stats, :total, 0) > 0 do %>
<.other_stat_card label={translated("importAnalysis.other")} stats={@report.other_stats} />
<% end %>
<.stat_card label={translated("importAnalysis.pages")} stats={@report.page_stats} />
<.media_stat_card label={translated("importAnalysis.media")} stats={@report.media_stats} />
<.taxonomy_stat_card label={translated("importAnalysis.categories")} stats={@report.category_stats} />
@@ -610,6 +620,7 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
<span class="distribution-year"><%= row.year %></span>
<div class="distribution-bar-container">
<div class="distribution-bar distribution-bar-posts" style={"width: #{distribution_width(row.post_count, @report.date_distribution, :post_count)}%;"}></div>
<div class="distribution-bar distribution-bar-media" style={"width: #{distribution_width(row.media_count, @report.date_distribution, :media_count)}%;"}></div>
</div>
<span class="distribution-count"><%= row.post_count %> / <%= row.media_count %></span>
</div>
@@ -632,6 +643,9 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
<span class="import-detail"><%= @execution_state.detail %></span>
<% end %>
<span class="import-counter"><%= @execution_state.current || 0 %> / <%= @execution_state.total || @counts.total %></span>
<%= if eta = format_eta(Map.get(@execution_state, :eta)) do %>
<span class="import-eta"><%= eta %></span>
<% end %>
</div>
</div>
<% end %>
@@ -741,22 +755,52 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
</section>
<% end %>
<%= if Enum.any?(Map.get(@report, :macros, [])) do %>
<% macros = Map.get(@report, :macros, %{}) %>
<%= if Enum.any?(Map.get(macros, :discovered, [])) do %>
<section class="import-detail-section">
<button class="import-section-toggle" type="button" phx-click="toggle_import_section" phx-value-section="macros">
<span><%= translated("importAnalysis.macrosWithCount", %{count: length(@report.macros)}) %></span>
<span><%= translated("importAnalysis.macrosWithCount", %{count: macros.total || length(macros.discovered)}) %></span>
<span class="toggle-icon"><%= if @sections.macros, do: "▾", else: "▸" %></span>
</button>
<%= if @sections.macros do %>
<div class="macros-summary">
<span class="macros-mapped"><%= translated("importAnalysis.mappedCount", %{count: macros.mapped_count || 0}) %></span>
<span class="macros-unmapped"><%= translated("importAnalysis.unmappedCount", %{count: macros.unmapped_count || 0}) %></span>
</div>
<div class="macros-list">
<%= for macro <- @report.macros do %>
<div class="macro-item unmapped">
<%= for macro <- macros.discovered do %>
<div class={"macro-item #{if macro.mapped, do: "mapped", else: "unmapped"}"}>
<div class="macro-header">
<span class="macro-name"><%= macro.name %></span>
<span class="macro-status-badge unmapped"><%= translated("importAnalysis.macroStatusUnknown") %></span>
<span class="macro-count"><%= translated("importAnalysis.macroUses", %{count: macro.usage_count}) %></span>
<span class={"macro-status-badge #{if macro.mapped, do: "mapped", else: "unmapped"}"}>
<%= if macro.mapped, do: translated("importAnalysis.macroStatusMapped"), else: translated("importAnalysis.macroStatusUnknown") %>
</span>
<span class="macro-count"><%= translated("importAnalysis.macroUses", %{count: macro.total_count}) %></span>
</div>
<%= if Enum.any?(Map.get(macro, :usages, [])) do %>
<div class="macro-usages">
<%= for usage <- macro.usages do %>
<div class="macro-usage">
<span class="macro-usage-params">
<%= if Enum.any?(Map.get(usage, :params, %{})) do %>
<%= for {k, v} <- usage.params do %>
<span class="macro-usage-param"><%= k %>=<%= v %></span>
<% end %>
<% else %>
<%= translated("importAnalysis.noParameters") %>
<% end %>
</span>
<span class="macro-usage-count"><%= translated("importAnalysis.macroUses", %{count: usage.count}) %></span>
</div>
<% end %>
</div>
<% end %>
<%= if Enum.any?(Map.get(macro, :post_slugs, [])) do %>
<div class="macro-post-slugs">
<%= translated("importAnalysis.usedIn", %{items: Enum.join(Enum.take(macro.post_slugs, 5), ", "), more: if(length(macro.post_slugs) > 5, do: translated("importAnalysis.moreSuffix", %{count: length(macro.post_slugs) - 5}), else: "")}) %>
</div>
<% end %>
</div>
<% end %>
</div>
@@ -939,6 +983,23 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
attr :label, :string, required: true
attr :stats, :map, required: true
def other_stat_card(assigns) do
~H"""
<div class="import-stat-card import-stat-card-other">
<h3><%= @label %></h3>
<div class="import-stat-number"><%= Map.get(@stats, :total, 0) %></div>
<div class="import-stat-breakdown">
<%= for type <- Map.get(@stats, :types, []) do %>
<span class="import-stat-tag stat-other"><%= type %></span>
<% end %>
</div>
</div>
"""
end
attr :label, :string, required: true
attr :stats, :map, required: true
def media_stat_card(assigns) do
~H"""
<div class="import-stat-card">
@@ -1123,7 +1184,7 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
defp importable_entity_count(items) do
Enum.count(items || [], fn item ->
item.status == "new" or (item.status == "conflict" and Map.get(item, :resolution, "skip") != "skip")
item.status == "new" or (item.status == "conflict" and Map.get(item, :resolution, "ignore") not in ["ignore", "skip"])
end)
end
@@ -1177,6 +1238,58 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
end
defp translated(text, bindings \\ %{}), do: ShellData.translate(text, bindings, Process.get(:bds_ui_locale))
defp translate_phase(step) when is_binary(step) do
case step do
"parsing" -> translated("importAnalysis.analysisPhase.parsing")
"scanning" -> translated("importAnalysis.analysisPhase.scanning")
"taxonomies" -> translated("importAnalysis.analysisPhase.taxonomies")
"posts" -> translated("importAnalysis.analysisPhase.posts")
"media" -> translated("importAnalysis.analysisPhase.media")
"complete" -> translated("importAnalysis.analysisPhase.complete")
other -> other
end
end
defp translate_phase(other), do: other
defp translate_execution_phase(phase) when is_binary(phase) do
case phase do
"tags" -> translated("importAnalysis.phase.tags")
"posts" -> translated("importAnalysis.phase.posts")
"media" -> translated("importAnalysis.phase.media")
"pages" -> translated("importAnalysis.phase.pages")
"complete" -> translated("importAnalysis.phase.complete")
other -> other
end
end
defp translate_execution_phase(other), do: other
defp decompose_progress_detail(%{detail: detail, eta: eta}), do: {to_string_or_nil(detail), eta}
defp decompose_progress_detail(detail) when is_binary(detail) or is_nil(detail), do: {detail, nil}
defp decompose_progress_detail(detail), do: {to_string_or_nil(detail), nil}
defp to_string_or_nil(nil), do: nil
defp to_string_or_nil(value) when is_binary(value), do: value
defp to_string_or_nil(value), do: inspect(value)
def format_eta(nil), do: nil
def format_eta(ms) when is_integer(ms) and ms >= 0 do
seconds = div(ms, 1000)
if seconds < 60 do
translated("importAnalysis.eta", %{value: translated("importAnalysis.etaSeconds", %{count: seconds})})
else
m = div(seconds, 60)
s = rem(seconds, 60)
translated("importAnalysis.eta", %{value: translated("importAnalysis.etaMinutes", %{minutes: m, seconds: s})})
end
end
def format_eta(_other), do: nil
defp present?(value), do: value not in [nil, ""]
defp blank?(value), do: value in [nil, ""]
defp blank_to_nil(""), do: nil
@@ -1210,6 +1323,7 @@ defmodule BDS.Desktop.ShellLive.ImportEditor do
current: 0,
total: 0,
detail: nil,
eta: nil,
ref: nil
}
end

View File

@@ -68,6 +68,10 @@ defmodule BDS.ImportAnalysis do
tag_items = Enum.map(wxr_data.tags, &analyze_taxonomy_item(&1, existing_tag_set))
notify_progress(on_progress, "Discovering macros...")
macro_summary = analyze_macros(wxr_data.posts ++ wxr_data.pages)
posts_only = Enum.filter(analyzed_posts, &(&1.post_type == "post"))
other_posts = Enum.reject(analyzed_posts, &(&1.post_type == "post"))
%{
source_file: wxr_file_path,
@@ -77,14 +81,15 @@ defmodule BDS.ImportAnalysis do
language: wxr_data.site.language,
source_file: wxr_file_path
},
post_stats: summarize_post_items(analyzed_posts),
post_stats: summarize_post_items(posts_only),
other_stats: summarize_other_items(other_posts),
page_stats: summarize_post_items(analyzed_pages),
media_stats: summarize_media_items(analyzed_media),
category_stats: summarize_taxonomy_items(category_items),
tag_stats: summarize_taxonomy_items(tag_items),
date_distribution: date_distribution(analyzed_posts, analyzed_pages, analyzed_media),
conflicts: conflicts(analyzed_posts, analyzed_pages, analyzed_media),
macros: macros(wxr_data.posts ++ wxr_data.pages),
macros: macro_summary,
items: %{
posts: Enum.map(analyzed_posts, &summary_item/1),
pages: Enum.map(analyzed_pages, &summary_item/1),
@@ -110,17 +115,18 @@ defmodule BDS.ImportAnalysis do
cond do
existing_by_slug && existing_by_slug.checksum == content_checksum && not is_nil(existing_by_slug.checksum) -> {"update", existing_by_slug}
existing_by_slug -> {"conflict", existing_by_slug}
existing_by_checksum -> {"duplicate", existing_by_checksum}
existing_by_checksum -> {"content-duplicate", existing_by_checksum}
true -> {"new", nil}
end
%{
item_type: item_type,
post_type: wxr_post.post_type || item_type,
wp_id: wxr_post.wp_id,
title: wxr_post.title,
slug: wxr_post.slug,
status: status,
resolution: if(status == "conflict", do: "skip", else: nil),
resolution: if(status == "conflict", do: "ignore", else: nil),
existing_id: existing && existing.id,
existing_title: existing && existing.title,
author: blank_to_nil(wxr_post.creator),
@@ -159,7 +165,7 @@ defmodule BDS.ImportAnalysis do
cond do
existing_by_name && existing_by_name.checksum == file_checksum && not is_nil(existing_by_name.checksum) -> {"update", file_checksum, existing_by_name}
existing_by_name -> {"conflict", file_checksum, existing_by_name}
existing_by_checksum -> {"duplicate", file_checksum, existing_by_checksum}
existing_by_checksum -> {"content-duplicate", file_checksum, existing_by_checksum}
true -> {"new", file_checksum, nil}
end
end
@@ -170,8 +176,9 @@ defmodule BDS.ImportAnalysis do
title: wxr_media.title,
filename: wxr_media.filename,
relative_path: wxr_media.relative_path,
url: wxr_media.url,
status: status,
resolution: if(status == "conflict", do: "skip", else: nil),
resolution: if(status == "conflict", do: "ignore", else: nil),
existing_id: existing && existing.id,
existing_title: existing && existing.title,
mime_type: wxr_media.mime_type,
@@ -209,6 +216,7 @@ defmodule BDS.ImportAnalysis do
defp summary_item(item) do
base = %{
item_type: item.item_type,
post_type: Map.get(item, :post_type, item.item_type),
title: item.title,
slug: item.slug,
status: item.status
@@ -222,7 +230,17 @@ defmodule BDS.ImportAnalysis do
new_count: count_status(items, "new"),
update_count: count_status(items, "update"),
conflict_count: count_status(items, "conflict"),
duplicate_count: count_status(items, "duplicate")
duplicate_count: count_status(items, "content-duplicate")
}
end
defp summarize_other_items(items) do
%{
new_count: count_status(items, "new"),
update_count: count_status(items, "update"),
conflict_count: count_status(items, "conflict"),
duplicate_count: count_status(items, "content-duplicate"),
types: items |> Enum.map(&Map.get(&1, :post_type)) |> Enum.reject(&is_nil/1) |> Enum.uniq()
}
end
@@ -231,7 +249,7 @@ defmodule BDS.ImportAnalysis do
new_count: count_status(items, "new"),
update_count: count_status(items, "update"),
conflict_count: count_status(items, "conflict"),
duplicate_count: count_status(items, "duplicate"),
duplicate_count: count_status(items, "content-duplicate"),
missing_count: count_status(items, "missing")
}
end
@@ -271,43 +289,97 @@ defmodule BDS.ImportAnalysis do
%{
item_type: item.item_type,
item_name: Map.get(item, :slug) || Map.get(item, :filename),
resolution: item.resolution || "skip",
resolution: item.resolution || "ignore",
source_title: item.title,
existing_title: item.existing_title
}
end)
end
defp macros(items) do
items
|> Enum.flat_map(&discover_item_macros/1)
|> Enum.group_by(& &1.name)
|> Enum.map(fn {name, usages} ->
%{
name: name,
usage_count: length(usages),
parameters: usages |> Enum.flat_map(& &1.parameters) |> Enum.uniq() |> Enum.sort(),
validation_status: "unknown"
}
end)
|> Enum.sort_by(& &1.name)
defp analyze_macros(items) do
macro_map =
Enum.reduce(items, %{}, fn item, acc ->
slug = Map.get(item, :slug)
Regex.scan(@shortcode_regex, item.content || "")
|> Enum.reduce(acc, fn [_match, name, raw_params], inner_acc ->
name = String.downcase(name)
params = parse_macro_params(raw_params)
params_key = serialize_params(params)
existing =
Map.get(inner_acc, name, %{
name: name,
total_count: 0,
usages: %{},
post_slugs: MapSet.new()
})
usage =
existing.usages
|> Map.get(params_key, %{params: params, count: 0})
|> Map.update(:count, 1, &(&1 + 1))
updated = %{
existing
| total_count: existing.total_count + 1,
usages: Map.put(existing.usages, params_key, usage),
post_slugs:
if(is_binary(slug), do: MapSet.put(existing.post_slugs, slug), else: existing.post_slugs)
}
Map.put(inner_acc, name, updated)
end)
end)
discovered =
macro_map
|> Map.values()
|> Enum.map(fn macro ->
%{
name: macro.name,
mapped: false,
total_count: macro.total_count,
usages:
macro.usages
|> Map.values()
|> Enum.map(fn usage ->
%{
params: usage.params,
count: usage.count,
validation_status: "unknown"
}
end),
post_slugs: MapSet.to_list(macro.post_slugs) |> Enum.sort()
}
end)
|> Enum.sort_by(& &1.name)
%{
total: length(discovered),
mapped_count: Enum.count(discovered, & &1.mapped),
unmapped_count: Enum.count(discovered, &(not &1.mapped)),
discovered: discovered
}
end
defp discover_item_macros(item) do
Regex.scan(@shortcode_regex, item.content || "")
|> Enum.map(fn [_match, name, raw_params] ->
%{
name: String.downcase(name),
parameters: macro_parameters(raw_params)
}
end)
end
defp macro_parameters(raw_params) do
defp parse_macro_params(raw_params) do
Regex.scan(@param_regex, raw_params)
|> Enum.map(fn [_, key | _rest] -> key end)
|> Enum.uniq()
|> Enum.sort()
|> Enum.map(fn captures ->
key = Enum.at(captures, 1)
value = Enum.at(captures, 2) || Enum.at(captures, 3) || Enum.at(captures, 4) || ""
{key, value}
end)
|> Map.new()
end
defp serialize_params(params) when params == %{}, do: ""
defp serialize_params(params) do
params
|> Enum.sort_by(fn {k, _v} -> k end)
|> Enum.map(fn {k, v} -> "#{k}=#{v}" end)
|> Enum.join("|")
end
defp increment_year(nil, acc), do: acc
@@ -319,12 +391,30 @@ defmodule BDS.ImportAnalysis do
end
end
defp year_from(value) when is_integer(value), do: value
defp year_from(value) when is_integer(value) do
cond do
value > 100_000_000_000 -> value |> DateTime.from_unix!(:millisecond) |> DateTime.shift_zone!("Etc/UTC") |> Map.get(:year)
value > 1_000_000_000 -> value |> DateTime.from_unix!(:second) |> Map.get(:year)
true -> value
end
rescue
_error -> nil
end
defp year_from(value) when is_binary(value) do
case Regex.run(~r/(\d{4})/, value) do
[_, year] -> String.to_integer(year)
_other -> nil
normalized = String.replace(value, " ", "T")
case NaiveDateTime.from_iso8601(normalized) do
{:ok, naive} -> naive.year
_other ->
case DateTime.from_iso8601(value) do
{:ok, datetime, _offset} -> datetime.year
_ ->
case Regex.run(~r/(\d{4})/, value) do
[_, year] -> String.to_integer(year)
_other -> nil
end
end
end
end

View File

@@ -13,10 +13,21 @@ defmodule BDS.ImportExecution do
default_author = Keyword.get(opts, :default_author) || project_default_author(project_id)
uploads_folder_path = Keyword.get(opts, :uploads_folder_path)
on_progress = Keyword.get(opts, :on_progress, fn _phase, _current, _total, _detail -> :ok end)
taxonomies = taxonomy_items(normalized_report)
post_items = import_items(normalized_report, :posts)
category_items = List.wrap(get_in(normalized_report, [:items, :categories]))
tag_items = List.wrap(get_in(normalized_report, [:items, :tags]))
category_mapping = build_taxonomy_mapping(category_items)
tag_mapping = build_taxonomy_mapping(tag_items)
post_items =
normalized_report
|> import_items(:posts)
|> Enum.filter(&(Map.get(&1, :post_type, "post") == "post"))
page_items = import_items(normalized_report, :pages)
media_items = import_items(normalized_report, :media)
taxonomy_total = length(category_items) + length(tag_items)
result = %{
success: true,
@@ -24,85 +35,87 @@ defmodule BDS.ImportExecution do
posts: %{imported: 0, skipped: 0, errors: 0},
media: %{imported: 0, skipped: 0, errors: 0},
pages: %{imported: 0, skipped: 0, errors: 0},
wp_id_to_post_id: %{},
errors: []
}
notify_progress(on_progress, "tags", 0, length(taxonomies), "Creating tags...")
result = execute_taxonomies(taxonomies, project_id, result, on_progress)
started_at = System.monotonic_time(:millisecond)
notify_progress(on_progress, "posts", 0, length(post_items), "Importing posts...")
result = execute_posts(post_items, project_id, default_author, result, on_progress)
notify_progress(on_progress, "tags", 0, taxonomy_total, "creating_tags", started_at)
result = execute_taxonomies(category_items, tag_items, project_id, result, on_progress, started_at)
notify_progress(on_progress, "pages", 0, length(page_items), "Importing pages...")
result = execute_pages(page_items, project_id, default_author, result, on_progress)
notify_progress(on_progress, "posts", 0, length(post_items), "importing_posts", started_at)
result = execute_posts(post_items, project_id, default_author, tag_mapping, category_mapping, result, on_progress, :posts, started_at)
notify_progress(on_progress, "media", 0, length(media_items), "Importing media...")
result = execute_media(media_items, project_id, default_author, result, on_progress, uploads_folder_path)
notify_progress(on_progress, "media", 0, length(media_items), "importing_media", started_at)
result = execute_media(media_items, project_id, default_author, result, on_progress, uploads_folder_path, started_at)
notify_progress(on_progress, "complete", 1, 1, "Import complete")
notify_progress(on_progress, "pages", 0, length(page_items), "importing_pages", started_at)
result = execute_posts(page_items, project_id, default_author, tag_mapping, category_mapping, result, on_progress, :pages, started_at)
notify_progress(on_progress, "complete", 1, 1, "import_complete", started_at)
{:ok, result}
rescue
error -> {:error, %{message: Exception.message(error)}}
end
defp execute_taxonomies(taxonomies, project_id, result, on_progress) do
Enum.reduce(taxonomies, result, fn item, acc ->
current = acc.tags.created + acc.tags.skipped + 1
if item.exists_in_project || item.mapped_to do
notify_progress(on_progress, "tags", current, length(taxonomies), "Skipping tag: #{item.name}")
put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
else
case Tags.create_tag(%{project_id: project_id, name: item.name}) do
{:ok, _tag} ->
notify_progress(on_progress, "tags", current, length(taxonomies), "Created tag: #{item.name}")
put_in(acc, [:tags, :created], acc.tags.created + 1)
{:error, _reason} ->
notify_progress(on_progress, "tags", current, length(taxonomies), "Skipping tag: #{item.name}")
put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
end
end
end)
end
defp execute_posts(items, project_id, default_author, result, on_progress) do
total = length(items)
Enum.with_index(items, 1)
|> Enum.reduce(result, fn {item, index}, acc ->
notify_progress(on_progress, "posts", index, total, "Processing: #{item.title}")
execute_post_item(project_id, item, acc, :posts, default_author)
end)
end
defp execute_pages(items, project_id, default_author, result, on_progress) do
total = length(items)
Enum.with_index(items, 1)
|> Enum.reduce(result, fn {item, index}, acc ->
notify_progress(on_progress, "pages", index, total, "Processing: #{item.title}")
execute_post_item(project_id, ensure_page_category(item), acc, :pages, default_author)
end)
end
defp execute_media(items, project_id, default_author, result, on_progress, uploads_folder_path) do
defp execute_taxonomies(category_items, tag_items, project_id, result, on_progress, started_at) do
items = category_items ++ tag_items
total = length(items)
items
|> Enum.with_index(1)
|> Enum.reduce(result, fn {item, index}, acc ->
notify_progress(on_progress, "media", index, total, "Processing: #{item.filename}")
cond do
Map.get(item, :exists_in_project) || not is_nil(Map.get(item, :mapped_to)) ->
notify_progress(on_progress, "tags", index, total, "skipped_tag:#{item.name}", started_at)
put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
true ->
case Tags.create_tag(%{project_id: project_id, name: item.name}) do
{:ok, _tag} ->
notify_progress(on_progress, "tags", index, total, "created_tag:#{item.name}", started_at)
put_in(acc, [:tags, :created], acc.tags.created + 1)
{:error, _reason} ->
notify_progress(on_progress, "tags", index, total, "skipped_tag:#{item.name}", started_at)
put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
end
end
end)
end
defp execute_posts(items, project_id, default_author, tag_mapping, category_mapping, result, on_progress, bucket, started_at) do
total = length(items)
phase = Atom.to_string(bucket)
Enum.with_index(items, 1)
|> Enum.reduce(result, fn {item, index}, acc ->
notify_progress(on_progress, phase, index, total, "processing:#{item.title}", started_at)
execute_post_item(project_id, maybe_apply_page_category(item, bucket), acc, bucket, default_author, tag_mapping, category_mapping)
end)
end
defp execute_media(items, project_id, default_author, result, on_progress, uploads_folder_path, started_at) do
total = length(items)
items
|> Enum.with_index(1)
|> Enum.reduce(result, fn {item, index}, acc ->
notify_progress(on_progress, "media", index, total, "processing:#{item.filename}", started_at)
cond do
item.status in ["update", "duplicate", "missing"] ->
item.status == "missing" ->
put_in(acc, [:media, :skipped], acc.media.skipped + 1)
item.status == "conflict" and item.resolution != "import" and item.resolution != "merge" ->
item.status in ["update", "content-duplicate", "duplicate"] ->
put_in(acc, [:media, :skipped], acc.media.skipped + 1)
item.status == "conflict" and resolve_conflict(item) == "ignore" ->
put_in(acc, [:media, :skipped], acc.media.skipped + 1)
true ->
case import_media_item(project_id, item, default_author, uploads_folder_path) do
case import_media_item(project_id, item, default_author, uploads_folder_path, acc) do
{:ok, _media} -> put_in(acc, [:media, :imported], acc.media.imported + 1)
{:error, reason} ->
acc
@@ -114,17 +127,21 @@ defmodule BDS.ImportExecution do
end)
end
defp execute_post_item(project_id, item, result, bucket, default_author) do
defp execute_post_item(project_id, item, result, bucket, default_author, tag_mapping, category_mapping) do
cond do
item.status in ["update", "duplicate"] ->
item.status in ["update", "content-duplicate", "duplicate"] ->
put_in(result, [bucket, :skipped], get_in(result, [bucket, :skipped]) + 1)
item.status == "conflict" and item.resolution not in ["import", "merge"] ->
item.status == "conflict" and resolve_conflict(item) == "ignore" ->
put_in(result, [bucket, :skipped], get_in(result, [bucket, :skipped]) + 1)
item.status == "conflict" and item.resolution == "merge" ->
case merge_post_item(item, default_author) do
{:ok, _post} -> put_in(result, [bucket, :imported], get_in(result, [bucket, :imported]) + 1)
item.status == "conflict" and resolve_conflict(item) == "overwrite" ->
case overwrite_post_item(item, default_author, tag_mapping, category_mapping) do
{:ok, post} ->
result
|> put_in([bucket, :imported], get_in(result, [bucket, :imported]) + 1)
|> track_wp_id(item, post)
{:error, reason} ->
result
|> put_in([bucket, :errors], get_in(result, [bucket, :errors]) + 1)
@@ -133,8 +150,12 @@ defmodule BDS.ImportExecution do
end
true ->
case create_post_item(project_id, item, default_author) do
{:ok, _post} -> put_in(result, [bucket, :imported], get_in(result, [bucket, :imported]) + 1)
case create_post_item(project_id, item, default_author, tag_mapping, category_mapping) do
{:ok, post} ->
result
|> put_in([bucket, :imported], get_in(result, [bucket, :imported]) + 1)
|> track_wp_id(item, post)
{:error, reason} ->
result
|> put_in([bucket, :errors], get_in(result, [bucket, :errors]) + 1)
@@ -144,17 +165,17 @@ defmodule BDS.ImportExecution do
end
end
defp create_post_item(project_id, item, default_author) do
attrs = post_create_attrs(project_id, item, default_author)
defp create_post_item(project_id, item, default_author, tag_mapping, category_mapping) do
attrs = post_create_attrs(project_id, item, default_author, tag_mapping, category_mapping)
with {:ok, post} <- Posts.create_post(attrs),
:ok <- prepare_created_post(post.id, item),
:ok <- prepare_created_post(post.id, item, tag_mapping, category_mapping),
{:ok, published_post} <- maybe_publish(post.id, item) do
{:ok, published_post}
end
end
defp merge_post_item(item, default_author) do
defp overwrite_post_item(item, default_author, tag_mapping, category_mapping) do
case Repo.get(Post, item.existing_id) do
nil -> {:error, :not_found}
@@ -164,39 +185,92 @@ defmodule BDS.ImportExecution do
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author || default_author,
tags: item.tags,
categories: item.categories,
tags: resolve_taxonomy(item.tags, tag_mapping),
categories: resolve_taxonomy(item.categories, category_mapping),
checksum: item.content_checksum
})
end
end
defp import_media_item(project_id, item, default_author, uploads_folder_path) do
defp import_media_item(project_id, item, default_author, uploads_folder_path, result) do
source_path = item.source_file || uploads_source_path(item.relative_path, uploads_folder_path)
checksum = if(source_path != nil and File.exists?(source_path), do: md5(File.read!(source_path)), else: nil)
linked_post_ids = parent_post_ids(item, result)
if source_path && File.exists?(source_path) do
case item.status do
"conflict" when item.resolution == "merge" and item.existing_id ->
with {:ok, _updated_media} <- Media.update_media(item.existing_id, %{title: item.title, alt: item.description, author: default_author}) do
case {item.status, resolve_conflict(item)} do
{"conflict", "overwrite"} when item.existing_id != nil ->
with {:ok, _replaced} <- Media.replace_media_file(item.existing_id, source_path),
{:ok, _updated_media} <-
Media.update_media(item.existing_id, %{
title: item.title,
alt: item.description,
author: default_author
}) do
link_media(linked_post_ids, item.existing_id)
{:ok, Repo.get!(Media.Media, item.existing_id)}
end
_other ->
Media.import_media(%{
_ ->
attrs = %{
project_id: project_id,
source_path: source_path,
title: item.title,
alt: item.description,
author: default_author,
checksum: checksum
})
}
attrs = if linked_post_ids == [], do: attrs, else: Map.put(attrs, :linked_post_ids, linked_post_ids)
case Media.import_media(attrs) do
{:ok, %{id: media_id} = media} ->
link_media(linked_post_ids, media_id)
{:ok, media}
other ->
other
end
end
else
{:error, :missing_source_file}
end
end
defp link_media([], _media_id), do: :ok
defp link_media(post_ids, media_id) when is_list(post_ids) do
Enum.each(post_ids, fn post_id ->
try do
Media.link_media_to_post(media_id, post_id)
rescue
_ -> :ok
catch
_, _ -> :ok
end
end)
:ok
end
defp parent_post_ids(item, result) do
case Map.get(item, :parent_wp_id) do
nil -> []
0 -> []
wp_id ->
case Map.get(result.wp_id_to_post_id, wp_id) do
nil -> []
post_id -> [post_id]
end
end
end
defp track_wp_id(result, %{wp_id: wp_id}, %{id: post_id}) when is_integer(wp_id) and not is_nil(post_id) do
update_in(result, [:wp_id_to_post_id], &Map.put(&1, wp_id, post_id))
end
defp track_wp_id(result, _item, _post), do: result
defp maybe_publish(post_id, item) do
case item.wp_status do
"publish" -> Posts.publish_post(post_id)
@@ -204,7 +278,7 @@ defmodule BDS.ImportExecution do
end
end
defp prepare_created_post(post_id, item) do
defp prepare_created_post(post_id, item, tag_mapping, category_mapping) do
case Repo.get(Post, post_id) do
nil ->
{:error, :not_found}
@@ -222,8 +296,8 @@ defmodule BDS.ImportExecution do
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author,
tags: item.tags,
categories: item.categories,
tags: resolve_taxonomy(item.tags, tag_mapping),
categories: resolve_taxonomy(item.categories, category_mapping),
checksum: item.content_checksum,
created_at: created_at,
updated_at: updated_at,
@@ -238,31 +312,74 @@ defmodule BDS.ImportExecution do
end
defp desired_slug(post, item) do
if item.status == "conflict" and item.resolution == "import" do
if item.status == "conflict" and resolve_conflict(item) == "import" do
post.slug
else
item.slug || post.slug
end
end
defp post_create_attrs(project_id, item, default_author) do
defp post_create_attrs(project_id, item, default_author, tag_mapping, category_mapping) do
%{
project_id: project_id,
title: item.title,
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author || default_author,
tags: item.tags,
categories: item.categories,
tags: resolve_taxonomy(item.tags, tag_mapping),
categories: resolve_taxonomy(item.categories, category_mapping),
checksum: item.content_checksum
}
end
defp ensure_page_category(item) do
categories = (item.categories || []) |> Enum.uniq() |> Enum.concat(["page"]) |> Enum.uniq()
defp maybe_apply_page_category(item, :pages) do
categories = (Map.get(item, :categories) || []) |> Enum.uniq() |> Enum.concat(["page"]) |> Enum.uniq()
%{item | categories: categories}
end
defp maybe_apply_page_category(item, _bucket), do: item
defp build_taxonomy_mapping(items) do
Enum.reduce(items, %{}, fn item, acc ->
key = item.name |> to_string() |> String.downcase()
resolved =
cond do
present_string?(Map.get(item, :mapped_to)) -> String.downcase(item.mapped_to)
true -> key
end
Map.put(acc, key, %{resolved: resolved, needs_creation: not item.exists_in_project and not present_string?(Map.get(item, :mapped_to))})
end)
end
defp resolve_taxonomy(items, mapping) when is_list(items) do
items
|> Enum.map(fn item ->
key = item |> to_string() |> String.downcase()
case Map.get(mapping, key) do
%{resolved: resolved} -> resolved
_ -> key
end
end)
|> Enum.uniq()
end
defp resolve_taxonomy(_items, _mapping), do: []
defp resolve_conflict(item) do
raw = Map.get(item, :resolution)
normalize_resolution(raw)
end
defp normalize_resolution("ignore"), do: "ignore"
defp normalize_resolution("skip"), do: "ignore"
defp normalize_resolution("overwrite"), do: "overwrite"
defp normalize_resolution("merge"), do: "overwrite"
defp normalize_resolution("import"), do: "import"
defp normalize_resolution(_other), do: "ignore"
defp import_items(report, bucket) do
items = get_in(report, [:items, bucket]) || []
details = get_in(report, [:details, bucket]) || []
@@ -323,10 +440,6 @@ defmodule BDS.ImportExecution do
defp parse_timestamp(_value), do: nil
defp taxonomy_items(report) do
List.wrap(get_in(report, [:items, :categories])) ++ List.wrap(get_in(report, [:items, :tags]))
end
defp uploads_source_path(relative_path, uploads_folder_path)
defp uploads_source_path(relative_path, uploads_folder_path)
@@ -336,22 +449,39 @@ defmodule BDS.ImportExecution do
defp uploads_source_path(_relative_path, _uploads_folder_path), do: nil
defp notify_progress(callback, phase, current, total, detail) when is_function(callback, 4) do
defp notify_progress(callback, phase, current, total, detail, started_at) when is_function(callback, 4) do
eta = compute_eta(current, total, started_at)
try do
callback.(phase, current, total, detail)
callback.(phase, current, total, %{detail: detail, eta: eta})
rescue
_error -> :ok
_error ->
try do
callback.(phase, current, total, detail)
rescue
_error -> :ok
end
end
:ok
end
defp compute_eta(current, total, started_at) when is_integer(current) and is_integer(total) and current > 0 and total > 0 and current <= total do
elapsed = System.monotonic_time(:millisecond) - started_at
if current >= total, do: 0, else: trunc(elapsed / current * (total - current))
end
defp compute_eta(_current, _total, _started_at), do: nil
defp md5(binary) do
:md5
|> :crypto.hash(binary)
|> Base.encode16(case: :lower)
end
defp present_string?(value) when is_binary(value) and value != "", do: true
defp present_string?(_value), do: false
defp project_default_author(project_id) do
{:ok, metadata} = Metadata.get_project_metadata(project_id)
Map.get(metadata, :default_author)

View File

@@ -20,7 +20,7 @@ defmodule BDS.WxrParser do
[channel] ->
%{
site: parse_site(channel),
posts: parse_items(channel, "post"),
posts: parse_post_like_items(channel),
pages: parse_items(channel, "page"),
media: parse_media(channel),
categories: parse_categories(channel),
@@ -73,6 +73,16 @@ defmodule BDS.WxrParser do
|> Enum.map(&parse_post_item/1)
end
defp parse_post_like_items(channel) do
channel
|> direct_children_named("item")
|> Enum.filter(fn item ->
type = child_text(item, "post_type")
type not in ["", "attachment", "page"]
end)
|> Enum.map(&parse_post_item/1)
end
defp parse_media(channel) do
channel
|> direct_children_named("item")