feat: step 12 done

This commit is contained in:
2026-04-29 20:07:01 +02:00
parent 155fda8b81
commit f178b5b207
18 changed files with 3494 additions and 2 deletions

View File

@@ -8,7 +8,7 @@ defmodule BDS.Desktop.ShellLive do
alias BDS.AI
alias BDS.CliSync.Watcher
alias BDS.Desktop.{FilePicker, FolderPicker, Overlay, ShellCommands, ShellData}
alias BDS.Desktop.ShellLive.{ChatEditor, CodeEntityEditor, MediaEditor, MenuEditor, MiscEditor, SettingsEditor, TagsEditor}
alias BDS.Desktop.ShellLive.{ChatEditor, CodeEntityEditor, ImportEditor, MediaEditor, MenuEditor, MiscEditor, SettingsEditor, TagsEditor}
alias BDS.Desktop.ShellLive.OverlayComponents, as: ShellOverlayComponents
alias BDS.Desktop.ShellLive.PostEditor
alias BDS.Desktop.ShellLive.SidebarComponents, as: ShellSidebarComponents
@@ -105,6 +105,10 @@ defmodule BDS.Desktop.ShellLive do
|> assign(:chat_editor_surface_data, %{})
|> assign(:chat_editor_surface_tabs, %{})
|> assign(:chat_editor_action_errors, %{})
|> assign(:import_editor_execution_states, %{})
|> assign(:import_editor_sections, %{})
|> assign(:import_editor_model_selectors_open, %{})
|> assign(:import_editor_selected_models, %{})
|> assign(:misc_editor_selected_pairs, %{})
|> assign(:misc_editor_git_selected_files, %{})
|> assign(:metadata_diff_active_tabs, %{})
@@ -767,6 +771,46 @@ defmodule BDS.Desktop.ShellLive do
{:noreply, handle_chat_surface_action(socket, params)}
end
def handle_event("change_import_editor_definition", %{"import_definition" => params}, socket) do
{:noreply, ImportEditor.change_definition(socket, params, &reload_shell/2)}
end
def handle_event("select_import_uploads_folder", _params, socket) do
{:noreply, ImportEditor.select_uploads_folder(socket, &reload_shell/2, &append_output_entry/5)}
end
def handle_event("select_import_wxr_file", _params, socket) do
{:noreply, ImportEditor.select_and_analyze(socket, &reload_shell/2, &append_output_entry/5)}
end
def handle_event("execute_import_editor", _params, socket) do
{:noreply, ImportEditor.execute_import(socket, &reload_shell/2, &append_output_entry/5)}
end
def handle_event("change_import_conflict_resolution", params, socket) do
{:noreply, ImportEditor.change_conflict_resolution(socket, params, &reload_shell/2)}
end
def handle_event("change_import_taxonomy_mapping", params, socket) do
{:noreply, ImportEditor.change_taxonomy_mapping(socket, params, &reload_shell/2)}
end
def handle_event("toggle_import_section", %{"section" => section}, socket) do
{:noreply, ImportEditor.toggle_section(socket, section, &reload_shell/2)}
end
def handle_event("toggle_import_ai_model_selector", _params, socket) do
{:noreply, ImportEditor.toggle_model_selector(socket, &reload_shell/2)}
end
def handle_event("select_import_ai_model", %{"model" => model_id}, socket) do
{:noreply, ImportEditor.select_ai_model(socket, model_id, &reload_shell/2)}
end
def handle_event("analyze_import_taxonomy_ai", _params, socket) do
{:noreply, ImportEditor.analyze_taxonomy_ai(socket, &reload_shell/2, &append_output_entry/5)}
end
def handle_event("rerun_misc_editor", _params, socket) do
case MiscEditor.rerun(socket) do
{:command, action} -> {:noreply, apply_shell_command(socket, action)}
@@ -1255,6 +1299,7 @@ defmodule BDS.Desktop.ShellLive do
|> assign_tags_editor()
|> assign_code_entity_editor()
|> assign_chat_editor()
|> assign_import_editor()
|> assign_misc_editor()
end
@@ -1618,6 +1663,10 @@ defmodule BDS.Desktop.ShellLive do
ChatEditor.assign_socket(socket)
end
defp assign_import_editor(socket) do
ImportEditor.assign_socket(socket)
end
defp assign_misc_editor(socket) do
MiscEditor.assign_socket(socket)
end

View File

@@ -0,0 +1,745 @@
defmodule BDS.Desktop.ShellLive.ImportEditor do
@moduledoc false
use Phoenix.Component
alias BDS.Desktop.{FilePicker, FolderPicker, ShellData}
alias BDS.{AI, ImportAnalysis, ImportDefinitions, ImportExecution, Metadata, Tags}
def assign_socket(socket) do
case socket.assigns[:current_tab] do
%{type: :import, id: definition_id} ->
case ImportDefinitions.get_definition(definition_id) do
nil ->
assign(socket, :import_editor, nil)
definition ->
report = ImportDefinitions.decode_analysis_result(definition)
existing_terms = socket.assigns.projects.active_project_id |> Tags.list_tags() |> Enum.map(& &1.name)
execution_state = Map.get(socket.assigns.import_editor_execution_states, definition.id, default_execution_state())
sections = Map.get(socket.assigns.import_editor_sections, definition.id, default_sections())
selected_model = selected_model(socket.assigns, definition.id)
available_models = AI.available_chat_models(selected_model)
import_editor = %{
definition_id: definition.id,
definition_name: definition.name,
uploads_folder_path: definition.uploads_folder_path,
wxr_file_path: definition.wxr_file_path,
report: report,
existing_terms: existing_terms,
execution_state: execution_state,
importable_counts: importable_counts(report),
sections: sections,
selected_model: selected_model,
selected_model_label: selected_model_label(selected_model, available_models),
model_selector_open?: Map.get(socket.assigns.import_editor_model_selectors_open, definition.id, false),
available_models: available_models,
offline?: Map.get(socket.assigns, :offline_mode, true),
is_loading: false
}
socket
|> assign(:import_editor, import_editor)
|> assign(
:tab_meta,
Map.put(socket.assigns.tab_meta, {:import, definition.id}, %{
title: definition.name || translated("importAnalysis.untitledImport"),
subtitle: translated("importAnalysis.headerDescription")
})
)
end
_other ->
assign(socket, :import_editor, nil)
end
end
def change_definition(socket, params, reload) do
with %{id: definition_id} <- socket.assigns.current_tab,
{:ok, _definition} <- ImportDefinitions.update_definition(definition_id, %{name: Map.get(params, "name", "")}) do
reload.(socket, socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def select_uploads_folder(socket, reload, append_output) do
with %{id: definition_id} <- socket.assigns.current_tab do
case FolderPicker.choose_directory(translated("importAnalysis.uploadsFolder")) do
{:ok, uploads_folder_path} ->
{:ok, _definition} = ImportDefinitions.update_definition(definition_id, %{uploads_folder_path: uploads_folder_path})
reload.(socket, socket.assigns.workbench)
:cancel ->
reload.(socket, socket.assigns.workbench)
{:error, %{message: message}} ->
socket
|> append_output.(translated("activity.import"), message, nil, "error")
|> reload.(socket.assigns.workbench)
end
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def select_and_analyze(socket, reload, append_output) do
with %{id: definition_id} <- socket.assigns.current_tab,
%{} = definition <- ImportDefinitions.get_definition(definition_id) do
case FilePicker.choose_file(translated("importAnalysis.wxrFile")) do
{:ok, wxr_file_path} ->
project_id = socket.assigns.projects.active_project_id
case ImportAnalysis.analyze_wxr(project_id, wxr_file_path, definition.uploads_folder_path) do
{:ok, report} ->
{:ok, _definition} =
ImportDefinitions.update_definition(definition_id, %{
wxr_file_path: wxr_file_path,
last_analysis_result: report
})
socket
|> assign(:import_editor_execution_states, Map.delete(socket.assigns.import_editor_execution_states, definition_id))
|> append_output.(translated("activity.import"), translated("importAnalysis.analyzingWxr"), Path.basename(wxr_file_path), "info")
|> reload.(socket.assigns.workbench)
{:error, %{message: message}} ->
socket
|> append_output.(translated("activity.import"), message, nil, "error")
|> reload.(socket.assigns.workbench)
end
:cancel ->
reload.(socket, socket.assigns.workbench)
{:error, %{message: message}} ->
socket
|> append_output.(translated("activity.import"), message, nil, "error")
|> reload.(socket.assigns.workbench)
end
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def execute_import(socket, reload, append_output) do
with %{id: definition_id} <- socket.assigns.current_tab,
%{} = definition <- ImportDefinitions.get_definition(definition_id),
%{} = report <- ImportDefinitions.decode_analysis_result(definition) do
project_id = socket.assigns.projects.active_project_id
default_author = default_author(project_id)
case ImportExecution.execute_import(project_id, report,
uploads_folder_path: definition.uploads_folder_path,
default_author: default_author
) do
{:ok, result} ->
counts = importable_counts(report)
socket
|> assign(:import_editor_execution_states, Map.put(socket.assigns.import_editor_execution_states, definition_id, %{completed: true, error: nil, count: counts.total, result: result}))
|> append_output.(translated("activity.import"), translated("importAnalysis.importComplete", %{count: counts.total}), nil, "info")
|> reload.(socket.assigns.workbench)
{:error, %{message: message}} ->
socket
|> assign(:import_editor_execution_states, Map.put(socket.assigns.import_editor_execution_states, definition_id, %{completed: false, error: message, count: 0, result: nil}))
|> append_output.(translated("activity.import"), message, nil, "error")
|> reload.(socket.assigns.workbench)
end
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def change_conflict_resolution(socket, %{"item_type" => item_type, "item_name" => item_name, "resolution" => resolution}, reload) do
with %{id: definition_id} <- socket.assigns.current_tab,
%{} = definition <- ImportDefinitions.get_definition(definition_id),
%{} = report <- ImportDefinitions.decode_analysis_result(definition),
updated_report <- update_conflict_resolution(report, item_type, item_name, resolution),
{:ok, _definition} <- ImportDefinitions.update_definition(definition_id, %{last_analysis_result: updated_report}) do
reload.(socket, socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def change_taxonomy_mapping(socket, %{"type" => type, "name" => name, "mapped_to" => mapped_to}, reload) do
with %{id: definition_id} <- socket.assigns.current_tab,
%{} = definition <- ImportDefinitions.get_definition(definition_id),
%{} = report <- ImportDefinitions.decode_analysis_result(definition),
updated_report <- update_taxonomy_mapping(report, type, name, mapped_to),
{:ok, _definition} <- ImportDefinitions.update_definition(definition_id, %{last_analysis_result: updated_report}) do
reload.(socket, socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def toggle_section(socket, section, reload) do
with %{id: definition_id} <- socket.assigns.current_tab,
section_key when section_key in ["conflicts", "taxonomy", "macros"] <- section do
next_sections =
socket.assigns.import_editor_sections
|> Map.get(definition_id, default_sections())
|> Map.update!(String.to_existing_atom(section_key), &(!&1))
socket
|> assign(:import_editor_sections, Map.put(socket.assigns.import_editor_sections, definition_id, next_sections))
|> reload.(socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def toggle_model_selector(socket, reload) do
with %{id: definition_id} <- socket.assigns.current_tab do
current = Map.get(socket.assigns.import_editor_model_selectors_open, definition_id, false)
socket
|> assign(:import_editor_model_selectors_open, Map.put(socket.assigns.import_editor_model_selectors_open, definition_id, not current))
|> reload.(socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def select_ai_model(socket, model_id, reload) do
with %{id: definition_id} <- socket.assigns.current_tab do
socket
|> assign(:import_editor_selected_models, Map.put(socket.assigns.import_editor_selected_models, definition_id, model_id))
|> assign(:import_editor_model_selectors_open, Map.put(socket.assigns.import_editor_model_selectors_open, definition_id, false))
|> reload.(socket.assigns.workbench)
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
def analyze_taxonomy_ai(socket, reload, append_output) do
with %{id: definition_id} <- socket.assigns.current_tab,
%{} = definition <- ImportDefinitions.get_definition(definition_id),
%{} = report <- ImportDefinitions.decode_analysis_result(definition) do
cond do
socket.assigns.offline_mode ->
socket
|> append_output.(translated("activity.import"), ShellData.translate("Automatic AI actions stay gated by airplane mode.", %{}, socket.assigns.page_language), nil, "info")
|> reload.(socket.assigns.workbench)
true ->
updated_report = auto_map_taxonomies(report, socket.assigns.projects.active_project_id |> Tags.list_tags() |> Enum.map(& &1.name))
{:ok, _definition} = ImportDefinitions.update_definition(definition_id, %{last_analysis_result: updated_report})
mapped_count = auto_mapped_count(report, updated_report)
socket
|> append_output.(translated("activity.import"), translated("importAnalysis.mappedCount", %{count: mapped_count}), Map.get(socket.assigns.import_editor_selected_models, definition_id), "info")
|> reload.(socket.assigns.workbench)
end
else
_other -> reload.(socket, socket.assigns.workbench)
end
end
attr :import_editor, :map, required: true
def import_editor(assigns) do
assigns =
assigns
|> assign(:report, Map.get(assigns.import_editor, :report))
|> assign(:execution_state, Map.get(assigns.import_editor, :execution_state))
|> assign(:counts, Map.get(assigns.import_editor, :importable_counts, %{total: 0, tags: 0, posts: 0, media: 0, pages: 0}))
|> assign(:sections, Map.get(assigns.import_editor, :sections, default_sections()))
~H"""
<div class="import-analysis" data-testid="import-editor">
<form class="import-analysis-header" data-testid="import-editor-form" phx-change="change_import_editor_definition">
<input
class="import-definition-name"
type="text"
name="import_definition[name]"
value={@import_editor.definition_name || translated("importAnalysis.untitledImport")}
placeholder={translated("importAnalysis.namePlaceholder")}
/>
<p><%= translated("importAnalysis.headerDescription") %></p>
</form>
<div class="import-file-selectors">
<div class="import-file-row">
<label><%= translated("importAnalysis.uploadsFolder") %></label>
<div class={["import-file-path", if(blank?(@import_editor.uploads_folder_path), do: "placeholder")]}>
<%= @import_editor.uploads_folder_path || translated("importAnalysis.noFolderSelected") %>
</div>
<button type="button" phx-click="select_import_uploads_folder"><%= translated("Open") %></button>
</div>
<div class="import-file-row">
<label><%= translated("importAnalysis.wxrFile") %></label>
<div class={["import-file-path", if(blank?(@import_editor.wxr_file_path), do: "placeholder")]}>
<%= @import_editor.wxr_file_path || translated("importAnalysis.selectFileToAnalyze") %>
</div>
<button class="import-analyze-btn" type="button" phx-click="select_import_wxr_file"><%= translated("importAnalysis.selectAndAnalyze") %></button>
</div>
</div>
<%= if @report do %>
<div class="import-site-info">
<div class="import-site-info-item">
<span class="info-label"><%= translated("importAnalysis.site") %></span>
<span class="info-value"><%= get_in(@report, [:site_info, :title]) || translated("importAnalysis.untitled") %></span>
</div>
<div class="import-site-info-item">
<span class="info-label"><%= translated("importAnalysis.url") %></span>
<span class="info-value"><%= get_in(@report, [:site_info, :url]) || translated("importAnalysis.notAvailable") %></span>
</div>
<div class="import-site-info-item">
<span class="info-label"><%= translated("importAnalysis.language") %></span>
<span class="info-value"><%= get_in(@report, [:site_info, :language]) || translated("importAnalysis.notAvailable") %></span>
</div>
<div class="import-site-info-item">
<span class="info-label"><%= translated("importAnalysis.file") %></span>
<span class="info-value"><%= @import_editor.wxr_file_path |> to_string() |> Path.basename() %></span>
</div>
</div>
<div class="import-stat-cards">
<.stat_card label={translated("importAnalysis.posts")} stats={@report.post_stats} />
<.stat_card label={translated("importAnalysis.pages")} stats={@report.page_stats} />
<.media_stat_card label={translated("importAnalysis.media")} stats={@report.media_stats} />
<.taxonomy_stat_card label={translated("importAnalysis.categories")} stats={@report.category_stats} />
<.taxonomy_stat_card label={translated("importAnalysis.tags")} stats={@report.tag_stats} />
</div>
<%= if Enum.any?(Map.get(@report, :date_distribution, [])) do %>
<div class="import-date-distribution">
<h3><%= translated("importAnalysis.dateDistribution") %></h3>
<div class="distribution-bars">
<%= for row <- @report.date_distribution do %>
<div class="distribution-row">
<span class="distribution-year"><%= row.year %></span>
<div class="distribution-bar-container">
<div class="distribution-bar distribution-bar-posts" style={"width: #{distribution_width(row.post_count, @report.date_distribution, :post_count)}%;"}></div>
</div>
<span class="distribution-count"><%= row.post_count %> / <%= row.media_count %></span>
</div>
<% end %>
</div>
</div>
<% end %>
<div class="import-execute-section">
<div class="import-execute-summary">
<%= translated("importAnalysis.readyToImport") %>
<%= if @counts.tags > 0 do %><span class="import-count-tag"><%= @counts.tags %> <%= translated("importAnalysis.tagsCategories") %></span><% end %>
<%= if @counts.posts > 0 do %><span class="import-count-tag"><%= @counts.posts %> <%= translated("importAnalysis.posts") %></span><% end %>
<%= if @counts.media > 0 do %><span class="import-count-tag"><%= @counts.media %> <%= translated("importAnalysis.media") %></span><% end %>
<%= if @counts.pages > 0 do %><span class="import-count-tag"><%= @counts.pages %> <%= translated("importAnalysis.pages") %></span><% end %>
</div>
<button class="import-execute-btn" type="button" phx-click="execute_import_editor" disabled={@counts.total == 0}>
<%= if @counts.total == 0 do %>
<%= translated("importAnalysis.nothingToImport") %>
<% else %>
<%= translated("importAnalysis.importItems", %{count: @counts.total}) %>
<% end %>
</button>
</div>
<%= if @execution_state.completed do %>
<div class="import-execution-complete">
<span><%= translated("importAnalysis.importComplete", %{count: @execution_state.count || @counts.total}) %></span>
</div>
<% end %>
<%= if present?(@execution_state.error) do %>
<div class="import-execution-error">
<span><%= translated("importAnalysis.importFailed", %{error: @execution_state.error}) %></span>
</div>
<% end %>
<%= if Enum.any?(Map.get(@report, :conflicts, [])) do %>
<section class="import-detail-section conflicts-section">
<button class="import-section-toggle" type="button" phx-click="toggle_import_section" phx-value-section="conflicts">
<span><%= translated("importAnalysis.postSlugConflicts") %></span>
<span class="toggle-icon"><%= if @sections.conflicts, do: "▾", else: "▸" %></span>
</button>
<%= if @sections.conflicts do %>
<table class="import-detail-table conflicts-table">
<thead>
<tr>
<th><%= translated("importAnalysis.slug") %></th>
<th><%= translated("importAnalysis.newEntryWxr") %></th>
<th><%= translated("importAnalysis.existingEntry") %></th>
<th><%= translated("importAnalysis.resolution") %></th>
</tr>
</thead>
<tbody>
<%= for conflict <- @report.conflicts do %>
<tr>
<td class="slug-cell"><%= conflict.item_name %></td>
<td><%= conflict.source_title %></td>
<td><%= conflict.existing_title || translated("importAnalysis.none") %></td>
<td>
<form phx-change="change_import_conflict_resolution">
<input type="hidden" name="item_type" value={conflict.item_type} />
<input type="hidden" name="item_name" value={conflict.item_name} />
<select class="resolution-select" name="resolution">
<option value="skip" selected={conflict.resolution == "skip"}><%= translated("importAnalysis.ignore") %></option>
<option value="merge" selected={conflict.resolution == "merge"}><%= translated("importAnalysis.overwrite") %></option>
<option value="import" selected={conflict.resolution == "import"}><%= translated("importAnalysis.importNewSlug") %></option>
</select>
</form>
</td>
</tr>
<% end %>
</tbody>
</table>
<% end %>
</section>
<% end %>
<%= if Enum.any?(Map.get(@report.items, :categories, [])) or Enum.any?(Map.get(@report.items, :tags, [])) do %>
<section class="import-detail-section">
<button class="import-section-toggle" type="button" phx-click="toggle_import_section" phx-value-section="taxonomy">
<span><%= translated("importAnalysis.taxonomyTitle") %></span>
<span class="toggle-icon"><%= if @sections.taxonomy, do: "▾", else: "▸" %></span>
</button>
<%= if @sections.taxonomy do %>
<div class="taxonomy-analyze-row">
<div class="taxonomy-analyze-dropdown">
<button class="taxonomy-analyze-btn" type="button" phx-click="toggle_import_ai_model_selector"><%= translated("importAnalysis.analyzeWith") %></button>
<%= if @import_editor.model_selector_open? do %>
<div class="taxonomy-model-dropdown">
<%= for model <- @import_editor.available_models do %>
<button class="taxonomy-model-option" type="button" phx-click="select_import_ai_model" phx-value-model={model.id}>
<%= model.provider_name || model.provider || translated("importAnalysis.unknown") %>: <%= model.name || model.id %>
</button>
<% end %>
</div>
<% end %>
</div>
<button class="taxonomy-analyze-btn" type="button" phx-click="analyze_import_taxonomy_ai" disabled={Enum.empty?(@import_editor.available_models) and not @import_editor.offline?}>
<%= @import_editor.selected_model_label %>
</button>
<span class="taxonomy-analyze-hint"><%= translated("importAnalysis.aiMappingHint") %></span>
</div>
<div class="import-taxonomy-groups">
<.taxonomy_group title={translated("importAnalysis.categories")} items={Map.get(@report.items, :categories, [])} existing_terms={@import_editor.existing_terms} type="categories" />
<.taxonomy_group title={translated("importAnalysis.tags")} items={Map.get(@report.items, :tags, [])} existing_terms={@import_editor.existing_terms} type="tags" />
</div>
<% end %>
</section>
<% end %>
<%= if Enum.any?(Map.get(@report, :macros, [])) do %>
<section class="import-detail-section">
<button class="import-section-toggle" type="button" phx-click="toggle_import_section" phx-value-section="macros">
<span><%= translated("importAnalysis.macrosWithCount", %{count: length(@report.macros)}) %></span>
<span class="toggle-icon"><%= if @sections.macros, do: "▾", else: "▸" %></span>
</button>
<%= if @sections.macros do %>
<div class="macros-list">
<%= for macro <- @report.macros do %>
<div class="macro-item unmapped">
<div class="macro-header">
<span class="macro-name"><%= macro.name %></span>
<span class="macro-status-badge unmapped"><%= translated("importAnalysis.macroStatusUnknown") %></span>
<span class="macro-count"><%= translated("importAnalysis.macroUses", %{count: macro.usage_count}) %></span>
</div>
</div>
<% end %>
</div>
<% end %>
</section>
<% end %>
<% else %>
<div class="import-empty-state">
<svg width="48" height="48" viewBox="0 0 24 24" fill="currentColor">
<path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"></path>
</svg>
<p><%= translated("importAnalysis.emptyState") %></p>
</div>
<% end %>
</div>
"""
end
attr :label, :string, required: true
attr :stats, :map, required: true
def stat_card(assigns) do
~H"""
<div class="import-stat-card">
<h3><%= @label %></h3>
<div class="import-stat-number"><%= total_stats(@stats) %></div>
<div class="import-stat-breakdown">
<%= if @stats.new_count > 0 do %><span class="import-stat-tag stat-new"><%= @stats.new_count %> <%= translated("importAnalysis.new") %></span><% end %>
<%= if @stats.update_count > 0 do %><span class="import-stat-tag stat-update"><%= @stats.update_count %> <%= translated("importAnalysis.update") %></span><% end %>
<%= if @stats.conflict_count > 0 do %><span class="import-stat-tag stat-conflict"><%= @stats.conflict_count %> <%= translated("importAnalysis.conflict") %></span><% end %>
<%= if @stats.duplicate_count > 0 do %><span class="import-stat-tag stat-duplicate"><%= @stats.duplicate_count %> <%= translated("importAnalysis.duplicate") %></span><% end %>
</div>
</div>
"""
end
attr :label, :string, required: true
attr :stats, :map, required: true
def media_stat_card(assigns) do
~H"""
<div class="import-stat-card">
<h3><%= @label %></h3>
<div class="import-stat-number"><%= total_media_stats(@stats) %></div>
<div class="import-stat-breakdown">
<%= if @stats.new_count > 0 do %><span class="import-stat-tag stat-new"><%= @stats.new_count %> <%= translated("importAnalysis.new") %></span><% end %>
<%= if @stats.update_count > 0 do %><span class="import-stat-tag stat-update"><%= @stats.update_count %> <%= translated("importAnalysis.update") %></span><% end %>
<%= if @stats.conflict_count > 0 do %><span class="import-stat-tag stat-conflict"><%= @stats.conflict_count %> <%= translated("importAnalysis.conflict") %></span><% end %>
<%= if @stats.duplicate_count > 0 do %><span class="import-stat-tag stat-duplicate"><%= @stats.duplicate_count %> <%= translated("importAnalysis.duplicate") %></span><% end %>
<%= if @stats.missing_count > 0 do %><span class="import-stat-tag stat-missing"><%= @stats.missing_count %> <%= translated("importAnalysis.missing") %></span><% end %>
</div>
</div>
"""
end
attr :label, :string, required: true
attr :stats, :map, required: true
def taxonomy_stat_card(assigns) do
~H"""
<div class="import-stat-card">
<h3><%= @label %></h3>
<div class="import-stat-number"><%= @stats.existing_count + @stats.mapped_count + @stats.new_count %></div>
<div class="import-stat-breakdown">
<%= if @stats.existing_count > 0 do %><span class="import-stat-tag stat-update"><%= @stats.existing_count %> <%= translated("importAnalysis.existing") %></span><% end %>
<%= if @stats.mapped_count > 0 do %><span class="import-stat-tag stat-mapped"><%= @stats.mapped_count %> <%= translated("importAnalysis.mapped") %></span><% end %>
<%= if @stats.new_count > 0 do %><span class="import-stat-tag stat-new"><%= @stats.new_count %> <%= translated("importAnalysis.new") %></span><% end %>
</div>
</div>
"""
end
attr :title, :string, required: true
attr :items, :list, required: true
attr :existing_terms, :list, required: true
attr :type, :string, required: true
def taxonomy_group(assigns) do
~H"""
<div class="taxonomy-group">
<h4><%= @title %></h4>
<div class="import-taxonomy-list">
<%= for item <- @items do %>
<form class="import-taxonomy-form" phx-change="change_import_taxonomy_mapping">
<input type="hidden" name="type" value={@type} />
<input type="hidden" name="name" value={item.name} />
<span class={taxonomy_pill_class(item)}><%= item.name %></span>
<select name="mapped_to">
<option value=""><%= translated("importAnalysis.mapToPlaceholder") %></option>
<%= for term <- @existing_terms do %>
<option value={term} selected={item.mapped_to == term}><%= term %></option>
<% end %>
</select>
</form>
<% end %>
</div>
</div>
"""
end
defp update_conflict_resolution(report, item_type, item_name, resolution) do
report
|> update_in([:conflicts], fn conflicts ->
Enum.map(conflicts || [], fn conflict ->
if conflict.item_type == item_type and conflict.item_name == item_name do
%{conflict | resolution: resolution}
else
conflict
end
end)
end)
|> update_in([:items], &update_conflict_bucket(&1, item_type, item_name, resolution))
|> update_in([:details], &update_conflict_bucket(&1, item_type, item_name, resolution))
end
defp update_conflict_bucket(nil, _item_type, _item_name, _resolution), do: nil
defp update_conflict_bucket(buckets, item_type, item_name, resolution) do
bucket_key = if(item_type == "page", do: :pages, else: if(item_type == "media", do: :media, else: :posts))
update_in(buckets, [bucket_key], fn items ->
Enum.map(items || [], fn item ->
identity = Map.get(item, :slug) || Map.get(item, :filename)
if identity == item_name do
Map.put(item, :resolution, resolution)
else
item
end
end)
end)
end
defp update_taxonomy_mapping(report, type, name, mapped_to) do
bucket_key = if(type == "categories", do: :categories, else: :tags)
normalized_value = mapped_to |> to_string() |> String.trim() |> blank_to_nil()
updated_report =
update_in(report, [:items, bucket_key], fn items ->
Enum.map(items || [], fn item ->
if item.name == name do
%{item | mapped_to: normalized_value}
else
item
end
end)
end)
Map.put(updated_report, stat_key(bucket_key), rebuild_taxonomy_stats(get_in(updated_report, [:items, bucket_key]) || []))
end
defp rebuild_taxonomy_stats(items) do
%{
existing_count: Enum.count(items, & &1.exists_in_project),
mapped_count: Enum.count(items, &(not &1.exists_in_project and present?(&1.mapped_to))),
new_count: Enum.count(items, &(not &1.exists_in_project and not present?(&1.mapped_to)))
}
end
defp stat_key(:categories), do: :category_stats
defp stat_key(:tags), do: :tag_stats
defp importable_counts(nil), do: %{total: 0, tags: 0, posts: 0, media: 0, pages: 0}
defp importable_counts(report) do
tag_count =
(Map.get(report.items, :categories, []) ++ Map.get(report.items, :tags, []))
|> Enum.count(&(not &1.exists_in_project and not present?(&1.mapped_to)))
posts = importable_entity_count(Map.get(report.items, :posts, []))
pages = importable_entity_count(Map.get(report.items, :pages, []))
media = importable_entity_count(Map.get(report.items, :media, []))
%{total: tag_count + posts + pages + media, tags: tag_count, posts: posts, media: media, pages: pages}
end
defp importable_entity_count(items) do
Enum.count(items || [], fn item ->
item.status == "new" or (item.status == "conflict" and Map.get(item, :resolution, "skip") != "skip")
end)
end
defp distribution_width(value, rows, key) do
max_value = rows |> Enum.map(&Map.get(&1, key, 0)) |> Enum.max(fn -> 1 end)
max(8, value / max(max_value, 1) * 100)
end
defp total_stats(stats), do: stats.new_count + stats.update_count + stats.conflict_count + stats.duplicate_count
defp total_media_stats(stats), do: total_stats(stats) + stats.missing_count
defp taxonomy_pill_class(item) do
cond do
item.exists_in_project -> "import-taxonomy-pill exists"
present?(item.mapped_to) -> "import-taxonomy-pill mapped"
true -> "import-taxonomy-pill new-tax"
end
end
defp translated(text, bindings \\ %{}), do: ShellData.translate(text, bindings, Process.get(:bds_ui_locale))
defp present?(value), do: value not in [nil, ""]
defp blank?(value), do: value in [nil, ""]
defp blank_to_nil(""), do: nil
defp blank_to_nil(value), do: value
defp default_execution_state do
%{completed: false, error: nil, count: 0, result: nil}
end
defp default_sections do
%{conflicts: true, taxonomy: true, macros: true}
end
defp selected_model(assigns, definition_id) do
Map.get(assigns.import_editor_selected_models, definition_id) || preferred_model(assigns)
end
defp preferred_model(assigns) do
preference_key = if Map.get(assigns, :offline_mode, true), do: :airplane_chat, else: :chat
case AI.get_model_preference(preference_key) do
{:ok, model} when is_binary(model) and model != "" -> model
_other -> nil
end
end
defp selected_model_label(nil, []), do: translated("importAnalysis.analyzeWith")
defp selected_model_label(nil, [model | _rest]), do: model.name || model.id
defp selected_model_label(model_id, available_models) do
case Enum.find(available_models, &(&1.id == model_id)) do
nil -> model_id
model -> model.name || model.id
end
end
defp auto_map_taxonomies(report, existing_terms) do
report
|> update_in([:items, :categories], &auto_map_taxonomy_items(&1, existing_terms))
|> update_in([:items, :tags], &auto_map_taxonomy_items(&1, existing_terms))
|> then(fn updated_report ->
updated_report
|> Map.put(:category_stats, rebuild_taxonomy_stats(get_in(updated_report, [:items, :categories]) || []))
|> Map.put(:tag_stats, rebuild_taxonomy_stats(get_in(updated_report, [:items, :tags]) || []))
end)
end
defp auto_map_taxonomy_items(items, existing_terms) do
Enum.map(items || [], fn item ->
cond do
item.exists_in_project or present?(item.mapped_to) -> item
suggestion = best_taxonomy_match(item.name, existing_terms) -> %{item | mapped_to: suggestion}
true -> item
end
end)
end
defp best_taxonomy_match(term, existing_terms) do
normalized_term = normalize_term(term)
existing_terms
|> Enum.map(fn candidate -> {candidate, String.jaro_distance(normalized_term, normalize_term(candidate))} end)
|> Enum.max_by(fn {_candidate, score} -> score end, fn -> {nil, 0.0} end)
|> case do
{candidate, score} when is_binary(candidate) and score >= 0.94 -> candidate
_other -> nil
end
end
defp auto_mapped_count(previous_report, next_report) do
previous_count =
(Map.get(previous_report.items, :categories, []) ++ Map.get(previous_report.items, :tags, []))
|> Enum.count(&present?(&1.mapped_to))
next_count =
(Map.get(next_report.items, :categories, []) ++ Map.get(next_report.items, :tags, []))
|> Enum.count(&present?(&1.mapped_to))
max(next_count - previous_count, 0)
end
defp normalize_term(term) do
term
|> to_string()
|> String.downcase()
|> String.replace(~r/[^a-z0-9]+/u, "")
end
defp default_author(project_id) do
{:ok, metadata} = Metadata.get_project_metadata(project_id)
Map.get(metadata, :default_author)
end
end

View File

@@ -409,6 +409,9 @@
<% @current_tab.type == :chat and @chat_editor -> %>
<ChatEditor.chat_editor chat_editor={@chat_editor} />
<% @current_tab.type == :import and @import_editor -> %>
<ImportEditor.import_editor import_editor={@import_editor} />
<% @current_tab.type in [:site_validation, :metadata_diff, :translation_validation, :find_duplicates, :git_diff] and @misc_editor -> %>
<MiscEditor.misc_editor misc_editor={@misc_editor} />

359
lib/bds/import_analysis.ex Normal file
View File

@@ -0,0 +1,359 @@
defmodule BDS.ImportAnalysis do
@moduledoc false
import Ecto.Query
alias BDS.Media.Media
alias BDS.Posts.Post
alias BDS.Repo
alias BDS.Tags.Tag
alias BDS.WxrParser
@shortcode_regex ~r/(?<!\[)\[(\w+)([^\]]*?)(?:\s*\/)?\](?!\])/u
@param_regex ~r/(\w+)=(?:"([^"]*)"|'([^']*)'|([^\s\]"']+))/u
def analyze_wxr(project_id, wxr_file_path, uploads_folder_path \\ nil)
when is_binary(project_id) and is_binary(wxr_file_path) do
wxr_data = WxrParser.parse_file(wxr_file_path)
{:ok, build_report(project_id, wxr_data, wxr_file_path, uploads_folder_path)}
rescue
error -> {:error, %{message: Exception.message(error)}}
end
defp build_report(project_id, wxr_data, wxr_file_path, uploads_folder_path) do
existing_posts = Repo.all(from post in Post, where: post.project_id == ^project_id)
existing_media = Repo.all(from media in Media, where: media.project_id == ^project_id)
existing_tag_names = Repo.all(from tag in Tag, where: tag.project_id == ^project_id, select: tag.name)
existing_tag_set = existing_tag_names |> Enum.map(&String.downcase/1) |> MapSet.new()
posts_by_slug = Map.new(existing_posts, &{&1.slug, &1})
posts_by_checksum =
existing_posts
|> Enum.reject(&is_nil(&1.checksum))
|> Map.new(&{&1.checksum, &1})
media_by_name = Map.new(existing_media, &{String.downcase(&1.original_name), &1})
media_by_checksum =
existing_media
|> Enum.reject(&is_nil(&1.checksum))
|> Map.new(&{&1.checksum, &1})
analyzed_posts = Enum.map(wxr_data.posts, &analyze_post_item(&1, posts_by_slug, posts_by_checksum, "post"))
analyzed_pages = Enum.map(wxr_data.pages, &analyze_post_item(&1, posts_by_slug, posts_by_checksum, "page"))
analyzed_media =
Enum.map(wxr_data.media, &analyze_media_item(&1, uploads_folder_path, media_by_name, media_by_checksum))
category_items = Enum.map(wxr_data.categories, &analyze_taxonomy_item(&1, existing_tag_set))
tag_items = Enum.map(wxr_data.tags, &analyze_taxonomy_item(&1, existing_tag_set))
%{
source_file: wxr_file_path,
site_info: %{
title: wxr_data.site.title,
url: wxr_data.site.link,
language: wxr_data.site.language,
source_file: wxr_file_path
},
post_stats: summarize_post_items(analyzed_posts),
page_stats: summarize_post_items(analyzed_pages),
media_stats: summarize_media_items(analyzed_media),
category_stats: summarize_taxonomy_items(category_items),
tag_stats: summarize_taxonomy_items(tag_items),
date_distribution: date_distribution(analyzed_posts, analyzed_pages, analyzed_media),
conflicts: conflicts(analyzed_posts, analyzed_pages, analyzed_media),
macros: macros(wxr_data.posts ++ wxr_data.pages),
items: %{
posts: Enum.map(analyzed_posts, &summary_item/1),
pages: Enum.map(analyzed_pages, &summary_item/1),
media: Enum.map(analyzed_media, &summary_item/1),
categories: category_items,
tags: tag_items
},
details: %{
posts: analyzed_posts,
pages: analyzed_pages,
media: analyzed_media
}
}
end
defp analyze_post_item(wxr_post, posts_by_slug, posts_by_checksum, item_type) do
content_markdown = html_to_markdown(wxr_post.content || "")
content_checksum = sha256(content_markdown)
existing_by_slug = Map.get(posts_by_slug, wxr_post.slug)
existing_by_checksum = Map.get(posts_by_checksum, content_checksum)
{status, existing} =
cond do
existing_by_slug && existing_by_slug.checksum == content_checksum && not is_nil(existing_by_slug.checksum) -> {"update", existing_by_slug}
existing_by_slug -> {"conflict", existing_by_slug}
existing_by_checksum -> {"duplicate", existing_by_checksum}
true -> {"new", nil}
end
%{
item_type: item_type,
wp_id: wxr_post.wp_id,
title: wxr_post.title,
slug: wxr_post.slug,
status: status,
resolution: if(status == "conflict", do: "skip", else: nil),
existing_id: existing && existing.id,
existing_title: existing && existing.title,
author: blank_to_nil(wxr_post.creator),
excerpt: blank_to_nil(wxr_post.excerpt),
categories: wxr_post.categories,
tags: wxr_post.tags,
wp_status: blank_to_nil(wxr_post.status),
content_markdown: content_markdown,
content_checksum: content_checksum,
content_preview: String.slice(content_markdown, 0, 200),
created_at: wxr_post.post_date || wxr_post.pub_date,
updated_at: wxr_post.post_modified || wxr_post.post_date || wxr_post.pub_date,
published_at: wxr_post.pub_date
}
end
defp analyze_media_item(wxr_media, uploads_folder_path, media_by_name, media_by_checksum) do
source_file =
case uploads_folder_path do
nil -> nil
"" -> nil
path -> Path.join(path, wxr_media.relative_path)
end
{status, checksum, existing} =
cond do
is_nil(source_file) or not File.exists?(source_file) ->
{"missing", nil, nil}
true ->
binary = File.read!(source_file)
file_checksum = md5(binary)
existing_by_name = Map.get(media_by_name, String.downcase(wxr_media.filename))
existing_by_checksum = Map.get(media_by_checksum, file_checksum)
cond do
existing_by_name && existing_by_name.checksum == file_checksum && not is_nil(existing_by_name.checksum) -> {"update", file_checksum, existing_by_name}
existing_by_name -> {"conflict", file_checksum, existing_by_name}
existing_by_checksum -> {"duplicate", file_checksum, existing_by_checksum}
true -> {"new", file_checksum, nil}
end
end
%{
item_type: "media",
wp_id: wxr_media.wp_id,
title: wxr_media.title,
filename: wxr_media.filename,
relative_path: wxr_media.relative_path,
status: status,
resolution: if(status == "conflict", do: "skip", else: nil),
existing_id: existing && existing.id,
existing_title: existing && existing.title,
mime_type: wxr_media.mime_type,
description: blank_to_nil(wxr_media.description),
parent_wp_id: wxr_media.parent_id,
source_file: source_file,
checksum: checksum,
created_at: wxr_media.pub_date
}
end
defp analyze_taxonomy_item(item, existing_tag_set) do
exists_in_project = MapSet.member?(existing_tag_set, String.downcase(item.name))
%{
name: item.name,
slug: item.slug,
exists_in_project: exists_in_project,
mapped_to: nil
}
end
defp summary_item(%{item_type: "media"} = item) do
base = %{
item_type: item.item_type,
title: item.title,
filename: item.filename,
relative_path: item.relative_path,
status: item.status
}
maybe_put(base, :resolution, item.resolution)
end
defp summary_item(item) do
base = %{
item_type: item.item_type,
title: item.title,
slug: item.slug,
status: item.status
}
maybe_put(base, :resolution, item.resolution)
end
defp summarize_post_items(items) do
%{
new_count: count_status(items, "new"),
update_count: count_status(items, "update"),
conflict_count: count_status(items, "conflict"),
duplicate_count: count_status(items, "duplicate")
}
end
defp summarize_media_items(items) do
%{
new_count: count_status(items, "new"),
update_count: count_status(items, "update"),
conflict_count: count_status(items, "conflict"),
duplicate_count: count_status(items, "duplicate"),
missing_count: count_status(items, "missing")
}
end
defp summarize_taxonomy_items(items) do
%{
existing_count: Enum.count(items, & &1.exists_in_project),
mapped_count: Enum.count(items, &(not &1.exists_in_project and not is_nil(&1.mapped_to))),
new_count: Enum.count(items, &(not &1.exists_in_project and is_nil(&1.mapped_to)))
}
end
defp date_distribution(posts, pages, media) do
combined_posts = posts ++ pages
post_counts = Enum.reduce(combined_posts, %{}, &increment_year(&1.created_at || &1.published_at, &2))
media_counts = Enum.reduce(media, %{}, &increment_year(&1.created_at, &2))
post_counts
|> Map.keys()
|> Enum.concat(Map.keys(media_counts))
|> Enum.uniq()
|> Enum.sort()
|> Enum.map(fn year ->
%{
year: year,
post_count: Map.get(post_counts, year, 0),
media_count: Map.get(media_counts, year, 0)
}
end)
end
defp conflicts(posts, pages, media) do
(posts ++ pages ++ media)
|> Enum.filter(&(&1.status == "conflict"))
|> Enum.map(fn item ->
%{
item_type: item.item_type,
item_name: Map.get(item, :slug) || Map.get(item, :filename),
resolution: item.resolution || "skip",
source_title: item.title,
existing_title: item.existing_title
}
end)
end
defp macros(items) do
items
|> Enum.flat_map(&discover_item_macros/1)
|> Enum.group_by(& &1.name)
|> Enum.map(fn {name, usages} ->
%{
name: name,
usage_count: length(usages),
parameters: usages |> Enum.flat_map(& &1.parameters) |> Enum.uniq() |> Enum.sort(),
validation_status: "unknown"
}
end)
|> Enum.sort_by(& &1.name)
end
defp discover_item_macros(item) do
Regex.scan(@shortcode_regex, item.content || "")
|> Enum.map(fn [_match, name, raw_params] ->
%{
name: String.downcase(name),
parameters: macro_parameters(raw_params)
}
end)
end
defp macro_parameters(raw_params) do
Regex.scan(@param_regex, raw_params)
|> Enum.map(fn [_, key | _rest] -> key end)
|> Enum.uniq()
|> Enum.sort()
end
defp increment_year(nil, acc), do: acc
defp increment_year(value, acc) do
case year_from(value) do
nil -> acc
year -> Map.update(acc, year, 1, &(&1 + 1))
end
end
defp year_from(value) when is_integer(value), do: value
defp year_from(value) when is_binary(value) do
case Regex.run(~r/(\d{4})/, value) do
[_, year] -> String.to_integer(year)
_other -> nil
end
end
defp year_from(_value), do: nil
defp count_status(items, status), do: Enum.count(items, &(&1.status == status))
defp sha256(value) do
:sha256
|> :crypto.hash(value)
|> Base.encode16(case: :lower)
end
defp md5(binary) do
:md5
|> :crypto.hash(binary)
|> Base.encode16(case: :lower)
end
defp html_to_markdown(content) do
content
|> to_string()
|> String.replace(~r/<br\s*\/?>/i, "\n")
|> String.replace(~r|</p>|i, "\n\n")
|> String.replace(~r|<p[^>]*>|i, "")
|> String.replace(~r|<strong>(.*?)</strong>|is, "**\\1**")
|> String.replace(~r|<b>(.*?)</b>|is, "**\\1**")
|> String.replace(~r|<em>(.*?)</em>|is, "*\\1*")
|> String.replace(~r|<i>(.*?)</i>|is, "*\\1*")
|> String.replace(~r|<code>(.*?)</code>|is, "`\\1`")
|> String.replace(~r|<[^>]+>|u, "")
|> HtmlEntities.decode()
|> transform_shortcodes()
|> String.replace(~r/[ \t]+\n/u, "\n")
|> String.replace(~r/\n{3,}/u, "\n\n")
|> String.trim()
end
defp transform_shortcodes(content) do
Regex.replace(@shortcode_regex, content, fn _match, name, raw_params ->
inner = String.trim("#{name}#{raw_params}")
"[[#{inner}]]"
end)
end
defp maybe_put(map, _key, nil), do: map
defp maybe_put(map, key, value), do: Map.put(map, key, value)
defp blank_to_nil(nil), do: nil
defp blank_to_nil(""), do: nil
defp blank_to_nil(value), do: value
end

View File

@@ -17,13 +17,60 @@ defmodule BDS.ImportDefinitions do
name: attr(attrs, :name) || "",
wxr_file_path: attr(attrs, :wxr_file_path),
uploads_folder_path: attr(attrs, :uploads_folder_path),
last_analysis_result: attr(attrs, :last_analysis_result),
last_analysis_result: normalize_analysis_result(attr(attrs, :last_analysis_result)),
created_at: now,
updated_at: now
})
|> Repo.insert()
end
def get_definition(definition_id) when is_binary(definition_id) do
Repo.get(ImportDefinition, definition_id)
end
def update_definition(definition_id, attrs) when is_binary(definition_id) and is_map(attrs) do
case Repo.get(ImportDefinition, definition_id) do
nil ->
{:error, :not_found}
%ImportDefinition{} = definition ->
updates =
%{}
|> maybe_put(:name, attr(attrs, :name))
|> maybe_put(:wxr_file_path, attr(attrs, :wxr_file_path))
|> maybe_put(:uploads_folder_path, attr(attrs, :uploads_folder_path))
|> maybe_put(:last_analysis_result, normalize_analysis_result(attr(attrs, :last_analysis_result)))
|> Map.put(:updated_at, Persistence.now_ms())
definition
|> ImportDefinition.changeset(updates)
|> Repo.update()
end
end
def delete_definition(definition_id) when is_binary(definition_id) do
case Repo.get(ImportDefinition, definition_id) do
nil -> {:error, :not_found}
%ImportDefinition{} = definition ->
Repo.delete(definition)
|> case do
{:ok, _deleted} -> {:ok, :deleted}
error -> error
end
end
end
def decode_analysis_result(%ImportDefinition{last_analysis_result: result}), do: decode_analysis_result(result)
def decode_analysis_result(result) when is_binary(result) do
case Jason.decode(result) do
{:ok, value} -> atomize_keys(value)
{:error, _reason} -> nil
end
end
def decode_analysis_result(_result), do: nil
def list_definitions(project_id) do
Repo.all(
from definition in ImportDefinition,
@@ -34,4 +81,23 @@ defmodule BDS.ImportDefinitions do
end
defp attr(attrs, key), do: Map.get(attrs, key) || Map.get(attrs, Atom.to_string(key))
defp maybe_put(map, _key, nil), do: map
defp maybe_put(map, key, value), do: Map.put(map, key, value)
defp normalize_analysis_result(nil), do: nil
defp normalize_analysis_result(value) when is_binary(value), do: value
defp normalize_analysis_result(value), do: Jason.encode!(value)
defp atomize_keys(value) when is_map(value) do
value
|> Enum.map(fn {key, nested_value} ->
normalized_key = if(is_binary(key), do: String.to_atom(key), else: key)
{normalized_key, atomize_keys(nested_value)}
end)
|> Map.new()
end
defp atomize_keys(value) when is_list(value), do: Enum.map(value, &atomize_keys/1)
defp atomize_keys(value), do: value
end

306
lib/bds/import_execution.ex Normal file
View File

@@ -0,0 +1,306 @@
defmodule BDS.ImportExecution do
@moduledoc false
alias BDS.Media
alias BDS.Metadata
alias BDS.Posts
alias BDS.Posts.Post
alias BDS.Repo
alias BDS.Tags
def execute_import(project_id, report, opts \\ []) when is_binary(project_id) and is_map(report) do
normalized_report = normalize_report(report)
default_author = Keyword.get(opts, :default_author) || project_default_author(project_id)
result = %{
success: true,
tags: %{created: 0, skipped: 0},
posts: %{imported: 0, skipped: 0, errors: 0},
media: %{imported: 0, skipped: 0, errors: 0},
pages: %{imported: 0, skipped: 0, errors: 0},
errors: []
}
result = execute_taxonomies(normalized_report, project_id, result)
result = execute_posts(normalized_report, project_id, default_author, result)
result = execute_pages(normalized_report, project_id, default_author, result)
{:ok, execute_media(normalized_report, project_id, default_author, result)}
rescue
error -> {:error, %{message: Exception.message(error)}}
end
defp execute_taxonomies(report, project_id, result) do
taxonomies = List.wrap(get_in(report, [:items, :categories])) ++ List.wrap(get_in(report, [:items, :tags]))
Enum.reduce(taxonomies, result, fn item, acc ->
if item.exists_in_project || item.mapped_to do
put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
else
case Tags.create_tag(%{project_id: project_id, name: item.name}) do
{:ok, _tag} -> put_in(acc, [:tags, :created], acc.tags.created + 1)
{:error, _reason} -> put_in(acc, [:tags, :skipped], acc.tags.skipped + 1)
end
end
end)
end
defp execute_posts(report, project_id, default_author, result) do
items = import_items(report, :posts)
Enum.reduce(items, result, fn item, acc ->
execute_post_item(project_id, item, acc, :posts, default_author)
end)
end
defp execute_pages(report, project_id, default_author, result) do
items = import_items(report, :pages)
Enum.reduce(items, result, fn item, acc ->
execute_post_item(project_id, ensure_page_category(item), acc, :pages, default_author)
end)
end
defp execute_media(report, project_id, default_author, result) do
import_items(report, :media)
|> Enum.reduce(result, fn item, acc ->
cond do
item.status in ["update", "duplicate", "missing"] ->
put_in(acc, [:media, :skipped], acc.media.skipped + 1)
item.status == "conflict" and item.resolution != "import" and item.resolution != "merge" ->
put_in(acc, [:media, :skipped], acc.media.skipped + 1)
true ->
case import_media_item(project_id, item, default_author) do
{:ok, _media} -> put_in(acc, [:media, :imported], acc.media.imported + 1)
{:error, reason} ->
acc
|> put_in([:media, :errors], acc.media.errors + 1)
|> Map.update!(:errors, &(&1 ++ [inspect(reason)]))
|> Map.put(:success, false)
end
end
end)
end
defp execute_post_item(project_id, item, result, bucket, default_author) do
cond do
item.status in ["update", "duplicate"] ->
put_in(result, [bucket, :skipped], get_in(result, [bucket, :skipped]) + 1)
item.status == "conflict" and item.resolution not in ["import", "merge"] ->
put_in(result, [bucket, :skipped], get_in(result, [bucket, :skipped]) + 1)
item.status == "conflict" and item.resolution == "merge" ->
case merge_post_item(item, default_author) do
{:ok, _post} -> put_in(result, [bucket, :imported], get_in(result, [bucket, :imported]) + 1)
{:error, reason} ->
result
|> put_in([bucket, :errors], get_in(result, [bucket, :errors]) + 1)
|> Map.update!(:errors, &(&1 ++ [inspect(reason)]))
|> Map.put(:success, false)
end
true ->
case create_post_item(project_id, item, default_author) do
{:ok, _post} -> put_in(result, [bucket, :imported], get_in(result, [bucket, :imported]) + 1)
{:error, reason} ->
result
|> put_in([bucket, :errors], get_in(result, [bucket, :errors]) + 1)
|> Map.update!(:errors, &(&1 ++ [inspect(reason)]))
|> Map.put(:success, false)
end
end
end
defp create_post_item(project_id, item, default_author) do
attrs = post_create_attrs(project_id, item, default_author)
with {:ok, post} <- Posts.create_post(attrs),
:ok <- prepare_created_post(post.id, item),
{:ok, published_post} <- maybe_publish(post.id, item) do
{:ok, published_post}
end
end
defp merge_post_item(item, default_author) do
case Repo.get(Post, item.existing_id) do
nil -> {:error, :not_found}
%Post{} = post ->
Posts.update_post(post.id, %{
title: item.title,
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author || default_author,
tags: item.tags,
categories: item.categories,
checksum: item.content_checksum
})
end
end
defp import_media_item(project_id, item, default_author) do
source_path = item.source_file || Path.join("", item.relative_path)
checksum = if(source_path != nil and File.exists?(source_path), do: md5(File.read!(source_path)), else: nil)
if source_path && File.exists?(source_path) do
case item.status do
"conflict" when item.resolution == "merge" and item.existing_id ->
with {:ok, _updated_media} <- Media.update_media(item.existing_id, %{title: item.title, alt: item.description, author: default_author}) do
{:ok, Repo.get!(Media.Media, item.existing_id)}
end
_other ->
Media.import_media(%{
project_id: project_id,
source_path: source_path,
title: item.title,
alt: item.description,
author: default_author,
checksum: checksum
})
end
else
{:error, :missing_source_file}
end
end
defp maybe_publish(post_id, item) do
case item.wp_status do
"publish" -> Posts.publish_post(post_id)
_other -> {:ok, Repo.get!(Post, post_id)}
end
end
defp prepare_created_post(post_id, item) do
case Repo.get(Post, post_id) do
nil ->
{:error, :not_found}
%Post{} = post ->
desired_slug = desired_slug(post, item)
created_at = parse_timestamp(item.created_at) || post.created_at
updated_at = parse_timestamp(item.updated_at) || created_at
published_at = parse_timestamp(item.published_at) || created_at
post
|> Post.changeset(%{
slug: desired_slug,
title: item.title,
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author,
tags: item.tags,
categories: item.categories,
checksum: item.content_checksum,
created_at: created_at,
updated_at: updated_at,
published_at: if(item.wp_status == "publish", do: published_at, else: nil)
})
|> Repo.update()
|> case do
{:ok, _updated} -> :ok
error -> error
end
end
end
defp desired_slug(post, item) do
if item.status == "conflict" and item.resolution == "import" do
post.slug
else
item.slug || post.slug
end
end
defp post_create_attrs(project_id, item, default_author) do
%{
project_id: project_id,
title: item.title,
excerpt: item.excerpt,
content: item.content_markdown,
author: item.author || default_author,
tags: item.tags,
categories: item.categories,
checksum: item.content_checksum
}
end
defp ensure_page_category(item) do
categories = (item.categories || []) |> Enum.uniq() |> Enum.concat(["page"]) |> Enum.uniq()
%{item | categories: categories}
end
defp import_items(report, bucket) do
items = get_in(report, [:items, bucket]) || []
details = get_in(report, [:details, bucket]) || []
if details == [] do
Enum.map(items, &normalize_item/1)
else
detail_index =
details
|> Enum.map(&normalize_item/1)
|> Map.new(fn item -> {item_identity(item), item} end)
Enum.map(items, fn item ->
normalized_item = normalize_item(item)
identity = item_identity(normalized_item)
detail_item = Map.get(detail_index, identity, normalized_item)
if Map.has_key?(normalized_item, :resolution) do
%{detail_item | resolution: normalized_item.resolution}
else
detail_item
end
end)
end
end
defp item_identity(%{item_type: "media", filename: filename}), do: {:media, filename}
defp item_identity(%{item_type: item_type, slug: slug}), do: {item_type, slug}
defp normalize_report(report) when is_map(report) do
report
|> Enum.map(fn {key, value} ->
normalized_key = if(is_binary(key), do: String.to_atom(key), else: key)
{normalized_key, normalize_report(value)}
end)
|> Map.new()
end
defp normalize_report(report) when is_list(report), do: Enum.map(report, &normalize_report/1)
defp normalize_report(report), do: report
defp normalize_item(item) do
normalize_report(item)
end
defp parse_timestamp(nil), do: nil
defp parse_timestamp(value) when is_integer(value), do: value
defp parse_timestamp(value) when is_binary(value) do
value
|> String.replace(" ", "T")
|> NaiveDateTime.from_iso8601()
|> case do
{:ok, naive} -> DateTime.from_naive!(naive, "Etc/UTC") |> DateTime.to_unix(:millisecond)
_other -> nil
end
end
defp parse_timestamp(_value), do: nil
defp md5(binary) do
:md5
|> :crypto.hash(binary)
|> Base.encode16(case: :lower)
end
defp project_default_author(project_id) do
{:ok, metadata} = Metadata.get_project_metadata(project_id)
Map.get(metadata, :default_author)
end
end

206
lib/bds/wxr_parser.ex Normal file
View File

@@ -0,0 +1,206 @@
defmodule BDS.WxrParser do
@moduledoc false
require Record
Record.defrecord(:xmlElement, Record.extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl"))
Record.defrecord(:xmlAttribute, Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl"))
Record.defrecord(:xmlText, Record.extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl"))
def parse_file(file_path) when is_binary(file_path) do
file_path
|> File.read!()
|> parse_xml()
end
def parse_xml(xml_content) when is_binary(xml_content) do
{document, _rest} = :xmerl_scan.string(String.to_charlist(xml_content))
case :xmerl_xpath.string(~c"/rss/channel", document) do
[channel] ->
%{
site: parse_site(channel),
posts: parse_items(channel, "post"),
pages: parse_items(channel, "page"),
media: parse_media(channel),
categories: parse_categories(channel),
tags: parse_tags(channel)
}
_other ->
raise RuntimeError, "Invalid WXR file: no <channel> element found"
end
end
defp parse_site(channel) do
%{
title: child_text(channel, "title"),
link: child_text(channel, "link"),
description: child_text(channel, "description"),
language: child_text(channel, "language")
}
end
defp parse_categories(channel) do
channel
|> direct_children()
|> Enum.filter(&(full_name(&1) == "wp:category"))
|> Enum.map(fn element ->
%{
name: child_text(element, "cat_name"),
slug: child_text(element, "category_nicename"),
parent: child_text(element, "category_parent")
}
end)
end
defp parse_tags(channel) do
channel
|> direct_children()
|> Enum.filter(&(full_name(&1) == "wp:tag"))
|> Enum.map(fn element ->
%{
name: child_text(element, "tag_name"),
slug: child_text(element, "tag_slug")
}
end)
end
defp parse_items(channel, expected_type) do
channel
|> direct_children_named("item")
|> Enum.filter(&(child_text(&1, "post_type") == expected_type))
|> Enum.map(&parse_post_item/1)
end
defp parse_media(channel) do
channel
|> direct_children_named("item")
|> Enum.filter(&(child_text(&1, "post_type") == "attachment"))
|> Enum.map(&parse_media_item/1)
end
defp parse_post_item(item) do
%{
wp_id: parse_integer(child_text(item, "post_id")),
title: child_text(item, "title"),
slug: child_text(item, "post_name"),
content: child_text_by_full_name(item, "content:encoded"),
excerpt: child_text_by_full_name(item, "excerpt:encoded"),
pub_date: blank_to_nil(child_text(item, "pubDate")),
post_date: blank_to_nil(child_text(item, "post_date")),
post_modified: blank_to_nil(child_text(item, "post_modified")),
creator: child_text_by_full_name(item, "dc:creator"),
status: child_text(item, "status"),
post_type: child_text(item, "post_type"),
categories: item_taxonomy(item, "category"),
tags: item_taxonomy(item, "post_tag")
}
end
defp parse_media_item(item) do
attachment_url = child_text(item, "attachment_url")
filename = attachment_url |> Path.basename() |> blank_to_nil() || ""
%{
wp_id: parse_integer(child_text(item, "post_id")),
title: child_text(item, "title"),
url: attachment_url,
filename: filename,
relative_path: relative_upload_path(attachment_url),
pub_date: blank_to_nil(child_text(item, "pubDate")),
parent_id: parse_integer(child_text(item, "post_parent")),
mime_type: MIME.from_path(filename),
description: child_text_by_full_name(item, "content:encoded")
}
end
defp item_taxonomy(item, domain) do
item
|> direct_children_named("category")
|> Enum.filter(&(xml_attr(&1, :domain) == domain))
|> Enum.map(&text_content/1)
|> Enum.reject(&(&1 == ""))
end
defp relative_upload_path(url) when is_binary(url) do
marker = "/wp-content/uploads/"
case String.split(url, marker, parts: 2) do
[_prefix, suffix] -> suffix
_other -> Path.basename(url)
end
end
defp direct_children(element) do
Enum.filter(xmlElement(element, :content), fn child ->
is_tuple(child) and tuple_size(child) > 0 and elem(child, 0) == :xmlElement
end)
end
defp direct_children_named(element, name) do
Enum.filter(direct_children(element), &(local_name(&1) == name))
end
defp child_text(element, name) do
element
|> direct_children_named(name)
|> List.first()
|> text_content()
end
defp child_text_by_full_name(element, name) do
element
|> direct_children()
|> Enum.find(&(full_name(&1) == name))
|> text_content()
end
defp text_content(nil), do: ""
defp text_content(element) do
element
|> xmlElement(:content)
|> Enum.map_join("", fn
child when is_tuple(child) and tuple_size(child) > 0 and elem(child, 0) == :xmlText ->
child
|> xmlText(:value)
|> to_string()
child when is_tuple(child) and tuple_size(child) > 0 and elem(child, 0) == :xmlElement ->
text_content(child)
_other -> ""
end)
|> String.trim()
end
defp xml_attr(element, name) do
element
|> xmlElement(:attributes)
|> Enum.find_value(fn attribute ->
if xmlAttribute(attribute, :name) == name do
attribute |> xmlAttribute(:value) |> to_string()
end
end)
end
defp full_name(element), do: element |> xmlElement(:name) |> to_string()
defp local_name(element) do
element
|> full_name()
|> String.split(":")
|> List.last()
end
defp parse_integer(value) do
case Integer.parse(to_string(value)) do
{parsed, _rest} -> parsed
:error -> 0
end
end
defp blank_to_nil(""), do: nil
defp blank_to_nil(value), do: value
end