Add use <tunit> instead of <span> for Deepl translation

To avoid getting results like "... bla.&quot;</span>" ->
"... bla</span>.&quot;" from Deepl we need to set set splitting_tags
option to an element name (that's not as common as span).
This commit is contained in:
Thelonius Kort
2023-01-14 21:59:57 +01:00
parent 3b1c1d04a1
commit d7325d64c5
3 changed files with 14 additions and 7 deletions

View File

@ -73,7 +73,7 @@ defmodule Outlook.Translators do
defp prepare_article(tree) do defp prepare_article(tree) do
# Logger.info "so far." # Logger.info "so far."
HtmlDocComponent.render_doc(%{tree: tree}) HtmlDocComponent.render_doc(%{tree: tree, tunit_tag: "tunit"})
|> Phoenix.HTML.Safe.to_iodata() |> Phoenix.HTML.Safe.to_iodata()
|> IO.iodata_to_binary() |> IO.iodata_to_binary()
end end
@ -86,7 +86,7 @@ defmodule Outlook.Translators do
def process_translation(translation, tunit_ids) do def process_translation(translation, tunit_ids) do
tunit_map = translation tunit_map = translation
|> Floki.parse_fragment! |> Floki.parse_fragment!
|> Floki.find("span.tunit") |> Floki.find("tunit")
|> Enum.map(fn {_,atts,cont} -> |> Enum.map(fn {_,atts,cont} ->
%TranslationUnit{ %TranslationUnit{
nid: Enum.find(atts, fn {k,_} -> k == "nid" end) |> Tuple.to_list |> Enum.at(1), nid: Enum.find(atts, fn {k,_} -> k == "nid" end) |> Tuple.to_list |> Enum.at(1),

View File

@ -26,6 +26,8 @@ defmodule Outlook.Translators.Deepl do
[ [
{"source_lang", options.source_lang}, {"source_lang", options.source_lang},
{"target_lang", options.target_lang}, {"target_lang", options.target_lang},
{"tag_handling", "xml"},
{"splitting_tags", "tunit"},
{"file", content, {"form-data", [{:name, "file"}, {:filename, "datei.html"}]}, []} {"file", content, {"form-data", [{:name, "file"}, {:filename, "datei.html"}]}, []}
] ]
) )

View File

@ -9,17 +9,22 @@ defmodule OutlookWeb.HtmlDocComponent do
attr :tree, :list, required: true attr :tree, :list, required: true
def render_doc(assigns) do def render_doc(%{tunit_tag: _} = assigns) do
~H""" ~H"""
<%= for node <- @tree do %> <%= for node <- @tree do %>
<.dnode node={node} /> <.dnode node={node} tunit_tag={@tunit_tag} />
<% end %> <% end %>
""" """
end end
def render_doc(assigns) do
assigns
|> assign(:tunit_tag, "span")
|> render_doc()
end
def dnode(%{node: %{status: status}} = assigns) do def dnode(%{node: %{status: status}} = assigns) do
~H""" ~H"""
<.dynamic_tag name="span" class="tunit" nid={@node.nid} {Map.get(@node.eph, :attributes, %{})}> <.dynamic_tag name={@tunit_tag} class="tunit" nid={@node.nid} {Map.get(@node.eph, :attributes, %{})}>
<%= @node.content |> raw %> <%= @node.content |> raw %>
</.dynamic_tag> </.dynamic_tag>
""" """
@ -27,9 +32,9 @@ defmodule OutlookWeb.HtmlDocComponent do
def dnode(assigns) when assigns.node.type == :element do def dnode(assigns) when assigns.node.type == :element do
~H""" ~H"""
<.dynamic_tag name={@node.name} nid={@node.nid} {Map.get(@node.eph, :attributes, %{})}> <.dynamic_tag name={@node.name} nid={@node.nid} {@node.attributes |> Map.merge(Map.get(@node.eph, :attributes, %{}))}>
<%= for child_node <- @node.content do %> <%= for child_node <- @node.content do %>
<.dnode node={child_node} /> <.dnode node={child_node} tunit_tag={@tunit_tag} />
<% end %> <% end %>
</.dynamic_tag> </.dynamic_tag>
""" """