Add use <tunit> instead of <span> for Deepl translation

To avoid getting results like "... bla.&quot;</span>" ->
"... bla</span>.&quot;" from Deepl we need to set set splitting_tags
option to an element name (that's not as common as span).
This commit is contained in:
Thelonius Kort
2023-01-14 21:59:57 +01:00
parent 3b1c1d04a1
commit d7325d64c5
3 changed files with 14 additions and 7 deletions

View File

@ -73,7 +73,7 @@ defmodule Outlook.Translators do
defp prepare_article(tree) do
# Logger.info "so far."
HtmlDocComponent.render_doc(%{tree: tree})
HtmlDocComponent.render_doc(%{tree: tree, tunit_tag: "tunit"})
|> Phoenix.HTML.Safe.to_iodata()
|> IO.iodata_to_binary()
end
@ -86,7 +86,7 @@ defmodule Outlook.Translators do
def process_translation(translation, tunit_ids) do
tunit_map = translation
|> Floki.parse_fragment!
|> Floki.find("span.tunit")
|> Floki.find("tunit")
|> Enum.map(fn {_,atts,cont} ->
%TranslationUnit{
nid: Enum.find(atts, fn {k,_} -> k == "nid" end) |> Tuple.to_list |> Enum.at(1),

View File

@ -26,6 +26,8 @@ defmodule Outlook.Translators.Deepl do
[
{"source_lang", options.source_lang},
{"target_lang", options.target_lang},
{"tag_handling", "xml"},
{"splitting_tags", "tunit"},
{"file", content, {"form-data", [{:name, "file"}, {:filename, "datei.html"}]}, []}
]
)