Update partitioning to strip empty elements

This commit is contained in:
Thelonius Kort
2023-01-12 22:48:33 +01:00
parent e45e3597c9
commit b7db9cdd8e
2 changed files with 23 additions and 2 deletions

View File

@ -53,6 +53,7 @@ defmodule Outlook.InternalTree.RawInternalBasic do
defp inline_to_translation_units(inline_tree) do defp inline_to_translation_units(inline_tree) do
partition_inlinelevel(inline_tree) partition_inlinelevel(inline_tree)
|> chunk_with_list() |> chunk_with_list()
|> Enum.map(fn sentence -> strip_empty_nodes(sentence) end)
|> Enum.map(fn sentence -> Html.strip_attributes(sentence) end) |> Enum.map(fn sentence -> Html.strip_attributes(sentence) end)
|> Enum.map(fn sentence -> |> Enum.map(fn sentence ->
%TranslationUnit{ %TranslationUnit{
@ -90,6 +91,25 @@ defmodule Outlook.InternalTree.RawInternalBasic do
def partition_inlinelevel([]), do: [] def partition_inlinelevel([]), do: []
def strip_empty_nodes([%{type: :element} = node | rest]) do
content = strip_empty_nodes(node.content)
case content do
[] -> strip_empty_nodes(rest)
_ -> [ %InternalNode{node | content: content} | strip_empty_nodes(rest) ]
end
end
def strip_empty_nodes([%{type: :text, content: ""} | rest]) do
strip_empty_nodes(rest)
end
def strip_empty_nodes([node | rest]) do
[ node | strip_empty_nodes(rest) ]
end
def strip_empty_nodes([]), do: []
def strip_empty_tunits([ %TranslationUnit{content: ""} | rest]) do def strip_empty_tunits([ %TranslationUnit{content: ""} | rest]) do
strip_empty_tunits(rest) strip_empty_tunits(rest)
end end

View File

@ -153,7 +153,8 @@ defmodule Outlook.InternalTreeTest do
%Outlook.InternalTree.InternalNode{ %Outlook.InternalTree.InternalNode{
name: "a", name: "a",
attributes: %{ attributes: %{
href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/" href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/",
bullshit: "bollocks"
}, },
type: :element, type: :element,
nid: "qxCrs0csHDLI", nid: "qxCrs0csHDLI",
@ -197,7 +198,7 @@ defmodule Outlook.InternalTreeTest do
%Outlook.InternalTree.TranslationUnit{ %Outlook.InternalTree.TranslationUnit{
status: :untranslated, status: :untranslated,
nid: "xxxxxx", nid: "xxxxxx",
content: "<a href=\"https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/\"></a> In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ", content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ",
eph: %{} eph: %{}
} }
], ],