Update partitioning to strip empty elements
This commit is contained in:
@ -53,6 +53,7 @@ defmodule Outlook.InternalTree.RawInternalBasic do
|
||||
defp inline_to_translation_units(inline_tree) do
|
||||
partition_inlinelevel(inline_tree)
|
||||
|> chunk_with_list()
|
||||
|> Enum.map(fn sentence -> strip_empty_nodes(sentence) end)
|
||||
|> Enum.map(fn sentence -> Html.strip_attributes(sentence) end)
|
||||
|> Enum.map(fn sentence ->
|
||||
%TranslationUnit{
|
||||
@ -90,6 +91,25 @@ defmodule Outlook.InternalTree.RawInternalBasic do
|
||||
def partition_inlinelevel([]), do: []
|
||||
|
||||
|
||||
def strip_empty_nodes([%{type: :element} = node | rest]) do
|
||||
content = strip_empty_nodes(node.content)
|
||||
case content do
|
||||
[] -> strip_empty_nodes(rest)
|
||||
_ -> [ %InternalNode{node | content: content} | strip_empty_nodes(rest) ]
|
||||
end
|
||||
end
|
||||
|
||||
def strip_empty_nodes([%{type: :text, content: ""} | rest]) do
|
||||
strip_empty_nodes(rest)
|
||||
end
|
||||
|
||||
def strip_empty_nodes([node | rest]) do
|
||||
[ node | strip_empty_nodes(rest) ]
|
||||
end
|
||||
|
||||
def strip_empty_nodes([]), do: []
|
||||
|
||||
|
||||
def strip_empty_tunits([ %TranslationUnit{content: ""} | rest]) do
|
||||
strip_empty_tunits(rest)
|
||||
end
|
||||
|
||||
@ -153,7 +153,8 @@ defmodule Outlook.InternalTreeTest do
|
||||
%Outlook.InternalTree.InternalNode{
|
||||
name: "a",
|
||||
attributes: %{
|
||||
href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/"
|
||||
href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/",
|
||||
bullshit: "bollocks"
|
||||
},
|
||||
type: :element,
|
||||
nid: "qxCrs0csHDLI",
|
||||
@ -197,7 +198,7 @@ defmodule Outlook.InternalTreeTest do
|
||||
%Outlook.InternalTree.TranslationUnit{
|
||||
status: :untranslated,
|
||||
nid: "xxxxxx",
|
||||
content: "<a href=\"https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/\"></a> In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ",
|
||||
content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ",
|
||||
eph: %{}
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user