Update partitioning the raw tree (means converting it to "non-raw" tree)
Finally achieves to pass the first internaltree test.
This commit is contained in:
@ -24,6 +24,6 @@ defmodule Outlook.InternalTree do
|
|||||||
# validate_sibling_collocation(tree)
|
# validate_sibling_collocation(tree)
|
||||||
tree
|
tree
|
||||||
|> RawInternalBasic.set_split_markers()
|
|> RawInternalBasic.set_split_markers()
|
||||||
|> RawInternalBasic.partition_textnodes()
|
|> RawInternalBasic.partition_to_tunits()
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@ -28,42 +28,103 @@ defmodule Outlook.InternalTree.RawInternalBasic do
|
|||||||
|
|
||||||
def set_split_markers([]), do: []
|
def set_split_markers([]), do: []
|
||||||
|
|
||||||
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
def partition_to_tunits(raw_tree) do
|
||||||
|
partition_blocklevel(raw_tree)
|
||||||
|
end
|
||||||
|
|
||||||
|
def partition_blocklevel([ %InternalNode{type: :element} = node | rest ]) do
|
||||||
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
||||||
:block -> partition_textnodes(node.content)
|
:block -> partition_blocklevel(node.content)
|
||||||
:inline -> inline_to_translation_units(node.content)
|
:inline -> inline_to_translation_units(node.content)
|
||||||
_ -> [ node ]
|
_ -> [ node ]
|
||||||
end
|
end
|
||||||
} | partition_textnodes(rest) ]
|
} | partition_blocklevel(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def partition_textnodes([ node | rest ]) do
|
def partition_blocklevel([ node | rest ]) do
|
||||||
[ node | partition_textnodes(rest) ]
|
[ node | partition_blocklevel(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def partition_textnodes([]), do: []
|
def partition_blocklevel([]), do: []
|
||||||
|
|
||||||
|
require Logger
|
||||||
defp inline_to_translation_units(contents) do
|
defp inline_to_translation_units(inline_tree) do
|
||||||
contents
|
Logger.info "inline_tree #{inline_tree |> inspect}"
|
||||||
# |> Html.strip_attributes # to be implemented
|
partition_inlinelevel(inline_tree)
|
||||||
|> Html.to_html()
|
|> chunk_with_list()
|
||||||
|> String.split(@splitmarker, trim: true)
|
|> Html.strip_attributes # to be implemented
|
||||||
|> Enum.map(fn sentence ->
|
|> Enum.map(fn sentence ->
|
||||||
%TranslationUnit{
|
%TranslationUnit{
|
||||||
content: sentence,
|
content: Html.to_html(sentence),
|
||||||
status: :untranslated,
|
status: :untranslated,
|
||||||
uuid: UUID.generate()
|
uuid: UUID.generate()
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp contains_elements?(content) do
|
def partition_inlinelevel([ %InternalNode{type: :element} = node | rest ]) do
|
||||||
|
[ partition_inlinelevel(node.content)
|
||||||
|
|> chunk_with_list()
|
||||||
|
|> Enum.map(fn nodelist -> %InternalNode{node | content: nodelist} end)
|
||||||
|
| partition_inlinelevel(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
def partition_inlinelevel([ %InternalNode{type: :text} = textnode | rest ]) do
|
||||||
|
content = if String.contains?(textnode.content, @splitmarker) do
|
||||||
|
String.split(textnode.content, @splitmarker, trim: true)
|
||||||
|
|> Enum.map(fn cont -> %InternalNode{textnode | content: cont} end)
|
||||||
|
else
|
||||||
|
textnode
|
||||||
|
end
|
||||||
|
[ content
|
||||||
|
| partition_inlinelevel(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def partition_inlinelevel([ node | rest ]) do
|
||||||
|
[ node | partition_inlinelevel(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def partition_inlinelevel([]), do: []
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_element_contents([ node | rest ]) when is_list(node.content) do
|
||||||
|
[ %InternalNode{node | content: flatten_element_contents(List.flatten(node.content))}
|
||||||
|
| flatten_element_contents(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def flatten_element_contents([ node | rest ]) do
|
||||||
|
[ node | flatten_element_contents(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def flatten_element_contents([]), do: []
|
||||||
|
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
iex> chunk_with_list([1, 1, [2, 2], 3, 3, [4, 4, 4], 5, 5])
|
||||||
|
[[1, 1, 2], [2, 3, 3, 4], [4], [4, 5, 5]]
|
||||||
|
iex> chunk_with_list([1, 1, [1, 2], 2, 2, [2, 3, 4], 4, 4])
|
||||||
|
[[1, 1, 1], [2, 2, 2, 2], [3], [4, 4, 4]]
|
||||||
|
"""
|
||||||
|
def chunk_with_list(list) do
|
||||||
|
chunk_fun = fn el, acc ->
|
||||||
|
if el do
|
||||||
|
{:cont, [el | acc]}
|
||||||
|
else
|
||||||
|
{:cont, Enum.reverse(acc), []}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
after_fun = fn
|
||||||
|
[] -> {:cont, []}
|
||||||
|
acc -> {:cont, Enum.reverse(acc), []}
|
||||||
|
end
|
||||||
|
Enum.map(list, fn el -> is_list(el) && Enum.intersperse(el, nil) || el end)
|
||||||
|
|> List.flatten()
|
||||||
|
|> Enum.chunk_while([], chunk_fun, after_fun)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@doc "Returns just either :block, :inline or nil. Assumes that it doesn't contain both."
|
||||||
def get_sibling_collocation(content) do
|
def get_sibling_collocation(content) do
|
||||||
content
|
content
|
||||||
|> Enum.map(fn node -> node.eph.sibling_with end)
|
|> Enum.map(fn node -> node.eph.sibling_with end)
|
||||||
|
|||||||
Reference in New Issue
Block a user