diff --git a/lib/outlook/internal_tree.ex b/lib/outlook/internal_tree.ex index 6071d92..b70485f 100644 --- a/lib/outlook/internal_tree.ex +++ b/lib/outlook/internal_tree.ex @@ -24,6 +24,6 @@ defmodule Outlook.InternalTree do # validate_sibling_collocation(tree) tree |> RawInternalBasic.set_split_markers() - |> RawInternalBasic.partition_textnodes() + |> RawInternalBasic.partition_to_tunits() end end diff --git a/lib/outlook/internal_tree/raw_internal_basic.ex b/lib/outlook/internal_tree/raw_internal_basic.ex index 8d9fa52..eda2060 100644 --- a/lib/outlook/internal_tree/raw_internal_basic.ex +++ b/lib/outlook/internal_tree/raw_internal_basic.ex @@ -28,42 +28,103 @@ defmodule Outlook.InternalTree.RawInternalBasic do def set_split_markers([]), do: [] - def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do + def partition_to_tunits(raw_tree) do + partition_blocklevel(raw_tree) + end + + def partition_blocklevel([ %InternalNode{type: :element} = node | rest ]) do [ %InternalNode{node | content: case get_sibling_collocation(node.content) do - :block -> partition_textnodes(node.content) + :block -> partition_blocklevel(node.content) :inline -> inline_to_translation_units(node.content) _ -> [ node ] end - } | partition_textnodes(rest) ] + } | partition_blocklevel(rest) ] end - def partition_textnodes([ node | rest ]) do - [ node | partition_textnodes(rest) ] + def partition_blocklevel([ node | rest ]) do + [ node | partition_blocklevel(rest) ] end - def partition_textnodes([]), do: [] + def partition_blocklevel([]), do: [] - - defp inline_to_translation_units(contents) do - contents - # |> Html.strip_attributes # to be implemented - |> Html.to_html() - |> String.split(@splitmarker, trim: true) - |> Enum.map(fn sentence -> - %TranslationUnit{ - content: sentence, - status: :untranslated, - uuid: UUID.generate() - } - end - ) + require Logger + defp inline_to_translation_units(inline_tree) do + Logger.info "inline_tree #{inline_tree |> inspect}" + partition_inlinelevel(inline_tree) + |> chunk_with_list() + |> Html.strip_attributes # to be implemented + |> Enum.map(fn sentence -> + %TranslationUnit{ + content: Html.to_html(sentence), + status: :untranslated, + uuid: UUID.generate() + } + end + ) end - defp contains_elements?(content) do - + def partition_inlinelevel([ %InternalNode{type: :element} = node | rest ]) do + [ partition_inlinelevel(node.content) + |> chunk_with_list() + |> Enum.map(fn nodelist -> %InternalNode{node | content: nodelist} end) + | partition_inlinelevel(rest) ] end - @doc "Returns just either :block or :inline. Assumes that it doesn't contain both." + def partition_inlinelevel([ %InternalNode{type: :text} = textnode | rest ]) do + content = if String.contains?(textnode.content, @splitmarker) do + String.split(textnode.content, @splitmarker, trim: true) + |> Enum.map(fn cont -> %InternalNode{textnode | content: cont} end) + else + textnode + end + [ content + | partition_inlinelevel(rest) ] + end + + def partition_inlinelevel([ node | rest ]) do + [ node | partition_inlinelevel(rest) ] + end + + def partition_inlinelevel([]), do: [] + + + def flatten_element_contents([ node | rest ]) when is_list(node.content) do + [ %InternalNode{node | content: flatten_element_contents(List.flatten(node.content))} + | flatten_element_contents(rest) ] + end + + def flatten_element_contents([ node | rest ]) do + [ node | flatten_element_contents(rest) ] + end + + def flatten_element_contents([]), do: [] + + + @doc """ + iex> chunk_with_list([1, 1, [2, 2], 3, 3, [4, 4, 4], 5, 5]) + [[1, 1, 2], [2, 3, 3, 4], [4], [4, 5, 5]] + iex> chunk_with_list([1, 1, [1, 2], 2, 2, [2, 3, 4], 4, 4]) + [[1, 1, 1], [2, 2, 2, 2], [3], [4, 4, 4]] + """ + def chunk_with_list(list) do + chunk_fun = fn el, acc -> + if el do + {:cont, [el | acc]} + else + {:cont, Enum.reverse(acc), []} + end + end + after_fun = fn + [] -> {:cont, []} + acc -> {:cont, Enum.reverse(acc), []} + end + Enum.map(list, fn el -> is_list(el) && Enum.intersperse(el, nil) || el end) + |> List.flatten() + |> Enum.chunk_while([], chunk_fun, after_fun) + end + + + @doc "Returns just either :block, :inline or nil. Assumes that it doesn't contain both." def get_sibling_collocation(content) do content |> Enum.map(fn node -> node.eph.sibling_with end)