71 lines
1.9 KiB
Elixir
71 lines
1.9 KiB
Elixir
defmodule Outlook.InternalTree.Basic do
|
|
|
|
alias Ecto.UUID
|
|
alias Outlook.InternalTree.InternalNode
|
|
alias Outlook.InternalTree.TranslationUnit
|
|
alias Outlook.InternalTree.Html
|
|
|
|
@splitmarker "@@translationunit@@"
|
|
|
|
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
|
[ %InternalNode{textnode |
|
|
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
|
} | set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
|
[ %InternalNode{node | content: set_split_markers(node.content)}
|
|
| set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([ node | rest ]) do
|
|
[ node | set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([]), do: []
|
|
|
|
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
|
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
|
:block -> partition_textnodes(node.content)
|
|
:inline -> inline_to_translation_units(node.content)
|
|
_ -> [ node ]
|
|
end
|
|
} | partition_textnodes(rest) ]
|
|
end
|
|
|
|
def partition_textnodes([ node | rest ]) do
|
|
[ node | partition_textnodes(rest) ]
|
|
end
|
|
|
|
def partition_textnodes([]), do: []
|
|
|
|
|
|
defp inline_to_translation_units(contents) do
|
|
contents
|
|
# |> Html.strip_attributes # to be implemented
|
|
|> Html.to_html()
|
|
|> String.split(@splitmarker, trim: true)
|
|
|> Enum.map(fn sentence ->
|
|
%TranslationUnit{
|
|
content: sentence,
|
|
status: :untranslated,
|
|
uuid: UUID.generate()
|
|
}
|
|
end
|
|
)
|
|
end
|
|
|
|
defp contains_elements?(content) do
|
|
|
|
end
|
|
|
|
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
|
def get_sibling_collocation(content) do
|
|
content
|
|
|> Enum.map(fn node -> node.sibling_with end)
|
|
|> Enum.uniq()
|
|
|> List.delete(:both)
|
|
|> List.first
|
|
end
|
|
end
|