Add :eph field to %TranslationUnit{} and remove :sibling_with
Using :eph from now on to store ephemeral data like :sibling_with. Additionally added cleaning up :eph before saving to db. And renamed InternalTree.Basic to InternalTree.RawInternalBasic to make clear that it contains function for an intermediary tree structure.
This commit is contained in:
@ -3,6 +3,7 @@ defmodule Outlook.Articles.InternalTree do
|
|||||||
|
|
||||||
alias Outlook.InternalTree.InternalNode
|
alias Outlook.InternalTree.InternalNode
|
||||||
alias Outlook.InternalTree.TranslationUnit
|
alias Outlook.InternalTree.TranslationUnit
|
||||||
|
alias Outlook.InternalTree.Basic
|
||||||
|
|
||||||
def type, do: :string
|
def type, do: :string
|
||||||
|
|
||||||
@ -16,7 +17,10 @@ defmodule Outlook.Articles.InternalTree do
|
|||||||
{:ok, Jason.decode!(tree, keys: :atoms!) |> from_json}
|
{:ok, Jason.decode!(tree, keys: :atoms!) |> from_json}
|
||||||
end
|
end
|
||||||
|
|
||||||
def dump(tree) when is_list(tree), do: {:ok, Jason.encode!(tree)}
|
def dump(tree) when is_list(tree) do
|
||||||
|
{:ok, Basic.clean_eph(tree) |> Jason.encode!()}
|
||||||
|
end
|
||||||
|
|
||||||
def dump(_), do: :error
|
def dump(_), do: :error
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -48,7 +48,7 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do
|
|||||||
|
|
||||||
def set_sibling_with([ %{type: :element} = node | rest ]) do
|
def set_sibling_with([ %{type: :element} = node | rest ]) do
|
||||||
[ %InternalNode{ node |
|
[ %InternalNode{ node |
|
||||||
sibling_with: node.name in @block_elements && :block || :inline,
|
eph: %{sibling_with: node.name in @block_elements && :block || :inline},
|
||||||
content: set_sibling_with(node.content)
|
content: set_sibling_with(node.content)
|
||||||
} | set_sibling_with(rest) ]
|
} | set_sibling_with(rest) ]
|
||||||
end
|
end
|
||||||
@ -58,11 +58,12 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do
|
|||||||
:text -> Regex.match?(~r/^\s*$/, node.content) && :both || :inline
|
:text -> Regex.match?(~r/^\s*$/, node.content) && :both || :inline
|
||||||
:comment -> :both
|
:comment -> :both
|
||||||
end
|
end
|
||||||
[ %InternalNode{ node | sibling_with: sib_with } | set_sibling_with(rest) ]
|
[ %InternalNode{ node | eph: %{sibling_with: sib_with} } | set_sibling_with(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_sibling_with([ ]), do: ( [ ] )
|
def set_sibling_with([ ]), do: ( [ ] )
|
||||||
|
|
||||||
|
|
||||||
def strip_whitespace_textnodes [ %{type: :text} = node | rest] do
|
def strip_whitespace_textnodes [ %{type: :text} = node | rest] do
|
||||||
if Regex.match?(~r/^\s*$/, node.content) do
|
if Regex.match?(~r/^\s*$/, node.content) do
|
||||||
strip_whitespace_textnodes(rest)
|
strip_whitespace_textnodes(rest)
|
||||||
@ -71,7 +72,6 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def strip_whitespace_textnodes [ %{type: :element} = node | rest] do
|
def strip_whitespace_textnodes [ %{type: :element} = node | rest] do
|
||||||
[ %InternalNode{ node | content: strip_whitespace_textnodes(node.content) }
|
[ %InternalNode{ node | content: strip_whitespace_textnodes(node.content) }
|
||||||
| strip_whitespace_textnodes(rest) ]
|
| strip_whitespace_textnodes(rest) ]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
defmodule Outlook.InternalTree do
|
defmodule Outlook.InternalTree do
|
||||||
|
|
||||||
alias Outlook.InternalTree.{Html,Modifiers,Basic}
|
alias Outlook.InternalTree.{Html,Modifiers,RawInternalBasic}
|
||||||
alias Outlook.HtmlPreparations.HtmlPreparation
|
alias Outlook.HtmlPreparations.HtmlPreparation
|
||||||
|
|
||||||
def render_html(tree) do
|
def render_html(tree) do
|
||||||
@ -11,7 +11,6 @@ defmodule Outlook.InternalTree do
|
|||||||
|
|
||||||
def render_html_preview(tree) do
|
def render_html_preview(tree) do
|
||||||
tree
|
tree
|
||||||
|> partition_text
|
|
||||||
|> Html.to_html_preview("1")
|
|> Html.to_html_preview("1")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -24,7 +23,7 @@ defmodule Outlook.InternalTree do
|
|||||||
def partition_text(tree) do
|
def partition_text(tree) do
|
||||||
# validate_sibling_collocation(tree)
|
# validate_sibling_collocation(tree)
|
||||||
tree
|
tree
|
||||||
|> Basic.set_split_markers()
|
|> RawInternalBasic.set_split_markers()
|
||||||
|> Basic.partition_textnodes()
|
|> RawInternalBasic.partition_textnodes()
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@ -1,70 +1,22 @@
|
|||||||
defmodule Outlook.InternalTree.Basic do
|
defmodule Outlook.InternalTree.Basic do
|
||||||
|
|
||||||
alias Ecto.UUID
|
|
||||||
alias Outlook.InternalTree.InternalNode
|
alias Outlook.InternalTree.InternalNode
|
||||||
alias Outlook.InternalTree.TranslationUnit
|
alias Outlook.InternalTree.TranslationUnit
|
||||||
alias Outlook.InternalTree.Html
|
|
||||||
|
|
||||||
@splitmarker "@@translationunit@@"
|
def clean_eph([%TranslationUnit{} = node | rest]) do
|
||||||
|
[ node | rest ]
|
||||||
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
|
||||||
[ %InternalNode{textnode |
|
|
||||||
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
|
||||||
} | set_split_markers(rest) ]
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
def clean_eph([%InternalNode{type: :element} = node | rest]) do
|
||||||
[ %InternalNode{node | content: set_split_markers(node.content)}
|
[ %InternalNode{node |
|
||||||
| set_split_markers(rest) ]
|
eph: %{},
|
||||||
|
content: clean_eph(node.content)}
|
||||||
|
| clean_eph(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_split_markers([ node | rest ]) do
|
def clean_eph([%{type: _} = node | rest]) do
|
||||||
[ node | set_split_markers(rest) ]
|
[ %InternalNode{node | eph: %{}} | clean_eph(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_split_markers([]), do: []
|
def clean_eph([]), do: []
|
||||||
|
|
||||||
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
|
||||||
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
|
||||||
:block -> partition_textnodes(node.content)
|
|
||||||
:inline -> inline_to_translation_units(node.content)
|
|
||||||
_ -> [ node ]
|
|
||||||
end
|
|
||||||
} | partition_textnodes(rest) ]
|
|
||||||
end
|
|
||||||
|
|
||||||
def partition_textnodes([ node | rest ]) do
|
|
||||||
[ node | partition_textnodes(rest) ]
|
|
||||||
end
|
|
||||||
|
|
||||||
def partition_textnodes([]), do: []
|
|
||||||
|
|
||||||
|
|
||||||
defp inline_to_translation_units(contents) do
|
|
||||||
contents
|
|
||||||
# |> Html.strip_attributes # to be implemented
|
|
||||||
|> Html.to_html()
|
|
||||||
|> String.split(@splitmarker, trim: true)
|
|
||||||
|> Enum.map(fn sentence ->
|
|
||||||
%TranslationUnit{
|
|
||||||
content: sentence,
|
|
||||||
status: :untranslated,
|
|
||||||
uuid: UUID.generate()
|
|
||||||
}
|
|
||||||
end
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp contains_elements?(content) do
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
|
||||||
def get_sibling_collocation(content) do
|
|
||||||
content
|
|
||||||
|> Enum.map(fn node -> node.sibling_with end)
|
|
||||||
|> Enum.uniq()
|
|
||||||
|> List.delete(:both)
|
|
||||||
|> List.first
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
defmodule Outlook.InternalTree.InternalNode do
|
defmodule Outlook.InternalTree.InternalNode do
|
||||||
@derive Jason.Encoder
|
@derive Jason.Encoder
|
||||||
defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], sibling_with: nil
|
defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], eph: %{}
|
||||||
end
|
end
|
||||||
|
|||||||
74
lib/outlook/internal_tree/raw_internal_basic.ex
Normal file
74
lib/outlook/internal_tree/raw_internal_basic.ex
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
defmodule Outlook.InternalTree.RawInternalBasic do
|
||||||
|
@moduledoc """
|
||||||
|
Function used for the raw_internal_tree which is a transitory state after importing
|
||||||
|
Html and before splitting textnodes into %TranslationUnit{}s.
|
||||||
|
"""
|
||||||
|
|
||||||
|
alias Ecto.UUID
|
||||||
|
alias Outlook.InternalTree.InternalNode
|
||||||
|
alias Outlook.InternalTree.TranslationUnit
|
||||||
|
alias Outlook.InternalTree.Html
|
||||||
|
|
||||||
|
@splitmarker "@@translationunit@@"
|
||||||
|
|
||||||
|
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
||||||
|
[ %InternalNode{textnode |
|
||||||
|
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
||||||
|
} | set_split_markers(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
||||||
|
[ %InternalNode{node | content: set_split_markers(node.content)}
|
||||||
|
| set_split_markers(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def set_split_markers([ node | rest ]) do
|
||||||
|
[ node | set_split_markers(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def set_split_markers([]), do: []
|
||||||
|
|
||||||
|
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
||||||
|
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
||||||
|
:block -> partition_textnodes(node.content)
|
||||||
|
:inline -> inline_to_translation_units(node.content)
|
||||||
|
_ -> [ node ]
|
||||||
|
end
|
||||||
|
} | partition_textnodes(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def partition_textnodes([ node | rest ]) do
|
||||||
|
[ node | partition_textnodes(rest) ]
|
||||||
|
end
|
||||||
|
|
||||||
|
def partition_textnodes([]), do: []
|
||||||
|
|
||||||
|
|
||||||
|
defp inline_to_translation_units(contents) do
|
||||||
|
contents
|
||||||
|
# |> Html.strip_attributes # to be implemented
|
||||||
|
|> Html.to_html()
|
||||||
|
|> String.split(@splitmarker, trim: true)
|
||||||
|
|> Enum.map(fn sentence ->
|
||||||
|
%TranslationUnit{
|
||||||
|
content: sentence,
|
||||||
|
status: :untranslated,
|
||||||
|
uuid: UUID.generate()
|
||||||
|
}
|
||||||
|
end
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
defp contains_elements?(content) do
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
||||||
|
def get_sibling_collocation(content) do
|
||||||
|
content
|
||||||
|
|> Enum.map(fn node -> node.eph.sibling_with end)
|
||||||
|
|> Enum.uniq()
|
||||||
|
|> List.delete(:both)
|
||||||
|
|> List.first
|
||||||
|
end
|
||||||
|
end
|
||||||
@ -4,7 +4,7 @@ defmodule OutlookWeb.ArticleLive.New do
|
|||||||
import OutlookWeb.ArticleLive.NewComponents
|
import OutlookWeb.ArticleLive.NewComponents
|
||||||
|
|
||||||
alias OutlookWeb.ArticleLive.FormComponent
|
alias OutlookWeb.ArticleLive.FormComponent
|
||||||
alias Outlook.{Articles,Authors,HtmlPreparations}
|
alias Outlook.{Articles,Authors,HtmlPreparations,InternalTree}
|
||||||
alias Articles.{Article,RawHtmlInput}
|
alias Articles.{Article,RawHtmlInput}
|
||||||
|
|
||||||
require Logger
|
require Logger
|
||||||
@ -52,6 +52,9 @@ defmodule OutlookWeb.ArticleLive.New do
|
|||||||
|
|
||||||
@impl true
|
@impl true
|
||||||
def handle_event("approve_raw_internaltree", _, socket) do
|
def handle_event("approve_raw_internaltree", _, socket) do
|
||||||
|
socket = socket
|
||||||
|
|> assign(:raw_internal_tree,
|
||||||
|
InternalTree.partition_text(socket.assigns.raw_internal_tree))
|
||||||
{:noreply, socket |> assign(:step, :review_translation_units)}
|
{:noreply, socket |> assign(:step, :review_translation_units)}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user