Add ignoring non-period points/dots

This commit is contained in:
Thelonius Kort
2023-01-04 15:26:05 +01:00
parent b0f8778c07
commit 9a7dc7cf80
2 changed files with 47 additions and 1 deletions

View File

@ -10,10 +10,14 @@ defmodule Outlook.InternalTree.RawInternalBasic do
alias Outlook.InternalTree.Html
@splitmarker "@@translationunit@@"
@nonperiodmarker "@@nonperiod@@"
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
[ %InternalNode{textnode |
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
content: textnode.content
|> String.replace(~r/([[:upper:]\d])\.(\d)?/u, "\\1#{@nonperiodmarker}\\2")
|> String.replace(~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|> String.replace(@nonperiodmarker, ".")
} | set_split_markers(rest) ]
end