diff --git a/lib/outlook/internal_tree/raw_internal_basic.ex b/lib/outlook/internal_tree/raw_internal_basic.ex
index 5b98ad4..909f35e 100644
--- a/lib/outlook/internal_tree/raw_internal_basic.ex
+++ b/lib/outlook/internal_tree/raw_internal_basic.ex
@@ -14,7 +14,9 @@ defmodule Outlook.InternalTree.RawInternalBasic do
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
[ %InternalNode{textnode |
content: textnode.content
- |> String.replace(~r/([[:upper:]\d])\.(\d)?/u, "\\1#{@nonperiodmarker}\\2")
+ |> String.replace(~r/\.\.\.+/u, "…")
+ |> String.replace(~r/([[:upper:]])\./u, "\\1#{@nonperiodmarker}")
+ |> String.replace(~r/(\d)\.(\d)/u, "\\1#{@nonperiodmarker}\\2")
|> String.replace(~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|> String.replace(@nonperiodmarker, ".")
} | set_split_markers(rest) ]
diff --git a/test/outlook/internaltree_test.exs b/test/outlook/internaltree_test.exs
index 67bf1d7..fa45228 100644
--- a/test/outlook/internaltree_test.exs
+++ b/test/outlook/internaltree_test.exs
@@ -91,20 +91,20 @@ defmodule Outlook.InternalTreeTest do
]
end
- test "partition_text/1 doesn't split numbers and abbreviated names" do
+ test "don't partition at numbers, repeated dots and abbreviated names" do
tree = [
- %InternalNode{
+ %Outlook.InternalTree.InternalNode{
name: "p",
attributes: %{},
type: :element,
- nid: "0248aec7-c525-483d-a472-40a34488478d",
+ nid: "oaRwUH3A2wMF",
content: [
- %InternalNode{
+ %Outlook.InternalTree.InternalNode{
name: "",
attributes: %{},
type: :text,
- nid: "d35ac56f-bf10-47b1-af19-152e6225bb32",
- content: "F. William Engdahl is 3.7 times more likely to write a good article than Mike Adams. But this doesn't mean anything bad about Mike.",
+ nid: "xep6gWMVWF1D",
+ content: "This was written by F. William Endahl in 2021. 99.9% of it is not that bad... But!",
eph: %{sibling_with: :inline}
}
],
@@ -112,21 +112,23 @@ defmodule Outlook.InternalTreeTest do
}
]
assert InternalTree.partition_text(tree) |> unify_nids_in_tunits() == [
- %InternalNode{
+ %Outlook.InternalTree.InternalNode{
name: "p",
attributes: %{},
type: :element,
- nid: "0248aec7-c525-483d-a472-40a34488478d",
+ nid: "oaRwUH3A2wMF",
content: [
- %TranslationUnit{
+ %Outlook.InternalTree.TranslationUnit{
status: :untranslated,
- nid: @default_nid,
- content: "F. William Engdahl is 3.7 times more likely to write a good article than Mike Adams. "
+ nid: "xxxxxx",
+ content: "This was written by F. William Endahl in 2021. ",
+ eph: %{}
},
- %TranslationUnit{
+ %Outlook.InternalTree.TranslationUnit{
status: :untranslated,
- nid: @default_nid,
- content: "But this doesn't mean anything bad about Mike."
+ nid: "xxxxxx",
+ content: "99.9% of it is not that bad… But!",
+ eph: %{}
}
],
eph: %{sibling_with: :block}
@@ -147,7 +149,7 @@ defmodule Outlook.InternalTreeTest do
attributes: %{},
type: :text,
nid: "xep6gWMVWF1D",
- content: "This Fit for 55 is the first time in the world that a group of countries, the EU, officially imposes an agenda to force an absurd “Zero” CO2 by 2050 and 55% less CO2 by 2030. EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for ",
+ content: "EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for ",
eph: %{sibling_with: :inline}
},
%Outlook.InternalTree.InternalNode{
@@ -192,7 +194,7 @@ defmodule Outlook.InternalTreeTest do
%Outlook.InternalTree.TranslationUnit{
status: :untranslated,
nid: "xxxxxx",
- content: "This Fit for 55 is the first time in the world that a group of countries, the EU, officially imposes an agenda to force an absurd “Zero” CO2 by 2050 and 55% less CO2 by 2030. EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for clean transport.”",
+ content: "EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for clean transport.”",
eph: %{}
},
%Outlook.InternalTree.TranslationUnit{