defmodule Outlook.InternalTreeTest do use Outlook.DataCase import Outlook.InternalTreeTestHelpers describe "internal_tree" do alias Outlook.InternalTree alias Outlook.InternalTree.{InternalNode,TranslationUnit} @default_nid "xxxxxx" test "partition_text/1 returns correctly partitioned text without overlapping markup" do tree = [ %InternalNode{ name: "p", attributes: %{}, type: :element, nid: "8293da39-18e3-4695-8ec5-a3a4a06f006c", content: [ %InternalNode{ name: "", attributes: %{}, type: :text, nid: "1b62b02f-0be1-4ba1-88a1-0f08f5a5254d", content: "A sentence with many letters ", eph: %{sibling_with: :inline} }, %InternalNode{ name: "a", attributes: %{href: "dingsda.com"}, type: :element, content: [ %InternalNode{ name: "", attributes: %{}, type: :text, nid: "c6816bfe-a660-436b-84ab-64d92417e321", content: "and many, many ", eph: %{sibling_with: :inline} }, %InternalNode{ name: "b", attributes: %{}, type: :element, content: [ %InternalNode{ name: "", attributes: %{}, type: :text, nid: "abcd5893-d062-4716-9979-4bf1f65d5e17", content: "words. A", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :inline} }, %InternalNode{ name: "", attributes: %{}, type: :text, nid: "d67f41fe-678a-4e27-99da-00d38decde75", content: " sentence", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :inline} }, %InternalNode{ name: "", attributes: %{}, type: :text, nid: "6fc0bf77-4dc6-4828-866e-2933d393f4b9", content: " with many letters and many, many words. ", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :block} } ] assert InternalTree.partition_text(tree) |> unify_nids_in_tunits() == [ %InternalNode{name: "p", attributes: %{}, type: :element, nid: "8293da39-18e3-4695-8ec5-a3a4a06f006c", content: [ %TranslationUnit{status: :untranslated, nid: @default_nid, content: "A sentence with many letters and many, many words. "}, %TranslationUnit{status: :untranslated, nid: @default_nid, content: "A sentence with many letters and many, many words. "} ], eph: %{sibling_with: :block} } ] end test "don't partition at numbers, repeated dots and abbreviated names" do tree = [ %Outlook.InternalTree.InternalNode{ name: "p", attributes: %{}, type: :element, nid: "oaRwUH3A2wMF", content: [ %Outlook.InternalTree.InternalNode{ name: "", attributes: %{}, type: :text, nid: "xep6gWMVWF1D", content: "This was written by F. William Endahl in 2021. 99.9% of it is not that bad... But!", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :block} } ] assert InternalTree.partition_text(tree) |> unify_nids_in_tunits() == [ %Outlook.InternalTree.InternalNode{ name: "p", attributes: %{}, type: :element, nid: "oaRwUH3A2wMF", content: [ %Outlook.InternalTree.TranslationUnit{ status: :untranslated, nid: "xxxxxx", content: "This was written by F. William Endahl in 2021. ", eph: %{} }, %Outlook.InternalTree.TranslationUnit{ status: :untranslated, nid: "xxxxxx", content: "99.9% of it is not that bad… But!", eph: %{} } ], eph: %{sibling_with: :block} } ] end test "partition when end of sentence is 'disguised' by some markup" do tree = [ %Outlook.InternalTree.InternalNode{ name: "p", attributes: %{}, type: :element, nid: "oaRwUH3A2wMF", content: [ %Outlook.InternalTree.InternalNode{ name: "", attributes: %{}, type: :text, nid: "xep6gWMVWF1D", content: "EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for ", eph: %{sibling_with: :inline} }, %Outlook.InternalTree.InternalNode{ name: "a", attributes: %{ href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/", bullshit: "bollocks" }, type: :element, nid: "qxCrs0csHDLI", content: [ %Outlook.InternalTree.InternalNode{ name: "", attributes: %{}, type: :text, nid: "2WwtRNKMc8Sp", content: "clean transport.”", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :inline} }, %Outlook.InternalTree.InternalNode{ name: "", attributes: %{}, type: :text, nid: "3CKpLvIywr8G", content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ", eph: %{sibling_with: :inline} } ], eph: %{sibling_with: :block} } ] assert InternalTree.partition_text(tree) |> unify_nids_in_tunits() == [ %Outlook.InternalTree.InternalNode{ name: "p", attributes: %{}, type: :element, nid: "oaRwUH3A2wMF", content: [ %Outlook.InternalTree.TranslationUnit{ status: :untranslated, nid: "xxxxxx", content: "EU Green Deal czar, Commissioner Frans Timmermans said in May, “We will strengthen the EU Emissions Trading System, update the Energy Taxation Directive, and propose new CO2 standards for cars, new energy efficiency standards for buildings, new targets for renewables, and new ways of supporting clean fuels and infrastructure for clean transport.”", eph: %{} }, %Outlook.InternalTree.TranslationUnit{ status: :untranslated, nid: "xxxxxx", content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ", eph: %{} } ], eph: %{sibling_with: :block} } ] end end end