@@ -573,6 +573,64 @@ public function testHyphenateHtml()
573573 ."\n" , $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext ' ));
574574 }
575575
576+ /**
577+ * @return array[]
578+ */
579+ public function dataHyphenateHtmlText ()
580+ {
581+ return [
582+ [
583+ 'Ridiculously <b class="unsplittable">complicated</b> metatext — with dash entity. ' ,
584+ 'Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text — with dash en-ti-ty. ' ,
585+ ],
586+ [
587+ '<html> ' .
588+ '<body> ' .
589+ 'Ridiculously <b class="unsplittable">complicated</b> metatext — with dash entity. ' .
590+ '</body> ' .
591+ '</html> ' ,
592+ '<html> ' .
593+ '<body> ' .
594+ 'Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text — with dash en-ti-ty. ' .
595+ '</body> ' .
596+ '</html> ' ,
597+ ],
598+ [
599+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> ' .
600+ '<html> ' .
601+ '<body class="body-class"> ' .
602+ 'Ridiculously <b class="unsplittable">complicated</b> metatext — with dash entity. ' .
603+ '</body> ' .
604+ '</html> ' ,
605+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> ' .
606+ '<html> ' .
607+ '<body class="body-class"> ' .
608+ 'Ridicu-lous-ly <b class="unsplittable">com-pli-cat-ed</b> meta-text — with dash en-ti-ty. ' .
609+ '</body> ' .
610+ '</html> ' ,
611+ ],
612+ ];
613+ }
614+
615+ /**
616+ * @dataProvider dataHyphenateHtmlText
617+ *
618+ * @return void
619+ */
620+ public function testHyphenateHtmlText ($ html , $ expected )
621+ {
622+ $ this ->object ->setHyphen ('- ' );
623+
624+ // Test that incoming content is never wrapped with the implicit doctype or
625+ // html and body tag of DOMDocument. It always behaves as if LIBXML_HTML_NOIMPLIED
626+ // and LIBXML_HTML_NODEFDTD are set.
627+ $ this ->object ->setLibxmlOptions (0 );
628+ $ this ->assertEquals ($ expected , $ this ->object ->hyphenateHtmlText ($ html ));
629+
630+ $ this ->object ->setLibxmlOptions (LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD );
631+ $ this ->assertEquals ($ expected , $ this ->object ->hyphenateHtmlText ($ html ));
632+ }
633+
576634 /**
577635 * @return void
578636 */
@@ -647,6 +705,13 @@ public function testExcludeElement()
647705 ."\n" ,
648706 $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' )
649707 );
708+ $ this ->assertEquals (
709+ 'Ridicu-lous-ly <b class="unsplittable">complicated</b> meta-text <i>ex-trav-a-gan-za</i> ' ,
710+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
711+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
712+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
713+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' ))
714+ );
650715 }
651716
652717 /**
@@ -664,6 +729,13 @@ public function testExcludeElements()
664729 ."\n" ,
665730 $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' )
666731 );
732+ $ this ->assertEquals (
733+ 'Ridicu-lous-ly <b class="unsplittable">complicated</b> meta-text <i>extravaganza</i> ' ,
734+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
735+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
736+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
737+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' ))
738+ );
667739 }
668740
669741 /**
@@ -682,6 +754,13 @@ public function testExcludeAllAndInclude()
682754 ."\n" ,
683755 $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' )
684756 );
757+ $ this ->assertEquals (
758+ 'Ridiculously <b class="unsplittable">com-pli-cat-ed</b> metatext <i>extravaganza</i> ' ,
759+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
760+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
761+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
762+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' ))
763+ );
685764 }
686765
687766 /**
@@ -696,9 +775,16 @@ public function testExcludeAndInclude()
696775 // Do not Hypenate content within <b>
697776 $ this ->assertEquals (
698777 '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> '
699- ."\n" .'<html><body><p>Ridicu-lous-ly <b class="unsplittable">complicated <i>ex-trav-a-gan-za</i></b> meta-text </p></body></html> '
778+ ."\n" .'<html><body><p>Ridicu-lous-ly <b class="unsplittable">complicated </b> meta-text < i>ex-trav-a-gan-za</i></p></body></html> '
700779 ."\n" ,
701- $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated <i>extravaganza</i></b> metatext ' )
780+ $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated </b> metatext <i>extravaganza</i> ' )
781+ );
782+ $ this ->assertEquals (
783+ 'Ridicu-lous-ly <b class="unsplittable">complicated </b> meta-text <i>ex-trav-a-gan-za</i> ' ,
784+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
785+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
786+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
787+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated </b> metatext <i>extravaganza</i> ' ))
702788 );
703789 }
704790
@@ -717,6 +803,13 @@ public function testExcludeAttribute()
717803 ."\n" ,
718804 $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' )
719805 );
806+ $ this ->assertEquals (
807+ 'Ridicu-lous-ly <b class="unsplittable">complicated</b> meta-text <i>ex-trav-a-gan-za</i> ' ,
808+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
809+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
810+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
811+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated</b> metatext <i>extravaganza</i> ' ))
812+ );
720813 }
721814
722815 /**
@@ -734,5 +827,40 @@ public function testExcludeAttributeValue()
734827 ."\n" ,
735828 $ this ->object ->hyphenateHtml ('Ridiculously <b class="unsplittable">complicated</b> metatext <i class="go right ahead">extravaganza</i> ' )
736829 );
830+ $ this ->assertEquals (
831+ 'Ridicu-lous-ly <b class="unsplittable">complicated</b> meta-text <i class="go right ahead">ex-trav-a-gan-za</i> ' ,
832+ // Old libxml versions of PHP < 7.4 occasionally added a line break in the output of
833+ // \DOMDocument::saveHTML(). These line breaks are not yet handled in the Syllable
834+ // implementation, but only quick and dirty in these tests by removing the trailing line breaks.
835+ rtrim ($ this ->object ->hyphenateHtmlText ('Ridiculously <b class="unsplittable">complicated</b> metatext <i class="go right ahead">extravaganza</i> ' ))
836+ );
837+ }
838+
839+ /**
840+ * @return void
841+ */
842+ public function testUtf8Characters ()
843+ {
844+ $ this ->object ->setHyphen ('- ' );
845+
846+ $ this ->object ->setLanguage ('de ' );
847+ $ this ->assertEquals (
848+ 'Äu-ßerst kom-pli-zier-ter Me-ta-text. ' ,
849+ $ this ->object ->hyphenateText ('Äußerst komplizierter Metatext. ' )
850+ );
851+ $ this ->assertEquals (
852+ 'Äu-ßerst <b class="unsplittable">kom-pli-zier-ter</b> Me-ta-text. ' ,
853+ $ this ->object ->hyphenateHtmlText ('Äußerst <b class="unsplittable">komplizierter</b> Metatext. ' )
854+ );
855+
856+ $ this ->object ->setLanguage ('uk ' );
857+ $ this ->assertEquals (
858+ 'Над-зви-чайно скла-дний ме-та-те-кст. ' ,
859+ $ this ->object ->hyphenateText ('Надзвичайно складний метатекст. ' )
860+ );
861+ $ this ->assertEquals (
862+ 'Над-зви-чайно <b class="unsplittable">скла-дний</b> ме-та-те-кст. ' ,
863+ $ this ->object ->hyphenateHtmlText ('Надзвичайно <b class="unsplittable">складний</b> метатекст. ' )
864+ );
737865 }
738866}
0 commit comments