From 1e2dadc9d8405242373e9780958ea44c0fcf53fb Mon Sep 17 00:00:00 2001 From: Leopoldo Pla Date: Wed, 6 Nov 2019 10:42:48 +0100 Subject: [PATCH] Match regular expression with code comment This is supposed to expect a '

' to separate documents, but the regex looks for any tag, creating issues in non-escaped texts. --- moses/ems/support/split-sentences.perl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/ems/support/split-sentences.perl b/moses/ems/support/split-sentences.perl index 29b6169..586ef8e 100755 --- a/moses/ems/support/split-sentences.perl +++ b/moses/ems/support/split-sentences.perl @@ -75,7 +75,7 @@ my $text = ""; while () { chomp; - if (/^<.+>$/ || /^\s*$/) { + if (/^

$/ || /^\s*$/) { # Time to process this block; we've hit a blank or

&do_it_for($text, $_); print "

\n" if (/^\s*$/ && $text); ## If we have text followed by

@@ -93,7 +93,7 @@ sub do_it_for { my($text,$markup) = @_; print &preprocess($text) if $text; - print "$markup\n" if ($markup =~ /^<.+>$/); + print "$markup\n" if ($markup =~ /^

$/); #chop($text); }