Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions tn/english/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def build_tagger_and_verbalizer(self):
).optimize() + (add_weight(punctuation.tagger, 2.00).plus | self.DELETE_SPACE)
self.tagger = (delete(" ").star + tagger.star) @ self.build_rule(delete(" "), r="[EOS]")

verbalizer = (
classify = (
cardinal.verbalizer
| ordinal.verbalizer
| word.verbalizer
Expand All @@ -92,7 +92,15 @@ def build_tagger_and_verbalizer(self):
| electronic.verbalizer
| serial.verbalizer
| whitelist.verbalizer
| punctuation.verbalizer
| rang.verbalizer
).optimize() + (punctuation.verbalizer.plus | self.INSERT_SPACE)
self.verbalizer = verbalizer.star @ self.build_rule(delete(" "), r="[EOS]")
).optimize()
punct = punctuation.verbalizer.optimize()
# Punct tokens carry surrounding spacing in their values (the tagger's
# add_weight(accep(" "), -1.0).star absorbs spaces around punctuation).
# So punct tokens handle their own spacing and don't need INSERT_SPACE.
# Only classify tokens need INSERT_SPACE for inter-word spacing.
verbalizer = (
classify + (punct.plus | self.INSERT_SPACE)
| punct + (punct.plus | self.DELETE_SPACE)
).star
self.verbalizer = verbalizer @ self.build_rule(delete(" "), r="[EOS]")
3 changes: 3 additions & 0 deletions tn/english/test/data/normalizer.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ Try searching for 'Toyota' or 'Investment' => Try searching for 'Toyota' or 'Inv
"" => ""
The HTML tag <p> defines a paragraph. => The HTML tag <p> defines a paragraph.
hello world => hello world
"So, one hundred thousand merit shouldn't be a problem." => "So, one hundred thousand merit shouldn't be a problem."
"hello" => "hello"
(hello) => (hello)
Loading