11#include < cstdio>
22#include < cwctype>
33#include < algorithm>
4+ #include < stdexcept>
45#include " ../vendor/hunspell/src/hunspell/hunspell.hxx"
56#include " spellchecker_hunspell.h"
67
78namespace spellchecker {
89
9- HunspellSpellchecker::HunspellSpellchecker () : hunspell( NULL ), transcoder(NewTranscoder()) { }
10+ HunspellSpellchecker::HunspellSpellchecker () : transcoder(NewTranscoder()) { }
1011
1112HunspellSpellchecker::~HunspellSpellchecker () {
12- if (hunspell ) {
13- delete hunspell ;
13+ for ( size_t i = 0 ; i < hunspells. size (); ++i ) {
14+ delete hunspells[i]. second ;
1415 }
1516
1617 if (transcoder) {
1718 FreeTranscoder (transcoder);
1819 }
1920}
2021
21- bool HunspellSpellchecker::SetDictionary (const std::string& language, const std::string& dirname) {
22- if (hunspell) {
23- delete hunspell;
24- hunspell = NULL ;
25- }
26-
22+ bool HunspellSpellchecker::AddDictionary (const std::string& language, const std::string& dirname) {
2723 // NB: Hunspell uses underscore to separate language and locale, and Win8 uses
2824 // dash - if they use the wrong one, just silently replace it for them
2925 std::string lang = language;
@@ -39,25 +35,51 @@ bool HunspellSpellchecker::SetDictionary(const std::string& language, const std:
3935 }
4036 fclose (handle);
4137
42- hunspell = new Hunspell (affixpath.c_str (), dpath.c_str ());
38+ std::locale loc;
39+ try {
40+ // On Linux locale requires "UTF-8" suffix; e.g., "en_US.UTF-8"
41+ loc = std::locale ((lang + " .UTF-8" ).c_str ());
42+ } catch (std::runtime_error & e) {
43+ // On Windows locale names are different; e.g. en-US
44+ std::string langDashed = language;
45+ std::replace (langDashed.begin (), langDashed.end (), ' _' , ' -' );
46+ std::locale loc (langDashed.c_str ());
47+ }
48+
49+ Hunspell* hunspell = new Hunspell (affixpath.c_str (), dpath.c_str ());
50+ hunspells.push_back (std::make_pair (loc, hunspell));
4351 return true ;
4452}
4553
54+ bool HunspellSpellchecker::SetDictionary (const std::string& language, const std::string& dirname) {
55+ for (size_t i = 0 ; i < hunspells.size (); ++i) {
56+ delete hunspells[i].second ;
57+ }
58+ hunspells.clear ();
59+
60+ return AddDictionary (language, dirname);
61+ }
62+
4663std::vector<std::string> HunspellSpellchecker::GetAvailableDictionaries (const std::string& path) {
4764 return std::vector<std::string>();
4865}
4966
5067bool HunspellSpellchecker::IsMisspelled (const std::string& word) {
51- if (!hunspell) {
52- return false ;
68+ for (size_t i = 0 ; i < hunspells.size (); ++i) {
69+ Hunspell* hunspell = hunspells[i].second ;
70+ bool misspelled = hunspell->spell (word.c_str ()) == 0 ;
71+ if (!misspelled) {
72+ return false ;
73+ }
5374 }
54- return hunspell->spell (word.c_str ()) == 0 ;
75+
76+ return true ;
5577}
5678
5779std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling (const uint16_t *utf16_text, size_t utf16_length) {
5880 std::vector<MisspelledRange> result;
5981
60- if (!hunspell || !transcoder) {
82+ if (hunspells. empty () || !transcoder) {
6183 return result;
6284 }
6385
@@ -80,7 +102,7 @@ std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t
80102 break ;
81103
82104 case in_separator:
83- if (iswalpha (c)) {
105+ if (isAlpha (c)) {
84106 word_start = i;
85107 state = in_word;
86108 } else if (!iswpunct (c) && !iswspace (c)) {
@@ -89,20 +111,29 @@ std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t
89111 break ;
90112
91113 case in_word:
92- if (c == ' \' ' && iswalpha (utf16_text[i + 1 ])) {
114+ if (c == ' \' ' && isAlpha (utf16_text[i + 1 ])) {
93115 i++;
94116 } else if (c == 0 || iswpunct (c) || iswspace (c)) {
95117 state = in_separator;
96118 bool converted = TranscodeUTF16ToUTF8 (transcoder, (char *)utf8_buffer.data (), utf8_buffer.size (), utf16_text + word_start, i - word_start);
97119 if (converted) {
98- if (hunspell->spell (utf8_buffer.data ()) == 0 ) {
120+ bool all_misspelled = true ;
121+ for (size_t i = 0 ; i < hunspells.size (); ++i) {
122+ Hunspell* hunspell = hunspells[i].second ;
123+ bool misspelled = hunspell->spell (utf8_buffer.data ()) == 0 ;
124+ if (!misspelled) {
125+ all_misspelled = false ;
126+ break ;
127+ }
128+ }
129+ if (all_misspelled) {
99130 MisspelledRange range;
100131 range.start = word_start;
101132 range.end = i;
102133 result.push_back (range);
103134 }
104135 }
105- } else if (!iswalpha (c)) {
136+ } else if (!isAlpha (c)) {
106137 state = unknown;
107138 }
108139 break ;
@@ -113,21 +144,25 @@ std::vector<MisspelledRange> HunspellSpellchecker::CheckSpelling(const uint16_t
113144}
114145
115146void HunspellSpellchecker::Add (const std::string& word) {
116- if (hunspell) {
147+ if (!hunspells.empty ()) {
148+ Hunspell* hunspell = hunspells[0 ].second ;
117149 hunspell->add (word.c_str ());
118150 }
119151}
120152
121153void HunspellSpellchecker::Remove (const std::string& word) {
122- if (hunspell) {
154+ if (!hunspells.empty ()) {
155+ Hunspell* hunspell = hunspells[0 ].second ;
123156 hunspell->remove (word.c_str ());
124157 }
125158}
126159
127160std::vector<std::string> HunspellSpellchecker::GetCorrectionsForMisspelling (const std::string& word) {
128161 std::vector<std::string> corrections;
129162
130- if (hunspell) {
163+ for (size_t i = 0 ; i < hunspells.size (); ++i) {
164+ Hunspell* hunspell = hunspells[i].second ;
165+
131166 char ** slist;
132167 int size = hunspell->suggest (&slist, word.c_str ());
133168
@@ -141,4 +176,17 @@ std::vector<std::string> HunspellSpellchecker::GetCorrectionsForMisspelling(cons
141176 return corrections;
142177}
143178
179+ bool HunspellSpellchecker::isAlpha (std::wint_t c) const {
180+ if (iswalpha (c)) {
181+ return true ;
182+ }
183+ for (size_t i = 0 ; i < hunspells.size (); ++i) {
184+ std::locale loc = hunspells[i].first ;
185+ if (std::isalpha ((wchar_t )c, loc)) {
186+ return true ;
187+ }
188+ }
189+ return false ;
190+ }
191+
144192} // namespace spellchecker
0 commit comments