|
1 | 1 | #include <odr/internal/oldms/word/io.hpp> |
2 | 2 |
|
| 3 | +#include "odr/internal/util/string_util.hpp" |
| 4 | + |
3 | 5 | #include <odr/internal/util/byte_stream_util.hpp> |
4 | | -#include <odr/internal/util/stream_util.hpp> |
| 6 | +#include <odr/internal/util/string_util.hpp> |
5 | 7 |
|
6 | 8 | namespace odr::internal::oldms { |
7 | 9 |
|
@@ -190,4 +192,97 @@ void oldms::skip_Prc(std::istream &in) { |
190 | 192 | in.ignore(cbGrpprl); |
191 | 193 | } |
192 | 194 |
|
| 195 | +std::string oldms::read_string_compressed(std::istream &in, |
| 196 | + const std::size_t size) { |
| 197 | + static constexpr auto eof = std::istream::traits_type::eof(); |
| 198 | + |
| 199 | + std::string result; |
| 200 | + result.reserve(size); |
| 201 | + |
| 202 | + for (std::size_t i = 0; i < size; ++i) { |
| 203 | + const auto ci = in.get(); |
| 204 | + if (ci == eof) { |
| 205 | + throw std::runtime_error("Unexpected end of input"); |
| 206 | + } |
| 207 | + if (ci < 0 || ci > 0xFF) { |
| 208 | + throw std::runtime_error("Unexpected input: " + std::to_string(ci)); |
| 209 | + } |
| 210 | + const char c = static_cast<char>(ci); |
| 211 | + if (const std::optional<char16_t> uncompressed = uncompress_char(c); |
| 212 | + uncompressed.has_value()) { |
| 213 | + util::string::append_c32(*uncompressed, result); |
| 214 | + } else { |
| 215 | + result.push_back(c); |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + return result; |
| 220 | +} |
| 221 | + |
| 222 | +std::u16string oldms::read_string_uncompressed(std::istream &in, |
| 223 | + const std::size_t size) { |
| 224 | + std::u16string result; |
| 225 | + result.resize(size); |
| 226 | + |
| 227 | + in.read(reinterpret_cast<char *>(result.data()), |
| 228 | + static_cast<std::streamsize>(size * sizeof(char16_t))); |
| 229 | + |
| 230 | + return result; |
| 231 | +} |
| 232 | + |
| 233 | +std::optional<char16_t> oldms::uncompress_char(const char c) { |
| 234 | + switch (c) { |
| 235 | + case '\x82': |
| 236 | + return 0x201A; |
| 237 | + case '\x83': |
| 238 | + return 0x0192; |
| 239 | + case '\x84': |
| 240 | + return 0x201E; |
| 241 | + case '\x85': |
| 242 | + return 0x2026; |
| 243 | + case '\x86': |
| 244 | + return 0x2020; |
| 245 | + case '\x87': |
| 246 | + return 0x2021; |
| 247 | + case '\x88': |
| 248 | + return 0x02C6; |
| 249 | + case '\x89': |
| 250 | + return 0x2030; |
| 251 | + case '\x8A': |
| 252 | + return 0x0160; |
| 253 | + case '\x8B': |
| 254 | + return 0x2039; |
| 255 | + case '\x8C': |
| 256 | + return 0x0152; |
| 257 | + case '\x91': |
| 258 | + return 0x2018; |
| 259 | + case '\x92': |
| 260 | + return 0x2019; |
| 261 | + case '\x93': |
| 262 | + return 0x201C; |
| 263 | + case '\x94': |
| 264 | + return 0x201D; |
| 265 | + case '\x95': |
| 266 | + return 0x2022; |
| 267 | + case '\x96': |
| 268 | + return 0x2013; |
| 269 | + case '\x97': |
| 270 | + return 0x2014; |
| 271 | + case '\x98': |
| 272 | + return 0x02DC; |
| 273 | + case '\x99': |
| 274 | + return 0x2122; |
| 275 | + case '\x9A': |
| 276 | + return 0x0161; |
| 277 | + case '\x9B': |
| 278 | + return 0x203A; |
| 279 | + case '\x9C': |
| 280 | + return 0x0153; |
| 281 | + case '\x9F': |
| 282 | + return 0x0178; |
| 283 | + default: |
| 284 | + return std::nullopt; |
| 285 | + } |
| 286 | +} |
| 287 | + |
193 | 288 | } // namespace odr::internal |
0 commit comments