| File: | _build/../src/decoder-cat.cc |
| Warning: | line 529, column 39 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * Copyright © 2017, 2018, 2019 Christian Persch | |||
| 3 | * | |||
| 4 | * This programme is free software; you can redistribute it and/or | |||
| 5 | * modify it under the terms of the GNU General Public | |||
| 6 | * License as published by the Free Software Foundation; either | |||
| 7 | * version 3 of the License, or (at your option) any later version. | |||
| 8 | * | |||
| 9 | * This programme is distributed in the hope that it will be useful, | |||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| 12 | * General Public License for more details. | |||
| 13 | * | |||
| 14 | * You should have received a copy of the GNU General Public License | |||
| 15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | |||
| 16 | */ | |||
| 17 | ||||
| 18 | #include "config.h" | |||
| 19 | ||||
| 20 | #include <glib.h> | |||
| 21 | ||||
| 22 | #include <fcntl.h> | |||
| 23 | #include <locale.h> | |||
| 24 | #include <unistd.h> | |||
| 25 | ||||
| 26 | #include <cassert> | |||
| 27 | #include <cerrno> | |||
| 28 | #include <cstdio> | |||
| 29 | #include <cstdlib> | |||
| 30 | #include <cstring> | |||
| 31 | ||||
| 32 | #include <string> | |||
| 33 | ||||
| 34 | #include "debug.h" | |||
| 35 | #include "glib-glue.hh" | |||
| 36 | #include "libc-glue.hh" | |||
| 37 | #include "utf8.hh" | |||
| 38 | ||||
| 39 | #ifdef WITH_ICU | |||
| 40 | #include "icu-decoder.hh" | |||
| 41 | #include "icu-glue.hh" | |||
| 42 | #endif | |||
| 43 | ||||
| 44 | using namespace std::literals; | |||
| 45 | ||||
| 46 | class Options { | |||
| 47 | private: | |||
| 48 | bool m_benchmark{false}; | |||
| 49 | bool m_codepoints{false}; | |||
| 50 | bool m_list{false}; | |||
| 51 | bool m_quiet{false}; | |||
| 52 | bool m_statistics{false}; | |||
| 53 | bool m_utf8{false}; | |||
| 54 | int m_repeat{1}; | |||
| 55 | char* m_charset{nullptr}; | |||
| 56 | char** m_filenames{nullptr}; | |||
| 57 | ||||
| 58 | template<typename T1, typename T2 = T1> | |||
| 59 | class OptionArg { | |||
| 60 | private: | |||
| 61 | T1* m_return_ptr; | |||
| 62 | T2 m_value; | |||
| 63 | public: | |||
| 64 | OptionArg(T1* ptr, T2 v) : m_return_ptr{ptr}, m_value{v} { } | |||
| 65 | ~OptionArg() { *m_return_ptr = m_value; } | |||
| 66 | ||||
| 67 | inline constexpr T2* ptr() noexcept { return &m_value; } | |||
| 68 | }; | |||
| 69 | ||||
| 70 | using BoolArg = OptionArg<bool, gboolean>; | |||
| 71 | using IntArg = OptionArg<int>; | |||
| 72 | using StrArg = OptionArg<char*>; | |||
| 73 | using StrvArg = OptionArg<char**>; | |||
| 74 | ||||
| 75 | public: | |||
| 76 | ||||
| 77 | Options() noexcept = default; | |||
| 78 | Options(Options const&) = delete; | |||
| 79 | Options(Options&&) = delete; | |||
| 80 | ||||
| 81 | ~Options() { | |||
| 82 | if (m_filenames != nullptr) | |||
| 83 | g_strfreev(m_filenames); | |||
| 84 | } | |||
| 85 | ||||
| 86 | Options& operator=(Options const&) = delete; | |||
| 87 | Options& operator=(Options&&) = delete; | |||
| 88 | ||||
| 89 | inline constexpr bool benchmark() const noexcept { return m_benchmark; } | |||
| 90 | inline constexpr bool codepoints() const noexcept { return m_codepoints; } | |||
| 91 | inline constexpr bool list() const noexcept { return m_list; } | |||
| 92 | inline constexpr bool statistics() const noexcept { return m_statistics; } | |||
| 93 | inline constexpr int quiet() const noexcept { return m_quiet; } | |||
| 94 | inline constexpr bool utf8() const noexcept { return m_utf8; } | |||
| 95 | inline constexpr int repeat() const noexcept { return m_repeat; } | |||
| 96 | inline constexpr char const* charset() const noexcept { return m_charset; } | |||
| 97 | inline constexpr char const* const* filenames() const noexcept { return m_filenames; } | |||
| 98 | ||||
| 99 | bool parse(int argc, | |||
| 100 | char* argv[], | |||
| 101 | GError** error) noexcept | |||
| 102 | { | |||
| 103 | { | |||
| 104 | auto benchmark = BoolArg{&m_benchmark, false}; | |||
| 105 | auto codepoints = BoolArg{&m_codepoints, false}; | |||
| 106 | auto list = BoolArg{&m_list, false}; | |||
| 107 | auto quiet = BoolArg{&m_quiet, false}; | |||
| 108 | auto statistics = BoolArg{&m_statistics, false}; | |||
| 109 | auto utf8 = BoolArg{&m_utf8, false}; | |||
| 110 | auto repeat = IntArg{&m_repeat, 1}; | |||
| 111 | auto charset = StrArg{&m_charset, nullptr}; | |||
| 112 | auto filenames = StrvArg{&m_filenames, nullptr}; | |||
| 113 | GOptionEntry const entries[] = { | |||
| 114 | { .long_name = "benchmark", .short_name = 'b', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = benchmark.ptr(), | |||
| 115 | .description = "Measure time spent parsing each file", .arg_description = nullptr }, | |||
| 116 | ||||
| 117 | { .long_name = "codepoints", .short_name = 'u', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = codepoints.ptr(), | |||
| 118 | .description = "Output unicode code points by number", .arg_description = nullptr }, | |||
| 119 | ||||
| 120 | { .long_name = "charset", .short_name = 'f', .flags = 0, .arg = G_OPTION_ARG_STRING, .arg_data = charset.ptr(), | |||
| 121 | .description = "Input charset", .arg_description = "CHARSET" }, | |||
| 122 | ||||
| 123 | { .long_name = "list-charsets", .short_name = 'l', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = list.ptr(), | |||
| 124 | .description = "List available charsets", .arg_description = nullptr }, | |||
| 125 | ||||
| 126 | { .long_name = "quiet", .short_name = 'q', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = quiet.ptr(), | |||
| 127 | .description = "Suppress output except for statistics and benchmark", .arg_description = nullptr }, | |||
| 128 | ||||
| 129 | { .long_name = "repeat", .short_name = 'r', .flags = 0, .arg = G_OPTION_ARG_INT, .arg_data = repeat.ptr(), | |||
| 130 | .description = "Repeat each file COUNT times", .arg_description = "COUNT" }, | |||
| 131 | ||||
| 132 | { .long_name = "statistics", .short_name = 's', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = statistics.ptr(), | |||
| 133 | .description = "Output statistics", .arg_description = nullptr }, | |||
| 134 | ||||
| 135 | { .long_name = "utf-8", .short_name = '8', .flags = 0, .arg = G_OPTION_ARG_NONE, .arg_data = utf8.ptr(), | |||
| 136 | .description = "UTF-8 input (default)", .arg_description = nullptr }, | |||
| 137 | ||||
| 138 | { .long_name = G_OPTION_REMAINING"", .short_name = 0, .flags = 0, .arg = G_OPTION_ARG_FILENAME_ARRAY, .arg_data = filenames.ptr(), | |||
| 139 | .description = nullptr, .arg_description = nullptr }, | |||
| 140 | }; | |||
| 141 | ||||
| 142 | auto context = g_option_context_new("[FILE…] — decoder cat"); | |||
| 143 | g_option_context_set_help_enabled(context, true); | |||
| 144 | g_option_context_add_main_entries(context, entries, nullptr); | |||
| 145 | ||||
| 146 | auto rv = bool{g_option_context_parse(context, &argc, &argv, error) != false}; | |||
| 147 | g_option_context_free(context); | |||
| 148 | if (!rv) | |||
| 149 | return rv; | |||
| 150 | } | |||
| 151 | ||||
| 152 | return true; | |||
| 153 | } | |||
| 154 | }; // class Options | |||
| 155 | ||||
| 156 | class Printer { | |||
| 157 | private: | |||
| 158 | std::string m_str{}; | |||
| 159 | bool m_codepoints{false}; | |||
| 160 | ||||
| 161 | void | |||
| 162 | print(char const* buf, | |||
| 163 | size_t len) noexcept | |||
| 164 | { | |||
| 165 | m_str.append(buf, len); | |||
| 166 | } | |||
| 167 | ||||
| 168 | G_GNUC_PRINTF(2, 3)__attribute__((__format__ (__printf__, 2, 3))) | |||
| 169 | void | |||
| 170 | print_format(char const* format, | |||
| 171 | ...) | |||
| 172 | { | |||
| 173 | char buf[256]; | |||
| 174 | va_list args; | |||
| 175 | va_start(args, format)__builtin_va_start(args, format); | |||
| 176 | auto const len = g_vsnprintf(buf, sizeof(buf), format, args); | |||
| 177 | va_end(args)__builtin_va_end(args); | |||
| 178 | ||||
| 179 | m_str.append(buf, len); | |||
| 180 | } | |||
| 181 | ||||
| 182 | void | |||
| 183 | print_u32(uint32_t const c) noexcept | |||
| 184 | { | |||
| 185 | char ubuf[7]; | |||
| 186 | auto const len = g_unichar_to_utf8(c, ubuf); | |||
| 187 | ||||
| 188 | if (m_codepoints) { | |||
| 189 | ubuf[len] = 0; | |||
| 190 | if (g_unichar_isprint(c)) { | |||
| 191 | print_format("[%04X %s]", c, ubuf); | |||
| 192 | } else { | |||
| 193 | print_format("[%04X]", c); | |||
| 194 | } | |||
| 195 | } else { | |||
| 196 | print(ubuf, len); | |||
| 197 | } | |||
| 198 | } | |||
| 199 | ||||
| 200 | void | |||
| 201 | printout(bool force_lf = false) noexcept | |||
| 202 | { | |||
| 203 | if (m_codepoints || force_lf) | |||
| 204 | m_str.push_back('\n'); | |||
| 205 | ||||
| 206 | #pragma GCC diagnostic push | |||
| 207 | #pragma GCC diagnostic ignored "-Wunused-result" | |||
| 208 | write(STDOUT_FILENO1, m_str.data(), m_str.size()); | |||
| 209 | #pragma GCC diagnostic pop | |||
| 210 | m_str.clear(); | |||
| 211 | } | |||
| 212 | ||||
| 213 | static inline auto const k_LF = uint32_t{0xau}; | |||
| 214 | ||||
| 215 | public: | |||
| 216 | ||||
| 217 | Printer(bool codepoints = false) noexcept | |||
| 218 | : m_codepoints{codepoints} | |||
| 219 | { | |||
| 220 | } | |||
| 221 | ||||
| 222 | ~Printer() noexcept | |||
| 223 | { | |||
| 224 | printout(true); | |||
| 225 | } | |||
| 226 | ||||
| 227 | void operator()(uint32_t const c) noexcept | |||
| 228 | { | |||
| 229 | print_u32(c); | |||
| 230 | if (c == k_LF) | |||
| 231 | printout(); | |||
| 232 | } | |||
| 233 | ||||
| 234 | }; // class Printer | |||
| 235 | ||||
| 236 | class Sink { | |||
| 237 | public: | |||
| 238 | void operator()(uint32_t c) noexcept { } | |||
| 239 | ||||
| 240 | }; // class Sink | |||
| 241 | ||||
| 242 | #ifdef WITH_ICU | |||
| 243 | ||||
| 244 | static std::unique_ptr<bte::base::ICUDecoder> | |||
| 245 | make_decoder(Options const& options) | |||
| 246 | { | |||
| 247 | auto err = icu::ErrorCode{}; | |||
| 248 | ||||
| 249 | auto converter = std::shared_ptr<UConverter>{ucnv_openucnv_open_76(options.charset(), err), &ucnv_closeucnv_close_76}; | |||
| 250 | if (err.isFailure()) { | |||
| 251 | if (!options.quiet()) | |||
| 252 | g_printerr("Failure to open converter for \"%s\": %s\n", | |||
| 253 | options.charset(), err.errorName()); | |||
| 254 | return {}; | |||
| 255 | } | |||
| 256 | ||||
| 257 | if (err.get() == U_AMBIGUOUS_ALIAS_WARNING) { | |||
| 258 | err.reset(); | |||
| 259 | auto canonical = ucnv_getNameucnv_getName_76(converter.get(), err); | |||
| 260 | if (err.isSuccess() && !options.quiet()) | |||
| 261 | g_printerr("Warning: charset \"%s\" is ambigous alias for \"%s\"\n", | |||
| 262 | options.charset(), canonical); | |||
| 263 | } | |||
| 264 | ||||
| 265 | err.reset(); | |||
| 266 | auto u32_converter = std::shared_ptr<UConverter>{ucnv_openucnv_open_76("utf32platformendian", err), &ucnv_closeucnv_close_76}; | |||
| 267 | if (err.isFailure()) { | |||
| 268 | if (!options.quiet()) | |||
| 269 | g_printerr("Failure to open converter for \"%s\": %s\n", | |||
| 270 | "UTF-32", err.errorName()); | |||
| 271 | return {}; | |||
| 272 | } | |||
| 273 | ||||
| 274 | return std::make_unique<bte::base::ICUDecoder>(converter, u32_converter); | |||
| 275 | } | |||
| 276 | ||||
| 277 | #endif /* WITH_ICU */ | |||
| 278 | ||||
| 279 | class Processor { | |||
| 280 | private: | |||
| 281 | gsize m_input_bytes{0}; | |||
| 282 | gsize m_output_chars{0}; | |||
| 283 | gsize m_errors{0}; | |||
| 284 | GArray* m_bench_times{nullptr}; | |||
| 285 | ||||
| 286 | template<class Functor> | |||
| 287 | void | |||
| 288 | process_file_utf8(int fd, | |||
| 289 | Functor& func) | |||
| 290 | { | |||
| 291 | auto decoder = bte::base::UTF8Decoder{}; | |||
| 292 | ||||
| 293 | auto const buf_size = size_t{16384}; | |||
| 294 | auto buf = g_new0(uint8_t, buf_size)(uint8_t *) (__extension__ ({ gsize __n = (gsize) (buf_size); gsize __s = sizeof (uint8_t); gpointer __p; if (__s == 1) __p = g_malloc0 (__n); else if (__builtin_constant_p (__n) && (__s == 0 || __n <= (9223372036854775807L *2UL+1UL) / __s )) __p = g_malloc0 (__n * __s); else __p = g_malloc0_n (__n, __s ); __p; })); | |||
| 295 | ||||
| 296 | auto start_time = g_get_monotonic_time(); | |||
| 297 | ||||
| 298 | auto buf_start = size_t{0}; | |||
| 299 | for (;;) { | |||
| 300 | auto len = read(fd, buf + buf_start, buf_size - buf_start); | |||
| 301 | if (!len) | |||
| 302 | break; | |||
| 303 | if (len == -1) { | |||
| 304 | if (errno(*__errno_location ()) == EAGAIN11) | |||
| 305 | continue; | |||
| 306 | break; | |||
| 307 | } | |||
| 308 | ||||
| 309 | m_input_bytes += len; | |||
| 310 | ||||
| 311 | auto const bufend = buf + len; | |||
| 312 | for (auto sptr = buf; sptr < bufend; ++sptr) { | |||
| 313 | switch (decoder.decode(*sptr)) { | |||
| 314 | case bte::base::UTF8Decoder::REJECT_REWIND: | |||
| 315 | /* Rewind the stream. | |||
| 316 | * Note that this will never lead to a loop, since in the | |||
| 317 | * next round this byte *will* be consumed. | |||
| 318 | */ | |||
| 319 | --sptr; | |||
| 320 | [[fallthrough]]; | |||
| 321 | case bte::base::UTF8Decoder::REJECT: | |||
| 322 | decoder.reset(); | |||
| 323 | /* Fall through to insert the U+FFFD replacement character. */ | |||
| 324 | [[fallthrough]]; | |||
| 325 | case bte::base::UTF8Decoder::ACCEPT: | |||
| 326 | func(decoder.codepoint()); | |||
| 327 | m_output_chars++; | |||
| 328 | ||||
| 329 | default: | |||
| 330 | break; | |||
| 331 | } | |||
| 332 | } | |||
| 333 | } | |||
| 334 | ||||
| 335 | /* Flush remaining output; at most one character */ | |||
| 336 | if (decoder.flush()) { | |||
| 337 | func(decoder.codepoint()); | |||
| 338 | m_output_chars++; | |||
| 339 | } | |||
| 340 | ||||
| 341 | auto const time_spent = int64_t{g_get_monotonic_time() - start_time}; | |||
| 342 | g_array_append_val(m_bench_times, time_spent)g_array_append_vals (m_bench_times, &(time_spent), 1); | |||
| 343 | ||||
| 344 | g_free(buf); | |||
| 345 | } | |||
| 346 | ||||
| 347 | #ifdef WITH_ICU | |||
| 348 | template<class Functor> | |||
| 349 | void | |||
| 350 | process_file_icu(int fd, | |||
| 351 | bte::base::ICUDecoder* decoder, | |||
| 352 | Functor& func) | |||
| 353 | { | |||
| 354 | decoder->reset(); | |||
| 355 | ||||
| 356 | auto const buf_size = size_t{16384}; | |||
| 357 | auto buf = g_new0(uint8_t, buf_size)(uint8_t *) (__extension__ ({ gsize __n = (gsize) (buf_size); gsize __s = sizeof (uint8_t); gpointer __p; if (__s == 1) __p = g_malloc0 (__n); else if (__builtin_constant_p (__n) && (__s == 0 || __n <= (9223372036854775807L *2UL+1UL) / __s )) __p = g_malloc0 (__n * __s); else __p = g_malloc0_n (__n, __s ); __p; })); | |||
| 358 | ||||
| 359 | auto start_time = g_get_monotonic_time(); | |||
| 360 | ||||
| 361 | auto buf_start = size_t{0}; | |||
| 362 | while (true) { | |||
| 363 | auto len = read(fd, buf + buf_start, buf_size - buf_start); | |||
| 364 | if (!len) /* EOF */ | |||
| 365 | break; | |||
| 366 | if (len == -1) { | |||
| 367 | if (errno(*__errno_location ()) == EAGAIN11) | |||
| 368 | continue; | |||
| 369 | break; | |||
| 370 | } | |||
| 371 | ||||
| 372 | m_input_bytes += len; | |||
| 373 | ||||
| 374 | auto sptr = reinterpret_cast<uint8_t const*>(buf); | |||
| 375 | auto const sptrend = buf + len; | |||
| 376 | while (sptr < sptrend) { | |||
| 377 | /* Note that rewinding will never lead to an infinite loop, | |||
| 378 | * since when the decoder runs out of output, this input byte | |||
| 379 | * *will* be consumed. | |||
| 380 | */ | |||
| 381 | switch (decoder->decode(&sptr)) { | |||
| 382 | case bte::base::ICUDecoder::Result::eSomething: | |||
| 383 | func(decoder->codepoint()); | |||
| 384 | m_output_chars++; | |||
| 385 | break; | |||
| 386 | ||||
| 387 | case bte::base::ICUDecoder::Result::eNothing: | |||
| 388 | break; | |||
| 389 | ||||
| 390 | case bte::base::ICUDecoder::Result::eError: | |||
| 391 | // FIXMEchpe need do ++sptr here? | |||
| 392 | m_errors++; | |||
| 393 | decoder->reset(); | |||
| 394 | break; | |||
| 395 | } | |||
| 396 | } | |||
| 397 | } | |||
| 398 | ||||
| 399 | /* Flush remaining output */ | |||
| 400 | auto sptr = reinterpret_cast<uint8_t const*>(buf + buf_size); | |||
| 401 | auto result = bte::base::ICUDecoder::Result{}; | |||
| 402 | while ((result = decoder->decode(&sptr, true)) == bte::base::ICUDecoder::Result::eSomething) { | |||
| 403 | func(decoder->codepoint()); | |||
| 404 | m_output_chars++; | |||
| 405 | } | |||
| 406 | ||||
| 407 | auto const time_spent = int64_t{g_get_monotonic_time() - start_time}; | |||
| 408 | g_array_append_val(m_bench_times, time_spent)g_array_append_vals (m_bench_times, &(time_spent), 1); | |||
| 409 | ||||
| 410 | g_free(buf); | |||
| 411 | } | |||
| 412 | #endif /* WITH_ICU */ | |||
| 413 | ||||
| 414 | template<class Functor> | |||
| 415 | bool | |||
| 416 | process_file(int fd, | |||
| 417 | Options const& options, | |||
| 418 | Functor& func) | |||
| 419 | { | |||
| 420 | #ifdef WITH_ICU | |||
| 421 | auto decoder = std::unique_ptr<bte::base::ICUDecoder>{}; | |||
| 422 | if (options.charset()) { | |||
| 423 | decoder = make_decoder(options); | |||
| 424 | if (!decoder) | |||
| 425 | return false; | |||
| 426 | } | |||
| 427 | ||||
| 428 | assert(decoder != nullptr || options.charset() == nullptr)(static_cast <bool> (decoder != nullptr || options.charset () == nullptr) ? void (0) : __assert_fail ("decoder != nullptr || options.charset() == nullptr" , __builtin_FILE (), __builtin_LINE (), __extension__ __PRETTY_FUNCTION__ )); | |||
| 429 | #endif | |||
| 430 | ||||
| 431 | for (auto i = 0; i < options.repeat(); ++i) { | |||
| 432 | if (i > 0 && lseek(fd, 0, SEEK_SET0) != 0) { | |||
| 433 | auto errsv = bte::libc::ErrnoSaver{}; | |||
| 434 | g_printerr("Failed to seek: %s\n", g_strerror(errsv)); | |||
| 435 | return false; | |||
| 436 | } | |||
| 437 | ||||
| 438 | #ifdef WITH_ICU | |||
| 439 | if (decoder) { | |||
| 440 | process_file_icu(fd, decoder.get(), func); | |||
| 441 | } else | |||
| 442 | #endif | |||
| 443 | { | |||
| 444 | process_file_utf8(fd, func); | |||
| 445 | } | |||
| 446 | } | |||
| 447 | ||||
| 448 | return true; | |||
| 449 | } | |||
| 450 | ||||
| 451 | public: | |||
| 452 | ||||
| 453 | Processor() noexcept | |||
| 454 | { | |||
| 455 | m_bench_times = g_array_new(false, true, sizeof(int64_t)); | |||
| 456 | } | |||
| 457 | ||||
| 458 | ~Processor() noexcept | |||
| 459 | { | |||
| 460 | g_array_free(m_bench_times, true); | |||
| 461 | } | |||
| 462 | ||||
| 463 | template<class Functor> | |||
| 464 | bool | |||
| 465 | process_files(Options const& options, | |||
| 466 | Functor& func) | |||
| 467 | { | |||
| 468 | auto r = bool{true}; | |||
| 469 | if (auto filenames = options.filenames(); filenames != nullptr) { | |||
| 470 | for (auto i = 0; filenames[i] != nullptr; i++) { | |||
| 471 | auto filename = filenames[i]; | |||
| 472 | ||||
| 473 | auto fd = int{-1}; | |||
| 474 | if (g_str_equal(filename, "-")(strcmp ((const char *) (filename), (const char *) ("-")) == 0 )) { | |||
| 475 | fd = STDIN_FILENO0; | |||
| 476 | ||||
| 477 | if (options.repeat() != 1) { | |||
| 478 | g_printerr("Cannot consume STDIN more than once\n"); | |||
| 479 | return false; | |||
| 480 | } | |||
| 481 | } else { | |||
| 482 | fd = ::open(filename, O_RDONLY00); | |||
| 483 | if (fd == -1) { | |||
| 484 | auto errsv = bte::libc::ErrnoSaver{}; | |||
| 485 | g_printerr("Error opening file %s: %s\n", | |||
| 486 | filename, g_strerror(errsv)); | |||
| 487 | } | |||
| 488 | } | |||
| 489 | if (fd != -1) { | |||
| 490 | r = process_file(fd, options, func); | |||
| 491 | if (fd != STDIN_FILENO0) | |||
| 492 | close(fd); | |||
| 493 | if (!r) | |||
| 494 | break; | |||
| 495 | } | |||
| 496 | } | |||
| 497 | } else { | |||
| 498 | r = process_file(STDIN_FILENO0, options, func); | |||
| 499 | } | |||
| 500 | ||||
| 501 | return r; | |||
| 502 | } | |||
| 503 | ||||
| 504 | void print_statistics() const noexcept | |||
| 505 | { | |||
| 506 | g_printerr("%\'16" G_GSIZE_FORMAT"lu" " input bytes produced %\'16" G_GSIZE_FORMAT"lu" | |||
| 507 | " unichars and %" G_GSIZE_FORMAT"lu" " errors\n", | |||
| 508 | m_input_bytes, m_output_chars, m_errors); | |||
| 509 | } | |||
| 510 | ||||
| 511 | void print_benchmark() const noexcept | |||
| 512 | { | |||
| 513 | g_array_sort(m_bench_times, | |||
| 514 | [](void const* p1, void const* p2) -> int { | |||
| 515 | int64_t const t1 = *(int64_t const*)p1; | |||
| 516 | int64_t const t2 = *(int64_t const*)p2; | |||
| 517 | return t1 == t2 ? 0 : (t1 < t2 ? -1 : 1); | |||
| 518 | }); | |||
| 519 | ||||
| 520 | auto total_time = int64_t{0}; | |||
| 521 | for (unsigned int i = 0; i < m_bench_times->len; ++i) | |||
| 522 | total_time += g_array_index(m_bench_times, int64_t, i)(((int64_t*) (void *) (m_bench_times)->data) [(i)]); | |||
| 523 | ||||
| 524 | g_printerr("\nTimes: best %\'" G_GINT64_FORMAT"li" "µs " | |||
| 525 | "worst %\'" G_GINT64_FORMAT"li" "µs " | |||
| 526 | "average %\'" G_GINT64_FORMAT"li" "µs\n", | |||
| 527 | g_array_index(m_bench_times, int64_t, 0)(((int64_t*) (void *) (m_bench_times)->data) [(0)]), | |||
| 528 | g_array_index(m_bench_times, int64_t, m_bench_times->len - 1)(((int64_t*) (void *) (m_bench_times)->data) [(m_bench_times ->len - 1)]), | |||
| 529 | total_time / (int64_t)m_bench_times->len); | |||
| ||||
| 530 | for (unsigned int i = 0; i < m_bench_times->len; ++i) | |||
| 531 | g_printerr(" %\'" G_GINT64_FORMAT"li" "µs\n", | |||
| 532 | g_array_index(m_bench_times, int64_t, i)(((int64_t*) (void *) (m_bench_times)->data) [(i)])); | |||
| 533 | } | |||
| 534 | ||||
| 535 | }; // class Processor | |||
| 536 | ||||
| 537 | // main | |||
| 538 | ||||
| 539 | int | |||
| 540 | main(int argc, | |||
| 541 | char *argv[]) | |||
| 542 | { | |||
| 543 | setlocale(LC_ALL6, ""); | |||
| 544 | _bte_debug_init_bte_external_debug_init(); | |||
| 545 | ||||
| 546 | auto options = Options{}; | |||
| 547 | auto error = bte::glib::Error{}; | |||
| 548 | if (!options.parse(argc, argv, error)) { | |||
| ||||
| 549 | g_printerr("Failed to parse arguments: %s\n", error.message()); | |||
| 550 | return EXIT_FAILURE1; | |||
| 551 | } | |||
| 552 | ||||
| 553 | if (options.list()) { | |||
| 554 | #ifdef WITH_ICU | |||
| 555 | auto charsets = bte::base::get_icu_charsets(true); | |||
| 556 | for (auto i = 0; charsets[i]; ++i) | |||
| 557 | g_print("%s\n", charsets[i]); | |||
| 558 | g_strfreev(charsets); | |||
| 559 | ||||
| 560 | return EXIT_SUCCESS0; | |||
| 561 | #else | |||
| 562 | g_printerr("ICU support not available.\n"); | |||
| 563 | return EXIT_FAILURE1; | |||
| 564 | #endif | |||
| 565 | } | |||
| 566 | ||||
| 567 | auto rv = bool{}; | |||
| 568 | auto proc = Processor{}; | |||
| 569 | if (options.quiet()) { | |||
| 570 | auto sink = Sink{}; | |||
| 571 | rv = proc.process_files(options, sink); | |||
| 572 | } else { | |||
| 573 | auto printer = Printer{options.codepoints()}; | |||
| 574 | rv = proc.process_files(options, printer); | |||
| 575 | } | |||
| 576 | ||||
| 577 | if (options.statistics()) | |||
| 578 | proc.print_statistics(); | |||
| 579 | if (options.benchmark()) | |||
| 580 | proc.print_benchmark(); | |||
| 581 | ||||
| 582 | return rv ? EXIT_SUCCESS0 : EXIT_FAILURE1; | |||
| 583 | } |