From 6165a2ece3a0bce92b28ffcf5c4d407f199e4d3f Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Wed, 19 Jun 2024 22:54:16 +0200 Subject: [PATCH] add a new utf8_to_output_function() method add such a method: template bool utf8_to_output_function(const char * utf8, OutputFunction output_function, int mode = 1); --- src/utf8/utf8.h | 65 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/src/utf8/utf8.h b/src/utf8/utf8.h index c2d3f90..edf812b 100644 --- a/src/utf8/utf8.h +++ b/src/utf8/utf8.h @@ -169,16 +169,6 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len); bool int_to_wide(int c, std::wstring & res); -/* - * call a convert_function for each character from an utf8 string - * - * this function takes one int parameter: - * output_function(int c) - */ -template -bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1); - - /* * convert an UTF-8 string to a wide string @@ -209,6 +199,18 @@ bool utf8_to_output_function_by_index(const Stream & stream, OutputFunction outp template bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1); +/* + * call a convert_function for each character from an utf8 string + * + * this function takes one int parameter: + * output_function(int c) + */ +template +bool utf8_to_output_function(const char * utf8, size_t utf8_len, OutputFunction output_function, int mode = 1); + +template +bool utf8_to_output_function(const char * utf8, OutputFunction output_function, int mode = 1); + template bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode = 1); @@ -571,6 +573,46 @@ return !was_error; +template +bool utf8_to_output_function(const char * utf8, OutputFunction output_function, int mode) +{ +int z; +size_t len; +bool correct, was_error = false; + + while( *utf8 ) + { + if( (unsigned char)*utf8 <= 0x7f ) + { + // small optimization + len = 1; + correct = true; + z = static_cast(*utf8); + } + else + { + len = utf8_to_int(utf8, z, correct); // the len will be different from zero + } + + if( !correct ) + { + if( mode == 1 ) + output_function(0xFFFD); // U+FFFD "replacement character" + + was_error = true; + } + else + { + output_function(z); + } + + utf8 += len; + } + + return !was_error; +} + + template bool wide_to_output_function(const wchar_t * str, size_t len, OutputFunction output_function, int mode) { @@ -976,6 +1018,9 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i + + + template bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode) {