add some utf8 converting methods

add new methods:
- bool int_to_stream(int c, pt::Stream & stream);
- template<typename OutputFunction>
  bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode = 1);
- template<typename StreamIteratorType, typename OutputFunction>
  bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode = 1);
- template<typename StreamType, typename OutputFunction>
  bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);

make some methods public:
- size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
- size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)

rename and make some methods public:
- template<typename OutputFunction>
  utf8_to_wide_generic(const char * utf8, size_t utf8_len, OutputFunction convert_function, int mode) -> utf8_to_output_function(...)

while here:
- fix: correctly convert characters in Log::put_multiline_generic()
This commit is contained in:
2024-05-30 20:19:04 +02:00
parent 5fd17175c1
commit aacb1f43ae
11 changed files with 428 additions and 338 deletions

View File

@@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2010-2023, Tomasz Sowa
* Copyright (c) 2010-2024, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,6 @@ namespace pt
{
/*!
returns true if 'c' is a correct unicode character
*/
@@ -128,6 +125,83 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
{
if( string_len == 0 )
{
z = 0;
correct = false;
return 0;
}
z = static_cast<int>(*wide_string);
correct = true;
if( sizeof(wchar_t) == 2 && is_first_surrogate_char(z) )
{
if( string_len > 1 )
{
int z2 = *(wide_string+1);
if( is_second_surrogate_char(z2) )
{
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
return 2;
}
else
{
correct = false;
return 1;
}
}
else
{
correct = false;
return 1;
}
}
else
{
correct = utf8_check_range(z);
return 1;
}
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
{
z = 0;
correct = false;
return 0;
}
if( *(wide_string+1) != 0 )
min_str_len = 2;
return wide_to_int(wide_string, min_str_len, z, correct);
}
/*
* converts an int to a wide string
*
@@ -185,6 +259,26 @@ bool int_to_wide(int c, std::wstring & res)
}
/*
* return true if c was a correct unicode character
* and has been put the the stream
*/
bool int_to_stream(int c, pt::Stream & stream)
{
if( stream.is_char_stream() )
{
return int_to_utf8(c, stream) > 0;
}
else
if( stream.is_wchar_stream() )
{
return int_to_wide(c, stream);
}
return false;
}
/*!
@@ -410,9 +504,9 @@ bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool c
if( clear )
res.clear();
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
bool status = utf8_to_output_function(utf8, utf8_len, [&res](int c) {
int_to_wide(c, res);
});
}, mode);
return status;
}