fix: put 'char' type directly to the output stream

Char type was converted to wchar_t and then was serialized as utf-8 stream.
Let char type will always be one char, of course it need to be a valid utf-8 sequence.

Let FT::dont_use_utf8 apply only to wchar_t and std::wstring
but ignore it if FT::hexadecimal or FT::binary are defined.

Now we have bool BaseExpression::esc_char(wchar_t val, pt::TextStream & stream) method
which (in most cases) will be used in derived classes.

Let wchar_t (and std::wstring) will be stored as 8 hex digits when using FT::hexadecimal
or FT::binary (and ignore FT::dont_use_utf8 in such a case).
This commit is contained in:
2022-02-08 12:47:34 +01:00
parent 48d515ea64
commit 0bdabfc7b4
11 changed files with 252 additions and 274 deletions

View File

@@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2018-2021, Tomasz Sowa
* Copyright (c) 2018-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -381,6 +381,38 @@ void BaseExpression::char_to_hex(char c, pt::TextStream & stream)
}
void BaseExpression::char_to_hex(wchar_t c, pt::TextStream & stream)
{
unsigned int z = static_cast<unsigned int>(c);
char_to_hex((char)(unsigned char)(z >> 24), stream);
char_to_hex((char)(unsigned char)(z >> 16), stream);
char_to_hex((char)(unsigned char)(z >> 8), stream);
char_to_hex((char)(unsigned char)(z), stream);
}
/*
* return true if the val character was escaped and put (or ignored) to the stream
*
*/
bool BaseExpression::esc_char(char val, pt::TextStream & stream)
{
return esc_char((wchar_t)(unsigned char)val, stream);
}
/*
* return true if the val character was escaped and put (or ignored) to the stream
*
* in most caces you have to provide your own esc_char(wchar_t val, pt::TextStream & stream) method
*
*/
bool BaseExpression::esc_char(wchar_t val, pt::TextStream & stream)
{
return false;
}
void BaseExpression::esc(char val, pt::TextStream & stream, const FT & field_type)
@@ -391,7 +423,10 @@ void BaseExpression::esc(char val, pt::TextStream & stream, const FT & field_typ
}
else
{
stream << val;
if( !esc_char(val, stream) )
{
stream << val;
}
}
}
@@ -404,48 +439,46 @@ void BaseExpression::esc(unsigned char val, pt::TextStream & stream, const FT &
void BaseExpression::esc(wchar_t val, pt::TextStream & stream, const FT & field_type)
{
if( field_type.use_utf8() )
if( field_type.is_binary() || field_type.is_hexadecimal() )
{
char utf8_buf[10];
// FIXME surrogate pairs are not used
size_t utf8_len = pt::int_to_utf8((int)val, utf8_buf, sizeof(utf8_buf));
for(size_t a = 0 ; a < utf8_len ; ++a)
{
esc(utf8_buf[a], stream, field_type);
}
char_to_hex(val, stream);
}
else
{
esc(static_cast<char>(val), stream, field_type);
if( field_type.use_utf8() )
{
if( !esc_char(val, stream) )
{
stream << val;
}
}
else
{
char val_char = (char)(unsigned char)val;
if( !esc_char(val_char, stream) )
{
stream << val_char;
}
}
}
}
void BaseExpression::esc(const wchar_t * val, bool has_known_length, size_t len, pt::TextStream & stream, const FT & field_type)
{
if( field_type.use_utf8() )
for(size_t i = 0 ; has_known_length ? (i < len) : val[i] != 0 ; ++i)
{
char utf8_buf[10];
for(size_t i = 0 ; has_known_length ? (i < len) : val[i] != 0 ; ++i)
{
// FIXME surrogate pairs are not used
size_t utf8_len = pt::int_to_utf8((int)val[i], utf8_buf, sizeof(utf8_buf));
for(size_t a = 0 ; a < utf8_len ; ++a)
{
esc(utf8_buf[a], stream, field_type);
}
}
esc(val[i], stream, field_type);
}
else
}
void BaseExpression::esc(const char * val, bool has_known_length, size_t len, pt::TextStream & stream, const FT & field_type)
{
for(size_t i = 0 ; has_known_length ? (i < len) : val[i] != 0 ; ++i)
{
for(size_t i = 0 ; has_known_length ? (i < len) : val[i] != 0 ; ++i)
{
esc(static_cast<char>(val[i]), stream, field_type);
}
esc(val[i], stream, field_type);
}
}
@@ -464,19 +497,13 @@ void BaseExpression::esc(const wchar_t * val, pt::TextStream & stream, const FT
void BaseExpression::esc(const std::string & val, pt::TextStream & stream, const FT & field_type)
{
for(size_t i = 0 ; i < val.size() ; ++i)
{
esc(val[i], stream, field_type);
}
esc(val.c_str(), true, val.size(), stream, field_type);
}
void BaseExpression::esc(const char * val, pt::TextStream & stream, const FT & field_type)
{
for(size_t i = 0 ; val[i] != 0 ; ++i)
{
esc(val[i], stream, field_type);
}
esc(val, false, 0, stream, field_type);
}