fix: put 'char' type directly to the output stream
Char type was converted to wchar_t and then was serialized as utf-8 stream. Let char type will always be one char, of course it need to be a valid utf-8 sequence. Let FT::dont_use_utf8 apply only to wchar_t and std::wstring but ignore it if FT::hexadecimal or FT::binary are defined. Now we have bool BaseExpression::esc_char(wchar_t val, pt::TextStream & stream) method which (in most cases) will be used in derived classes. Let wchar_t (and std::wstring) will be stored as 8 hex digits when using FT::hexadecimal or FT::binary (and ignore FT::dont_use_utf8 in such a case).
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
||||
* Copyright (c) 2018-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -313,7 +313,7 @@ void DbConnector::allocate_default_expression_if_needed()
|
||||
}
|
||||
|
||||
|
||||
char DbConnector::unescape_hex_char_part(char hex)
|
||||
unsigned int DbConnector::unescape_hex_char_part(char hex)
|
||||
{
|
||||
if( hex>='0' && hex<='9' )
|
||||
{
|
||||
@@ -348,43 +348,79 @@ char DbConnector::unescape_hex_char_part(char hex)
|
||||
}
|
||||
|
||||
|
||||
char DbConnector::unescape_hex_char(char char1, char char2)
|
||||
{
|
||||
int c1 = unescape_hex_char_part(char1);
|
||||
int c2 = unescape_hex_char_part(char2);
|
||||
|
||||
return static_cast<char>(((c1 << 4) | c2));
|
||||
const char * DbConnector::unescape_hex_char(const char * str, size_t len, unsigned int & res)
|
||||
{
|
||||
unsigned int c;
|
||||
res = 0;
|
||||
|
||||
for(size_t i = 0 ; i < len ; ++i)
|
||||
{
|
||||
if( *str != 0 )
|
||||
{
|
||||
c = unescape_hex_char_part(*str);
|
||||
str += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 0;
|
||||
}
|
||||
|
||||
res = (res << 4) | c;
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_hex_char(const char * str, char & c)
|
||||
{
|
||||
unsigned int res = 0;
|
||||
unescape_hex_char(str, sizeof(char) * 2, res);
|
||||
c = (char)res;
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_hex_char(const char * str, wchar_t & c)
|
||||
{
|
||||
unsigned int res = 0;
|
||||
unescape_hex_char(str, sizeof(wchar_t) * 2, res);
|
||||
c = (wchar_t)res;
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_bin_char(const char * str, char & c)
|
||||
{
|
||||
unescape_hex_char(str, c);
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_bin_char(const char * str, wchar_t & c)
|
||||
{
|
||||
unescape_hex_char(str, c);
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_hex_string(const char * str, std::string & out)
|
||||
{
|
||||
for(size_t i=0 ; str[i] != 0 ; i+=2 )
|
||||
unsigned int c = 0;
|
||||
|
||||
while( *str != 0 )
|
||||
{
|
||||
out += unescape_hex_char(str[i], str[i+1]);
|
||||
str = unescape_hex_char(str, sizeof(char) * 2, c);
|
||||
out += (char)c;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_hex_string(const char * str, std::wstring & out, const FT & field_type)
|
||||
void DbConnector::unescape_hex_string(const char * str, std::wstring & out)
|
||||
{
|
||||
if( field_type.use_utf8() )
|
||||
{
|
||||
size_t len;
|
||||
wchar_t c;
|
||||
unsigned int c = 0;
|
||||
|
||||
while( *str != 0 && (len = unescape_hex_char(str, c, field_type)) > 0 )
|
||||
{
|
||||
out += c;
|
||||
str += len;
|
||||
}
|
||||
}
|
||||
else
|
||||
while( *str != 0 )
|
||||
{
|
||||
for(size_t i=0 ; str[i] != 0 ; i+=2 )
|
||||
{
|
||||
out += static_cast<wchar_t>(static_cast<unsigned char>(unescape_hex_char(str[i], str[i+1])));
|
||||
}
|
||||
str = unescape_hex_char(str, sizeof(wchar_t) * 2, c);
|
||||
out += (wchar_t)c;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -395,151 +431,53 @@ void DbConnector::unescape_bin_string(const char * str, std::string & out)
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::unescape_bin_string(const char * str, std::wstring & out, const FT & field_type)
|
||||
void DbConnector::unescape_bin_string(const char * str, std::wstring & out)
|
||||
{
|
||||
unescape_hex_string(str, out, field_type);
|
||||
}
|
||||
|
||||
|
||||
// returns how many characters have been provided to utf8_str buffer
|
||||
// min size of utf8_str should be 5 bytes (max 4 bytes for utf8 sequence + terminating null)
|
||||
size_t DbConnector::unescape_hex_char(const char * value_str, char * utf8_str, size_t utf8_str_max_len)
|
||||
{
|
||||
size_t value_str_index = 0;
|
||||
size_t utf8_str_index = 0;
|
||||
|
||||
utf8_str[0] = 0;
|
||||
|
||||
while( utf8_str_index + 1 < utf8_str_max_len )
|
||||
{
|
||||
if( value_str[value_str_index] != 0 && value_str[value_str_index+1] != 0 )
|
||||
{
|
||||
utf8_str[utf8_str_index] = unescape_hex_char(value_str[value_str_index], value_str[value_str_index+1]);
|
||||
utf8_str[utf8_str_index+1] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
value_str_index += 2;
|
||||
utf8_str_index += 1;
|
||||
}
|
||||
|
||||
return utf8_str_index;
|
||||
}
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
// returns how many characters were used from value_str
|
||||
size_t DbConnector::unescape_hex_char(const char * value_str, wchar_t & field_value, const FT & field_type)
|
||||
{
|
||||
size_t len = 0;
|
||||
|
||||
if( field_type.use_utf8() )
|
||||
{
|
||||
char utf8_str[4 + 1]; // max utf8 sequence length + terminating zero
|
||||
size_t utf8_str_len = unescape_hex_char(value_str, utf8_str, sizeof(utf8_str) / sizeof(char));
|
||||
|
||||
int value_int;
|
||||
bool is_correct;
|
||||
len = pt::utf8_to_int(utf8_str, utf8_str_len, value_int, is_correct);
|
||||
len = len * 2;
|
||||
|
||||
if( is_correct )
|
||||
{
|
||||
field_value = static_cast<wchar_t>(value_int);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( log )
|
||||
{
|
||||
(*log) << pt::Log::log2 << "Morm: incorrect utf-8 sequence (ignoring)" << pt::Log::logend;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( value_str[0] != 0 && value_str[1] != 0 )
|
||||
{
|
||||
field_value = static_cast<wchar_t>(static_cast<unsigned char>(unescape_hex_char(value_str[0], value_str[1])));
|
||||
len = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( log )
|
||||
{
|
||||
(*log) << pt::Log::log2 << "Morm: unexpected end of string (ignoring)" << pt::Log::logend;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
size_t DbConnector::unescape_bin_char(const char * value_str, wchar_t & field_value, const FT & field_type)
|
||||
{
|
||||
return unescape_hex_char(value_str, field_value, field_type);
|
||||
unescape_hex_string(str, out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
void DbConnector::get_value(const char * value_str, char & field_value, const FT & field_type)
|
||||
{
|
||||
wchar_t c;
|
||||
|
||||
field_value = 0;
|
||||
get_value(value_str, c, field_type);
|
||||
|
||||
if( field_type.use_utf8() )
|
||||
if( field_type.is_hexadecimal() )
|
||||
{
|
||||
if( c <= 127 )
|
||||
{
|
||||
field_value = static_cast<char>(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( log )
|
||||
{
|
||||
(*log) << pt::Log::log2 << "Morm: a character greater than 127 cannot be stored in char type, code point: "
|
||||
<< (int)c << " '" << c << "'" << pt::Log::logend;
|
||||
}
|
||||
}
|
||||
unescape_hex_char(value_str, field_value);
|
||||
}
|
||||
else
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_char(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
field_value = static_cast<char>(c);
|
||||
field_value = *value_str;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
void DbConnector::get_value(const char * value_str, unsigned char & field_value, const FT & field_type)
|
||||
{
|
||||
char tmp_char;
|
||||
get_value(value_str, tmp_char, field_type);
|
||||
|
||||
field_value = static_cast<unsigned char>(tmp_char);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
void DbConnector::get_value(const char * value_str, wchar_t & field_value, const FT & field_type)
|
||||
{
|
||||
field_value = 0;
|
||||
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_char(value_str, field_value, field_type);
|
||||
}
|
||||
else
|
||||
if( field_type.is_hexadecimal() )
|
||||
{
|
||||
unescape_hex_char(value_str, field_value, field_type);
|
||||
unescape_hex_char(value_str, field_value);
|
||||
}
|
||||
else
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_char(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -556,7 +494,7 @@ void DbConnector::get_value(const char * value_str, wchar_t & field_value, const
|
||||
}
|
||||
else
|
||||
{
|
||||
// report an error?
|
||||
field_value = 0xFFFD; // U+FFFD "replacement character";
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -568,48 +506,18 @@ void DbConnector::get_value(const char * value_str, wchar_t & field_value, const
|
||||
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
void DbConnector::get_value(const char * value_str, std::wstring & field_value, const FT & field_type)
|
||||
{
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_string(value_str, field_value, field_type);
|
||||
}
|
||||
else
|
||||
if( field_type.is_hexadecimal() )
|
||||
{
|
||||
unescape_hex_string(value_str, field_value, field_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( field_type.use_utf8() )
|
||||
{
|
||||
pt::utf8_to_wide(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(size_t i=0 ; value_str[i] != 0 ; ++i)
|
||||
{
|
||||
field_value += static_cast<wchar_t>(value_str[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// CHECKME need to be tested
|
||||
void DbConnector::get_value(const char * value_str, std::string & field_value, const FT & field_type)
|
||||
{
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_string(value_str, field_value);
|
||||
}
|
||||
else
|
||||
if( field_type.is_hexadecimal() )
|
||||
{
|
||||
unescape_hex_string(value_str, field_value);
|
||||
}
|
||||
else
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_string(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
field_value = value_str;
|
||||
}
|
||||
@@ -622,6 +530,34 @@ void DbConnector::get_value(const char * value_str, std::string_view & field_val
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::get_value(const char * value_str, std::wstring & field_value, const FT & field_type)
|
||||
{
|
||||
if( field_type.is_hexadecimal() )
|
||||
{
|
||||
unescape_hex_string(value_str, field_value);
|
||||
}
|
||||
else
|
||||
if( field_type.is_binary() )
|
||||
{
|
||||
unescape_bin_string(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( field_type.use_utf8() )
|
||||
{
|
||||
pt::utf8_to_wide(value_str, field_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(size_t i=0 ; value_str[i] != 0 ; ++i)
|
||||
{
|
||||
field_value += static_cast<wchar_t>((unsigned char)value_str[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DbConnector::get_value(const char * value_str, bool & field_value, const FT & field_type)
|
||||
{
|
||||
// IMPROVE ME
|
||||
|
Reference in New Issue
Block a user