added two functions to utf8:
template<typename StreamType> bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear = true, int mode = 1); template<typename StreamType> bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear = true, int mode = 1); these functions are moved from TextStreamBase
This commit is contained in:
parent
792057a869
commit
8ec9350d52
|
@ -157,8 +157,6 @@ public:
|
|||
protected:
|
||||
|
||||
void put_stream(const Stream & stream);
|
||||
void put_utf8_to_wide(const Stream & stream);
|
||||
void put_wide_to_utf8(const Stream & stream);
|
||||
|
||||
|
||||
};
|
||||
|
@ -435,6 +433,9 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
|
||||
return *this;
|
||||
|
@ -445,6 +446,9 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
|
||||
return *this;
|
||||
|
@ -455,6 +459,7 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
||||
{
|
||||
// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
|
||||
return *this;
|
||||
|
@ -652,6 +657,7 @@ TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Stream
|
|||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const Stream & stream)
|
||||
{
|
||||
|
@ -669,13 +675,13 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const St
|
|||
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() )
|
||||
{
|
||||
// from utf8 to wide
|
||||
put_utf8_to_wide(stream);
|
||||
utf8_to_wide(stream, *this, false);
|
||||
}
|
||||
else
|
||||
if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() )
|
||||
{
|
||||
// from wide to utf8
|
||||
put_wide_to_utf8(stream);
|
||||
wide_stream_to_utf8(stream, *this, false);
|
||||
}
|
||||
else
|
||||
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream() )
|
||||
|
@ -695,68 +701,6 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const St
|
|||
}
|
||||
|
||||
|
||||
// test me when sizeof(wchar_t) is 2
|
||||
// or may move me to utf8 functions?
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void TextStreamBase<char_type, stack_size, heap_block_size>::put_utf8_to_wide(const Stream & stream)
|
||||
{
|
||||
size_t len;
|
||||
bool correct;
|
||||
size_t index = 0;
|
||||
int z;
|
||||
|
||||
do
|
||||
{
|
||||
len = utf8_to_int(stream, index, z, correct);
|
||||
|
||||
if( len > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
z = 0xFFFD; // U+FFFD "replacement character"
|
||||
}
|
||||
|
||||
int_to_wide(z, *this);
|
||||
index += len;
|
||||
}
|
||||
}
|
||||
while( len > 0 );
|
||||
}
|
||||
|
||||
|
||||
// test me when sizeof(wchar_t) is 2
|
||||
// or may move me to utf8 functions?
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void TextStreamBase<char_type, stack_size, heap_block_size>::put_wide_to_utf8(const Stream & stream)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
|
||||
for(size_t i=0 ; i < stream.size() ; ++i)
|
||||
{
|
||||
int c = static_cast<int>(stream.get_wchar(i));
|
||||
|
||||
if( is_first_surrogate_char(c) && i + 1 < stream.size() )
|
||||
{
|
||||
wchar_t c1 = static_cast<wchar_t>(c);
|
||||
wchar_t c2 = stream.get_wchar(++i);
|
||||
surrogate_pair_to_int(c1, c2, c);
|
||||
}
|
||||
|
||||
size_t len = int_to_utf8(c, utf8_buf, utf8_buf_len);
|
||||
|
||||
for(size_t u=0 ; u < len ; ++u)
|
||||
{
|
||||
operator<<(utf8_buf[u]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
|
|
|
@ -60,6 +60,8 @@ namespace pt
|
|||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
RENAMEME to is_correct_unicode_char
|
||||
*/
|
||||
bool utf8_check_range(int c);
|
||||
|
||||
|
@ -152,6 +154,8 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true,
|
|||
template<typename StreamType>
|
||||
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear = true, int mode = 1);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -205,8 +209,11 @@ bool wide_to_utf8(const std::wstring & wide_string, char * utf8, s
|
|||
template<typename StreamType>
|
||||
void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested
|
||||
|
||||
template<typename StreamType>
|
||||
bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear = true, int mode = 1);
|
||||
|
||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested
|
||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested, IMPROVE ME add clear parameter, mode parameter is not used
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -136,6 +136,59 @@ return !was_error;
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
this function converts a UTF-8 stream into wide stream
|
||||
|
||||
input:
|
||||
stream - a UTF-8 stream for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
res - a wide stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
size_t len;
|
||||
bool correct;
|
||||
int z;
|
||||
size_t index = 0;
|
||||
bool was_error = false;
|
||||
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
// CHECKME test me when sizeof(wchar_t) is 2
|
||||
|
||||
do
|
||||
{
|
||||
len = utf8_to_int(stream, index, z, correct);
|
||||
|
||||
if( len > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
res << 0xFFFD; // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
int_to_wide(z, res);
|
||||
}
|
||||
|
||||
index += len;
|
||||
}
|
||||
}
|
||||
while( len > 0 );
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -268,6 +321,59 @@ void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, in
|
|||
}
|
||||
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
|
||||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
for(size_t i=0 ; i < stream.size() ; ++i)
|
||||
{
|
||||
int c = static_cast<int>(stream.get_wchar(i));
|
||||
bool is_correct = false;
|
||||
|
||||
if( utf8_check_range(c) )
|
||||
{
|
||||
// CHECKME test me when sizeof(wchar_t) == 2
|
||||
if( is_first_surrogate_char(c) )
|
||||
{
|
||||
if( i + 1 < stream.size() )
|
||||
{
|
||||
wchar_t c1 = static_cast<wchar_t>(c);
|
||||
wchar_t c2 = stream.get_wchar(++i);
|
||||
|
||||
if( surrogate_pair_to_int(c1, c2, c) )
|
||||
{
|
||||
is_correct = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
is_correct = true;
|
||||
}
|
||||
}
|
||||
|
||||
if( is_correct )
|
||||
{
|
||||
int_to_utf8(c, utf8);
|
||||
}
|
||||
else
|
||||
{
|
||||
was_error = true;
|
||||
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
|
||||
}
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
// not tested
|
||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
|
||||
|
|
Loading…
Reference in New Issue