leavy only utf8.h and utf8.cpp
Remove utf8_private.h, utf8_private.cpp and utf8_templates.h and move their methods to utf8.h/utf8.cpp.
This commit is contained in:
@@ -34,29 +34,27 @@
|
||||
|
||||
#include <fstream>
|
||||
#include "utf8.h"
|
||||
#include "utf8_private.h"
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
*/
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool utf8_check_range(int c)
|
||||
{
|
||||
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
this method is used when reading from an utf8 string
|
||||
how_many_bytes - means how many bytes from the utf8 string were read
|
||||
*/
|
||||
/*
|
||||
* returns true if 'c' is a correct unicode character
|
||||
*
|
||||
* this method is used when reading from an utf8 string
|
||||
* how_many_bytes - means how many bytes from the utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes)
|
||||
{
|
||||
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
||||
@@ -126,12 +124,12 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
*
|
||||
* returns how many wide characters were used
|
||||
* if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||
{
|
||||
if( string_len == 0 )
|
||||
@@ -177,12 +175,12 @@ size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
* an auxiliary function for converting from wide characters to UTF-8
|
||||
* converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
* returns how many wide characters were used
|
||||
* if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
||||
{
|
||||
size_t min_str_len = 1;
|
||||
@@ -235,10 +233,10 @@ size_t int_to_wide(int c, wchar_t * res, size_t max_buf_len)
|
||||
|
||||
|
||||
/*
|
||||
converts an int to a wide string
|
||||
|
||||
returns true if a character was inserted to the string
|
||||
*/
|
||||
* converts an int to a wide string
|
||||
*
|
||||
* returns true if a character was inserted to the string
|
||||
*/
|
||||
bool int_to_wide(int c, std::wstring & res)
|
||||
{
|
||||
wchar_t buf[2];
|
||||
@@ -281,23 +279,23 @@ bool int_to_stream(int c, pt::Stream & stream)
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one UTF-8 character into one wide-character
|
||||
|
||||
input:
|
||||
utf8 - an input UTF-8 string
|
||||
utf8_len - size of the input string,
|
||||
the string should be at least 4 bytes length for correctly
|
||||
recognized the utf-8 sequence
|
||||
|
||||
output:
|
||||
res - an output character
|
||||
correct - true if it is a correct character
|
||||
|
||||
the function returns how many characters have been used from the input string
|
||||
(returns zero only if utf8_len is zero)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
/*
|
||||
* this function converts one UTF-8 character into one wide-character
|
||||
*
|
||||
* input:
|
||||
* utf8 - an input UTF-8 string
|
||||
* utf8_len - size of the input string,
|
||||
* the string should be at least 4 bytes length for correctly
|
||||
* recognized the utf-8 sequence
|
||||
*
|
||||
* output:
|
||||
* res - an output character
|
||||
* correct - true if it is a correct character
|
||||
*
|
||||
* the function returns how many characters have been used from the input string
|
||||
* (returns zero only if utf8_len is zero)
|
||||
* even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
@@ -1016,6 +1014,160 @@ return res;
|
||||
|
||||
|
||||
|
||||
namespace private_namespace
|
||||
{
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
||||
{
|
||||
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
||||
uz <<= 1;
|
||||
|
||||
if( len == 1 || len > 4 )
|
||||
return false;
|
||||
|
||||
res = uz;
|
||||
|
||||
if( len > 0 )
|
||||
res >>= len;
|
||||
|
||||
if( len == 0 )
|
||||
len = 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
||||
{
|
||||
if( (uz & 0xc0) != 0x80 )
|
||||
return false;
|
||||
|
||||
res <<= 6;
|
||||
res |= (uz & 0x3F);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
|
||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
a null terminating character)
|
||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
if this flag is true then utf8_written is equal to zero
|
||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
(was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
utf8_written = 0;
|
||||
was_utf8_buf_too_small = false;
|
||||
chars = wide_to_int(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
{
|
||||
utf8_written = int_to_utf8(z, utf8, utf8_len);
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( mode == 1 )
|
||||
{
|
||||
utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = wide_to_int(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = int_to_utf8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = wide_to_int(wide_string, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = int_to_utf8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
} // namespace private_namespace
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
Reference in New Issue
Block a user