api2021 part I #4

Merged
tomasz.sowa merged 67 commits from api2021 into master 2021-05-27 10:37:36 +02:00
3 changed files with 48 additions and 7 deletions
Showing only changes of commit 463cec3283 - Show all commits

View File

@ -56,6 +56,42 @@ bool UTF8_CheckRange(int c)
}
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_bytes - means how many bytes from the utf8 string were read
*/
bool UTF8_CheckRange(int c, int how_many_bytes)
{
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
{
return true;
}
if( c >= 0x0080 && c <= 0x07ff && how_many_bytes == 2 )
{
return true;
}
if( c >= 0x0800 && c < 0xD800 && how_many_bytes == 3)
{
return true;
}
if( c > 0xDFFF && c <= 0xffff && how_many_bytes == 3)
{
return true;
}
if( c >= 0x10000 && c <= 0x10FFFF && how_many_bytes == 4 )
{
return true;
}
return false;
}
/*!
@ -97,7 +133,7 @@ size_t i, len;
return i;
}
if( UTF8_CheckRange(res) )
if( UTF8_CheckRange(res, len) )
correct = true;
return len;
@ -141,7 +177,7 @@ size_t i, len;
return i;
}
if( UTF8_CheckRange(res) )
if( UTF8_CheckRange(res, len) )
correct = true;
return len;
@ -210,7 +246,7 @@ unsigned char uz;
return i;
}
if( UTF8_CheckRange(res) )
if( UTF8_CheckRange(res, len) )
correct = true;
return len;

View File

@ -64,6 +64,14 @@ namespace PT
bool UTF8_CheckRange(int c);
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_chars - means how many characters from utf8 string were read
*/
bool UTF8_CheckRange(int c, int how_many_bytes);
/*
*

View File

@ -52,7 +52,7 @@ bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
for(len=0 ; (uz & 0x80) != 0 ; ++len)
uz <<= 1;
if( len == 1 )
if( len == 1 || len > 4 )
return false;
res = uz;
@ -60,9 +60,6 @@ bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
if( len > 0 )
res >>= len;
if( res == 0 )
return false;
if( len == 0 )
len = 1;