added: to locale: comparing lexicographically two characters/strings

int Compare(wchar_t c1, wchar_t c2);
       int Compare(const std::wstring & str1, const std::wstring & str2);
       
	   


git-svn-id: svn://ttmath.org/publicrep/winix/trunk@800 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2012-01-21 20:51:13 +00:00
parent 7407d24586
commit 5aaab89cd8
3 changed files with 171 additions and 11 deletions

View File

@ -14,3 +14,8 @@ smallleters = "ąćęłńóśźż"
capitalics = "ĄĆĘŁŃÓŚŹŻ"
# ASCII characters (a-z, A-Z) are tested beforehand
# so this table are not used for such characters
sort = ( "ąa1", "ćc1", "ęe1", "łl1", "ńn1", "óo1", "śs1", "źz1", "żz2",
"ĄA1", "ĆC1", "ĘE1", "ŁL1", "ŃN1", "ÓO1", "ŚS1", "ŹZ1", "ŻZ2" )

View File

@ -107,6 +107,7 @@ bool read = false;
subst_url.clear();
subst_smalllet.clear();
subst_capitallet.clear();
subst_sort.clear();
if( dir_def && ReadSubstTable(dir_def) )
read = true;
@ -135,7 +136,8 @@ bool read = false;
read = true;
CreateSubstVector(subst_url, space.table_single[L"url_original"], space.table_single[L"url_changeto"]);
CreateSubstVector(subst_smalllet, space.table_single[L"smallleters"], space.table_single[L"capitalics"]);
CreateSubstVector(subst_capitallet, space.table_single[L"capitalics"], space.table_single[L"smallleters"]);
CreateSubstVector(subst_capitallet, space.table_single[L"capitalics"], space.table_single[L"smallleters"]);
CreateSubstSortVector(subst_sort, space.table[L"sort"]);
log << log3 << "Locale: read characters substitution tables from: " << file_name << logend;
}
@ -167,6 +169,34 @@ void Locale::CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring
}
void Locale::CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab)
{
SubstItem s;
vect.clear();
if( tab.empty() )
return;
vect.reserve(tab.size());
for(size_t i=0 ; i<tab.size() ; ++i)
{
if( tab[i].size() >= 3 )
{
s.from = tab[i][0];
s.to = tab[i][1];
s.index = Toi(&tab[i][2]);
vect.push_back(s);
}
}
std::sort(vect.begin(), vect.end());
}
void Locale::Read(const char * dir, const char * dir_def)
{
for(size_t i=0 ; i<locale_files.size() ; ++i)
@ -459,35 +489,35 @@ void Locale::UTF8(bool utf)
binary search in vect
vect should be sorted by 'from'
if the 'val' is found in vect[].from then vect[].to is reterned
else 'val' is returned
if the 'val' is found in vect[].from then its index is returned (index to vect table)
else vect.size() is returned
*/
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
size_t Locale::SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val)
{
if( vect.empty() )
return val;
return vect.size();
size_t o1 = 0;
size_t o2 = vect.size() - 1;
if( val < vect[o1].from )
return val;
return vect.size();
if( val == vect[o1].from )
return vect[o1].to;
return o1;
if( val > vect[o2].from )
return val;
return vect.size();
if( val == vect[o2].from )
return vect[o2].to;
return o2;
while( o1 + 1 < o2 )
{
size_t o = (o1 + o2) / 2;
if( val == vect[o].from )
return vect[o].to;
return o;
if( val < vect[o].from )
o2 = o;
@ -495,10 +525,31 @@ wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
o1 = o;
}
return val;
return vect.size();
}
/*
binary search in vect
vect should be sorted by 'from'
if the 'val' is found in vect[].from then vect[].to is returned
else 'val' is returned
*/
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
{
size_t i = SubstFindIndex(vect, val);
if( i == vect.size() )
{
return val;
}
else
{
return vect[i].to;
}
}
wchar_t Locale::UrlSubst(wchar_t c)
@ -546,3 +597,96 @@ void Locale::ToCapital(std::wstring & str)
str[i] = ToCapital(str[i]);
}
/*
comparing lexicographically two characters
return value:
less than 0 if c1 is 'less' than c2
zero if they are equal
greater than 0 if c1 is 'greater' than c2
capital letters are treated equaly as small ones
but they will appear first (before the small ones)
*/
int Locale::Compare(wchar_t c1, wchar_t c2)
{
SubstItem s1, s2;
s1.from = c1;
s1.to = c1;
s1.index = 0;
s2.from = c2;
s2.to = c2;
s2.index = 0;
if( !((c1>='a' && c1<='z') || (c1>='A' && c1<='Z')) )
{
size_t i1 = SubstFindIndex(subst_sort, c1);
if( i1 < subst_sort.size() )
{
s1.to = subst_sort[i1].to;
s1.index = subst_sort[i1].index;
}
}
if( !((c2>='a' && c2<='z') || (c2>='A' && c2<='Z')) )
{
size_t i2 = SubstFindIndex(subst_sort, c2);
if( i2 < subst_sort.size() )
{
s2.to = subst_sort[i2].to;
s2.index = subst_sort[i2].index;
}
}
wchar_t small1 = ToSmall(s1.to);
wchar_t small2 = ToSmall(s2.to);
if( small1 == small2 )
{
if( s1.index != s2.index )
return s1.index - s2.index;
// this will sort capital letters at the end (after small ones)
return s1.to - s2.to;
}
return small1 - small2;
}
/*
comparing lexicographically two strings
return value:
less than 0 if str1 is 'less' than str2
zero if they are equal
greater than 0 if str1 is 'greater' than str2
*/
int Locale::Compare(const std::wstring & str1, const std::wstring & str2)
{
size_t i1 = 0;
size_t i2 = 0;
for( ; i1 < str1.size() && i2 < str2.size() ; ++i1, ++i2)
{
int res = Compare(str1[i1], str2[i2]);
if( res != 0 )
return res;
}
if( str1.size() < str2.size() )
return -1;
if( str1.size() > str2.size() )
return 1;
return 0;
}

View File

@ -99,12 +99,20 @@ public:
wchar_t ToCapital(wchar_t c);
void ToCapital(std::wstring & str);
// comparing two characters/strings
// return a value less than zero if c1<c2, zero if c1==c2, value greater than 0 if c1>c2
int Compare(wchar_t c1, wchar_t c2);
int Compare(const std::wstring & str1, const std::wstring & str2);
private:
// struct to used for substitution
struct SubstItem
{
wchar_t from, to;
int index;
SubstItem() { from = to = 0; index = 0; }
bool operator<(const SubstItem & arg) const { return from < arg.from; }
};
@ -114,6 +122,8 @@ private:
void ReadSubstTable(const char * dir, const char * dir_def);
bool ReadSubstTable(const char * dir);
void CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring & tab1, const std::wstring & tab2);
void CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab);
size_t SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val);
wchar_t SubstFind(const std::vector<SubstItem> & vect, wchar_t val);
// locale files
@ -132,6 +142,7 @@ private:
std::vector<SubstItem> subst_url;
std::vector<SubstItem> subst_smalllet; // changing from small to capital
std::vector<SubstItem> subst_capitallet; // changing from capital to small
std::vector<SubstItem> subst_sort; // local characters for comparison
PT::Space space;