added: to locale: comparing lexicographically two characters/strings
int Compare(wchar_t c1, wchar_t c2); int Compare(const std::wstring & str1, const std::wstring & str2); git-svn-id: svn://ttmath.org/publicrep/winix/trunk@800 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
7407d24586
commit
5aaab89cd8
|
@ -14,3 +14,8 @@ smallleters = "ąćęłńóśźż"
|
||||||
capitalics = "ĄĆĘŁŃÓŚŹŻ"
|
capitalics = "ĄĆĘŁŃÓŚŹŻ"
|
||||||
|
|
||||||
|
|
||||||
|
# ASCII characters (a-z, A-Z) are tested beforehand
|
||||||
|
# so this table are not used for such characters
|
||||||
|
sort = ( "ąa1", "ćc1", "ęe1", "łl1", "ńn1", "óo1", "śs1", "źz1", "żz2",
|
||||||
|
"ĄA1", "ĆC1", "ĘE1", "ŁL1", "ŃN1", "ÓO1", "ŚS1", "ŹZ1", "ŻZ2" )
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,7 @@ bool read = false;
|
||||||
subst_url.clear();
|
subst_url.clear();
|
||||||
subst_smalllet.clear();
|
subst_smalllet.clear();
|
||||||
subst_capitallet.clear();
|
subst_capitallet.clear();
|
||||||
|
subst_sort.clear();
|
||||||
|
|
||||||
if( dir_def && ReadSubstTable(dir_def) )
|
if( dir_def && ReadSubstTable(dir_def) )
|
||||||
read = true;
|
read = true;
|
||||||
|
@ -136,6 +137,7 @@ bool read = false;
|
||||||
CreateSubstVector(subst_url, space.table_single[L"url_original"], space.table_single[L"url_changeto"]);
|
CreateSubstVector(subst_url, space.table_single[L"url_original"], space.table_single[L"url_changeto"]);
|
||||||
CreateSubstVector(subst_smalllet, space.table_single[L"smallleters"], space.table_single[L"capitalics"]);
|
CreateSubstVector(subst_smalllet, space.table_single[L"smallleters"], space.table_single[L"capitalics"]);
|
||||||
CreateSubstVector(subst_capitallet, space.table_single[L"capitalics"], space.table_single[L"smallleters"]);
|
CreateSubstVector(subst_capitallet, space.table_single[L"capitalics"], space.table_single[L"smallleters"]);
|
||||||
|
CreateSubstSortVector(subst_sort, space.table[L"sort"]);
|
||||||
|
|
||||||
log << log3 << "Locale: read characters substitution tables from: " << file_name << logend;
|
log << log3 << "Locale: read characters substitution tables from: " << file_name << logend;
|
||||||
}
|
}
|
||||||
|
@ -167,6 +169,34 @@ void Locale::CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void Locale::CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab)
|
||||||
|
{
|
||||||
|
SubstItem s;
|
||||||
|
|
||||||
|
vect.clear();
|
||||||
|
|
||||||
|
if( tab.empty() )
|
||||||
|
return;
|
||||||
|
|
||||||
|
vect.reserve(tab.size());
|
||||||
|
|
||||||
|
for(size_t i=0 ; i<tab.size() ; ++i)
|
||||||
|
{
|
||||||
|
if( tab[i].size() >= 3 )
|
||||||
|
{
|
||||||
|
s.from = tab[i][0];
|
||||||
|
s.to = tab[i][1];
|
||||||
|
s.index = Toi(&tab[i][2]);
|
||||||
|
|
||||||
|
vect.push_back(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(vect.begin(), vect.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Locale::Read(const char * dir, const char * dir_def)
|
void Locale::Read(const char * dir, const char * dir_def)
|
||||||
{
|
{
|
||||||
for(size_t i=0 ; i<locale_files.size() ; ++i)
|
for(size_t i=0 ; i<locale_files.size() ; ++i)
|
||||||
|
@ -459,35 +489,35 @@ void Locale::UTF8(bool utf)
|
||||||
binary search in vect
|
binary search in vect
|
||||||
vect should be sorted by 'from'
|
vect should be sorted by 'from'
|
||||||
|
|
||||||
if the 'val' is found in vect[].from then vect[].to is reterned
|
if the 'val' is found in vect[].from then its index is returned (index to vect table)
|
||||||
else 'val' is returned
|
else vect.size() is returned
|
||||||
*/
|
*/
|
||||||
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
size_t Locale::SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val)
|
||||||
{
|
{
|
||||||
if( vect.empty() )
|
if( vect.empty() )
|
||||||
return val;
|
return vect.size();
|
||||||
|
|
||||||
size_t o1 = 0;
|
size_t o1 = 0;
|
||||||
size_t o2 = vect.size() - 1;
|
size_t o2 = vect.size() - 1;
|
||||||
|
|
||||||
if( val < vect[o1].from )
|
if( val < vect[o1].from )
|
||||||
return val;
|
return vect.size();
|
||||||
|
|
||||||
if( val == vect[o1].from )
|
if( val == vect[o1].from )
|
||||||
return vect[o1].to;
|
return o1;
|
||||||
|
|
||||||
if( val > vect[o2].from )
|
if( val > vect[o2].from )
|
||||||
return val;
|
return vect.size();
|
||||||
|
|
||||||
if( val == vect[o2].from )
|
if( val == vect[o2].from )
|
||||||
return vect[o2].to;
|
return o2;
|
||||||
|
|
||||||
while( o1 + 1 < o2 )
|
while( o1 + 1 < o2 )
|
||||||
{
|
{
|
||||||
size_t o = (o1 + o2) / 2;
|
size_t o = (o1 + o2) / 2;
|
||||||
|
|
||||||
if( val == vect[o].from )
|
if( val == vect[o].from )
|
||||||
return vect[o].to;
|
return o;
|
||||||
|
|
||||||
if( val < vect[o].from )
|
if( val < vect[o].from )
|
||||||
o2 = o;
|
o2 = o;
|
||||||
|
@ -495,10 +525,31 @@ wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
||||||
o1 = o;
|
o1 = o;
|
||||||
}
|
}
|
||||||
|
|
||||||
return val;
|
return vect.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
binary search in vect
|
||||||
|
vect should be sorted by 'from'
|
||||||
|
|
||||||
|
if the 'val' is found in vect[].from then vect[].to is returned
|
||||||
|
else 'val' is returned
|
||||||
|
*/
|
||||||
|
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
||||||
|
{
|
||||||
|
size_t i = SubstFindIndex(vect, val);
|
||||||
|
|
||||||
|
if( i == vect.size() )
|
||||||
|
{
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return vect[i].to;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
wchar_t Locale::UrlSubst(wchar_t c)
|
wchar_t Locale::UrlSubst(wchar_t c)
|
||||||
|
@ -546,3 +597,96 @@ void Locale::ToCapital(std::wstring & str)
|
||||||
str[i] = ToCapital(str[i]);
|
str[i] = ToCapital(str[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
comparing lexicographically two characters
|
||||||
|
|
||||||
|
return value:
|
||||||
|
less than 0 if c1 is 'less' than c2
|
||||||
|
zero if they are equal
|
||||||
|
greater than 0 if c1 is 'greater' than c2
|
||||||
|
|
||||||
|
capital letters are treated equaly as small ones
|
||||||
|
but they will appear first (before the small ones)
|
||||||
|
*/
|
||||||
|
int Locale::Compare(wchar_t c1, wchar_t c2)
|
||||||
|
{
|
||||||
|
SubstItem s1, s2;
|
||||||
|
|
||||||
|
s1.from = c1;
|
||||||
|
s1.to = c1;
|
||||||
|
s1.index = 0;
|
||||||
|
|
||||||
|
s2.from = c2;
|
||||||
|
s2.to = c2;
|
||||||
|
s2.index = 0;
|
||||||
|
|
||||||
|
if( !((c1>='a' && c1<='z') || (c1>='A' && c1<='Z')) )
|
||||||
|
{
|
||||||
|
size_t i1 = SubstFindIndex(subst_sort, c1);
|
||||||
|
|
||||||
|
if( i1 < subst_sort.size() )
|
||||||
|
{
|
||||||
|
s1.to = subst_sort[i1].to;
|
||||||
|
s1.index = subst_sort[i1].index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !((c2>='a' && c2<='z') || (c2>='A' && c2<='Z')) )
|
||||||
|
{
|
||||||
|
size_t i2 = SubstFindIndex(subst_sort, c2);
|
||||||
|
|
||||||
|
if( i2 < subst_sort.size() )
|
||||||
|
{
|
||||||
|
s2.to = subst_sort[i2].to;
|
||||||
|
s2.index = subst_sort[i2].index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
wchar_t small1 = ToSmall(s1.to);
|
||||||
|
wchar_t small2 = ToSmall(s2.to);
|
||||||
|
|
||||||
|
if( small1 == small2 )
|
||||||
|
{
|
||||||
|
if( s1.index != s2.index )
|
||||||
|
return s1.index - s2.index;
|
||||||
|
|
||||||
|
// this will sort capital letters at the end (after small ones)
|
||||||
|
return s1.to - s2.to;
|
||||||
|
}
|
||||||
|
|
||||||
|
return small1 - small2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
comparing lexicographically two strings
|
||||||
|
|
||||||
|
return value:
|
||||||
|
less than 0 if str1 is 'less' than str2
|
||||||
|
zero if they are equal
|
||||||
|
greater than 0 if str1 is 'greater' than str2
|
||||||
|
*/
|
||||||
|
int Locale::Compare(const std::wstring & str1, const std::wstring & str2)
|
||||||
|
{
|
||||||
|
size_t i1 = 0;
|
||||||
|
size_t i2 = 0;
|
||||||
|
|
||||||
|
for( ; i1 < str1.size() && i2 < str2.size() ; ++i1, ++i2)
|
||||||
|
{
|
||||||
|
int res = Compare(str1[i1], str2[i2]);
|
||||||
|
|
||||||
|
if( res != 0 )
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( str1.size() < str2.size() )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if( str1.size() > str2.size() )
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -99,12 +99,20 @@ public:
|
||||||
wchar_t ToCapital(wchar_t c);
|
wchar_t ToCapital(wchar_t c);
|
||||||
void ToCapital(std::wstring & str);
|
void ToCapital(std::wstring & str);
|
||||||
|
|
||||||
|
// comparing two characters/strings
|
||||||
|
// return a value less than zero if c1<c2, zero if c1==c2, value greater than 0 if c1>c2
|
||||||
|
int Compare(wchar_t c1, wchar_t c2);
|
||||||
|
int Compare(const std::wstring & str1, const std::wstring & str2);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
// struct to used for substitution
|
// struct to used for substitution
|
||||||
struct SubstItem
|
struct SubstItem
|
||||||
{
|
{
|
||||||
wchar_t from, to;
|
wchar_t from, to;
|
||||||
|
int index;
|
||||||
|
|
||||||
|
SubstItem() { from = to = 0; index = 0; }
|
||||||
bool operator<(const SubstItem & arg) const { return from < arg.from; }
|
bool operator<(const SubstItem & arg) const { return from < arg.from; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -114,6 +122,8 @@ private:
|
||||||
void ReadSubstTable(const char * dir, const char * dir_def);
|
void ReadSubstTable(const char * dir, const char * dir_def);
|
||||||
bool ReadSubstTable(const char * dir);
|
bool ReadSubstTable(const char * dir);
|
||||||
void CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring & tab1, const std::wstring & tab2);
|
void CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring & tab1, const std::wstring & tab2);
|
||||||
|
void CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab);
|
||||||
|
size_t SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val);
|
||||||
wchar_t SubstFind(const std::vector<SubstItem> & vect, wchar_t val);
|
wchar_t SubstFind(const std::vector<SubstItem> & vect, wchar_t val);
|
||||||
|
|
||||||
// locale files
|
// locale files
|
||||||
|
@ -132,6 +142,7 @@ private:
|
||||||
std::vector<SubstItem> subst_url;
|
std::vector<SubstItem> subst_url;
|
||||||
std::vector<SubstItem> subst_smalllet; // changing from small to capital
|
std::vector<SubstItem> subst_smalllet; // changing from small to capital
|
||||||
std::vector<SubstItem> subst_capitallet; // changing from capital to small
|
std::vector<SubstItem> subst_capitallet; // changing from capital to small
|
||||||
|
std::vector<SubstItem> subst_sort; // local characters for comparison
|
||||||
|
|
||||||
|
|
||||||
PT::Space space;
|
PT::Space space;
|
||||||
|
|
Loading…
Reference in New Issue