added: to locale: comparing lexicographically two characters/strings
int Compare(wchar_t c1, wchar_t c2); int Compare(const std::wstring & str1, const std::wstring & str2); git-svn-id: svn://ttmath.org/publicrep/winix/trunk@800 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
parent
7407d24586
commit
5aaab89cd8
|
@ -14,3 +14,8 @@ smallleters = "ąćęłńóśźż"
|
|||
capitalics = "ĄĆĘŁŃÓŚŹŻ"
|
||||
|
||||
|
||||
# ASCII characters (a-z, A-Z) are tested beforehand
|
||||
# so this table are not used for such characters
|
||||
sort = ( "ąa1", "ćc1", "ęe1", "łl1", "ńn1", "óo1", "śs1", "źz1", "żz2",
|
||||
"ĄA1", "ĆC1", "ĘE1", "ŁL1", "ŃN1", "ÓO1", "ŚS1", "ŹZ1", "ŻZ2" )
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ bool read = false;
|
|||
subst_url.clear();
|
||||
subst_smalllet.clear();
|
||||
subst_capitallet.clear();
|
||||
subst_sort.clear();
|
||||
|
||||
if( dir_def && ReadSubstTable(dir_def) )
|
||||
read = true;
|
||||
|
@ -136,6 +137,7 @@ bool read = false;
|
|||
CreateSubstVector(subst_url, space.table_single[L"url_original"], space.table_single[L"url_changeto"]);
|
||||
CreateSubstVector(subst_smalllet, space.table_single[L"smallleters"], space.table_single[L"capitalics"]);
|
||||
CreateSubstVector(subst_capitallet, space.table_single[L"capitalics"], space.table_single[L"smallleters"]);
|
||||
CreateSubstSortVector(subst_sort, space.table[L"sort"]);
|
||||
|
||||
log << log3 << "Locale: read characters substitution tables from: " << file_name << logend;
|
||||
}
|
||||
|
@ -167,6 +169,34 @@ void Locale::CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring
|
|||
}
|
||||
|
||||
|
||||
|
||||
void Locale::CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab)
|
||||
{
|
||||
SubstItem s;
|
||||
|
||||
vect.clear();
|
||||
|
||||
if( tab.empty() )
|
||||
return;
|
||||
|
||||
vect.reserve(tab.size());
|
||||
|
||||
for(size_t i=0 ; i<tab.size() ; ++i)
|
||||
{
|
||||
if( tab[i].size() >= 3 )
|
||||
{
|
||||
s.from = tab[i][0];
|
||||
s.to = tab[i][1];
|
||||
s.index = Toi(&tab[i][2]);
|
||||
|
||||
vect.push_back(s);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(vect.begin(), vect.end());
|
||||
}
|
||||
|
||||
|
||||
void Locale::Read(const char * dir, const char * dir_def)
|
||||
{
|
||||
for(size_t i=0 ; i<locale_files.size() ; ++i)
|
||||
|
@ -459,35 +489,35 @@ void Locale::UTF8(bool utf)
|
|||
binary search in vect
|
||||
vect should be sorted by 'from'
|
||||
|
||||
if the 'val' is found in vect[].from then vect[].to is reterned
|
||||
else 'val' is returned
|
||||
if the 'val' is found in vect[].from then its index is returned (index to vect table)
|
||||
else vect.size() is returned
|
||||
*/
|
||||
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
||||
size_t Locale::SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val)
|
||||
{
|
||||
if( vect.empty() )
|
||||
return val;
|
||||
return vect.size();
|
||||
|
||||
size_t o1 = 0;
|
||||
size_t o2 = vect.size() - 1;
|
||||
|
||||
if( val < vect[o1].from )
|
||||
return val;
|
||||
return vect.size();
|
||||
|
||||
if( val == vect[o1].from )
|
||||
return vect[o1].to;
|
||||
return o1;
|
||||
|
||||
if( val > vect[o2].from )
|
||||
return val;
|
||||
return vect.size();
|
||||
|
||||
if( val == vect[o2].from )
|
||||
return vect[o2].to;
|
||||
return o2;
|
||||
|
||||
while( o1 + 1 < o2 )
|
||||
{
|
||||
size_t o = (o1 + o2) / 2;
|
||||
|
||||
if( val == vect[o].from )
|
||||
return vect[o].to;
|
||||
return o;
|
||||
|
||||
if( val < vect[o].from )
|
||||
o2 = o;
|
||||
|
@ -495,10 +525,31 @@ wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
|||
o1 = o;
|
||||
}
|
||||
|
||||
return val;
|
||||
return vect.size();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
binary search in vect
|
||||
vect should be sorted by 'from'
|
||||
|
||||
if the 'val' is found in vect[].from then vect[].to is returned
|
||||
else 'val' is returned
|
||||
*/
|
||||
wchar_t Locale::SubstFind(const std::vector<SubstItem> & vect, wchar_t val)
|
||||
{
|
||||
size_t i = SubstFindIndex(vect, val);
|
||||
|
||||
if( i == vect.size() )
|
||||
{
|
||||
return val;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vect[i].to;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
wchar_t Locale::UrlSubst(wchar_t c)
|
||||
|
@ -546,3 +597,96 @@ void Locale::ToCapital(std::wstring & str)
|
|||
str[i] = ToCapital(str[i]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
comparing lexicographically two characters
|
||||
|
||||
return value:
|
||||
less than 0 if c1 is 'less' than c2
|
||||
zero if they are equal
|
||||
greater than 0 if c1 is 'greater' than c2
|
||||
|
||||
capital letters are treated equaly as small ones
|
||||
but they will appear first (before the small ones)
|
||||
*/
|
||||
int Locale::Compare(wchar_t c1, wchar_t c2)
|
||||
{
|
||||
SubstItem s1, s2;
|
||||
|
||||
s1.from = c1;
|
||||
s1.to = c1;
|
||||
s1.index = 0;
|
||||
|
||||
s2.from = c2;
|
||||
s2.to = c2;
|
||||
s2.index = 0;
|
||||
|
||||
if( !((c1>='a' && c1<='z') || (c1>='A' && c1<='Z')) )
|
||||
{
|
||||
size_t i1 = SubstFindIndex(subst_sort, c1);
|
||||
|
||||
if( i1 < subst_sort.size() )
|
||||
{
|
||||
s1.to = subst_sort[i1].to;
|
||||
s1.index = subst_sort[i1].index;
|
||||
}
|
||||
}
|
||||
|
||||
if( !((c2>='a' && c2<='z') || (c2>='A' && c2<='Z')) )
|
||||
{
|
||||
size_t i2 = SubstFindIndex(subst_sort, c2);
|
||||
|
||||
if( i2 < subst_sort.size() )
|
||||
{
|
||||
s2.to = subst_sort[i2].to;
|
||||
s2.index = subst_sort[i2].index;
|
||||
}
|
||||
}
|
||||
|
||||
wchar_t small1 = ToSmall(s1.to);
|
||||
wchar_t small2 = ToSmall(s2.to);
|
||||
|
||||
if( small1 == small2 )
|
||||
{
|
||||
if( s1.index != s2.index )
|
||||
return s1.index - s2.index;
|
||||
|
||||
// this will sort capital letters at the end (after small ones)
|
||||
return s1.to - s2.to;
|
||||
}
|
||||
|
||||
return small1 - small2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
comparing lexicographically two strings
|
||||
|
||||
return value:
|
||||
less than 0 if str1 is 'less' than str2
|
||||
zero if they are equal
|
||||
greater than 0 if str1 is 'greater' than str2
|
||||
*/
|
||||
int Locale::Compare(const std::wstring & str1, const std::wstring & str2)
|
||||
{
|
||||
size_t i1 = 0;
|
||||
size_t i2 = 0;
|
||||
|
||||
for( ; i1 < str1.size() && i2 < str2.size() ; ++i1, ++i2)
|
||||
{
|
||||
int res = Compare(str1[i1], str2[i2]);
|
||||
|
||||
if( res != 0 )
|
||||
return res;
|
||||
}
|
||||
|
||||
if( str1.size() < str2.size() )
|
||||
return -1;
|
||||
|
||||
if( str1.size() > str2.size() )
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -99,12 +99,20 @@ public:
|
|||
wchar_t ToCapital(wchar_t c);
|
||||
void ToCapital(std::wstring & str);
|
||||
|
||||
// comparing two characters/strings
|
||||
// return a value less than zero if c1<c2, zero if c1==c2, value greater than 0 if c1>c2
|
||||
int Compare(wchar_t c1, wchar_t c2);
|
||||
int Compare(const std::wstring & str1, const std::wstring & str2);
|
||||
|
||||
private:
|
||||
|
||||
// struct to used for substitution
|
||||
struct SubstItem
|
||||
{
|
||||
wchar_t from, to;
|
||||
int index;
|
||||
|
||||
SubstItem() { from = to = 0; index = 0; }
|
||||
bool operator<(const SubstItem & arg) const { return from < arg.from; }
|
||||
};
|
||||
|
||||
|
@ -114,6 +122,8 @@ private:
|
|||
void ReadSubstTable(const char * dir, const char * dir_def);
|
||||
bool ReadSubstTable(const char * dir);
|
||||
void CreateSubstVector(std::vector<SubstItem> & vect, const std::wstring & tab1, const std::wstring & tab2);
|
||||
void CreateSubstSortVector(std::vector<SubstItem> & vect, std::vector<std::wstring> & tab);
|
||||
size_t SubstFindIndex(const std::vector<SubstItem> & vect, wchar_t val);
|
||||
wchar_t SubstFind(const std::vector<SubstItem> & vect, wchar_t val);
|
||||
|
||||
// locale files
|
||||
|
@ -132,6 +142,7 @@ private:
|
|||
std::vector<SubstItem> subst_url;
|
||||
std::vector<SubstItem> subst_smalllet; // changing from small to capital
|
||||
std::vector<SubstItem> subst_capitallet; // changing from capital to small
|
||||
std::vector<SubstItem> subst_sort; // local characters for comparison
|
||||
|
||||
|
||||
PT::Space space;
|
||||
|
|
Loading…
Reference in New Issue