winix/core/misc.h

1002 lines
21 KiB
C++
Executable File

/*
* This file is a part of Winix
* and is not publicly distributed
*
* Copyright (c) 2008-2013, Tomasz Sowa
* All rights reserved.
*
*/
#ifndef headerfile_winix_core_misc
#define headerfile_winix_core_misc
#include <string>
#include <sstream>
#include <ctime>
#include <cstdio>
#include "item.h"
#include "requesttypes.h"
#include "date/date.h"
#include "textstream/textstream.h"
#include "utf8/utf8.h"
/*
conversions between text and numbers
*/
int Toi(const std::string & str, int base = 10);
int Toi(const std::wstring & str, int base = 10);
int Toi(const char * str, int base = 10);
int Toi(const wchar_t * str, int base = 10);
long Tol(const std::string & str, int base = 10);
long Tol(const std::wstring & str, int base = 10);
long Tol(const char * str, int base = 10);
long Tol(const wchar_t * str, int base = 10);
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10)
{
size_t i1, i2;
long rest;
if( buf_len == 0 )
return false;
i1 = i2 = 0;
if( base < 2 ) base = 2;
if( base > 16 ) base = 16;
do
{
rest = value % base;
value = value / base;
buffer[i2++] = (rest < 10) ? char(rest) + '0' : char(rest) - 10 + 'A';
}
while(value != 0 && i2 < buf_len);
if( i2 >= buf_len )
{
buffer[0] = 0; // ops, the buffer was too small
return false;
}
buffer[i2--] = 0;
for( ; i1 < i2 ; ++i1, --i2)
{
CharType temp = buffer[i1];
buffer[i1] = buffer[i2];
buffer[i2] = temp;
}
return true;
}
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10)
{
if( buf_len == 0 )
return false;
CharType * buf = buffer;
if( value < 0 )
{
buffer[0] = '-';
buf += 1;
buf_len -= 1;
value = -value;
}
bool res = Toa(static_cast<unsigned long>(value), buf, buf_len, base);
if( !res )
buffer[0] = 0;
return res;
}
template<class CharType>
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10)
{
return Toa(static_cast<unsigned long>(value), buffer, buf_len, base);
}
template<class CharType>
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10)
{
return Toa(static_cast<long>(value), buffer, buf_len, base);
}
/*
these methos don't take the buffer size
make sure the buffer size is sufficient big
2^64 - 1 = 18446744073709551615 = 20 characters (plus minus sign and plus terminating zero)
so the buffer should have at least 22 characters
!! CHECK ME check the size whether is correct
*/
template<class CharType>
bool Toa(unsigned long value, CharType * buffer)
{
size_t sufficient_space = 25;
return Toa(value, buffer, sufficient_space);
}
template<class CharType>
bool Toa(long value, CharType * buffer)
{
size_t sufficient_space = 25;
return Toa(value, buffer, sufficient_space);
}
template<class CharType>
bool Toa(unsigned int value, CharType * buffer)
{
size_t sufficient_space = 25;
return Toa(value, buffer, sufficient_space);
}
template<class CharType>
bool Toa(int value, CharType * buffer)
{
size_t sufficient_space = 25;
return Toa(value, buffer, sufficient_space);
}
// warning: it uses its own static buffer
// one buffer for both these functions
// !! REMOVE ME they are deprecated (don't use it)
const wchar_t * Toa(unsigned int value, int base = 10);
const wchar_t * Toa(unsigned long value, int base = 10);
const wchar_t * Toa(int value, int base = 10);
const wchar_t * Toa(long value, int base = 10);
void Toa(int value, std::string & res, int base = 10, bool clear = true);
void Toa(long value, std::string & res, int base = 10, bool clear = true);
void Toa(int value, std::wstring & res, int base = 10, bool clear = true);
void Toa(long value, std::wstring & res, int base = 10, bool clear = true);
/*
conversions between ascii text and wide characters
(destination is always std::string or std::wstring)
characters are copied as they are without any locales checking
*/
void AssignString(const char * src, size_t len, std::wstring & dst, bool clear = true);
void AssignString(const char * src, std::wstring & dst, bool clear = true);
void AssignString(const std::string & src, std::wstring & dst, bool clear = true);
void AssignString(const wchar_t * src, size_t len, std::string & dst, bool clear = true);
void AssignString(const wchar_t * src, std::string & dst, bool clear = true);
void AssignString(const std::wstring & src, std::string & dst, bool clear = true);
void AssignString(const char * src, size_t len, std::string & dst, bool clear = true);
void AssignString(const char * src, std::string & dst, bool clear = true);
void AssignString(const std::string & src, std::string & dst, bool clear = true);
void AssignString(const wchar_t * src, size_t len, std::wstring & dst, bool clear = true);
void AssignString(const wchar_t * src, std::wstring & dst, bool clear = true);
void AssignString(const std::wstring & src, std::wstring & dst, bool clear = true);
bool CorrectUrlChar(wchar_t c);
void CorrectUrlDots(std::wstring & url);
void CorrectUrlChars(std::wstring & url);
void CorrectUrlOnlyAllowedChar(std::wstring & url);
const wchar_t * DateToStr(int year, int month, int day);
const wchar_t * DateToStr(int year, int month, int day, int hour, int min, int sec);
const wchar_t * DateToStr(const PT::Date & d);
const wchar_t * DateToStr(time_t t);
const wchar_t * DateToStrWithoutHours(const PT::Date & d);
const wchar_t * DateToStrWithoutHours(time_t t);
const char * DateToStrCookie(int year, int month, int day, int hour, int min, int sec);
const char * DateToStrCookie(const PT::Date & d);
const char * DateToStrCookie(time_t t);
// depracated
// not thread safe
const wchar_t * IpToStr(unsigned int ip_);
// in a new code we can use WTextStream in such a way
// DateToStr() etc can use it too
PT::WTextStream IPToStr(unsigned int ip);
PT::WTextStream IPToStr(int ip);
bool IsWhite(wchar_t s);
bool IsWhite(const wchar_t * str, bool treat_new_line_as_white = false);
bool IsWhite(const std::wstring & str, bool treat_new_line_as_white = false);
bool IsLastSlash(const std::wstring & path);
template<class StringType>
void TrimWhite(StringType & s)
{
typename StringType::size_type i;
if( s.empty() )
return;
// looking for white characters at the end
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
if( i==0 && IsWhite(s[i]) )
{
// the whole string has white characters
s.clear();
return;
}
// deleting white characters at the end
if( i != s.size() - 1 )
s.erase(i+1, StringType::npos);
// looking for white characters at the beginning
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
// deleting white characters at the beginning
if( i != 0 )
s.erase(0, i);
}
template<class StringType>
void TrimFirst(StringType & s, wchar_t c)
{
typename StringType::size_type i;
if( s.empty() )
return;
// looking for the 'c' characters at the beginning
for(i=0 ; i<s.size() && s[i]==c ; ++i);
// deleting the 'c' characters at the beginning
if( i != 0 )
s.erase(0, i);
}
template<class StringType>
void TrimLast(StringType & s, wchar_t c)
{
typename StringType::size_type i;
if( s.empty() )
return;
// looking for the 'c' characters at the end
for(i=s.size()-1 ; i>0 && s[i]==c ; --i);
if( i==0 && s[i]==c )
{
// the whole string has the 'c' characters
s.clear();
return;
}
// deleting 'c' characters at the end
if( i != s.size() - 1 )
s.erase(i+1, StringType::npos);
}
template<class StringType>
void Trim(StringType & s, wchar_t c)
{
if( s.empty() )
return;
TrimLast(s, c);
TrimFirst(s, c);
}
template<class StringType>
void MaxSize(StringType & str, size_t max_size)
{
if( str.size() > max_size )
str.erase(max_size);
}
/*
this method removing all characters from given string
only digits are allowed and if allow_comma then one comma (or dot)
character is allowed
if change_to_dot is true then if a comma exists then it is changed to a dot
*/
template<class StringType>
void OnlyDigit(StringType & s, bool allow_comma = true, bool change_to_dot = true)
{
typename StringType::size_type i;
bool was_comma = false;
if( s.empty() )
{
s = '0';
return;
}
for(i=0 ; i<s.size() ; )
{
if( (s[i]>='0' && s[i]<='9') ||
(allow_comma && !was_comma && (s[i]=='.' || s[i]==',')) )
{
if( change_to_dot && s[i] == ',' )
s[i] = '.';
if( s[i]=='.' || s[i]==',' )
was_comma = true;
i += 1;
}
else
{
s.erase(i, 1);
}
}
}
wchar_t ToSmall(wchar_t c);
void ToSmall(std::wstring & s);
const char * SkipWhite(const char * s);
const wchar_t * SkipWhite(const wchar_t * s);
template<class StringType1, class StringType2>
bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 * short_str, const StringType2 * long_str)
{
return IsSubStringp(short_str, long_str);
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringp(short_str.c_str(), long_str.c_str());
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && ToSmall(*short_str) == ToSmall(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringNoCase(short_str.c_str(), long_str.c_str());
}
template<class StringType1, class StringType2>
bool Equal(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && wchar_t(*str1) == wchar_t(*str2) )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool Equal(const StringType1 & str1, const StringType2 & str2)
{
return Equal(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && ToSmall(*str1) == ToSmall(*str2) )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 & str1, const StringType2 & str2)
{
return EqualNoCase(str1.c_str(), str2.c_str());
}
/*
looking for 'look_for' string in 'buf' and replacing it with 'replace'
'replace' can be empty (so only 'look_for' will be deleted)
*/
template<class StringType>
void ReplaceString(StringType & buf, const StringType & look_for, const StringType & replace)
{
size_t i = 0;
if( look_for.empty() )
return;
while( i < buf.size() )
{
if( IsSubString(look_for.c_str(), buf.c_str() + i) )
{
buf.erase(i, look_for.size());
buf.insert(i, replace);
i += replace.size();
}
else
{
i += 1;
}
}
}
template<class StringType>
void NoLastSlash(StringType & s)
{
if( s.empty() )
return;
size_t i = s.size();
for( ; i>0 && s[i-1]=='/' ; --i);
if( i < s.size() )
s.erase(i);
}
template<class StringType>
void NoFirstHttp(StringType & s)
{
if( s.empty() )
return;
const char http[] = "http://";
const char https[] = "https://";
if( IsSubStringNoCase(http, s.c_str()) )
{
s.erase(0, sizeof(http)/sizeof(char));
}
else
if( IsSubStringNoCase(https, s.c_str()) )
{
s.erase(0, sizeof(https)/sizeof(char));
}
}
/*
this method returns true if there are two dots meaning 'go up' somewhere in the path
for example such paths return true:
".."
"test/../path"
"test/where/../"
"test/where/.."
"../abc"
*/
template<class StringType>
bool PathHasUpDir(const StringType * str)
{
size_t i = 0;
while( str[i] )
{
if( str[i]=='.' && str[i+1]=='.' )
{
i += 2;
if( str[i]=='/' || str[i]==0 )
return true;
}
// skipping until to next slash
while( str[i] && str[i]!='/' )
i += 1;
// skipping the slash (or slashes)
while( str[i]=='/' )
i += 1;
}
return false;
}
template<class StringType>
bool PathHasUpDir(const StringType & str)
{
return PathHasUpDir(str.c_str());
}
/*
this method calculates how many directories there are in the given path
input:
str - path
last_is_dir - true if the last part of the path should be treated as a directory too
samples:
HowManyDirs("", false) -> 0
HowManyDirs("", true) -> 0
HowManyDirs("abc", false) -> 0
HowManyDirs("abc", true) -> 1
HowManyDirs("/abc/", true) -> 1
HowManyDirs("////", false) -> 0
HowManyDirs("////", true) -> 0
HowManyDirs("////abc", false) -> 0
HowManyDirs("////abc", true) -> 1
HowManyDirs("/var/static", false) -> 1
HowManyDirs("/var/static", true) -> 2
HowManyDirs("/var/static/", false) -> 2
HowManyDirs("/var/static/", true) -> 2
*/
template<class StringType>
size_t HowManyDirs(const StringType * str, bool is_last_dir = false)
{
size_t res = 0;
size_t i = 0;
// first slash (root dir) is not calculated
while( str[i]=='/' )
i += 1;
while( str[i] )
{
if( str[i]=='/' )
{
res += 1;
while( str[i]=='/' )
i += 1;
}
else
{
i += 1;
}
}
if( is_last_dir && i>0 && str[i-1]!='/' )
res += 1;
return res;
}
template<class StringType>
size_t HowManyDirs(const StringType & str, bool is_last_dir = false)
{
return HowManyDirs(str.c_str(), is_last_dir);
}
/*
this method skips some first directories from given path
samples:
SkipDirs("/var/test", 1) -> "test"
SkipDirs("/var/test/somewhere", 1) -> "test/somewhere"
SkipDirs("/var/test/somewhere", 2) -> "somewhere"
SkipDirs("/var/test/somewhere", 10) -> ""
*/
template<class StringType>
const StringType * SkipDirs(const StringType * str, size_t how_many_skip)
{
size_t i = 0;
size_t skipped = 0;
if( how_many_skip == 0 )
return str;
// first slash (root dir) is not calculated
while( str[i]=='/' )
i += 1;
while( str[i] )
{
if( str[i]=='/' )
{
skipped += 1;
while( str[i]=='/' )
i += 1;
if( skipped == how_many_skip )
return str+i;
}
else
{
i += 1;
}
}
return str + i;
}
bool IsEmailCorrectChar(wchar_t c);
bool ValidateEmail(const wchar_t * email);
bool ValidateEmail(const std::wstring & email);
bool IsFile(const wchar_t * file);
bool IsFile(const std::wstring & file);
bool CreateDir(const wchar_t * dir, int priv);
bool CreateDir(const std::wstring & dir, int priv);
// creating directories (dirs) under base_dir (base_dir must exist)
// if skip_last == true then last part from dir is treated as a file (the last directory is not created)
bool CreateDirs(const wchar_t * base_dir, const wchar_t * dirs, int priv = 0755, bool skip_last = false);
bool CreateDirs(const std::wstring & base_dir, const std::wstring & dirs, int priv = 0755, bool skip_last = false);
bool CopyFile(FILE * in, FILE * out);
bool CopyFile(const wchar_t * src, const wchar_t * dst);
bool CopyFile(const std::wstring & src, const std::wstring & dst);
bool RemoveFile(const wchar_t * file);
bool RemoveFile(const std::wstring & file);
bool RenameFile(const wchar_t * from, const wchar_t * to);
bool RenameFile(const std::wstring & from, const std::wstring & to);
bool GetUTF8File(const char * file_path, std::wstring & content, bool clear_content = true);
bool GetUTF8File(const wchar_t * file_path, std::wstring & content, bool clear_content = true);
bool GetUTF8File(const std::string & file_path, std::wstring & content, bool clear_content = true);
bool GetUTF8File(const std::wstring & file_path, std::wstring & content, bool clear_content = true);
const wchar_t * GetFileExt(const wchar_t * name);
int SelectFileType(const wchar_t * file_name);
int SelectFileType(const std::wstring & file_name);
// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(char c,
PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear_out = true)
{
char buffer[10];
size_t buflen = sizeof(buffer)/sizeof(char);
if( clear_out )
out.clear();
if( (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == ',' || c == '-' || c == '_' || c == '(' || c == ')' )
{
out << c;
}
else
{
Toa(static_cast<unsigned char>(c), buffer, buflen, 16);
out << '%';
if( buffer[1] == 0 )
out << '0'; // there is only one character in the buffer
out << buffer;
}
}
// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const char * in,
PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear_out = true)
{
if( clear_out )
out.clear();
for(size_t i=0 ; in[i] != 0 ; ++i)
UrlEncode(in[i], out, false);
}
// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const std::string & in,
PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear_out = true)
{
UrlEncode(in.c_str(), out, clear_out);
}
// not thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const wchar_t * in,
PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear_out = true)
{
static std::string ain;
PT::WideToUTF8(in, ain);
if( clear_out )
out.clear();
for(size_t i=0 ; i < ain.size() ; ++i)
UrlEncode(ain[i], out, false);
}
// not thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const std::wstring & in,
PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear_out = true)
{
UrlEncode(in.c_str(), out, clear_out);
}
// no thread safe
template<class StringType>
void UrlEncode(char c, StringType & out, bool clear_out = true)
{
static PT::TextStream tmp;
UrlEncode(c, tmp);
tmp.to_string(out, clear_out);
}
// !! IMROVE ME we need some UrlEncode methods with PT::TextBuffer instead of std::string
void UrlEncode(const char * in, std::string & out, bool clear_out = true);
void UrlEncode(const std::string & in, std::string & out, bool clear_out = true);
void UrlEncode(const wchar_t * in, std::string & out, bool clear_out = true);
void UrlEncode(const std::wstring & in, std::string & out, bool clear_out = true);
void UrlEncode(const wchar_t * in, std::wstring & out, bool clear_out = true);
void UrlEncode(const std::wstring & in, std::wstring & out, bool clear_out = true);
template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncodeAddChar(char_type c, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out)
{
if( (c>='A' && c<='Z') ||
(c>='a' && c<='z') ||
(c>='0' && c<='9') )
{
out << c;
}
else
{
char buf1[10];
char buf2[10];
size_t len1 = sizeof(buf1) / sizeof(char);
size_t len2 = sizeof(buf2) / sizeof(char);
size_t len = PT::IntToUTF8(int(c), buf1, len1);
for(size_t i=0 ; i<len ; ++i)
{
// make sure that it produces *capital* letters (ABC...)
Toa((unsigned long)(unsigned char)buf1[i], buf2, len2, 16);
out << '=';
if( buf2[1] == 0 )
out << '0';
out << buf2;
}
}
}
/*
this encoding is used in mails headers
encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
http://www.faqs.org/rfcs/rfc1522.html
we have:
charset = UTF-8
encoding = Q
current limitation:
we do not support checking the maximum length:
"An encoded-word may not be more than 75 characters long, including
charset, encoding, encoded-text, and delimiters."
*/
template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncode(const wchar_t * in, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear = true)
{
if( clear )
out.clear();
out << "=?UTF-8?Q?";
for( ; *in ; ++in)
QEncodeAddChar(*in, out);
out << "?=";
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncode(const std::wstring & in, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
bool clear = true)
{
if( clear )
out.clear();
out << "=?UTF-8?Q?";
// do not use QEncode(in.c_str()) as 'in' can have a zero byte
for(size_t i=0 ; i<in.size() ; ++i)
QEncodeAddChar(in[i], out);
out << "?=";
}
void QEncode(const std::wstring & in, std::string & out, bool clear = true);
void RemovePostFileTmp(PostFileTab & post_file_tab);
/*
short_str is removed from long_str (and a last dots are removed too)
and the result is stored in out
sample:
short_str: "mydomain.tld"
long_str: "www.subdomain.mydomain.tld"
out: "www.subdomain"
short_str: "mydomain.tld"
long_str: "otherdifferentstring"
out: ""
*/
template<class StringType1, class StringType2, class StringType3>
void CreateSubdomain(const StringType1 * short_str, const StringType2 * long_str, StringType3 & out)
{
size_t i1, i2;
out.clear();
for(i1=0 ; short_str[i1] != 0 ; ++i1);
for(i2=0 ; long_str[i2] != 0 ; ++i2);
if( i1 >= i2 )
return;
// i1 is < i2
while( i1-- > 0 )
{
i2 -= 1;
if( short_str[i1] != long_str[i2] )
return; // short_str is not a last substring of long_str
}
while( i2>0 && long_str[i2-1] == '.' )
i2 -= 1;
for(i1=0 ; i1 < i2 ; ++i1)
out += long_str[i1];
}
#endif