winix/core/misc.h

/*
 * This file is a part of Winix
 * and is not publicly distributed
 *
 * Copyright (c) 2008-2013, Tomasz Sowa
 * All rights reserved.
 *
 */

#ifndef headerfile_winix_core_misc
#define headerfile_winix_core_misc


#include <string>
#include <sstream>
#include <ctime>
#include <cstdio>
#include "item.h"
#include "requesttypes.h"
#include "date/date.h"
#include "textstream/textstream.h"
#include "utf8/utf8.h"


/*
	conversions between text and numbers
*/

int Toi(const std::string & str,  int base = 10);
int Toi(const std::wstring & str, int base = 10);
int Toi(const char * str,         int base = 10);
int Toi(const wchar_t * str,      int base = 10);

long Tol(const std::string & str,  int base = 10);
long Tol(const std::wstring & str, int base = 10);
long Tol(const char * str,         int base = 10);
long Tol(const wchar_t * str,      int base = 10);


// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10)
{
size_t i1, i2;
long rest;

	if( buf_len == 0 )
		return false;

	i1 = i2 = 0;

	if( base < 2 )  base = 2;
	if( base > 16 )	base = 16;

	do
	{
		rest  = value % base;
		value = value / base;
		buffer[i2++] = (rest < 10) ? char(rest) + '0' : char(rest) - 10 + 'A';
	}
	while(value != 0 && i2 < buf_len);

	if( i2 >= buf_len )
	{
		buffer[0] = 0; // ops, the buffer was too small
		return false;
	}

	buffer[i2--] = 0;

	for( ; i1 < i2 ; ++i1, --i2)
	{
		CharType temp = buffer[i1];
		buffer[i1] = buffer[i2];
		buffer[i2] = temp;
	}

return true;
}


// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10)
{
	if( buf_len == 0 )
		return false;

	CharType * buf = buffer;

	if( value < 0 )
	{
		buffer[0] = '-';
		buf      += 1;
		buf_len  -= 1;
		value     = -value;
	}

	bool res = Toa(static_cast<unsigned long>(value), buf, buf_len, base);

	if( !res )
		buffer[0] = 0;

return res;
}


template<class CharType>
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10)
{
	return Toa(static_cast<unsigned long>(value), buffer, buf_len, base);
}


template<class CharType>
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10)
{
	return Toa(static_cast<long>(value), buffer, buf_len, base);
}


/*
	these methos don't take the buffer size
	make sure the buffer size is sufficient big
	2^64 - 1 = 18446744073709551615 = 20 characters (plus minus sign and plus terminating zero)
	so the buffer should have at least 22 characters
	!! CHECK ME check the size whether is correct
*/
template<class CharType>
bool Toa(unsigned long value, CharType * buffer)
{
	size_t sufficient_space = 25;
	return Toa(value, buffer, sufficient_space);
}

template<class CharType>
bool Toa(long value, CharType * buffer)
{
	size_t sufficient_space = 25;
	return Toa(value, buffer, sufficient_space);
}

template<class CharType>
bool Toa(unsigned int value, CharType * buffer)
{
	size_t sufficient_space = 25;
	return Toa(value, buffer, sufficient_space);
}

template<class CharType>
bool Toa(int value, CharType * buffer)
{
	size_t sufficient_space = 25;
	return Toa(value, buffer, sufficient_space);
}


// warning: it uses its own static buffer
// one buffer for both these functions
// !! REMOVE ME they are deprecated (don't use it)
const wchar_t * Toa(unsigned int value,  int base = 10);
const wchar_t * Toa(unsigned long value, int base = 10);
const wchar_t * Toa(int value,  int base = 10);
const wchar_t * Toa(long value, int base = 10);

void Toa(int  value, std::string & res,  int base = 10, bool clear = true);
void Toa(long value, std::string & res,  int base = 10, bool clear = true);
void Toa(int  value, std::wstring & res, int base = 10, bool clear = true);
void Toa(long value, std::wstring & res, int base = 10, bool clear = true);


/*
	conversions between ascii text and wide characters
	(destination is always std::string or std::wstring)

	characters are copied as they are without any locales checking
*/

void AssignString(const char * src, size_t len, std::wstring & dst, bool clear = true);
void AssignString(const char * src, std::wstring & dst, bool clear = true);
void AssignString(const std::string & src, std::wstring & dst, bool clear = true);

void AssignString(const wchar_t * src, size_t len, std::string & dst, bool clear = true);
void AssignString(const wchar_t * src, std::string & dst, bool clear = true);
void AssignString(const std::wstring & src, std::string & dst, bool clear = true);

void AssignString(const char * src, size_t len, std::string & dst, bool clear = true);
void AssignString(const char * src, std::string & dst, bool clear = true);
void AssignString(const std::string & src, std::string & dst, bool clear = true);

void AssignString(const wchar_t * src, size_t len, std::wstring & dst, bool clear = true);
void AssignString(const wchar_t * src, std::wstring & dst, bool clear = true);
void AssignString(const std::wstring & src, std::wstring & dst, bool clear = true);


bool CorrectUrlChar(wchar_t c);
void CorrectUrlDots(std::wstring & url);
void CorrectUrlChars(std::wstring & url);
void CorrectUrlOnlyAllowedChar(std::wstring & url);


const wchar_t * DateToStr(int year, int month, int day);
const wchar_t * DateToStr(int year, int month, int day, int hour, int min, int sec);
const wchar_t * DateToStr(const PT::Date & d);
const wchar_t * DateToStr(time_t t);
const wchar_t * DateToStrWithoutHours(const PT::Date & d);
const wchar_t * DateToStrWithoutHours(time_t t);

const char * DateToStrCookie(int year, int month, int day, int hour, int min, int sec);
const char * DateToStrCookie(const PT::Date & d);
const char * DateToStrCookie(time_t t);

// depracated
// not thread safe
const wchar_t * IpToStr(unsigned int ip_);

// in a new code we can use WTextStream in such a way
// DateToStr() etc can use it too
PT::WTextStream IPToStr(unsigned int ip);
PT::WTextStream IPToStr(int ip);


bool IsWhite(wchar_t s);
bool IsWhite(const wchar_t * str, 		bool treat_new_line_as_white = false);
bool IsWhite(const std::wstring & str, 	bool treat_new_line_as_white = false);
bool IsLastSlash(const std::wstring & path);

template<class StringType>
void TrimWhite(StringType & s)
{
typename StringType::size_type i;

	if( s.empty() )
		return;

	// looking for white characters at the end
	for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);

	if( i==0 && IsWhite(s[i]) )
	{
		// the whole string has white characters
		s.clear();
		return;
	}

	// deleting white characters at the end
	if( i != s.size() - 1 )
		s.erase(i+1, StringType::npos);

	// looking for white characters at the beginning
	for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);

	// deleting white characters at the beginning
	if( i != 0 )
		s.erase(0, i);
}


template<class StringType>
void TrimFirst(StringType & s, wchar_t c)
{
typename StringType::size_type i;

	if( s.empty() )
		return;

	// looking for the 'c' characters at the beginning
	for(i=0 ; i<s.size() && s[i]==c ; ++i);

	// deleting the 'c' characters at the beginning
	if( i != 0 )
		s.erase(0, i);
}


template<class StringType>
void TrimLast(StringType & s, wchar_t c)
{
typename StringType::size_type i;

	if( s.empty() )
		return;

	// looking for the 'c' characters at the end
	for(i=s.size()-1 ; i>0 && s[i]==c ; --i);

	if( i==0 && s[i]==c )
	{
		// the whole string has the 'c' characters
		s.clear();
		return;
	}

	// deleting 'c' characters at the end
	if( i != s.size() - 1 )
		s.erase(i+1, StringType::npos);
}


template<class StringType>
void Trim(StringType & s, wchar_t c)
{
	if( s.empty() )
		return;

	TrimLast(s, c);
	TrimFirst(s, c);
}


template<class StringType>
void MaxSize(StringType & str, size_t max_size)
{
	if( str.size() > max_size )
		str.erase(max_size);
}


/*
	this method removing all characters from given string
	only digits are allowed and if allow_comma then one comma (or dot)
	character is allowed

	if change_to_dot is true then if a comma exists then it is changed to a dot
*/
template<class StringType>
void OnlyDigit(StringType & s, bool allow_comma = true, bool change_to_dot = true)
{
typename StringType::size_type i;
bool was_comma = false;

	if( s.empty() )
	{
		s = '0';
		return;
	}

	for(i=0 ; i<s.size() ; )
	{
		if( (s[i]>='0' && s[i]<='9') ||
			(allow_comma && !was_comma && (s[i]=='.' || s[i]==',')) )
		{
			if( change_to_dot && s[i] == ',' )
				s[i] = '.';

			if( s[i]=='.' || s[i]==',' )
				was_comma = true;

			i += 1;
		}
		else
		{
			s.erase(i, 1);
		}
	}
}


wchar_t ToSmall(wchar_t c);
void ToSmall(std::wstring & s);

const char * SkipWhite(const char * s);
const wchar_t * SkipWhite(const wchar_t * s);


template<class StringType1, class StringType2>
bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str)
{
	while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) )
	{
		++short_str;
		++long_str;
	}

	if( *short_str == 0 )
		return true;

return false;
}


template<class StringType1, class StringType2>
bool IsSubString(const StringType1 * short_str, const StringType2 * long_str)
{
	return IsSubStringp(short_str, long_str);
}


template<class StringType1, class StringType2>
bool IsSubString(const StringType1 & short_str, const StringType2 & long_str)
{
	return IsSubStringp(short_str.c_str(), long_str.c_str());
}


template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str)
{
	while( *short_str && *long_str && ToSmall(*short_str) == ToSmall(*long_str) )
	{
		++short_str;
		++long_str;
	}

	if( *short_str == 0 )
		return true;

return false;
}


template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str)
{
	return IsSubStringNoCase(short_str.c_str(), long_str.c_str());
}


template<class StringType1, class StringType2>
bool Equal(const StringType1 * str1, const StringType2 * str2)
{
	while( *str1 && *str2 && wchar_t(*str1) == wchar_t(*str2) )
	{
		++str1;
		++str2;
	}

	if( *str1 == 0 && *str2 == 0 )
		return true;

return false;
}


template<class StringType1, class StringType2>
bool Equal(const StringType1 & str1, const StringType2 & str2)
{
	return Equal(str1.c_str(), str2.c_str());
}


template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1, const StringType2 * str2)
{
	while( *str1 && *str2 && ToSmall(*str1) == ToSmall(*str2) )
	{
		++str1;
		++str2;
	}

	if( *str1 == 0 && *str2 == 0 )
		return true;

return false;
}


template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 & str1, const StringType2 & str2)
{
	return EqualNoCase(str1.c_str(), str2.c_str());
}


/*
	looking for 'look_for' string in 'buf' and replacing it with 'replace'
	'replace' can be empty (so only 'look_for' will be deleted)
*/
template<class StringType>
void ReplaceString(StringType & buf, const StringType & look_for, const StringType & replace)
{
size_t i = 0;

	if( look_for.empty() )
		return;

	while( i < buf.size() )
	{
		if( IsSubString(look_for.c_str(), buf.c_str() + i) )
		{
			buf.erase(i, look_for.size());
			buf.insert(i, replace);
			i += replace.size();
		}
		else
		{
			i += 1;
		}
	}
}


template<class StringType>
void NoLastSlash(StringType & s)
{
	if( s.empty() )
		return;

	size_t i = s.size();

	for( ; i>0 && s[i-1]=='/' ; --i);

	if( i < s.size() )
		s.erase(i);
}


template<class StringType>
void NoFirstHttp(StringType & s)
{
	if( s.empty() )
		return;

	const char http[]  = "http://";
	const char https[] = "https://";

	if( IsSubStringNoCase(http, s.c_str()) )
	{
		s.erase(0, sizeof(http)/sizeof(char));
	}
	else
	if( IsSubStringNoCase(https, s.c_str()) )
	{
		s.erase(0, sizeof(https)/sizeof(char));
	}
}


/*
	this method returns true if there are two dots meaning 'go up' somewhere in the path

	for example such paths return true:
	".."
	"test/../path"
	"test/where/../"
	"test/where/.."
	"../abc"
*/
template<class StringType>
bool PathHasUpDir(const StringType * str)
{
size_t i = 0;

	while( str[i] )
	{
		if( str[i]=='.' && str[i+1]=='.' )
		{
			i += 2;

			if( str[i]=='/' || str[i]==0 )
				return true;
		}

		// skipping until to next slash
		while( str[i] && str[i]!='/' )
			i += 1;

		// skipping the slash (or slashes)
		while( str[i]=='/' )
			i += 1;
	}

return false;
}


template<class StringType>
bool PathHasUpDir(const StringType & str)
{
	return PathHasUpDir(str.c_str());
}


/*
	this method calculates how many directories there are in the given path
	input:
		str         - path
		last_is_dir - true if the last part of the path should be treated as a directory too

	samples:
	HowManyDirs("", false)            -> 0
	HowManyDirs("", true)             -> 0
	HowManyDirs("abc", false)         -> 0
	HowManyDirs("abc", true)          -> 1
	HowManyDirs("/abc/", true)        -> 1
	HowManyDirs("////", false)        -> 0
	HowManyDirs("////", true)         -> 0
	HowManyDirs("////abc", false)     -> 0
	HowManyDirs("////abc", true)      -> 1
	HowManyDirs("/var/static", false) -> 1
	HowManyDirs("/var/static", true)  -> 2
	HowManyDirs("/var/static/", false) -> 2
	HowManyDirs("/var/static/", true)  -> 2
*/
template<class StringType>
size_t HowManyDirs(const StringType * str, bool is_last_dir = false)
{
size_t res = 0;
size_t i = 0;

	// first slash (root dir) is not calculated
	while( str[i]=='/' )
		i += 1;

	while( str[i] )
	{
		if( str[i]=='/' )
		{
			res += 1;

			while( str[i]=='/' )
				i += 1;
		}
		else
		{
			i += 1;
		}
	}

	if( is_last_dir && i>0 && str[i-1]!='/' )
		res += 1;

return res;
}


template<class StringType>
size_t HowManyDirs(const StringType & str, bool is_last_dir = false)
{
	return HowManyDirs(str.c_str(), is_last_dir);
}


/*
	this method skips some first directories from given path

	samples:
	SkipDirs("/var/test", 1)            -> "test"
	SkipDirs("/var/test/somewhere", 1)  -> "test/somewhere"
	SkipDirs("/var/test/somewhere", 2)  -> "somewhere"
	SkipDirs("/var/test/somewhere", 10) -> ""
*/
template<class StringType>
const StringType * SkipDirs(const StringType * str, size_t how_many_skip)
{
size_t i = 0;
size_t skipped = 0;

	if( how_many_skip == 0 )
		return str;

	// first slash (root dir) is not calculated
	while( str[i]=='/' )
		i += 1;

	while( str[i] )
	{
		if( str[i]=='/' )
		{
			skipped += 1;

			while( str[i]=='/' )
				i += 1;

			if( skipped == how_many_skip )
				return str+i;
		}
		else
		{
			i += 1;
		}
	}

return str + i;
}


bool IsEmailCorrectChar(wchar_t c);
bool ValidateEmail(const wchar_t * email);
bool ValidateEmail(const std::wstring & email);

bool IsFile(const wchar_t * file);
bool IsFile(const std::wstring & file);
bool CreateDir(const wchar_t * dir, int priv);
bool CreateDir(const std::wstring & dir, int priv);

// creating directories (dirs) under base_dir (base_dir must exist)
// if skip_last == true then last part from dir is treated as a file (the last directory is not created)
bool CreateDirs(const wchar_t * base_dir, const wchar_t * dirs, int priv = 0755, bool skip_last = false);
bool CreateDirs(const std::wstring & base_dir, const std::wstring & dirs, int priv = 0755, bool skip_last = false);

bool CopyFile(FILE * in, FILE * out);
bool CopyFile(const wchar_t * src, const wchar_t * dst);
bool CopyFile(const std::wstring & src, const std::wstring & dst);

bool RemoveFile(const wchar_t * file);
bool RemoveFile(const std::wstring & file);

bool RenameFile(const wchar_t * from, const wchar_t * to);
bool RenameFile(const std::wstring & from, const std::wstring & to);

bool GetUTF8File(const char * file_path,         std::wstring & content, bool clear_content = true);
bool GetUTF8File(const wchar_t * file_path,      std::wstring & content, bool clear_content = true);
bool GetUTF8File(const std::string & file_path,  std::wstring & content, bool clear_content = true);
bool GetUTF8File(const std::wstring & file_path, std::wstring & content, bool clear_content = true);


const wchar_t * GetFileExt(const wchar_t * name);
int SelectFileType(const wchar_t * file_name);
int SelectFileType(const std::wstring & file_name);


// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(char c,
			   PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			   bool clear_out = true)
{
char buffer[10];
size_t buflen = sizeof(buffer)/sizeof(char);

	if( clear_out )
		out.clear();

	if( (c >= 'a' && c <= 'z') ||
		(c >= 'A' && c <= 'Z') ||
		(c >= '0' && c <= '9') ||
		c == '.' || c == ',' || c == '-' || c == '_' || c == '(' || c == ')' )
	{
		out << c;
	}
	else
	{
		Toa(static_cast<unsigned char>(c), buffer, buflen, 16);
		out << '%';

		if( buffer[1] == 0 )
			out << '0'; // there is only one character in the buffer

		out << buffer;
	}
}


// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const char * in,
			   PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			   bool clear_out = true)
{
	if( clear_out )
		out.clear();

	for(size_t i=0 ; in[i] != 0 ; ++i)
		UrlEncode(in[i], out, false);
}


// thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const std::string & in,
			   PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			   bool clear_out = true)
{
	UrlEncode(in.c_str(), out, clear_out);
}


// not thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const wchar_t * in,
			   PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			   bool clear_out = true)
{
static std::string ain;

	PT::WideToUTF8(in, ain);

	if( clear_out )
		out.clear();

	for(size_t i=0 ; i < ain.size() ; ++i)
		UrlEncode(ain[i], out, false);
}


// not thread safe
template<typename char_type, size_t stack_size, size_t heap_block_size>
void UrlEncode(const std::wstring & in,
			   PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			   bool clear_out = true)
{
	UrlEncode(in.c_str(), out, clear_out);
}


// no thread safe
template<class StringType>
void UrlEncode(char c, StringType & out, bool clear_out = true)
{
static PT::TextStream tmp;

	UrlEncode(c, tmp);
	tmp.to_string(out, clear_out);
}


// !! IMROVE ME we need some UrlEncode methods with PT::TextBuffer instead of std::string


void UrlEncode(const char * in,			std::string & out, bool clear_out = true);
void UrlEncode(const std::string & in,	std::string & out, bool clear_out = true);
void UrlEncode(const wchar_t * in,		std::string & out, bool clear_out = true);
void UrlEncode(const std::wstring & in, std::string & out, bool clear_out = true);
void UrlEncode(const wchar_t * in,		std::wstring & out, bool clear_out = true);
void UrlEncode(const std::wstring & in, std::wstring & out, bool clear_out = true);


template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncodeAddChar(char_type c, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out)
{
	if( (c>='A' && c<='Z') ||
		(c>='a' && c<='z') ||
		(c>='0' && c<='9') )
	{
		out << c;
	}
	else
	{
		char buf1[10];
		char buf2[10];

		size_t len1 = sizeof(buf1) / sizeof(char);
		size_t len2 = sizeof(buf2) / sizeof(char);
		size_t len  = PT::IntToUTF8(int(c), buf1, len1);

		for(size_t i=0 ; i<len ; ++i)
		{
			// make sure that it produces *capital* letters (ABC...)
			Toa((unsigned long)(unsigned char)buf1[i], buf2, len2, 16);
			out << '=';

			if( buf2[1] == 0 )
				out << '0';

			out << buf2;
		}
	}
}


/*
	this encoding is used in mails headers
	encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
	http://www.faqs.org/rfcs/rfc1522.html

	we have:
	charset  = UTF-8
	encoding = Q

	current limitation:
	we do not support checking the maximum length:
	"An encoded-word may not be more than 75 characters long, including
     charset, encoding, encoded-text, and delimiters."
*/
template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncode(const wchar_t * in, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			 bool clear = true)
{
	if( clear )
		out.clear();

	out << "=?UTF-8?Q?";

	for( ; *in ; ++in)
		QEncodeAddChar(*in, out);

	out << "?=";
}


template<typename char_type, size_t stack_size, size_t heap_block_size>
void QEncode(const std::wstring & in, PT::TextStreamBase<char_type, stack_size, heap_block_size> & out,
			 bool clear = true)
{
	if( clear )
		out.clear();

	out << "=?UTF-8?Q?";

	// do not use QEncode(in.c_str()) as 'in' can have a zero byte
	for(size_t i=0 ; i<in.size() ; ++i)
		QEncodeAddChar(in[i], out);

	out << "?=";
}


void QEncode(const std::wstring & in, std::string & out, bool clear = true);


void RemovePostFileTmp(PostFileTab & post_file_tab);


/*
	short_str is removed from long_str (and a last dots are removed too)
	and the result is stored in out

	sample:
	short_str: "mydomain.tld"
	long_str:  "www.subdomain.mydomain.tld"
	out:       "www.subdomain"

	short_str: "mydomain.tld"
	long_str:  "otherdifferentstring"
	out:       ""
*/
template<class StringType1, class StringType2, class StringType3>
void CreateSubdomain(const StringType1 * short_str, const StringType2 * long_str, StringType3 & out)
{
size_t i1, i2;

	out.clear();

	for(i1=0 ; short_str[i1] != 0 ; ++i1);
	for(i2=0 ; long_str[i2]  != 0 ; ++i2);

	if( i1 >= i2 )
		return;

	// i1 is < i2

	while( i1-- > 0 )
	{
		i2 -= 1;

		if( short_str[i1] != long_str[i2] )
			return; // short_str is not a last substring of long_str
	}

	while( i2>0 && long_str[i2-1] == '.' )
		i2 -= 1;

	for(i1=0 ; i1 < i2 ; ++i1)
		out += long_str[i1];
}


#endif