winix/core/confparser.cpp

/*
 * This file is a part of Winix
 * and is not publicly distributed
 *
 * Copyright (c) 2008-2011, Tomasz Sowa
 * All rights reserved.
 *
 */

#include <cstdlib>
#include <wchar.h>
#include "confparser.h"
#include "utf8.h"


ConfParser::ConfParser()
{
	SetDefault();
}


void ConfParser::SetDefault()
{
	// you can change this separators to what you want
	// you shoud not use only white characters here (as expected by IsWhite() method)
	// and new line characters ('\n')
	separator       = '=';
	commentary      = '#';
	list_start      = '(';
	list_end        = ')';
	list_delimiter  = ',';
	split_single    = false;
	skip_empty      = false;
	use_escape_char = true;
	input_as_utf8   = false;

	default_str     = L"";
	default_int     = 0;
	default_size    = 0;
	default_bool    = false;
}


void ConfParser::SplitSingle(bool split)
{
	split_single = split;
}


void ConfParser::SkipEmpty(bool skip)
{
	skip_empty = skip;
}


void ConfParser::UseEscapeChar(bool escape)
{
	use_escape_char = escape;
}


void ConfParser::UTF8(bool utf)
{
	input_as_utf8 = utf;
}


void ConfParser::Clear()
{
	space.table.clear();
	space.table_single.clear();
	spaces.clear();

	line               = 1;
	using_global_space = true;
	pchar_ascii        = 0;
	pchar_unicode      = 0;
	status             = ok;
}


ConfParser::Status ConfParser::Parse(const char * file_name)
{
	Clear();
	reading_from_file = true;

	file.clear();
	file.open( file_name );

	if( file )
	{
		Parse();
		file.close();
	}
	else
	{
		status = cant_open_file;
	}

return status;
}


ConfParser::Status ConfParser::Parse(const std::string & file_name)
{
	return Parse(file_name.c_str());
}


ConfParser::Status ConfParser::Parse(const wchar_t * file_name)
{
	Ezc::WideToUTF8(file_name, afile_name);
	return Parse(afile_name.c_str());
}


ConfParser::Status ConfParser::Parse(const std::wstring & file_name)
{
	return Parse(file_name.c_str());
}


ConfParser::Status ConfParser::ParseString(const char * str)
{
	Clear();
	reading_from_file         = false;
	reading_from_wchar_string = false;
	pchar_ascii               = str;

	Parse();

return status;
}


ConfParser::Status ConfParser::ParseString(const std::string & str)
{
	return ParseString(str.c_str());
}


ConfParser::Status ConfParser::ParseString(const wchar_t * str)
{
	Clear();
	reading_from_file         = false;
	reading_from_wchar_string = true;
	pchar_unicode             = str;

	Parse();

return status;
}


ConfParser::Status ConfParser::ParseString(const std::wstring & str)
{
	return ParseString(str.c_str());
}


void ConfParser::Parse()
{
	status = ok;
	ReadChar();
	SkipWhiteLines();

	while( status == ok && lastc != -1 )
	{
		if( lastc == list_end )
		{
			TestListEnd();
		}
		else
		{
			ReadVariable();

			if( lastc == list_start )
			{
				TestListStart();
			}
			else
			if( lastc == separator && !variable.empty() )
			{
				ReadAddValue();
			}
			else
			{
				status = syntax_error;
			}
		}

		if( status == ok )
			SkipWhiteLines();
	}
}


void ConfParser::TestListEnd()
{
	if( using_global_space )
	{
		// there cannot be a loose list end character in the global space
		status = syntax_error;
	}
	else
	{
		using_global_space = true;
		ReadChar();
	}
}


void ConfParser::TestListStart()
{
	if( using_global_space )
	{
		spaces.insert(spaces.end(), Space());
		spaces.back().name = variable;
		using_global_space = false;
		ReadChar();
	}
	else
	{
		// only one additional level of spaces is allowed
		status = syntax_error;
	}
}


void ConfParser::ReadAddValue()
{
	ReadChar(); // skipping separator '='

	if( ReadValue() )
	{
		AddOption();
	}
	else
	{
		status = syntax_error;
	}
}


bool ConfParser::IsVariableChar(int c)
{
	if( (c>='a' && c<='z') ||
	    (c>='A' && c<='Z') ||
	    (c>='0' && c<='9') ||
	    c=='.' || c==',' || c=='_' )
		return true;

return false;
}


bool ConfParser::IsWhite(int c)
{
	// dont use '\n' here
	// 13 (\r) is at the end of a line in a dos file \r\n
	// 160 is an unbreakable space
	if( c==' ' || c=='\t' || c==13 || c==160 )
		return true;

return false;
}


void ConfParser::SkipWhite()
{
	while( IsWhite(lastc) || lastc == commentary )
	{
		if( lastc == commentary )
			SkipLine();
		else
			ReadChar();
	}
}


void ConfParser::SkipWhiteLines()
{
	while( IsWhite(lastc) || lastc == commentary || lastc=='\n' )
	{
		if( lastc == commentary )
			SkipLine();
		else
			ReadChar();
	}
}


void ConfParser::SkipLine()
{
	while( lastc != -1 && lastc != '\n' )
		ReadChar();
}


void ConfParser::Trim(std::wstring & s)
{
std::wstring::size_type i;

	if( s.empty() )
		return;

	// looking for white characters at the end
	for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);

	if( i==0 && IsWhite(s[i]) )
	{
		// the whole string has white characters
		s.clear();
		return;
	}

	// deleting white characters at the end
	if( i != s.size() - 1 )
		s.erase(i+1, std::wstring::npos);

	// looking for white characters at the beginning
	for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);

	// deleting white characters at the beginning
	if( i != 0 )
		s.erase(0, i);
}


wchar_t ConfParser::ToSmall(wchar_t c)
{
	if( c>='A' && c<='Z' )
		c = c - 'A' + 'a';

return c;
}


bool ConfParser::EqualNoCase(const wchar_t * str1, const wchar_t * str2)
{
	while( *str1 && *str2 && ToSmall(*str1) == ToSmall(*str2) )
	{
		++str1;
		++str2;
	}

	if( *str1 == 0 && *str2 == 0 )
		return true;

return false;
}


void ConfParser::AddOption()
{
	if( value.empty() && skip_empty )
	{
		DeleteFromTable(variable);
		DeleteFromTableSingle(variable);
		return;
	}

	Space * ps = &space;

	if( !using_global_space && !spaces.empty() )
		ps = &spaces.back();

	if( split_single && value.size() == 1 )
	{
		ps->table_single[variable] = value[0];
		DeleteFromTable(variable);
	}
	else
	{
		ps->table[variable] = value;
		DeleteFromTableSingle(variable);
	}
}


void ConfParser::DeleteFromTable(const std::wstring & var)
{
	Space * ps = &space;

	if( !using_global_space && !spaces.empty() )
		ps = &spaces.back();

	Table::iterator i = ps->table.find(var);

	if( i != ps->table.end() )
		ps->table.erase(i);
}


void ConfParser::DeleteFromTableSingle(const std::wstring & var)
{
	Space * ps = &space;

	if( !using_global_space && !spaces.empty() )
		ps = &spaces.back();

	TableSingle::iterator i = ps->table_single.find(var);

	if( i != ps->table_single.end() )
		ps->table_single.erase(i);
}


void ConfParser::ReadVariable()
{
	variable.clear();
	SkipWhite();

	while( IsVariableChar(lastc) )
	{
		variable += lastc;
		ReadChar();
	}

	SkipWhite();
}


bool ConfParser::ReadValue()
{
	value.clear();
	SkipWhite();

	if( lastc == list_start )
		return ReadValueList();
	else
		return ReadValueNoList();
}


bool ConfParser::ReadValueList()
{
	ReadChar();			// skipping first list character '('
	SkipWhiteLines();	// lists can be split into several lines

	while( lastc != -1 && lastc != list_end )
	{
		if( !ReadValueNoList(true) )
			return false;

		if( lastc == list_delimiter )
			ReadChar();

		SkipWhiteLines();
	}

	if( lastc != list_end )
		return false;

	ReadChar();		// skipping last list character ')'
	SkipWhite();

return true;
}


bool ConfParser::ReadValueNoList(bool use_list_delimiter)
{
bool res;

	value_item.clear();

	if( lastc == '"' )
	{
		res = ReadValueQuoted(); // quoted value

		if( res )
			value.push_back(value_item);
	}
	else
	{
		res = ReadValueSimple(use_list_delimiter);

		if( res && !value_item.empty() )
			value.push_back(value_item);
	}

return res;
}


bool ConfParser::ReadValueQuoted()
{
	ReadChar();		// skipping the first quote

	while( lastc != '"' && lastc != -1 )
	{
		if( use_escape_char && lastc == '\\' )
			ReadChar();

		value_item += lastc;
		ReadChar();
	}

	if( lastc != '"' )
		return false;

	ReadChar();		// skipping the last quote
	SkipWhite();

return true;
}


bool ConfParser::ReadValueSimple(bool use_list_delimiter)
{
	int list_delimiter1 = -1;
	int list_delimiter2 = -1;

	if( use_list_delimiter )
		list_delimiter1 = list_delimiter;

	if( use_list_delimiter || !using_global_space )
		list_delimiter2 = list_end;

	while(	lastc!=-1 && lastc!='\n' && lastc!=commentary &&
			lastc!=list_delimiter1 && lastc!=list_delimiter2 )
	{
		value_item += lastc;
		ReadChar();
	}

	Trim(value_item);
	SkipWhite();

return true;
}


int ConfParser::ReadUTF8Char()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		Ezc::UTF8ToInt(file, c, correct);

		if( !file )
			return lastc;
	}
	while( !correct );

	lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;
}


int ConfParser::ReadASCIIChar()
{
	lastc = file.get();

	if( lastc == '\n' )
		++line;

return lastc;
}


int ConfParser::ReadCharFromWcharString()
{
	if( *pchar_unicode == 0 )
		lastc = -1;
	else
		lastc = *(pchar_unicode++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int ConfParser::ReadCharFromUTF8String()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		size_t len = Ezc::UTF8ToInt(pchar_ascii, c, correct);
		pchar_ascii += len;

		if( *pchar_ascii == 0 )
			return lastc;
	}
	while( !correct );

	lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;

}


int ConfParser::ReadCharFromAsciiString()
{
	if( *pchar_ascii == 0 )
		lastc = -1;
	else
		lastc = *(pchar_ascii++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int ConfParser::ReadChar()
{
	if( reading_from_file )
	{
		if( input_as_utf8 )
			return ReadUTF8Char();
		else
			return ReadASCIIChar();
	}
	else
	{
		if( reading_from_wchar_string )
		{
			return ReadCharFromWcharString();
		}
		else
		{
			if( input_as_utf8 )
				return ReadCharFromUTF8String();
			else
				return ReadCharFromAsciiString();
		}
	}
}


bool ConfParser::Space::GetValue(const wchar_t * name, std::wstring & out)
{
	tmp_name = name;
	return GetValue(tmp_name, out, L"");
}


bool ConfParser::Space::GetValue(const wchar_t * name, std::wstring & out, const wchar_t * def)
{
	tmp_name = name;
	return GetValue(tmp_name, out, def);
}


bool ConfParser::Space::GetValue(const std::wstring & name, std::wstring & out)
{
	return GetValue(name, out, L"");
}


bool ConfParser::Space::GetValue(const std::wstring & name, std::wstring & out, const wchar_t * def)
{
	TableSingle::iterator i = table_single.find(name);

	if( i != table_single.end() )
	{
		out = i->second;
		return true;
	}
	else
	{
		Table::iterator t = table.find(name);

		if( t == table.end() || t->second.empty() )
		{
			out = def;
			return false;
		}
		else
		{
			out = t->second[0];
			return true;
		}
	}
}


void ConfParser::ToText(const wchar_t * name, std::wstring & out)
{
	tmp_name = name;
	return ToText(tmp_name, out, default_str.c_str());
}


void ConfParser::ToText(const wchar_t * name, std::wstring & out, const wchar_t * def)
{
	tmp_name = name;
	return ToText(tmp_name, out, def);
}


void ConfParser::ToText(const std::wstring & name, std::wstring & out, const wchar_t * def)
{
	space.GetValue(name, out, def);
}


std::wstring & ConfParser::Text(const wchar_t * name)
{
	ToText(name, tmp_value_text);
	return tmp_value_text;
}


std::wstring & ConfParser::Text(const wchar_t * name, const wchar_t * def)
{
	ToText(name, tmp_value_text, def);
	return tmp_value_text;
}


std::wstring & ConfParser::Text(const std::wstring & name, const wchar_t * def)
{
	ToText(name, tmp_value_text, def);
	return tmp_value_text;
}


std::string & ConfParser::AText(const wchar_t * name)
{
	ToText(name, tmp_value_text);
	Ezc::WideToUTF8(tmp_value_text, tmp_value_text_ascii);

return tmp_value_text_ascii;
}


std::string & ConfParser::AText(const wchar_t * name, const wchar_t * def)
{
	ToText(name, tmp_value_text, def);
	Ezc::WideToUTF8(tmp_value_text, tmp_value_text_ascii);

return tmp_value_text_ascii;
}


std::string & ConfParser::AText(const std::wstring & name, const wchar_t * def)
{
	ToText(name, tmp_value_text, def);
	Ezc::WideToUTF8(tmp_value_text, tmp_value_text_ascii);

return tmp_value_text_ascii;
}


int ConfParser::Int(const wchar_t * name)
{
	tmp_name = name;
	return Int(tmp_name, default_int);
}


int ConfParser::Int(const wchar_t * name, int def)
{
	tmp_name = name;
	return Int(tmp_name, def);
}


int ConfParser::ToInt(const std::wstring & value)
{
	long res = (value[0] == '0')? wcstol(value.c_str() + 1, 0, 8) : wcstol(value.c_str(), 0, 10);

return static_cast<int>(res);
}


int ConfParser::Int(const std::wstring & name, int def)
{
	if( space.GetValue(name, tmp_value_text) )
		return ToInt(tmp_value_text);

return def;
}


size_t ConfParser::Size(const wchar_t * name)
{
	tmp_name = name;
	return Size(tmp_name, default_size);
}


size_t ConfParser::Size(const wchar_t * name, size_t def)
{
	tmp_name = name;
	return Size(tmp_name, def);
}


size_t ConfParser::ToSize(const std::wstring & value)
{
	unsigned long res = (value[0] == '0')? wcstoul(value.c_str() + 1, 0, 8) : wcstoul(value.c_str(), 0, 10);

return static_cast<size_t>(res);
}


size_t ConfParser::Size(const std::wstring & name, size_t def)
{
	if( space.GetValue(name, tmp_value_text) )
		return ToSize(tmp_value_text);

return def;
}


bool ConfParser::Bool(const wchar_t * name)
{
	tmp_name = name;
	return Bool(tmp_name, default_bool);
}


bool ConfParser::Bool(const wchar_t * name, bool def)
{
	tmp_name = name;
	return Bool(tmp_name, def);
}


bool ConfParser::ToBool(const std::wstring & value)
{
	return (	EqualNoCase(value.c_str(), L"true") ||
				EqualNoCase(value.c_str(), L"yes")  ||
				EqualNoCase(value.c_str(), L"1")
			);
}


bool ConfParser::Bool(const std::wstring & name, bool def)
{
	if( space.GetValue(name, tmp_value_text) )
		return ToBool(tmp_value_text);

return def;
}


void ConfParser::SetDefaultText(const wchar_t * def)
{
	default_str = def;
}

void ConfParser::SetDefaultText(const std::wstring & def)
{
	default_str = def;
}

void ConfParser::SetDefaultInt(int def)
{
	default_int = def;
}

void ConfParser::SetDefaultSize(size_t def)
{
	default_size = def;
}

void ConfParser::SetDefaultBool(bool def)
{
	default_bool = def;
}


// in lists we don't use default values
void ConfParser::ListText(const wchar_t * name, std::vector<std::wstring> & list)
{
	tmp_name = name;
	ListText(tmp_name, list);
}


void ConfParser::ListText(const std::wstring & name, std::vector<std::wstring> & list)
{
	list.clear();
	ConfParser::TableSingle::iterator i = space.table_single.find(name);

	if( i != space.table_single.end() )
	{
		list.push_back(i->second);
	}
	else
	{
		ConfParser::Table::iterator z = space.table.find(name);

		if( z != space.table.end() )
			list = z->second;
	}
}


void ConfParser::Print(std::ostream & out)
{
	TableSingle::iterator i1;

	for(i1 = space.table_single.begin() ; i1 != space.table_single.end() ; ++i1)
	{
		Ezc::WideToUTF8(i1->first, out);
		out << '=';
		Ezc::WideToUTF8(i1->second, out);
		out << std::endl;
	}

	Table::iterator i2;
	Value::iterator i3;

	for(i2 = space.table.begin() ; i2 != space.table.end() ; ++i2)
	{
		Ezc::WideToUTF8(i2->first, out);
		out << '=';

		for(i3 = i2->second.begin() ; i3 != i2->second.end() ; ++i3)
		{
			Ezc::WideToUTF8(*i3, out);
			out << ',';
		}

		out << std::endl;
	}
}