changed: SpaceParser -- parser's engine has been rewritten

now we can map all strings to all strings
         documentation in space.h need to be updated yet


git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@407 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2012-05-10 21:16:19 +00:00
parent c8cf401316
commit e620c8f95d
4 changed files with 315 additions and 217 deletions

View File

@ -638,6 +638,31 @@ return false;
}
/*
those white characters here should be the same as in spaceparser.cpp
*/
bool Space::IsWhite(int c)
{
// dont use '\n' here
// 13 (\r) is at the end of a line in a dos file \r\n
// 160 is an unbreakable space
if( c==' ' || c=='\t' || c==13 || c==160 )
return true;
return false;
}
bool Space::HasWhite(const std::wstring & str)
{
for(size_t i=0 ; i<str.size() ; ++i)
if( IsWhite(str[i]) )
return true;
return false;
}
} // namespace

View File

@ -307,7 +307,29 @@ public:
template<class Stream>
void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const;
template<class Stream>
void SerializeTableSingle(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream>
static void PrintValue(Stream & out, const std::wstring & str, bool use_quote = true);
// for other uses
template<class Stream>
static void PrintValue(Stream & out, const std::string & str, bool use_quote = true);
template<class Stream>
static void PrintKey(Stream & out, const std::wstring & str);
template<class Stream>
static void PrintLevel(Stream & out, bool use_indents, int level);
private:
std::wstring tmp_name;
std::wstring tmp_value_text;
std::string tmp_value_text_ascii;
@ -318,19 +340,8 @@ private:
bool ToBool(const std::wstring & value);
wchar_t ToSmall(wchar_t c);
bool EqualNoCase(const wchar_t * str1, const wchar_t * str2);
template<class Stream>
void PrintLevel(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableSingle(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream>
void PrintValue(Stream & out, const std::wstring & str) const;
static bool IsWhite(int c);
static bool HasWhite(const std::wstring & str);
};
@ -339,7 +350,7 @@ private:
template<class Stream>
void Space::PrintLevel(Stream & out, bool use_indents, int level) const
void Space::PrintLevel(Stream & out, bool use_indents, int level)
{
if( use_indents )
{
@ -349,9 +360,11 @@ void Space::PrintLevel(Stream & out, bool use_indents, int level) const
}
template<class Stream>
void Space::PrintValue(Stream & out, const std::wstring & str) const
void Space::PrintValue(Stream & out, const std::wstring & str, bool use_quote)
{
if( use_quote )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
@ -362,32 +375,80 @@ void Space::PrintValue(Stream & out, const std::wstring & str) const
if( str[i] == '"' )
out << L"\\\"";
else
if( str[i] == '\r' )
out << L"\\r";
else
if( str[i] == '\n' )
out << L"\\n";
else
if( str[i] == 0 )
out << L"\\0";
else
out << str[i];
}
if( use_quote )
out << '\"';
}
template<class Stream>
void Space::PrintValue(Stream & out, const std::string & str, bool use_quote)
{
if( use_quote )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
{
if( str[i] == '\\' )
out << "\\\\";
else
if( str[i] == '"' )
out << "\\\"";
else
if( str[i] == '\r' )
out << "\\r";
else
if( str[i] == '\n' )
out << "\\n";
else
if( str[i] == 0 )
out << "\\0";
else
out << str[i];
}
if( use_quote )
out << '\"';
}
template<class Stream>
void Space::PrintKey(Stream & out, const std::wstring & str)
{
bool use_quote = false;
if( str.empty() || HasWhite(str) )
use_quote = true;
PrintValue(out, str, use_quote);
}
template<class Stream>
void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const
{
if( !table_single.empty() )
{
TableSingle::const_iterator i;
for(i=table_single.begin() ; i != table_single.end() ; ++i)
{
PrintLevel(out, use_indents, level);
out << i->first << L" = ";
PrintKey(out, i->first);
out << L" = ";
PrintValue(out, i->second);
out << '\n';
}
}
}
@ -397,13 +458,11 @@ void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const
Table::const_iterator i2;
size_t v;
if( !table.empty() )
{
for(i2 = table.begin() ; i2 != table.end() ; ++i2)
{
PrintLevel(out, use_indents, level);
out << i2->first << L" = ";
PrintKey(out, i2->first);
out << L" = ";
if( i2->second.size() != 1 )
out << '(';
@ -425,7 +484,6 @@ size_t v;
out << '\n';
}
}
}
template<class Stream>
@ -437,7 +495,10 @@ void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int lev
PrintLevel(out, use_indents, level);
if( !name.empty() )
out << name << ' ';
{
PrintKey(out, name);
out << ' ';
}
out << L"(\n";

View File

@ -113,7 +113,7 @@ SpaceParser::Status SpaceParser::Parse(const char * file_name)
reading_from_file = true;
file.clear();
file.open( file_name );
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
@ -209,7 +209,7 @@ void SpaceParser::Parse()
if( status == ok && space != root_space )
{
// last closing a space characters ')' are missing
// last closing ')' characters are missing (closing a space)
status = syntax_error;
}
}
@ -225,16 +225,18 @@ void SpaceParser::ParseLoop()
}
else
{
ReadVariable();
ReadKey();
SkipWhite();
if( lastc == list_start )
{
SpaceStarts();
}
else
if( lastc == separator && !variable.empty() )
if( lastc == separator )
{
ReadAddValue();
ReadValue();
AddKeyValuePair();
}
else
{
@ -258,7 +260,11 @@ void SpaceParser::SpaceEnds()
else
{
space = space->parent;
ReadChar();
ReadChar(); // skipping closing space character ')'
SkipWhite();
if( lastc != -1 && lastc != '\n' )
status = syntax_error;
}
}
@ -268,40 +274,19 @@ void SpaceParser::SpaceStarts()
Space * new_space = new Space();
space->spaces.push_back(new_space);
new_space->parent = space;
new_space->name = variable;
new_space->name = key;
space = new_space;
ReadChar();
ReadChar(); // skipping space starts character ')'
}
void SpaceParser::ReadAddValue()
{
ReadChar(); // skipping separator '='
if( ReadValue() )
{
AddOption();
}
else
{
status = syntax_error;
}
}
bool SpaceParser::IsVariableChar(int c)
{
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
c=='.' || c==',' || c=='_' )
return true;
return false;
}
/*
those white characters here should be the same as in space.h
*/
bool SpaceParser::IsWhite(int c)
{
// dont use '\n' here
@ -380,27 +365,6 @@ std::wstring::size_type i;
void SpaceParser::AddOption()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(variable);
DeleteFromTableSingle(variable);
return;
}
if( split_single && value.size() == 1 )
{
space->table_single[variable] = value[0];
DeleteFromTable(variable);
}
else
{
space->table[variable] = value;
DeleteFromTableSingle(variable);
}
}
void SpaceParser::DeleteFromTable(const std::wstring & var)
@ -423,138 +387,155 @@ void SpaceParser::DeleteFromTableSingle(const std::wstring & var)
void SpaceParser::ReadVariable()
{
variable.clear();
SkipWhite();
while( IsVariableChar(lastc) )
void SpaceParser::ReadTokenQuoted()
{
variable += lastc;
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || lastc != '"') )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
SkipWhite();
}
bool SpaceParser::ReadValue()
{
value.clear();
SkipWhite();
if( lastc == list_start )
return ReadValueList();
if( !char_was_escaped && lastc == '"' )
ReadChar(); // skipping the last quotation mark
else
return ReadValueNoList();
status = syntax_error;
}
void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == commentary )
SkipLine();
if( lastc == -1 ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2) ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void SpaceParser::ReadKey()
{
ReadToken(false, true, separator, list_start);
key = token;
SkipWhite();
}
bool SpaceParser::ReadValueList()
void SpaceParser::ReadValueList()
{
ReadChar(); // skipping first list character '('
SkipWhiteLines(); // lists can be split into several lines
ReadChar(); // skipping the first list character ')'
SkipWhiteLines();
while( lastc != -1 && lastc != list_end )
{
if( !ReadValueNoList(true) )
return false;
ReadToken(true, true, list_delimiter, list_end);
value.push_back(token);
SkipWhiteLines();
if( lastc == list_delimiter )
{
ReadChar();
SkipWhiteLines();
}
}
if( lastc == list_end )
{
ReadChar(); // skipping the last list character ')'
SkipWhite();
// there cannot be anything after ')' character
if( lastc != -1 && lastc != '\n' )
status = syntax_error;
}
else
{
status = syntax_error; // missing one ')'
}
}
void SpaceParser::ReadValueSingle()
{
SkipWhite();
ReadToken(false, true, -1, -1);
value.push_back(token);
SkipWhite();
if( lastc != -1 && lastc != '\n' )
status = syntax_error;
}
void SpaceParser::ReadValue()
{
ReadChar(); // skipping separator '='
value.clear();
SkipWhite();
if( lastc == '(' )
ReadValueList();
else
ReadValueSingle();
SkipWhiteLines();
}
if( lastc != list_end )
return false;
ReadChar(); // skipping last list character ')'
SkipWhite();
return true;
void SpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
DeleteFromTableSingle(key);
return;
}
bool SpaceParser::ReadValueNoList(bool use_list_delimiter)
if( split_single && value.size() == 1 )
{
bool res;
value_item.clear();
if( lastc == '"' )
{
res = ReadValueQuoted(); // quoted value
if( res )
value.push_back(value_item);
space->table_single[key] = value[0];
DeleteFromTable(key);
}
else
{
res = ReadValueSimple(use_list_delimiter);
if( res && !value_item.empty() )
value.push_back(value_item);
space->table[key] = value;
DeleteFromTableSingle(key);
}
return res;
}
bool SpaceParser::ReadValueQuoted()
{
ReadChar(); // skipping the first quote
// !! IMPROVE ME
// add some other escaped characters especialy \0 (the serializator is working that way now)
while( lastc != '"' && lastc != -1 )
{
if( use_escape_char && lastc == '\\' )
ReadChar();
value_item += lastc;
ReadChar();
}
if( lastc != '"' )
return false;
ReadChar(); // skipping the last quote
SkipWhite();
return true;
}
bool SpaceParser::ReadValueSimple(bool use_list_delimiter)
{
int list_delimiter1 = -1;
int list_delimiter2 = -1;
if( use_list_delimiter )
list_delimiter1 = list_delimiter;
if( use_list_delimiter || space != root_space )
list_delimiter2 = list_end;
while( lastc!=-1 && lastc!='\n' && lastc!=commentary &&
lastc!=list_delimiter1 && lastc!=list_delimiter2 )
{
value_item += lastc;
ReadChar();
}
Trim(value_item);
SkipWhite();
return true;
}
int SpaceParser::ReadUTF8Char()
@ -651,7 +632,7 @@ return lastc;
}
int SpaceParser::ReadChar()
int SpaceParser::ReadCharNoEscape()
{
if( reading_from_file )
{
@ -677,6 +658,31 @@ int SpaceParser::ReadChar()
}
int SpaceParser::ReadChar()
{
char_was_escaped = false;
ReadCharNoEscape();
if( use_escape_char && lastc == '\\' )
{
char_was_escaped = true;
ReadCharNoEscape();
switch(lastc)
{
case '0': lastc = 0; break;
case 'n': lastc = '\n'; break;
case '\\': lastc = '\\'; break;
case 'r': lastc = '\r'; break;
case 't': lastc = '\t'; break;
case '"': lastc = '"'; break;
}
}
return lastc;
}
} // namespace

View File

@ -142,7 +142,6 @@ public:
/*
'\' character is used to escape other characters in a quoted string
so "some \t t\"ext" will produce "some t t"ext"
(this is only use in quoted string)
default: true
*/
void UseEscapeChar(bool escape);
@ -191,15 +190,15 @@ private:
/*
last read variable (option)
last read token
*/
std::wstring variable;
std::wstring token;
/*
last read list item
last read key
*/
std::wstring value_item;
std::wstring key;
/*
@ -245,6 +244,13 @@ private:
int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/*
current file
*/
@ -287,27 +293,27 @@ private:
void ParseLoop();
void SpaceEnds();
void SpaceStarts();
void ReadAddValue();
void AddOption();
void DeleteFromTable(const std::wstring & var);
void DeleteFromTableSingle(const std::wstring & var);
void ReadVariable();
bool ReadValue();
bool ReadValueList();
bool ReadValueNoList(bool use_list_delimiter = false);
bool ReadValueQuoted();
bool ReadValueSimple(bool use_list_delimiter = false);
void ReadTokenQuoted();
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadKey();
void ReadValueList();
void ReadValueSingle();
void ReadValue();
void AddKeyValuePair();
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
int ReadCharFromUTF8String();
int ReadCharFromAsciiString();
int ReadCharNoEscape();
int ReadChar();
bool IsWhite(int c);
bool IsVariableChar(int c);
void SkipWhite();
void SkipWhiteLines();
void SkipLine();