changed: SpaceParser -- parser's engine has been rewritten

now we can map all strings to all strings
         documentation in space.h need to be updated yet


git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@407 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
Tomasz Sowa 2012-05-10 21:16:19 +00:00
parent c8cf401316
commit e620c8f95d
4 changed files with 315 additions and 217 deletions

View File

@ -638,6 +638,31 @@ return false;
} }
/*
those white characters here should be the same as in spaceparser.cpp
*/
bool Space::IsWhite(int c)
{
// dont use '\n' here
// 13 (\r) is at the end of a line in a dos file \r\n
// 160 is an unbreakable space
if( c==' ' || c=='\t' || c==13 || c==160 )
return true;
return false;
}
bool Space::HasWhite(const std::wstring & str)
{
for(size_t i=0 ; i<str.size() ; ++i)
if( IsWhite(str[i]) )
return true;
return false;
}
} // namespace } // namespace

View File

@ -307,7 +307,29 @@ public:
template<class Stream> template<class Stream>
void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const; void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const;
template<class Stream>
void SerializeTableSingle(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream>
static void PrintValue(Stream & out, const std::wstring & str, bool use_quote = true);
// for other uses
template<class Stream>
static void PrintValue(Stream & out, const std::string & str, bool use_quote = true);
template<class Stream>
static void PrintKey(Stream & out, const std::wstring & str);
template<class Stream>
static void PrintLevel(Stream & out, bool use_indents, int level);
private: private:
std::wstring tmp_name; std::wstring tmp_name;
std::wstring tmp_value_text; std::wstring tmp_value_text;
std::string tmp_value_text_ascii; std::string tmp_value_text_ascii;
@ -318,19 +340,8 @@ private:
bool ToBool(const std::wstring & value); bool ToBool(const std::wstring & value);
wchar_t ToSmall(wchar_t c); wchar_t ToSmall(wchar_t c);
bool EqualNoCase(const wchar_t * str1, const wchar_t * str2); bool EqualNoCase(const wchar_t * str1, const wchar_t * str2);
static bool IsWhite(int c);
static bool HasWhite(const std::wstring & str);
template<class Stream>
void PrintLevel(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableSingle(Stream & out, bool use_indents, int level) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream>
void PrintValue(Stream & out, const std::wstring & str) const;
}; };
@ -339,7 +350,7 @@ private:
template<class Stream> template<class Stream>
void Space::PrintLevel(Stream & out, bool use_indents, int level) const void Space::PrintLevel(Stream & out, bool use_indents, int level)
{ {
if( use_indents ) if( use_indents )
{ {
@ -349,10 +360,12 @@ void Space::PrintLevel(Stream & out, bool use_indents, int level) const
} }
template<class Stream> template<class Stream>
void Space::PrintValue(Stream & out, const std::wstring & str) const void Space::PrintValue(Stream & out, const std::wstring & str, bool use_quote)
{ {
out << '\"'; if( use_quote )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i) for(size_t i=0 ; i<str.size() ; ++i)
{ {
@ -362,30 +375,78 @@ void Space::PrintValue(Stream & out, const std::wstring & str) const
if( str[i] == '"' ) if( str[i] == '"' )
out << L"\\\""; out << L"\\\"";
else else
if( str[i] == '\r' )
out << L"\\r";
else
if( str[i] == '\n' )
out << L"\\n";
else
if( str[i] == 0 ) if( str[i] == 0 )
out << L"\\0"; out << L"\\0";
else else
out << str[i]; out << str[i];
} }
out << '\"'; if( use_quote )
out << '\"';
}
template<class Stream>
void Space::PrintValue(Stream & out, const std::string & str, bool use_quote)
{
if( use_quote )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
{
if( str[i] == '\\' )
out << "\\\\";
else
if( str[i] == '"' )
out << "\\\"";
else
if( str[i] == '\r' )
out << "\\r";
else
if( str[i] == '\n' )
out << "\\n";
else
if( str[i] == 0 )
out << "\\0";
else
out << str[i];
}
if( use_quote )
out << '\"';
}
template<class Stream>
void Space::PrintKey(Stream & out, const std::wstring & str)
{
bool use_quote = false;
if( str.empty() || HasWhite(str) )
use_quote = true;
PrintValue(out, str, use_quote);
} }
template<class Stream> template<class Stream>
void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const
{ {
if( !table_single.empty() ) TableSingle::const_iterator i;
{
TableSingle::const_iterator i;
for(i = table_single.begin() ; i != table_single.end() ; ++i) for(i=table_single.begin() ; i != table_single.end() ; ++i)
{ {
PrintLevel(out, use_indents, level); PrintLevel(out, use_indents, level);
out << i->first << L" = "; PrintKey(out, i->first);
PrintValue(out, i->second); out << L" = ";
out << '\n'; PrintValue(out, i->second);
} out << '\n';
} }
} }
@ -397,33 +458,30 @@ void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const
Table::const_iterator i2; Table::const_iterator i2;
size_t v; size_t v;
for(i2 = table.begin() ; i2 != table.end() ; ++i2)
if( !table.empty() )
{ {
for(i2 = table.begin() ; i2 != table.end() ; ++i2) PrintLevel(out, use_indents, level);
PrintKey(out, i2->first);
out << L" = ";
if( i2->second.size() != 1 )
out << '(';
for(v = 0 ; v < i2->second.size() ; ++v)
{ {
PrintLevel(out, use_indents, level); if( v > 0 )
out << i2->first << L" = "; PrintLevel(out, use_indents, level + i2->first.size() + 3);
if( i2->second.size() != 1 ) PrintValue(out, i2->second[v]);
out << '(';
for(v = 0 ; v < i2->second.size() ; ++v) if( v + 1 < i2->second.size() )
{ out << '\n';
if( v > 0 )
PrintLevel(out, use_indents, level + i2->first.size() + 3);
PrintValue(out, i2->second[v]);
if( v + 1 < i2->second.size() )
out << '\n';
}
if( i2->second.size() != 1 )
out << ')';
out << '\n';
} }
if( i2->second.size() != 1 )
out << ')';
out << '\n';
} }
} }
@ -437,7 +495,10 @@ void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int lev
PrintLevel(out, use_indents, level); PrintLevel(out, use_indents, level);
if( !name.empty() ) if( !name.empty() )
out << name << ' '; {
PrintKey(out, name);
out << ' ';
}
out << L"(\n"; out << L"(\n";

View File

@ -113,7 +113,7 @@ SpaceParser::Status SpaceParser::Parse(const char * file_name)
reading_from_file = true; reading_from_file = true;
file.clear(); file.clear();
file.open( file_name ); file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file ) if( file )
{ {
@ -209,7 +209,7 @@ void SpaceParser::Parse()
if( status == ok && space != root_space ) if( status == ok && space != root_space )
{ {
// last closing a space characters ')' are missing // last closing ')' characters are missing (closing a space)
status = syntax_error; status = syntax_error;
} }
} }
@ -225,16 +225,18 @@ void SpaceParser::ParseLoop()
} }
else else
{ {
ReadVariable(); ReadKey();
SkipWhite();
if( lastc == list_start ) if( lastc == list_start )
{ {
SpaceStarts(); SpaceStarts();
} }
else else
if( lastc == separator && !variable.empty() ) if( lastc == separator )
{ {
ReadAddValue(); ReadValue();
AddKeyValuePair();
} }
else else
{ {
@ -258,7 +260,11 @@ void SpaceParser::SpaceEnds()
else else
{ {
space = space->parent; space = space->parent;
ReadChar(); ReadChar(); // skipping closing space character ')'
SkipWhite();
if( lastc != -1 && lastc != '\n' )
status = syntax_error;
} }
} }
@ -268,40 +274,19 @@ void SpaceParser::SpaceStarts()
Space * new_space = new Space(); Space * new_space = new Space();
space->spaces.push_back(new_space); space->spaces.push_back(new_space);
new_space->parent = space; new_space->parent = space;
new_space->name = variable; new_space->name = key;
space = new_space; space = new_space;
ReadChar(); ReadChar(); // skipping space starts character ')'
} }
void SpaceParser::ReadAddValue()
{
ReadChar(); // skipping separator '='
if( ReadValue() )
{
AddOption();
}
else
{
status = syntax_error;
}
}
bool SpaceParser::IsVariableChar(int c)
{
if( (c>='a' && c<='z') ||
(c>='A' && c<='Z') ||
(c>='0' && c<='9') ||
c=='.' || c==',' || c=='_' )
return true;
return false;
}
/*
those white characters here should be the same as in space.h
*/
bool SpaceParser::IsWhite(int c) bool SpaceParser::IsWhite(int c)
{ {
// dont use '\n' here // dont use '\n' here
@ -380,27 +365,6 @@ std::wstring::size_type i;
void SpaceParser::AddOption()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(variable);
DeleteFromTableSingle(variable);
return;
}
if( split_single && value.size() == 1 )
{
space->table_single[variable] = value[0];
DeleteFromTable(variable);
}
else
{
space->table[variable] = value;
DeleteFromTableSingle(variable);
}
}
void SpaceParser::DeleteFromTable(const std::wstring & var) void SpaceParser::DeleteFromTable(const std::wstring & var)
@ -423,140 +387,157 @@ void SpaceParser::DeleteFromTableSingle(const std::wstring & var)
void SpaceParser::ReadVariable()
void SpaceParser::ReadTokenQuoted()
{ {
variable.clear(); ReadChar(); // skipping the first quotation mark
SkipWhite();
while( lastc != -1 && (char_was_escaped || lastc != '"') )
while( IsVariableChar(lastc) )
{ {
variable += lastc; token += static_cast<wchar_t>(lastc);
ReadChar(); ReadChar();
} }
SkipWhite(); if( !char_was_escaped && lastc == '"' )
} ReadChar(); // skipping the last quotation mark
bool SpaceParser::ReadValue()
{
value.clear();
SkipWhite();
if( lastc == list_start )
return ReadValueList();
else else
return ReadValueNoList(); status = syntax_error;
}
void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == commentary )
SkipLine();
if( lastc == -1 ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2) ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void SpaceParser::ReadKey()
{
ReadToken(false, true, separator, list_start);
key = token;
SkipWhite();
} }
bool SpaceParser::ReadValueList() void SpaceParser::ReadValueList()
{ {
ReadChar(); // skipping first list character '(' ReadChar(); // skipping the first list character ')'
SkipWhiteLines(); // lists can be split into several lines SkipWhiteLines();
while( lastc != -1 && lastc != list_end ) while( lastc != -1 && lastc != list_end )
{ {
if( !ReadValueNoList(true) ) ReadToken(true, true, list_delimiter, list_end);
return false; value.push_back(token);
if( lastc == list_delimiter )
ReadChar();
SkipWhiteLines(); SkipWhiteLines();
if( lastc == list_delimiter )
{
ReadChar();
SkipWhiteLines();
}
} }
if( lastc != list_end ) if( lastc == list_end )
return false;
ReadChar(); // skipping last list character ')'
SkipWhite();
return true;
}
bool SpaceParser::ReadValueNoList(bool use_list_delimiter)
{
bool res;
value_item.clear();
if( lastc == '"' )
{ {
res = ReadValueQuoted(); // quoted value ReadChar(); // skipping the last list character ')'
SkipWhite();
if( res ) // there cannot be anything after ')' character
value.push_back(value_item); if( lastc != -1 && lastc != '\n' )
status = syntax_error;
} }
else else
{ {
res = ReadValueSimple(use_list_delimiter); status = syntax_error; // missing one ')'
if( res && !value_item.empty() )
value.push_back(value_item);
} }
return res;
} }
void SpaceParser::ReadValueSingle()
bool SpaceParser::ReadValueQuoted()
{ {
ReadChar(); // skipping the first quote
// !! IMPROVE ME
// add some other escaped characters especialy \0 (the serializator is working that way now)
while( lastc != '"' && lastc != -1 )
{
if( use_escape_char && lastc == '\\' )
ReadChar();
value_item += lastc;
ReadChar();
}
if( lastc != '"' )
return false;
ReadChar(); // skipping the last quote
SkipWhite(); SkipWhite();
ReadToken(false, true, -1, -1);
return true; value.push_back(token);
SkipWhite();
if( lastc != -1 && lastc != '\n' )
status = syntax_error;
} }
void SpaceParser::ReadValue()
bool SpaceParser::ReadValueSimple(bool use_list_delimiter)
{ {
int list_delimiter1 = -1; ReadChar(); // skipping separator '='
int list_delimiter2 = -1; value.clear();
if( use_list_delimiter )
list_delimiter1 = list_delimiter;
if( use_list_delimiter || space != root_space )
list_delimiter2 = list_end;
while( lastc!=-1 && lastc!='\n' && lastc!=commentary &&
lastc!=list_delimiter1 && lastc!=list_delimiter2 )
{
value_item += lastc;
ReadChar();
}
Trim(value_item);
SkipWhite(); SkipWhite();
return true; if( lastc == '(' )
ReadValueList();
else
ReadValueSingle();
SkipWhiteLines();
} }
void SpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
DeleteFromTableSingle(key);
return;
}
if( split_single && value.size() == 1 )
{
space->table_single[key] = value[0];
DeleteFromTable(key);
}
else
{
space->table[key] = value;
DeleteFromTableSingle(key);
}
}
int SpaceParser::ReadUTF8Char() int SpaceParser::ReadUTF8Char()
{ {
int c; int c;
@ -651,7 +632,7 @@ return lastc;
} }
int SpaceParser::ReadChar() int SpaceParser::ReadCharNoEscape()
{ {
if( reading_from_file ) if( reading_from_file )
{ {
@ -677,6 +658,31 @@ int SpaceParser::ReadChar()
} }
int SpaceParser::ReadChar()
{
char_was_escaped = false;
ReadCharNoEscape();
if( use_escape_char && lastc == '\\' )
{
char_was_escaped = true;
ReadCharNoEscape();
switch(lastc)
{
case '0': lastc = 0; break;
case 'n': lastc = '\n'; break;
case '\\': lastc = '\\'; break;
case 'r': lastc = '\r'; break;
case 't': lastc = '\t'; break;
case '"': lastc = '"'; break;
}
}
return lastc;
}
} // namespace } // namespace

View File

@ -142,7 +142,6 @@ public:
/* /*
'\' character is used to escape other characters in a quoted string '\' character is used to escape other characters in a quoted string
so "some \t t\"ext" will produce "some t t"ext" so "some \t t\"ext" will produce "some t t"ext"
(this is only use in quoted string)
default: true default: true
*/ */
void UseEscapeChar(bool escape); void UseEscapeChar(bool escape);
@ -191,15 +190,15 @@ private:
/* /*
last read variable (option) last read token
*/ */
std::wstring variable; std::wstring token;
/* /*
last read list item last read key
*/ */
std::wstring value_item; std::wstring key;
/* /*
@ -245,6 +244,13 @@ private:
int lastc; int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/* /*
current file current file
*/ */
@ -287,27 +293,27 @@ private:
void ParseLoop(); void ParseLoop();
void SpaceEnds(); void SpaceEnds();
void SpaceStarts(); void SpaceStarts();
void ReadAddValue();
void AddOption();
void DeleteFromTable(const std::wstring & var); void DeleteFromTable(const std::wstring & var);
void DeleteFromTableSingle(const std::wstring & var); void DeleteFromTableSingle(const std::wstring & var);
void ReadVariable(); void ReadTokenQuoted();
bool ReadValue(); void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
bool ReadValueList(); void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
bool ReadValueNoList(bool use_list_delimiter = false); void ReadKey();
bool ReadValueQuoted(); void ReadValueList();
bool ReadValueSimple(bool use_list_delimiter = false); void ReadValueSingle();
void ReadValue();
void AddKeyValuePair();
int ReadUTF8Char(); int ReadUTF8Char();
int ReadASCIIChar(); int ReadASCIIChar();
int ReadCharFromWcharString(); int ReadCharFromWcharString();
int ReadCharFromUTF8String(); int ReadCharFromUTF8String();
int ReadCharFromAsciiString(); int ReadCharFromAsciiString();
int ReadCharNoEscape();
int ReadChar(); int ReadChar();
bool IsWhite(int c); bool IsWhite(int c);
bool IsVariableChar(int c);
void SkipWhite(); void SkipWhite();
void SkipWhiteLines(); void SkipWhiteLines();
void SkipLine(); void SkipLine();