From e620c8f95d178f59e9fb8c4442e6418b69e4d6bc Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Thu, 10 May 2012 21:16:19 +0000 Subject: [PATCH] changed: SpaceParser -- parser's engine has been rewritten now we can map all strings to all strings documentation in space.h need to be updated yet git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@407 e52654a7-88a9-db11-a3e9-0013d4bc506e --- space/space.cpp | 25 ++++ space/space.h | 161 +++++++++++++++------- space/spaceparser.cpp | 308 +++++++++++++++++++++--------------------- space/spaceparser.h | 38 +++--- 4 files changed, 315 insertions(+), 217 deletions(-) diff --git a/space/space.cpp b/space/space.cpp index a335f24..65d188b 100755 --- a/space/space.cpp +++ b/space/space.cpp @@ -638,6 +638,31 @@ return false; } +/* + those white characters here should be the same as in spaceparser.cpp +*/ +bool Space::IsWhite(int c) +{ + // dont use '\n' here + // 13 (\r) is at the end of a line in a dos file \r\n + // 160 is an unbreakable space + if( c==' ' || c=='\t' || c==13 || c==160 ) + return true; + +return false; +} + + +bool Space::HasWhite(const std::wstring & str) +{ + for(size_t i=0 ; i void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const; + template + void SerializeTableSingle(Stream & out, bool use_indents, int level) const; + + template + void SerializeTableMulti(Stream & out, bool use_indents, int level) const; + + template + static void PrintValue(Stream & out, const std::wstring & str, bool use_quote = true); + + // for other uses + template + static void PrintValue(Stream & out, const std::string & str, bool use_quote = true); + + template + static void PrintKey(Stream & out, const std::wstring & str); + + template + static void PrintLevel(Stream & out, bool use_indents, int level); + + + private: + std::wstring tmp_name; std::wstring tmp_value_text; std::string tmp_value_text_ascii; @@ -318,19 +340,8 @@ private: bool ToBool(const std::wstring & value); wchar_t ToSmall(wchar_t c); bool EqualNoCase(const wchar_t * str1, const wchar_t * str2); - - - template - void PrintLevel(Stream & out, bool use_indents, int level) const; - - template - void SerializeTableSingle(Stream & out, bool use_indents, int level) const; - - template - void SerializeTableMulti(Stream & out, bool use_indents, int level) const; - - template - void PrintValue(Stream & out, const std::wstring & str) const; + static bool IsWhite(int c); + static bool HasWhite(const std::wstring & str); }; @@ -339,7 +350,7 @@ private: template -void Space::PrintLevel(Stream & out, bool use_indents, int level) const +void Space::PrintLevel(Stream & out, bool use_indents, int level) { if( use_indents ) { @@ -349,10 +360,12 @@ void Space::PrintLevel(Stream & out, bool use_indents, int level) const } + template -void Space::PrintValue(Stream & out, const std::wstring & str) const +void Space::PrintValue(Stream & out, const std::wstring & str, bool use_quote) { - out << '\"'; + if( use_quote ) + out << '\"'; for(size_t i=0 ; i +void Space::PrintValue(Stream & out, const std::string & str, bool use_quote) +{ + if( use_quote ) + out << '\"'; + + for(size_t i=0 ; i +void Space::PrintKey(Stream & out, const std::wstring & str) +{ +bool use_quote = false; + + if( str.empty() || HasWhite(str) ) + use_quote = true; + + PrintValue(out, str, use_quote); } template void Space::SerializeTableSingle(Stream & out, bool use_indents, int level) const { - if( !table_single.empty() ) - { - TableSingle::const_iterator i; +TableSingle::const_iterator i; - for(i = table_single.begin() ; i != table_single.end() ; ++i) - { - PrintLevel(out, use_indents, level); - out << i->first << L" = "; - PrintValue(out, i->second); - out << '\n'; - } + for(i=table_single.begin() ; i != table_single.end() ; ++i) + { + PrintLevel(out, use_indents, level); + PrintKey(out, i->first); + out << L" = "; + PrintValue(out, i->second); + out << '\n'; } } @@ -397,33 +458,30 @@ void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const Table::const_iterator i2; size_t v; - - if( !table.empty() ) + for(i2 = table.begin() ; i2 != table.end() ; ++i2) { - for(i2 = table.begin() ; i2 != table.end() ; ++i2) + PrintLevel(out, use_indents, level); + PrintKey(out, i2->first); + out << L" = "; + + if( i2->second.size() != 1 ) + out << '('; + + for(v = 0 ; v < i2->second.size() ; ++v) { - PrintLevel(out, use_indents, level); - out << i2->first << L" = "; + if( v > 0 ) + PrintLevel(out, use_indents, level + i2->first.size() + 3); - if( i2->second.size() != 1 ) - out << '('; + PrintValue(out, i2->second[v]); - for(v = 0 ; v < i2->second.size() ; ++v) - { - if( v > 0 ) - PrintLevel(out, use_indents, level + i2->first.size() + 3); - - PrintValue(out, i2->second[v]); - - if( v + 1 < i2->second.size() ) - out << '\n'; - } - - if( i2->second.size() != 1 ) - out << ')'; - - out << '\n'; + if( v + 1 < i2->second.size() ) + out << '\n'; } + + if( i2->second.size() != 1 ) + out << ')'; + + out << '\n'; } } @@ -437,7 +495,10 @@ void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int lev PrintLevel(out, use_indents, level); if( !name.empty() ) - out << name << ' '; + { + PrintKey(out, name); + out << ' '; + } out << L"(\n"; diff --git a/space/spaceparser.cpp b/space/spaceparser.cpp index 1f41304..37d098e 100755 --- a/space/spaceparser.cpp +++ b/space/spaceparser.cpp @@ -113,7 +113,7 @@ SpaceParser::Status SpaceParser::Parse(const char * file_name) reading_from_file = true; file.clear(); - file.open( file_name ); + file.open(file_name, std::ios_base::binary | std::ios_base::in); if( file ) { @@ -209,7 +209,7 @@ void SpaceParser::Parse() if( status == ok && space != root_space ) { - // last closing a space characters ')' are missing + // last closing ')' characters are missing (closing a space) status = syntax_error; } } @@ -225,16 +225,18 @@ void SpaceParser::ParseLoop() } else { - ReadVariable(); + ReadKey(); + SkipWhite(); if( lastc == list_start ) { SpaceStarts(); } else - if( lastc == separator && !variable.empty() ) + if( lastc == separator ) { - ReadAddValue(); + ReadValue(); + AddKeyValuePair(); } else { @@ -258,7 +260,11 @@ void SpaceParser::SpaceEnds() else { space = space->parent; - ReadChar(); + ReadChar(); // skipping closing space character ')' + SkipWhite(); + + if( lastc != -1 && lastc != '\n' ) + status = syntax_error; } } @@ -268,40 +274,19 @@ void SpaceParser::SpaceStarts() Space * new_space = new Space(); space->spaces.push_back(new_space); new_space->parent = space; - new_space->name = variable; + new_space->name = key; space = new_space; - ReadChar(); + ReadChar(); // skipping space starts character ')' } -void SpaceParser::ReadAddValue() -{ - ReadChar(); // skipping separator '=' - - if( ReadValue() ) - { - AddOption(); - } - else - { - status = syntax_error; - } -} -bool SpaceParser::IsVariableChar(int c) -{ - if( (c>='a' && c<='z') || - (c>='A' && c<='Z') || - (c>='0' && c<='9') || - c=='.' || c==',' || c=='_' ) - return true; - -return false; -} - +/* + those white characters here should be the same as in space.h +*/ bool SpaceParser::IsWhite(int c) { // dont use '\n' here @@ -380,27 +365,6 @@ std::wstring::size_type i; -void SpaceParser::AddOption() -{ - if( value.empty() && skip_empty ) - { - DeleteFromTable(variable); - DeleteFromTableSingle(variable); - return; - } - - if( split_single && value.size() == 1 ) - { - space->table_single[variable] = value[0]; - DeleteFromTable(variable); - } - else - { - space->table[variable] = value; - DeleteFromTableSingle(variable); - } -} - void SpaceParser::DeleteFromTable(const std::wstring & var) @@ -423,140 +387,157 @@ void SpaceParser::DeleteFromTableSingle(const std::wstring & var) -void SpaceParser::ReadVariable() + +void SpaceParser::ReadTokenQuoted() { - variable.clear(); - SkipWhite(); - - while( IsVariableChar(lastc) ) + ReadChar(); // skipping the first quotation mark + + while( lastc != -1 && (char_was_escaped || lastc != '"') ) { - variable += lastc; + token += static_cast(lastc); ReadChar(); } - SkipWhite(); -} - - - -bool SpaceParser::ReadValue() -{ - value.clear(); - SkipWhite(); - - if( lastc == list_start ) - return ReadValueList(); + if( !char_was_escaped && lastc == '"' ) + ReadChar(); // skipping the last quotation mark else - return ReadValueNoList(); + status = syntax_error; +} + + +void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) +{ + while( true ) + { + if( lastc == commentary ) + SkipLine(); + + if( lastc == -1 || + (delimit1 != -1 && lastc == delimit1) || + (delimit2 != -1 && lastc == delimit2) || + (white_delimit && IsWhite(lastc)) || + (new_line_delimit && lastc == '\n') ) + { + break; + } + + token += static_cast(lastc); + ReadChar(); + } + + Trim(token); +} + + +void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) +{ + token.clear(); + SkipWhite(); + + if( !char_was_escaped && lastc == '"' ) + ReadTokenQuoted(); + else + ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2); +} + + +void SpaceParser::ReadKey() +{ + ReadToken(false, true, separator, list_start); + key = token; + SkipWhite(); } -bool SpaceParser::ReadValueList() +void SpaceParser::ReadValueList() { - ReadChar(); // skipping first list character '(' - SkipWhiteLines(); // lists can be split into several lines + ReadChar(); // skipping the first list character ')' + SkipWhiteLines(); while( lastc != -1 && lastc != list_end ) { - if( !ReadValueNoList(true) ) - return false; - - if( lastc == list_delimiter ) - ReadChar(); + ReadToken(true, true, list_delimiter, list_end); + value.push_back(token); SkipWhiteLines(); + + if( lastc == list_delimiter ) + { + ReadChar(); + SkipWhiteLines(); + } } - if( lastc != list_end ) - return false; - - ReadChar(); // skipping last list character ')' - SkipWhite(); - -return true; -} - - - -bool SpaceParser::ReadValueNoList(bool use_list_delimiter) -{ -bool res; - - value_item.clear(); - - if( lastc == '"' ) + if( lastc == list_end ) { - res = ReadValueQuoted(); // quoted value + ReadChar(); // skipping the last list character ')' + SkipWhite(); - if( res ) - value.push_back(value_item); + // there cannot be anything after ')' character + if( lastc != -1 && lastc != '\n' ) + status = syntax_error; } else { - res = ReadValueSimple(use_list_delimiter); - - if( res && !value_item.empty() ) - value.push_back(value_item); + status = syntax_error; // missing one ')' } - -return res; } - -bool SpaceParser::ReadValueQuoted() +void SpaceParser::ReadValueSingle() { - ReadChar(); // skipping the first quote - - // !! IMPROVE ME - // add some other escaped characters especialy \0 (the serializator is working that way now) - - while( lastc != '"' && lastc != -1 ) - { - if( use_escape_char && lastc == '\\' ) - ReadChar(); - - value_item += lastc; - ReadChar(); - } - - if( lastc != '"' ) - return false; - - ReadChar(); // skipping the last quote SkipWhite(); - -return true; + ReadToken(false, true, -1, -1); + value.push_back(token); + SkipWhite(); + + if( lastc != -1 && lastc != '\n' ) + status = syntax_error; } - -bool SpaceParser::ReadValueSimple(bool use_list_delimiter) +void SpaceParser::ReadValue() { - int list_delimiter1 = -1; - int list_delimiter2 = -1; - - if( use_list_delimiter ) - list_delimiter1 = list_delimiter; - - if( use_list_delimiter || space != root_space ) - list_delimiter2 = list_end; - - while( lastc!=-1 && lastc!='\n' && lastc!=commentary && - lastc!=list_delimiter1 && lastc!=list_delimiter2 ) - { - value_item += lastc; - ReadChar(); - } - - Trim(value_item); + ReadChar(); // skipping separator '=' + value.clear(); SkipWhite(); - -return true; + + if( lastc == '(' ) + ReadValueList(); + else + ReadValueSingle(); + + SkipWhiteLines(); } +void SpaceParser::AddKeyValuePair() +{ + if( value.empty() && skip_empty ) + { + DeleteFromTable(key); + DeleteFromTableSingle(key); + return; + } + + if( split_single && value.size() == 1 ) + { + space->table_single[key] = value[0]; + DeleteFromTable(key); + } + else + { + space->table[key] = value; + DeleteFromTableSingle(key); + } +} + + + + + + int SpaceParser::ReadUTF8Char() { int c; @@ -651,7 +632,7 @@ return lastc; } -int SpaceParser::ReadChar() +int SpaceParser::ReadCharNoEscape() { if( reading_from_file ) { @@ -677,6 +658,31 @@ int SpaceParser::ReadChar() } +int SpaceParser::ReadChar() +{ + char_was_escaped = false; + ReadCharNoEscape(); + + if( use_escape_char && lastc == '\\' ) + { + char_was_escaped = true; + ReadCharNoEscape(); + + switch(lastc) + { + case '0': lastc = 0; break; + case 'n': lastc = '\n'; break; + case '\\': lastc = '\\'; break; + case 'r': lastc = '\r'; break; + case 't': lastc = '\t'; break; + case '"': lastc = '"'; break; + } + } + +return lastc; +} + + } // namespace diff --git a/space/spaceparser.h b/space/spaceparser.h index 6d4d71f..7fab9fb 100755 --- a/space/spaceparser.h +++ b/space/spaceparser.h @@ -142,7 +142,6 @@ public: /* '\' character is used to escape other characters in a quoted string so "some \t t\"ext" will produce "some t t"ext" - (this is only use in quoted string) default: true */ void UseEscapeChar(bool escape); @@ -191,15 +190,15 @@ private: /* - last read variable (option) + last read token */ - std::wstring variable; - + std::wstring token; + /* - last read list item + last read key */ - std::wstring value_item; + std::wstring key; /* @@ -245,6 +244,13 @@ private: int lastc; + /* + true if the lastc was escaped (with a backslash) + we have to know if the last sequence was \" or just " + */ + bool char_was_escaped; + + /* current file */ @@ -287,27 +293,27 @@ private: void ParseLoop(); void SpaceEnds(); void SpaceStarts(); - void ReadAddValue(); - void AddOption(); void DeleteFromTable(const std::wstring & var); void DeleteFromTableSingle(const std::wstring & var); - void ReadVariable(); - bool ReadValue(); - bool ReadValueList(); - bool ReadValueNoList(bool use_list_delimiter = false); - bool ReadValueQuoted(); - bool ReadValueSimple(bool use_list_delimiter = false); - + void ReadTokenQuoted(); + void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); + void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); + void ReadKey(); + void ReadValueList(); + void ReadValueSingle(); + void ReadValue(); + void AddKeyValuePair(); + int ReadUTF8Char(); int ReadASCIIChar(); int ReadCharFromWcharString(); int ReadCharFromUTF8String(); int ReadCharFromAsciiString(); + int ReadCharNoEscape(); int ReadChar(); bool IsWhite(int c); - bool IsVariableChar(int c); void SkipWhite(); void SkipWhiteLines(); void SkipLine();