/* * This file is a part of PikoTools * and is distributed under the (new) BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2008-2012, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "spaceparser.h" #include "utf8/utf8.h" namespace PT { SpaceParser::SpaceParser() { root_space = 0; SetDefault(); } void SpaceParser::SetSpace(Space * pspace) { root_space = pspace; } void SpaceParser::SetSpace(Space & pspace) { root_space = &pspace; } void SpaceParser::SetDefault() { // you can change this separators to what you want // you shoud not use only white characters here (as expected by IsWhite() method) // and new line characters ('\n') separator = '='; commentary = '#'; list_start = '('; list_end = ')'; list_delimiter = ','; split_single = true; skip_empty = false; use_escape_char = true; input_as_utf8 = true; } void SpaceParser::SplitSingle(bool split) { split_single = split; } void SpaceParser::SkipEmpty(bool skip) { skip_empty = skip; } void SpaceParser::UseEscapeChar(bool escape) { use_escape_char = escape; } void SpaceParser::UTF8(bool utf) { input_as_utf8 = utf; } SpaceParser::Status SpaceParser::Parse(const char * file_name) { reading_from_file = true; file.clear(); file.open(file_name, std::ios_base::binary | std::ios_base::in); if( file ) { Parse(); file.close(); } else { status = cant_open_file; } return status; } SpaceParser::Status SpaceParser::Parse(const std::string & file_name) { return Parse(file_name.c_str()); } SpaceParser::Status SpaceParser::Parse(const wchar_t * file_name) { PT::WideToUTF8(file_name, afile_name); return Parse(afile_name.c_str()); } SpaceParser::Status SpaceParser::Parse(const std::wstring & file_name) { return Parse(file_name.c_str()); } SpaceParser::Status SpaceParser::ParseString(const char * str) { reading_from_file = false; reading_from_wchar_string = false; pchar_ascii = str; pchar_unicode = 0; Parse(); return status; } SpaceParser::Status SpaceParser::ParseString(const std::string & str) { return ParseString(str.c_str()); } SpaceParser::Status SpaceParser::ParseString(const wchar_t * str) { reading_from_file = false; reading_from_wchar_string = true; pchar_unicode = str; pchar_ascii = 0; Parse(); return status; } SpaceParser::Status SpaceParser::ParseString(const std::wstring & str) { return ParseString(str.c_str()); } void SpaceParser::Parse() { if( !root_space ) { status = no_space; return; } line = 1; status = ok; space = root_space; ReadChar(); SkipWhiteLines(); ParseLoop(); if( status == ok && space != root_space ) { // last closing ')' characters are missing (closing a space) status = syntax_error; } token.clear(); key.clear(); value.clear(); } void SpaceParser::ParseLoop() { while( status == ok && lastc != -1 ) { if( lastc == list_end ) { SpaceEnds(); } else { ReadKey(); SkipWhite(); if( lastc == list_start ) { SpaceStarts(); } else if( lastc == separator ) { ReadValue(); AddKeyValuePair(); } else { status = syntax_error; } } if( status == ok ) SkipWhiteLines(); } } void SpaceParser::SpaceEnds() { if( space == root_space ) { // there cannot be a loose list end character in the global space status = syntax_error; } else { space = space->parent; ReadChar(); // skipping closing space character ')' SkipWhite(); } } void SpaceParser::SpaceStarts() { Space * new_space = new Space(); space->spaces.push_back(new_space); new_space->parent = space; new_space->name = key; space = new_space; ReadChar(); // skipping space starts character ')' } /* those white characters here should be the same as in space.h */ bool SpaceParser::IsWhite(int c) { // dont use '\n' here // 13 (\r) is at the end of a line in a dos file \r\n // 160 is an unbreakable space if( c==' ' || c=='\t' || c==13 || c==160 ) return true; return false; } void SpaceParser::SkipWhite() { while( IsWhite(lastc) || lastc == commentary ) { if( lastc == commentary ) SkipLine(); else ReadChar(); } } void SpaceParser::SkipWhiteLines() { while( IsWhite(lastc) || lastc == commentary || lastc=='\n' ) { if( lastc == commentary ) SkipLine(); else ReadChar(); } } void SpaceParser::SkipLine() { while( lastc != -1 && lastc != '\n' ) ReadChar(); } void SpaceParser::Trim(std::wstring & s) { std::wstring::size_type i; if( s.empty() ) return; // looking for white characters at the end for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i); if( i==0 && IsWhite(s[i]) ) { // the whole string has white characters s.clear(); return; } // deleting white characters at the end if( i != s.size() - 1 ) s.erase(i+1, std::wstring::npos); // looking for white characters at the beginning for(i=0 ; itable.find(var); if( i != space->table.end() ) space->table.erase(i); } void SpaceParser::DeleteFromTableSingle(const std::wstring & var) { Space::TableSingle::iterator i = space->table_single.find(var); if( i != space->table_single.end() ) space->table_single.erase(i); } void SpaceParser::ReadTokenQuoted() { ReadChar(); // skipping the first quotation mark while( lastc != -1 && (char_was_escaped || lastc != '"') ) { token += static_cast(lastc); ReadChar(); } if( !char_was_escaped && lastc == '"' ) ReadChar(); // skipping the last quotation mark else status = syntax_error; } void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) { while( true ) { if( lastc == commentary ) SkipLine(); if( lastc == -1 || (!char_was_escaped && ( lastc == list_end || (white_delimit && IsWhite(lastc)) || (new_line_delimit && lastc == '\n') || (delimit1 != -1 && lastc == delimit1) || (delimit2 != -1 && lastc == delimit2) ) ) ) { break; } token += static_cast(lastc); ReadChar(); } Trim(token); } void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) { token.clear(); SkipWhite(); if( !char_was_escaped && lastc == '"' ) ReadTokenQuoted(); else ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2); } void SpaceParser::ReadKey() { ReadToken(false, true, separator, list_start); key = token; SkipWhite(); } void SpaceParser::ReadValueList() { ReadChar(); // skipping the first list character ')' SkipWhiteLines(); while( lastc != -1 && lastc != list_end ) { ReadToken(true, true, list_delimiter, list_end); value.push_back(token); SkipWhiteLines(); if( lastc == list_delimiter ) { ReadChar(); SkipWhiteLines(); } } if( lastc == list_end ) { ReadChar(); // skipping the last list character ')' SkipWhite(); } else { status = syntax_error; // missing one ')' } } void SpaceParser::ReadValueSingle() { SkipWhite(); ReadToken(false, true, -1, -1); value.push_back(token); SkipWhite(); } void SpaceParser::ReadValue() { ReadChar(); // skipping separator '=' value.clear(); SkipWhite(); if( lastc == list_start ) ReadValueList(); else ReadValueSingle(); SkipWhiteLines(); } void SpaceParser::AddKeyValuePair() { if( value.empty() && skip_empty ) { DeleteFromTable(key); DeleteFromTableSingle(key); return; } if( split_single && value.size() == 1 ) { space->table_single[key] = value[0]; DeleteFromTable(key); } else { space->table[key] = value; DeleteFromTableSingle(key); } } int SpaceParser::ReadUTF8Char() { int c; bool correct; lastc = -1; do { PT::UTF8ToInt(file, c, correct); if( !file ) return lastc; } while( !correct ); lastc = c; if( lastc == '\n' ) ++line; return lastc; } int SpaceParser::ReadASCIIChar() { lastc = file.get(); if( lastc == '\n' ) ++line; return lastc; } int SpaceParser::ReadCharFromWcharString() { if( *pchar_unicode == 0 ) lastc = -1; else lastc = *(pchar_unicode++); if( lastc == '\n' ) ++line; return lastc; } int SpaceParser::ReadCharFromUTF8String() { int c; bool correct; lastc = -1; do { size_t len = PT::UTF8ToInt(pchar_ascii, c, correct); pchar_ascii += len; if( *pchar_ascii == 0 ) return lastc; } while( !correct ); lastc = c; if( lastc == '\n' ) ++line; return lastc; } int SpaceParser::ReadCharFromAsciiString() { if( *pchar_ascii == 0 ) lastc = -1; else lastc = *(pchar_ascii++); if( lastc == '\n' ) ++line; return lastc; } int SpaceParser::ReadCharNoEscape() { if( reading_from_file ) { if( input_as_utf8 ) return ReadUTF8Char(); else return ReadASCIIChar(); } else { if( reading_from_wchar_string ) { return ReadCharFromWcharString(); } else { if( input_as_utf8 ) return ReadCharFromUTF8String(); else return ReadCharFromAsciiString(); } } } int SpaceParser::ReadChar() { char_was_escaped = false; ReadCharNoEscape(); if( use_escape_char && lastc == '\\' ) { char_was_escaped = true; ReadCharNoEscape(); switch(lastc) { case '0': lastc = 0; break; case 't': lastc = '\t'; break; case 'r': lastc = '\r'; break; case 'n': lastc = '\n'; break; // in other cases we return the last character } } return lastc; } } // namespace