You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1160 lines
19 KiB
1160 lines
19 KiB
/* |
|
* This file is a part of PikoTools |
|
* and is distributed under the (new) BSD licence. |
|
* Author: Tomasz Sowa <t.sowa@ttmath.org> |
|
*/ |
|
|
|
/* |
|
* Copyright (c) 2012-2021, Tomasz Sowa |
|
* All rights reserved. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions are met: |
|
* |
|
* * Redistributions of source code must retain the above copyright notice, |
|
* this list of conditions and the following disclaimer. |
|
* |
|
* * Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* |
|
* * Neither the name Tomasz Sowa nor the names of contributors to this |
|
* project may be used to endorse or promote products derived |
|
* from this software without specific prior written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
|
* THE POSSIBILITY OF SUCH DAMAGE. |
|
*/ |
|
|
|
#include <cstdlib> |
|
#include <wchar.h> |
|
#include "spaceparser.h" |
|
#include "utf8/utf8.h" |
|
#include "convert/strtoint.h" |
|
|
|
|
|
namespace pt |
|
{ |
|
|
|
|
|
|
|
|
|
SpaceParser::SpaceParser() |
|
{ |
|
root_space = 0; |
|
SetDefault(); |
|
} |
|
|
|
|
|
void SpaceParser::SetSpace(Space * pspace) |
|
{ |
|
root_space = pspace; |
|
} |
|
|
|
|
|
void SpaceParser::SetSpace(Space & pspace) |
|
{ |
|
root_space = &pspace; |
|
} |
|
|
|
|
|
void SpaceParser::SetDefault() |
|
{ |
|
// you can change this separators to what you want |
|
// you shoud not use only white characters here (as expected by IsWhite() method) |
|
// and new line characters ('\n') |
|
separator = ':'; |
|
space_start = '{'; |
|
space_end = '}'; |
|
table_start = '['; |
|
table_end = ']'; |
|
option_delimiter = ','; |
|
skip_empty = false; |
|
use_escape_char = true; |
|
input_as_utf8 = true; |
|
} |
|
|
|
|
|
|
|
void SpaceParser::SkipEmpty(bool skip) |
|
{ |
|
skip_empty = skip; |
|
} |
|
|
|
|
|
void SpaceParser::UseEscapeChar(bool escape) |
|
{ |
|
use_escape_char = escape; |
|
} |
|
|
|
|
|
void SpaceParser::UTF8(bool utf) |
|
{ |
|
input_as_utf8 = utf; |
|
} |
|
|
|
|
|
int SpaceParser::get_last_parsed_line() |
|
{ |
|
return line; |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSONFile(const char * file_name) |
|
{ |
|
reading_from_file = true; |
|
parsing_space = false; |
|
|
|
file.clear(); |
|
file.open(file_name, std::ios_base::binary | std::ios_base::in); |
|
|
|
if( file ) |
|
{ |
|
ParseRootSpace(); |
|
file.close(); |
|
} |
|
else |
|
{ |
|
status = cant_open_file; |
|
} |
|
|
|
return status; |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSONFile(const std::string & file_name) |
|
{ |
|
return ParseJSONFile(file_name.c_str()); |
|
} |
|
|
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSONFile(const wchar_t * file_name) |
|
{ |
|
std::string file_name_utf8; |
|
|
|
WideToUTF8(file_name, file_name_utf8); |
|
return ParseJSONFile(file_name_utf8.c_str()); |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSONFile(const std::wstring & file_name) |
|
{ |
|
return ParseJSONFile(file_name.c_str()); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpaceFile(const char * file_name) |
|
{ |
|
reading_from_file = true; |
|
parsing_space = true; |
|
|
|
file.clear(); |
|
file.open(file_name, std::ios_base::binary | std::ios_base::in); |
|
|
|
if( file ) |
|
{ |
|
ParseRootSpace(); |
|
file.close(); |
|
} |
|
else |
|
{ |
|
status = cant_open_file; |
|
} |
|
|
|
return status; |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpaceFile(const std::string & file_name) |
|
{ |
|
return ParseSpaceFile(file_name.c_str()); |
|
} |
|
|
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpaceFile(const wchar_t * file_name) |
|
{ |
|
std::string file_name_utf8; |
|
|
|
WideToUTF8(file_name, file_name_utf8); |
|
return ParseSpaceFile(file_name_utf8.c_str()); |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpaceFile(const std::wstring & file_name) |
|
{ |
|
return ParseSpaceFile(file_name.c_str()); |
|
} |
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSON(const char * str) |
|
{ |
|
reading_from_file = false; |
|
reading_from_wchar_string = false; |
|
pchar_ascii = str; |
|
pchar_unicode = 0; |
|
parsing_space = false; |
|
|
|
ParseRootSpace(); |
|
|
|
return status; |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSON(const std::string & str) |
|
{ |
|
return ParseJSON(str.c_str()); |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSON(const wchar_t * str) |
|
{ |
|
reading_from_file = false; |
|
reading_from_wchar_string = true; |
|
pchar_unicode = str; |
|
pchar_ascii = 0; |
|
parsing_space = false; |
|
|
|
ParseRootSpace(); |
|
|
|
return status; |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseJSON(const std::wstring & str) |
|
{ |
|
return ParseJSON(str.c_str()); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpace(const char * str) |
|
{ |
|
reading_from_file = false; |
|
reading_from_wchar_string = false; |
|
pchar_ascii = str; |
|
pchar_unicode = 0; |
|
parsing_space = true; |
|
|
|
ParseRootSpace(); |
|
|
|
return status; |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpace(const std::string & str) |
|
{ |
|
return ParseSpace(str.c_str()); |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpace(const wchar_t * str) |
|
{ |
|
reading_from_file = false; |
|
reading_from_wchar_string = true; |
|
pchar_unicode = str; |
|
pchar_ascii = 0; |
|
parsing_space = true; |
|
|
|
ParseRootSpace(); |
|
|
|
return status; |
|
} |
|
|
|
|
|
SpaceParser::Status SpaceParser::ParseSpace(const std::wstring & str) |
|
{ |
|
return ParseSpace(str.c_str()); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseRootSpace() |
|
{ |
|
line = 1; |
|
status = ok; |
|
|
|
if( !root_space ) |
|
{ |
|
status = no_space; |
|
return; |
|
} |
|
|
|
ReadChar(); // put first character to lastc |
|
|
|
if( parsing_space ) |
|
{ |
|
separator = '='; |
|
table_start = '('; |
|
table_end = ')'; |
|
ParseSpace(root_space); |
|
} |
|
else |
|
{ |
|
separator = ':'; |
|
table_start = '['; |
|
table_end = ']'; |
|
Parse(root_space, false, false); |
|
} |
|
|
|
SkipWhite(); |
|
|
|
if( lastc != -1 ) |
|
status = syntax_error; |
|
|
|
token.clear(); |
|
} |
|
|
|
|
|
void SpaceParser::Parse(Space * space, bool is_object_value, bool is_table_value) |
|
{ |
|
SkipWhite(); |
|
|
|
if( lastc == space_start ) |
|
{ |
|
ParseSpace(space); |
|
} |
|
else |
|
if( lastc == table_start ) |
|
{ |
|
ParseTable(space); |
|
} |
|
else |
|
if( lastc == '"' ) // IMPROVEME define a variable |
|
{ |
|
ParseTextValue(space); |
|
} |
|
else |
|
{ |
|
ReadStringValue(token, is_object_value, is_table_value); |
|
|
|
if( token == L"null" ) |
|
{ |
|
space->set_null(); |
|
} |
|
else |
|
if( token == L"true" ) |
|
{ |
|
space->set(true); |
|
} |
|
else |
|
if( token == L"false" ) |
|
{ |
|
space->set(false); |
|
} |
|
else |
|
if( is_integer_token() ) |
|
{ |
|
ParseIntegerValue(space); |
|
} |
|
else |
|
if( is_floating_point_token() ) |
|
{ |
|
ParseFloatingPointValue(space); |
|
} |
|
else |
|
{ |
|
if( parsing_space ) |
|
{ |
|
space->set(token); |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseSpace(Space * space) |
|
{ |
|
/* |
|
* in Space format in global namespace the space start character is not required |
|
*/ |
|
bool need_space_start_character = !parsing_space || space != root_space; |
|
|
|
if( need_space_start_character ) |
|
{ |
|
ReadChar(); // inserting a next character after the space_start char to lastc |
|
} |
|
|
|
space->set_empty_object(); |
|
ParseKeyValuePairs(space); |
|
|
|
if( need_space_start_character ) |
|
{ |
|
if( lastc == space_end ) |
|
{ |
|
ReadChar(); |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseTextValue(Space * space) |
|
{ |
|
space->set_empty_wstring(); |
|
std::wstring * str = space->get_wstr(); |
|
|
|
if( parsing_space ) |
|
ReadMultilineTokenQuoted(*str); |
|
else |
|
ReadTokenQuoted(*str); |
|
} |
|
|
|
|
|
void SpaceParser::ParseIntegerValue(Space * space) |
|
{ |
|
const wchar_t * after_str = nullptr; |
|
bool was_overflow = false; |
|
int base = 10; |
|
|
|
if( parsing_space ) |
|
{ |
|
// in Space format when the integer value begins with a zero it means |
|
// this is an octal number |
|
if( !token.empty() && token[0] == '0' ) |
|
base = 8; |
|
} |
|
|
|
long long val = Toll(token.c_str(), base, &after_str, &was_overflow, false); |
|
|
|
if( was_overflow ) |
|
{ |
|
status = syntax_error; |
|
} |
|
else |
|
if( size_t(after_str - token.c_str()) != token.size() ) |
|
{ |
|
status = syntax_error; |
|
} |
|
else |
|
{ |
|
space->set(val); |
|
} |
|
} |
|
|
|
|
|
void SpaceParser::ParseFloatingPointValue(Space * space) |
|
{ |
|
wchar_t * after_str = nullptr; |
|
double val = wcstod(token.c_str(), &after_str); |
|
|
|
if( errno == ERANGE ) |
|
{ |
|
status = syntax_error; |
|
} |
|
else |
|
if( size_t(after_str - token.c_str()) != token.size() ) |
|
{ |
|
status = syntax_error; |
|
} |
|
else |
|
{ |
|
space->set(val); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseTable(Space * space) |
|
{ |
|
space->set_empty_table(); |
|
|
|
|
|
ReadChar(); // inserting a next character after the table_start char to lastc |
|
space->set_empty_table(); |
|
ParseValuesList(space); |
|
|
|
if( lastc == table_end ) |
|
{ |
|
ReadChar(); |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseKeyValuePairs(Space * space) |
|
{ |
|
bool is_first = true; |
|
SkipWhite(); |
|
|
|
while( status == ok && lastc != space_end && lastc != -1 ) |
|
{ |
|
if( !is_first ) |
|
{ |
|
SkipWhite(); |
|
|
|
if( lastc == option_delimiter ) |
|
{ |
|
ReadChar(); // inserting a next character after the option_delimiter to lastc |
|
|
|
if( parsing_space ) |
|
{ |
|
// in space format a space_end character is allowed to be after the last table item |
|
SkipWhite(); |
|
|
|
if( lastc == space_end ) |
|
break; |
|
} |
|
} |
|
else |
|
if( !parsing_space ) |
|
{ |
|
// in json format the option_delimiter is required |
|
status = syntax_error; |
|
} |
|
} |
|
|
|
if( status == ok ) |
|
{ |
|
ReadKey(); |
|
|
|
if( status == ok ) |
|
{ |
|
SkipWhite(); |
|
|
|
if( lastc == separator ) |
|
{ |
|
ReadChar(); // inserting a next character after the separator to lastc |
|
|
|
Space & new_space = space->add(token.c_str(), new Space()); |
|
Parse(&new_space, true, false); |
|
} |
|
else |
|
if( parsing_space && lastc == space_start ) |
|
{ |
|
Space & new_space = space->add_child_space(token.c_str()); |
|
ParseSpace(&new_space); |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
} |
|
|
|
is_first = false; |
|
SkipWhite(); |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
void SpaceParser::ParseValuesList(Space * space) |
|
{ |
|
bool is_first = true; |
|
SkipWhite(); |
|
|
|
while( status == ok && lastc != table_end && lastc != -1 ) |
|
{ |
|
if( !is_first ) |
|
{ |
|
SkipWhite(); |
|
|
|
if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects... |
|
{ |
|
ReadChar(); // inserting a next character after the delimiter |
|
|
|
if( parsing_space ) |
|
{ |
|
// in space format a table_end character is allowed to be after the last table item |
|
SkipWhite(); |
|
|
|
if( lastc == table_end ) |
|
break; |
|
} |
|
} |
|
else |
|
if( !parsing_space ) |
|
{ |
|
// in json format the option_delimiter is required |
|
status = syntax_error; |
|
} |
|
} |
|
|
|
if( status == ok ) |
|
{ |
|
Space * new_space = &space->add(new Space()); |
|
Parse(new_space, false, true); |
|
} |
|
|
|
is_first = false; |
|
SkipWhite(); |
|
} |
|
} |
|
|
|
|
|
|
|
bool SpaceParser::is_integer_token() |
|
{ |
|
if( token.empty() ) |
|
return false; |
|
|
|
size_t i = 0; |
|
|
|
if( token[i] == '-' ) |
|
{ |
|
i += 1; |
|
|
|
if( token.size() == 1 ) |
|
return false; |
|
} |
|
|
|
for( ; i < token.size() ; ++i) |
|
{ |
|
if( token[i] < '0' || token[i] > '9' ) |
|
{ |
|
return false; |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
bool SpaceParser::is_floating_point_token() |
|
{ |
|
bool was_dot = false; |
|
bool was_exponential = false; |
|
bool was_plus_minus_sign = false; |
|
|
|
if( token.empty() ) |
|
return false; |
|
|
|
size_t i = 0; |
|
|
|
if( token[i] == '-' ) |
|
{ |
|
i += 1; |
|
|
|
if( token.size() == 1 ) |
|
return false; |
|
} |
|
|
|
for( ; i < token.size() ; ++i) |
|
{ |
|
if( token[i] == '.' ) |
|
{ |
|
if( was_dot || was_exponential ) |
|
return false; |
|
|
|
was_dot = true; |
|
} |
|
else |
|
if( token[i] == 'e' || token[i]=='E' ) |
|
{ |
|
if( was_exponential ) |
|
return false; |
|
|
|
was_exponential = true; |
|
|
|
// the exponential character cannot be the last character |
|
if( i + 1 == token.size() ) |
|
return false; |
|
} |
|
else |
|
if( token[i] == '+' || token[i] == '-' ) |
|
{ |
|
if( was_plus_minus_sign || !was_exponential ) |
|
return false; |
|
|
|
// the plus or minus should be after the exponential character |
|
if( i > 0 && (token[i-1] != 'e' && token[i-1] != 'E') ) |
|
return false; |
|
|
|
was_plus_minus_sign = true; |
|
} |
|
else |
|
if( token[i] < '0' || token[i] > '9' ) |
|
{ |
|
return false; |
|
} |
|
} |
|
|
|
return true; |
|
} |
|
|
|
|
|
|
|
bool SpaceParser::IsWhite(int c) |
|
{ |
|
// 13 (\r) is at the end of a line in a dos file \r\n |
|
// 160 is an unbreakable space |
|
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 ) |
|
return true; |
|
|
|
return false; |
|
} |
|
|
|
|
|
bool SpaceParser::is_alfa_numeric_char(int c) |
|
{ |
|
return (c >= 'a' && c <= 'z') || |
|
(c >= 'A' && c <= 'Z') || |
|
(c >= '0' && c <= '9') || |
|
c == '.' || c=='-' || c=='+'; |
|
} |
|
|
|
|
|
void SpaceParser::SkipLine() |
|
{ |
|
while( lastc != -1 && (char_was_escaped || lastc != '\n') ) |
|
ReadChar(); |
|
} |
|
|
|
|
|
|
|
void SpaceParser::SkipWhite() |
|
{ |
|
if( parsing_space ) |
|
{ |
|
while( IsWhite(lastc) || (!char_was_escaped && lastc == '#') ) |
|
{ |
|
if( lastc == '#' ) |
|
SkipLine(); |
|
else |
|
ReadChar(); |
|
} |
|
} |
|
else |
|
{ |
|
while( IsWhite(lastc) ) |
|
{ |
|
ReadChar(); |
|
} |
|
} |
|
} |
|
|
|
|
|
void SpaceParser::TrimLastWhite(std::wstring & s) |
|
{ |
|
std::wstring::size_type i; |
|
|
|
for(i=s.size() ; i>0 && IsWhite(s[i-1]) ; --i) |
|
{ |
|
} |
|
|
|
if( i < s.size() ) |
|
{ |
|
s.erase(i, std::wstring::npos); |
|
} |
|
} |
|
|
|
|
|
/* |
|
void SpaceParser::Trim(std::wstring & s) |
|
{ |
|
std::wstring::size_type i; |
|
|
|
if( s.empty() ) |
|
return; |
|
|
|
// looking for white characters at the end |
|
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i); |
|
|
|
if( i==0 && IsWhite(s[i]) ) |
|
{ |
|
// the whole string consists of white characters |
|
s.clear(); |
|
return; |
|
} |
|
|
|
// deleting white characters at the end |
|
if( i != s.size() - 1 ) |
|
s.erase(i+1, std::wstring::npos); |
|
|
|
// looking for white characters at the beginning |
|
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i); |
|
|
|
// deleting white characters at the beginning |
|
if( i != 0 ) |
|
s.erase(0, i); |
|
} |
|
*/ |
|
|
|
|
|
void SpaceParser::ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2) |
|
{ |
|
token.clear(); |
|
|
|
while( lastc != -1 && (char_was_escaped || (lastc != '\n' && lastc != '#' && lastc != delimiter1 && lastc != delimiter2)) ) |
|
{ |
|
token += static_cast<wchar_t>(lastc); |
|
ReadChar(); |
|
} |
|
|
|
TrimLastWhite(token); |
|
} |
|
|
|
|
|
void SpaceParser::ReadAlfaNumericToken(std::wstring & token) |
|
{ |
|
token.clear(); |
|
|
|
while( is_alfa_numeric_char(lastc) ) |
|
{ |
|
token += static_cast<wchar_t>(lastc); |
|
ReadChar(); |
|
} |
|
} |
|
|
|
|
|
void SpaceParser::ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value) |
|
{ |
|
if( parsing_space ) |
|
{ |
|
if( is_object_value ) |
|
{ |
|
ReadTokenUntilDelimiter(token, space_end, -1); |
|
} |
|
else |
|
if( is_table_value ) |
|
{ |
|
ReadTokenUntilDelimiter(token, table_end, option_delimiter); |
|
} |
|
else |
|
{ |
|
ReadTokenUntilDelimiter(token, -1, -1); |
|
} |
|
} |
|
else |
|
{ |
|
ReadAlfaNumericToken(token); |
|
} |
|
} |
|
|
|
|
|
// rename to something like ReadSpaceFieldToken??? |
|
void SpaceParser::ReadSpaceFieldToken(std::wstring & token) |
|
{ |
|
token.clear(); |
|
|
|
while( lastc != -1 && (char_was_escaped || (lastc != separator && lastc != 10 && lastc != space_start && lastc != '#' )) ) |
|
{ |
|
token += static_cast<wchar_t>(lastc); |
|
ReadChar(); |
|
} |
|
|
|
TrimLastWhite(token); |
|
} |
|
|
|
|
|
// IMPROVEME in JSON we should not allow non-escaped a new line character |
|
void SpaceParser::ReadTokenQuoted(std::wstring & token) |
|
{ |
|
token.clear(); |
|
ReadChar(); // skipping the first quotation mark |
|
|
|
while( lastc != -1 && (char_was_escaped || (lastc != '"' && lastc != 10)) ) |
|
{ |
|
token += static_cast<wchar_t>(lastc); |
|
ReadChar(); |
|
} |
|
|
|
if( !char_was_escaped && lastc == '"' ) |
|
{ |
|
ReadChar(); // skipping the last quotation mark |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
|
|
|
|
void SpaceParser::ReadMultilineTokenQuoted(std::wstring & token) |
|
{ |
|
token.clear(); |
|
ReadChar(); // skipping the first quotation mark |
|
|
|
while( lastc != -1 && (char_was_escaped || lastc != '"') ) |
|
{ |
|
token += static_cast<wchar_t>(lastc); |
|
ReadChar(); |
|
} |
|
|
|
if( !char_was_escaped && lastc == '"' ) |
|
{ |
|
ReadChar(); // skipping the last quotation mark |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
|
|
|
|
/* |
|
* this method is used to read the field name (key) in an object |
|
* or to read the space child name (used in Space format) |
|
*/ |
|
void SpaceParser::ReadKey() |
|
{ |
|
SkipWhite(); |
|
|
|
if( parsing_space ) |
|
{ |
|
if( lastc == '"' ) |
|
{ |
|
ReadMultilineTokenQuoted(token); |
|
} |
|
else |
|
{ |
|
ReadSpaceFieldToken(token); |
|
} |
|
} |
|
else |
|
{ |
|
if( lastc == '"' ) |
|
{ |
|
ReadTokenQuoted(token); |
|
} |
|
else |
|
{ |
|
status = syntax_error; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
int SpaceParser::ReadUTF8Char() |
|
{ |
|
int c; |
|
bool correct; |
|
|
|
lastc = -1; |
|
|
|
do |
|
{ |
|
UTF8ToInt(file, c, correct); |
|
|
|
if( !file ) |
|
return lastc; |
|
} |
|
while( !correct ); |
|
|
|
lastc = c; |
|
|
|
if( lastc == '\n' ) |
|
++line; |
|
|
|
return lastc; |
|
} |
|
|
|
|
|
|
|
int SpaceParser::ReadASCIIChar() |
|
{ |
|
lastc = file.get(); |
|
|
|
if( lastc == '\n' ) |
|
++line; |
|
|
|
return lastc; |
|
} |
|
|
|
|
|
|
|
|
|
int SpaceParser::ReadCharFromWcharString() |
|
{ |
|
if( *pchar_unicode == 0 ) |
|
lastc = -1; |
|
else |
|
lastc = *(pchar_unicode++); |
|
|
|
if( lastc == '\n' ) |
|
++line; |
|
|
|
return lastc; |
|
} |
|
|
|
|
|
int SpaceParser::ReadCharFromUTF8String() |
|
{ |
|
int c; |
|
bool correct; |
|
|
|
lastc = -1; |
|
|
|
do |
|
{ |
|
size_t len = UTF8ToInt(pchar_ascii, c, correct); |
|
pchar_ascii += len; |
|
} |
|
while( *pchar_ascii && !correct ); |
|
|
|
if( correct ) |
|
lastc = c; |
|
|
|
if( lastc == '\n' ) |
|
++line; |
|
|
|
return lastc; |
|
|
|
} |
|
|
|
|
|
int SpaceParser::ReadCharFromAsciiString() |
|
{ |
|
if( *pchar_ascii == 0 ) |
|
lastc = -1; |
|
else |
|
lastc = *(pchar_ascii++); |
|
|
|
if( lastc == '\n' ) |
|
++line; |
|
|
|
return lastc; |
|
} |
|
|
|
|
|
int SpaceParser::ReadCharNoEscape() |
|
{ |
|
if( reading_from_file ) |
|
{ |
|
if( input_as_utf8 ) |
|
return ReadUTF8Char(); |
|
else |
|
return ReadASCIIChar(); |
|
} |
|
else |
|
{ |
|
if( reading_from_wchar_string ) |
|
{ |
|
return ReadCharFromWcharString(); |
|
} |
|
else |
|
{ |
|
if( input_as_utf8 ) |
|
return ReadCharFromUTF8String(); |
|
else |
|
return ReadCharFromAsciiString(); |
|
} |
|
} |
|
} |
|
|
|
bool SpaceParser::IsHexDigit(wchar_t c) |
|
{ |
|
return ((c>='0' && c<='9') || |
|
(c>='a' && c<='f') || |
|
(c>='A' && c<='F') ); |
|
} |
|
|
|
|
|
int SpaceParser::HexToInt(wchar_t c) |
|
{ |
|
if( c>='0' && c<='9' ) |
|
return c - '0'; |
|
|
|
if( c>='a' && c<='f' ) |
|
return c - 'a' + 10; |
|
|
|
if( c>='A' && c<='F' ) |
|
return c - 'A' + 10; |
|
|
|
return 0; |
|
} |
|
|
|
|
|
void SpaceParser::ReadUnicodeCodePoint() |
|
{ |
|
wchar_t c; |
|
int value = 0; |
|
|
|
for(int i=0 ; i<4 ; ++i) |
|
{ |
|
c = ReadCharNoEscape(); |
|
|
|
if( !IsHexDigit(c) ) |
|
{ |
|
status = syntax_error; |
|
return; |
|
} |
|
|
|
value = (value << 4) | HexToInt(c); |
|
} |
|
|
|
lastc = (wchar_t)value; |
|
} |
|
|
|
|
|
int SpaceParser::ReadChar() |
|
{ |
|
char_was_escaped = false; |
|
ReadCharNoEscape(); |
|
|
|
if( use_escape_char && lastc == '\\' ) |
|
{ |
|
char_was_escaped = true; |
|
ReadCharNoEscape(); |
|
|
|
switch(lastc) |
|
{ |
|
case '0': lastc = 0; break; |
|
case 't': lastc = '\t'; break; |
|
case 'r': lastc = '\r'; break; |
|
case 'n': lastc = '\n'; break; |
|
case 'b': lastc = 0x08; break; |
|
case 'f': lastc = 0x0c; break; |
|
case 'u': ReadUnicodeCodePoint(); break; |
|
// "in other cases we return the last character, so two \\ returns one \ " |
|
} |
|
} |
|
|
|
return lastc; |
|
} |
|
|
|
|
|
|
|
} // namespace |
|
|
|
|
|
|
|
|
|
|