diff --git a/src/Makefile.dep b/src/Makefile.dep index 16e85d6..84ddfa4 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -8,6 +8,9 @@ ./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h ./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h ./convert/double.o: membuffer/membuffer.h textstream/types.h +./convert/baseparser.o: ./convert/baseparser.h utf8/utf8.h +./convert/baseparser.o: textstream/stream.h utf8/utf8_templates.h +./convert/baseparser.o: utf8/utf8_private.h ./date/date.o: ./date/date.h convert/inttostr.h ./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h ./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h @@ -29,14 +32,15 @@ ./space/spaceparser.o: ./space/spaceparser.h ./space/space.h ./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h ./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h -./space/spaceparser.o: utf8/utf8_private.h convert/strtoint.h -./space/spaceparser.o: ./convert/text.h ./convert/misc.h +./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h +./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h ./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h ./utf8/utf8.o: utf8/utf8_private.h ./utf8/utf8_private.o: utf8/utf8_private.h ./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h ./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h ./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h +./csv/csvparser.o: convert/baseparser.h ./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h ./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h ./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h diff --git a/src/convert/baseparser.cpp b/src/convert/baseparser.cpp new file mode 100644 index 0000000..b95933d --- /dev/null +++ b/src/convert/baseparser.cpp @@ -0,0 +1,188 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "baseparser.h" +#include "utf8/utf8.h" + + + +namespace pt +{ + +BaseParser::BaseParser() +{ + clear(); +} + + +void BaseParser::clear() +{ + line = 0; + reading_from_file = false; + pchar_ascii = nullptr; + pchar_unicode = nullptr; + reading_from_wchar_string = false; + lastc = -1; + input_as_utf8 = true; +} + + +int BaseParser::read_utf8_char() +{ +int c; +bool correct; + + lastc = -1; + + do + { + utf8_to_int(file, c, correct); + + if( !file ) + return lastc; + } + while( !correct ); + + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int BaseParser::read_ascii_char() +{ + lastc = file.get(); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int BaseParser::read_char_from_wchar_string() +{ + if( *pchar_unicode == 0 ) + lastc = -1; + else + lastc = *(pchar_unicode++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int BaseParser::read_char_from_utf8_string() +{ +int c; +bool correct; + + lastc = -1; + + do + { + size_t len = utf8_to_int(pchar_ascii, c, correct); + pchar_ascii += len; + } + while( *pchar_ascii && !correct ); + + if( correct ) + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; + +} + + +int BaseParser::read_char_from_ascii_string() +{ + if( *pchar_ascii == 0 ) + lastc = -1; + else + lastc = *(pchar_ascii++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int BaseParser::read_char_no_escape() +{ + if( reading_from_file ) + { + if( input_as_utf8 ) + return read_utf8_char(); + else + return read_ascii_char(); + } + else + { + if( reading_from_wchar_string ) + { + return read_char_from_wchar_string(); + } + else + { + if( input_as_utf8 ) + return read_char_from_utf8_string(); + else + return read_char_from_ascii_string(); + } + } +} + + +int BaseParser::read_char() +{ + return read_char_no_escape(); +} + + + + +} + diff --git a/src/convert/baseparser.h b/src/convert/baseparser.h new file mode 100644 index 0000000..381568f --- /dev/null +++ b/src/convert/baseparser.h @@ -0,0 +1,120 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_baseparser +#define headerfile_picotools_convert_baseparser + +#include +#include + + +namespace pt +{ + +class BaseParser +{ +protected: + + BaseParser(); + + void clear(); + + int read_utf8_char(); + int read_ascii_char(); + int read_char_from_wchar_string(); + int read_char_from_utf8_string(); + int read_char_from_ascii_string(); + int read_char_no_escape(); + int read_char(); + + + + /* + a number of a line in which there is a syntax_error + */ + int line; + + + /* + true if parse() method was called + false if ParseString() was called + */ + bool reading_from_file; + + /* + pointers to the current character + if ParseString() is in used + */ + const char * pchar_ascii; + const wchar_t * pchar_unicode; + + + /* + true if ParseString(wchar_t *) or ParseString(std::wstring&) was called + */ + bool reading_from_wchar_string; + + /* + last read char + or -1 if the end + */ + int lastc; + + + /* + current file + + may it would be better to make a pointer? + if we parse only a string then there is no sense to have such an object + */ + std::ifstream file; + + + /* + input file is in UTF-8 + default: true + */ + bool input_as_utf8; + + + + +}; + +} + +#endif diff --git a/src/csv/csvparser.cpp b/src/csv/csvparser.cpp index 4ab1480..583eee3 100644 --- a/src/csv/csvparser.cpp +++ b/src/csv/csvparser.cpp @@ -44,6 +44,11 @@ namespace pt { +CSVParser::CSVParser() +{ + input_as_utf8 = true; +} + CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space) @@ -285,132 +290,6 @@ bool CSVParser::read_non_quoted_value_to(std::wstring & value) - -int CSVParser::read_utf8_char() -{ -int c; -bool correct; - - lastc = -1; - - do - { - utf8_to_int(file, c, correct); - - if( !file ) - return lastc; - } - while( !correct ); - - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - -int CSVParser::read_ascii_char() -{ - lastc = file.get(); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - - -int CSVParser::read_char_from_wchar_string() -{ - if( *pchar_unicode == 0 ) - lastc = -1; - else - lastc = *(pchar_unicode++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int CSVParser::read_char_from_utf8_string() -{ -int c; -bool correct; - - lastc = -1; - - do - { - size_t len = utf8_to_int(pchar_ascii, c, correct); - pchar_ascii += len; - } - while( *pchar_ascii && !correct ); - - if( correct ) - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int CSVParser::read_char_from_ascii_string() -{ - if( *pchar_ascii == 0 ) - lastc = -1; - else - lastc = *(pchar_ascii++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int CSVParser::read_char_no_escape() -{ - if( reading_from_file ) - { - if( input_as_utf8 ) - return read_utf8_char(); - else - return read_ascii_char(); - } - else - { - if( reading_from_wchar_string ) - { - return read_char_from_wchar_string(); - } - else - { - if( input_as_utf8 ) - return read_char_from_utf8_string(); - else - return read_char_from_ascii_string(); - } - } -} - - - - -int CSVParser::read_char() -{ - return read_char_no_escape(); -} - - } diff --git a/src/csv/csvparser.h b/src/csv/csvparser.h index 8370867..c549fa5 100644 --- a/src/csv/csvparser.h +++ b/src/csv/csvparser.h @@ -38,9 +38,11 @@ #ifndef headerfile_picotools_csv_csvparser #define headerfile_picotools_csv_csvparser -#include "space/space.h" #include #include +#include "space/space.h" +#include "convert/baseparser.h" + namespace pt @@ -51,10 +53,12 @@ namespace pt * https://datatracker.ietf.org/doc/html/rfc4180 * */ -class CSVParser +class CSVParser : public BaseParser { public: + CSVParser(); + enum Status { ok, @@ -85,53 +89,6 @@ protected: Space * space; - /* - true if parse_file() method was called - false if parse() was called - */ - bool reading_from_file; - - /* - true if parse(wchar_t *) or parse(std::wstring&) was called - */ - bool reading_from_wchar_string; - - /* - pointers to the current character - if parse() is being used - */ - const char * pchar_ascii; - const wchar_t * pchar_unicode; - - - /* - last read char - or -1 if the end - */ - int lastc; - - - - /* - a number of a line in which there is a syntax_error - */ - int line; - - /* - current file - - may it would be better to make a pointer? - if we parse only a string then there is no sense to have such an object - */ - std::ifstream file; - - /* - input file is in UTF-8 - default: true - */ - bool input_as_utf8; - - void parse(); @@ -142,19 +99,6 @@ protected: bool read_non_quoted_value_to(std::wstring & value); - - /* - * copied from SpaceParser - * may it would be better to have a class with those methods and inherit from it? - */ - int read_utf8_char(); - int read_ascii_char(); - int read_char_from_wchar_string(); - int read_char_from_utf8_string(); - int read_char_from_ascii_string(); - int read_char_no_escape(); - - int read_char(); }; } diff --git a/src/space/spaceparser.cpp b/src/space/spaceparser.cpp index ccd905e..9c334a7 100644 --- a/src/space/spaceparser.cpp +++ b/src/space/spaceparser.cpp @@ -891,122 +891,6 @@ void SpaceParser::read_key() -int SpaceParser::read_utf8_char() -{ -int c; -bool correct; - - lastc = -1; - - do - { - utf8_to_int(file, c, correct); - - if( !file ) - return lastc; - } - while( !correct ); - - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - -int SpaceParser::read_ascii_char() -{ - lastc = file.get(); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - - -int SpaceParser::read_char_from_wchar_string() -{ - if( *pchar_unicode == 0 ) - lastc = -1; - else - lastc = *(pchar_unicode++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int SpaceParser::read_char_from_utf8_string() -{ -int c; -bool correct; - - lastc = -1; - - do - { - size_t len = utf8_to_int(pchar_ascii, c, correct); - pchar_ascii += len; - } - while( *pchar_ascii && !correct ); - - if( correct ) - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; - -} - - -int SpaceParser::read_char_from_ascii_string() -{ - if( *pchar_ascii == 0 ) - lastc = -1; - else - lastc = *(pchar_ascii++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int SpaceParser::read_char_no_escape() -{ - if( reading_from_file ) - { - if( input_as_utf8 ) - return read_utf8_char(); - else - return read_ascii_char(); - } - else - { - if( reading_from_wchar_string ) - { - return read_char_from_wchar_string(); - } - else - { - if( input_as_utf8 ) - return read_char_from_utf8_string(); - else - return read_char_from_ascii_string(); - } - } -} bool SpaceParser::is_hex_digit(wchar_t c) { diff --git a/src/space/spaceparser.h b/src/space/spaceparser.h index 818b260..6805b88 100644 --- a/src/space/spaceparser.h +++ b/src/space/spaceparser.h @@ -40,6 +40,7 @@ #include #include "space.h" +#include "convert/baseparser.h" @@ -49,7 +50,7 @@ namespace pt -class SpaceParser +class SpaceParser : public BaseParser { public: @@ -154,32 +155,6 @@ private: Space * root_space; - /* - a number of a line in which there is a syntax_error - */ - int line; - - /* - true if parse() method was called - false if ParseString() was called - */ - bool reading_from_file; - - - /* - pointers to the current character - if ParseString() is in used - */ - const char * pchar_ascii; - const wchar_t * pchar_unicode; - - - /* - true if ParseString(wchar_t *) or ParseString(std::wstring&) was called - */ - bool reading_from_wchar_string; - - /* last read token */ @@ -222,13 +197,6 @@ private: int option_delimiter; - /* - last read char - or -1 if the end - */ - int lastc; - - /* true if the lastc was escaped (with a backslash) we have to know if the last sequence was \" or just " @@ -236,22 +204,6 @@ private: bool char_was_escaped; - /* - current file - - may it would be better to make a pointer? - if we parse only a string then there is no sense to have such an object - */ - std::ifstream file; - - - /* - input file is in UTF-8 - default: true - */ - bool input_as_utf8; - - /* * if parsing_space is false then it means we are parsing JSON format * @@ -287,12 +239,6 @@ private: void read_token_quoted(std::wstring & token); void read_multiline_token_quoted(std::wstring & token); - int read_utf8_char(); - int read_ascii_char(); - int read_char_from_wchar_string(); - int read_char_from_utf8_string(); - int read_char_from_ascii_string(); - int read_char_no_escape(); int read_char(); bool is_white(int c); void skip_line(); diff --git a/tests/Makefile.dep b/tests/Makefile.dep index 60ed660..e83e777 100644 --- a/tests/Makefile.dep +++ b/tests/Makefile.dep @@ -13,7 +13,8 @@ ./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h ./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h ./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h -./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h test.h +./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h +./csvparser.o: ../src/convert/baseparser.h test.h ./main.o: convert.h mainoptionsparser.h csvparser.h ./test.o: test.h ./mainoptionsparser.o: mainoptionsparser.h test.h