diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..e34073d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# EditorConfig +# https://editorconfig.org/ +# https://editorconfig-specification.readthedocs.io/ + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true + +# 4 space indentation +[*.{h,cpp,html,css,js,conf,txt}] +indent_style = tab +indent_size = 4 + +# Tab indentation (no size specified) +[Makefile] +indent_style = tab +indent_size = 4 diff --git a/.gitignore b/.gitignore index 2da86a3..960f3fc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ .project .settings/ *.o -*.a +src/pikotools.a +tests/tests +m diff --git a/Makefile b/Makefile index 5ea0e93..3439f7c 100644 --- a/Makefile +++ b/Makefile @@ -1,76 +1,58 @@ -# Makefile for GNU make -ifndef CXX -CXX = clang++ -endif - -ifndef CXXFLAGS -CXXFLAGS = -Wall -O2 -I/usr/local/include -endif - -ifndef LDFLAGS -LDFLAGS = -L/usr/local/lib -endif - -ifndef AR -AR = ar -endif - -#CXX=g++5 -#CXXFLAGS=-Wall -O0 -g3 -gdwarf-2 -std=c++14 export CXX export CXXFLAGS -export LDFLAGS export AR -all: space mainparser mainspaceparser utf8 date convert log +all: src +src: FORCE + $(MAKE) -C src -space: FORCE - @cd space ; $(MAKE) -e +tests: FORCE + $(MAKE) -C src + $(MAKE) -C tests -mainparser: FORCE - @cd mainparser ; $(MAKE) -e -mainspaceparser: FORCE - @cd mainspaceparser ; $(MAKE) -e +tests-gcc10: FORCE + env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C src + env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C tests -utf8: FORCE - @cd utf8 ; $(MAKE) -e -date: FORCE - @cd date ; $(MAKE) -e +tests-clang: FORCE + env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src + env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests -convert: FORCE - @cd convert ; $(MAKE) -e -log: FORCE - @cd log ; $(MAKE) -e +tests-clang-sa: FORCE + env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src + env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests + + +tests-clang-sm: FORCE + env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src + env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests + + +tests-clang-su: FORCE + env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src + env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests + + +clean: FORCE + $(MAKE) -C src clean + $(MAKE) -C tests clean + + +depend: FORCE + $(MAKE) -C src depend + $(MAKE) -C tests depend FORCE: - -clean: - @cd space ; $(MAKE) -e clean - @cd mainparser ; $(MAKE) -e clean - @cd mainspaceparser ; $(MAKE) -e clean - @cd utf8 ; $(MAKE) -e clean - @cd date ; $(MAKE) -e clean - @cd convert ; $(MAKE) -e clean - @cd log ; $(MAKE) -e clean - -depend: - @cd space ; $(MAKE) -e depend - @cd mainparser ; $(MAKE) -e depend - @cd mainspaceparser ; $(MAKE) -e depend - @cd utf8 ; $(MAKE) -e depend - @cd date ; $(MAKE) -e depend - @cd convert ; $(MAKE) -e depend - @cd log ; $(MAKE) -e depend diff --git a/convert/Makefile b/convert/Makefile deleted file mode 100644 index 3b74c9f..0000000 --- a/convert/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=convert.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/convert/Makefile.dep b/convert/Makefile.dep deleted file mode 100644 index 35b219c..0000000 --- a/convert/Makefile.dep +++ /dev/null @@ -1,5 +0,0 @@ -# DO NOT DELETE - -inttostr.o: inttostr.h -misc.o: misc.h text.h -text.o: text.h diff --git a/convert/Makefile.o.dep b/convert/Makefile.o.dep deleted file mode 100644 index 70b7e42..0000000 --- a/convert/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = inttostr.o misc.o text.o \ No newline at end of file diff --git a/convert/text.cpp b/convert/text.cpp deleted file mode 100644 index 8fc4370..0000000 --- a/convert/text.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2017-2018, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include "text.h" - - -namespace PT -{ - -// white_chars table should be sorted (a binary search algorithm is used to find a character) -// we do not treat a new line character (10) as a white character here -// also space (32) and tab (9) are not inserted here -static const wchar_t white_chars_table[] = { - 0x000B, // LINE TABULATION (vertical tabulation) - 0x000C, // FORM FEED (FF) - 0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file - 0x0085, // NEXT LINE (NEL) - 0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE) - 0x1680, // OGHAM SPACE MARK - 0x180E, // MONGOLIAN VOWEL SEPARATOR - 0x2000, // EN QUAD - 0x2001, // EM QUAD - 0x2002, // EN SPACE - 0x2003, // EM SPACE - 0x2004, // THREE-PER-EM SPACE - 0x2005, // FOUR-PER-EM SPACE - 0x2006, // SIX-PER-EM SPACE - 0x2007, // FIGURE SPACE - 0x2008, // PUNCTUATION SPACE - 0x2009, // THIN SPACE - 0x200A, // HAIR SPACE - 0x2028, // LINE SEPARATOR - 0x2029, // PARAGRAPH SEPARATOR - 0x202F, // NARROW NO-BREAK SPACE - 0x205F, // MEDIUM MATHEMATICAL SPACE - 0x3000, // IDEOGRAPHIC SPACE - 0xFEFF, // ZERO WIDTH NO-BREAK SPACE -}; - - - - -/* - if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true) -*/ -bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) -{ - // space (32) and tab (9) are the most common white chars - // so we check them at the beginning (optimisation) - if( c == 32 || c == 9 ) - return true; - - std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t); - std::size_t o1 = 0; - std::size_t o2 = len - 1; - - if( c == 10 ) - return treat_new_line_as_white ? true : false; - - if( !check_additional_chars ) - return false; - - if( c < white_chars_table[o1] || c > white_chars_table[o2] ) - return false; - - if( c == white_chars_table[o1] || c == white_chars_table[o2] ) - return true; - - while( o1 + 1 < o2 ) - { - std::size_t o = (o2 - o1)/2 + o1; - - if( c == white_chars_table[o] ) - return true; - - if( c > white_chars_table[o] ) - o1 = o; - else - o2 = o; - } - -return false; -} - - - -bool IsDigit(wchar_t c, int base, int * digit) -{ - int d = 0; - - if( c >= '0' && c <= '9' ) - { - d = c - '0'; - } - else - if( c >= 'a' && c <= 'f' ) - { - d = c - 'a' + 10; - } - else - if( c >= 'A' && c <= 'F' ) - { - d = c - 'A' + 10; - } - else - { - if( digit ) - *digit = d; - - return false; - } - - if( digit ) - *digit = d; - - return d < base; -} - - - -wchar_t ToLower(wchar_t c) -{ - if( c >= 'A' && c <= 'Z' ) - return c - 'A' + 'a'; - - return c; -} - - -wchar_t ToUpper(wchar_t c) -{ - if( c >= 'a' && c <= 'z' ) - return c - 'a' + 'A'; - - return c; -} - - -void ToLower(std::wstring & s) -{ - std::wstring::size_type i; - - for(i=0 ; i - */ - -/* - * Copyright (c) 2017-2018, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_convert_text -#define headerfile_picotools_convert_text - -#include - - - -namespace PT -{ - -bool IsWhite(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true); - - -bool IsDigit(wchar_t c, int base = 10, int * digit = 0); - - -template -CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) -{ - while( IsWhite(static_cast(*str), check_additional_chars, treat_new_line_as_white) ) - { - str += 1; - } - - return str; -} - - -/* - * - * str_end is pointing at the end of the string (the last item + one) - * - * return value is a pointer to the first white character after a non-white character at the end - * or to the last+one if there is no any white characters - * - */ -template -CharType * SkipWhiteFromBack(CharType * str_begin, CharType * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true) -{ - while( str_end > str_begin && IsWhite(static_cast(*(str_end-1)), check_additional_chars, treat_new_line_as_white) ) - { - str_end -= 1; - } - - return str_end; -} - - -template -CharType * SkipWhiteFromBack(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) -{ - CharType * str_begin = str; - - while( *str != 0 ) - { - str += 1; - } - - return SkipWhiteFromBack(str_begin, str, check_additional_chars, treat_new_line_as_white); -} - - -wchar_t ToLower(wchar_t c); -wchar_t ToUpper(wchar_t c); - - -// change to a template -void ToLower(std::wstring & s); -void ToUpper(std::wstring & s); - - - - - - -template -int CompareNoCase(const StringType1 * str1, const StringType2 * str2) -{ - while( *str1 && *str2 && ToLower(*str1) == ToLower(*str2) ) - { - ++str1; - ++str2; - } - - if( *str1 == 0 && *str2 == 0 ) - return 0; - - return (int)ToLower(*str1) - (int)ToLower(*str2); -} - - -template -int CompareNoCase(const StringType1 & str1, const StringType2 & str2) -{ - return CompareNoCase(str1.c_str(), str2.c_str()); -} - - -template -int CompareNoCasep(const StringType1 * str1, const StringType2 * str2) -{ - return CompareNoCase(str1, str2); -} - - - - -template -int CompareNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2) -{ - while( str1_begin < str1_end && *str2 && ToLower(*str1_begin) == ToLower(*str2) ) - { - ++str1_begin; - ++str2; - } - - if( str1_begin == str1_end && *str2 == 0 ) - return 0; - - wchar_t str1_char = 0; - - if( str1_begin < str1_end ) - str1_char = *str1_begin; - - return (int)ToLower(str1_char) - (int)ToLower(*str2); -} - - - - - -template -bool EqualNoCase(const StringType1 * str1, const StringType2 * str2) -{ - return CompareNoCase(str1, str2) == 0; -} - - -template -bool EqualNoCase(const StringType1 & str1, const StringType2 & str2) -{ - return EqualNoCase(str1.c_str(), str2.c_str()); -} - - -template -bool EqualNoCasep(const StringType1 * str1, const StringType2 * str2) -{ - return EqualNoCase(str1, str2); -} - - - -template -bool EqualNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2) -{ - return CompareNoCase(str1_begin, str1_end, str2) == 0; -} - - - - - - - -template -bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str) -{ - while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) ) - { - ++short_str; - ++long_str; - } - - if( *short_str == 0 ) - return true; - -return false; -} - - -template -bool IsSubString(const StringType1 * short_str, const StringType2 * long_str) -{ - return IsSubStringp(short_str, long_str); -} - - -template -bool IsSubString(const StringType1 & short_str, const StringType2 & long_str) -{ - return IsSubStringp(short_str.c_str(), long_str.c_str()); -} - - -template -bool IsSubStringNoCasep(const StringType1 * short_str, const StringType2 * long_str) -{ - while( *short_str && *long_str && ToLower(*short_str) == ToLower(*long_str) ) - { - ++short_str; - ++long_str; - } - - if( *short_str == 0 ) - return true; - -return false; -} - - -template -bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str) -{ - return IsSubStringNoCasep(short_str, long_str); -} - - -template -bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str) -{ - return IsSubStringNoCasep(short_str.c_str(), long_str.c_str()); -} - - - - - -} - - -#endif diff --git a/date/Makefile b/date/Makefile deleted file mode 100644 index a17267b..0000000 --- a/date/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=date.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/date/Makefile.dep b/date/Makefile.dep deleted file mode 100644 index 4b933e3..0000000 --- a/date/Makefile.dep +++ /dev/null @@ -1,3 +0,0 @@ -# DO NOT DELETE - -date.o: date.h ../convert/inttostr.h diff --git a/date/Makefile.o.dep b/date/Makefile.o.dep deleted file mode 100644 index 1e1c891..0000000 --- a/date/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = date.o \ No newline at end of file diff --git a/log/Makefile b/log/Makefile deleted file mode 100644 index bed6eba..0000000 --- a/log/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=log.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/log/Makefile.dep b/log/Makefile.dep deleted file mode 100644 index 1a14a56..0000000 --- a/log/Makefile.dep +++ /dev/null @@ -1,9 +0,0 @@ -# DO NOT DELETE - -filelog.o: filelog.h ../textstream/textstream.h ../space/space.h -filelog.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h -filelog.o: ../membuffer/membuffer.h ../textstream/types.h ../utf8/utf8.h -log.o: log.h ../textstream/textstream.h ../space/space.h -log.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h -log.o: ../membuffer/membuffer.h ../textstream/types.h filelog.h -log.o: ../utf8/utf8.h diff --git a/log/Makefile.o.dep b/log/Makefile.o.dep deleted file mode 100644 index 3eff2ad..0000000 --- a/log/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = filelog.o log.o \ No newline at end of file diff --git a/mainparser/Makefile b/mainparser/Makefile deleted file mode 100644 index a451264..0000000 --- a/mainparser/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=mainparser.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/mainparser/Makefile.dep b/mainparser/Makefile.dep deleted file mode 100644 index ad09e57..0000000 --- a/mainparser/Makefile.dep +++ /dev/null @@ -1,3 +0,0 @@ -# DO NOT DELETE - -mainparser.o: mainparser.h diff --git a/mainparser/Makefile.o.dep b/mainparser/Makefile.o.dep deleted file mode 100644 index 0a6c3a7..0000000 --- a/mainparser/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = mainparser.o \ No newline at end of file diff --git a/mainparser/mainparser.cpp b/mainparser/mainparser.cpp deleted file mode 100644 index 6a67449..0000000 --- a/mainparser/mainparser.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2011-2012, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "mainparser.h" -#include - - -namespace PT -{ - - -MainParser::MainParser() -{ - argsize = 0; - arg = 0; - Reset(); -} - - - -MainParser::MainParser(int argc, const char ** argv) -{ - Set(argc, argv); - Reset(); -} - - - -void MainParser::Set(int argc, const char ** argv) -{ - argsize = argc; - arg = argv; - Reset(); -} - - - -void MainParser::Reset() -{ - argindex = 1; - offset = 0; - has_single_param = false; - has_double_param = false; -} - - - -char MainParser::GetSingleParam() -{ - if( !has_single_param ) - return 0; - - if( last_single_param != 0 ) - return last_single_param; - - Advance(); - - if( argindex >= argsize ) - return 0; - - last_single_param = arg[argindex][offset]; - offset += 1; - -return last_single_param; -} - - - -bool MainParser::IsSingleParam(char c) -{ - return GetSingleParam() == c; -} - - -const char * MainParser::GetDoubleParam() -{ - empty = 0; - - if( !has_double_param ) - return ∅ - - if( last_double_param != &empty ) - return last_double_param; - - Advance(); - - if( argindex >= argsize ) - return ∅ - - last_double_param = &arg[argindex][offset]; - offset = 0; - argindex += 1; - -return last_double_param; -} - - -bool MainParser::IsDoubleParam(const char * param) -{ - return strcmp(GetDoubleParam(), param) == 0; -} - - -const char * MainParser::GetValue() -{ - empty = 0; - Advance(); - - if( argindex >= argsize ) - return ∅ - - const char * value = &arg[argindex][offset]; - offset = 0; - argindex += 1; - has_single_param = false; - has_double_param = false; - -return value; -} - - - -bool MainParser::NextParam() -{ - bool was_single_param = has_single_param; - has_single_param = false; - has_double_param = false; - last_single_param = 0; - last_double_param = ∅ - empty = 0; - - if( Advance() ) - was_single_param = false; - - if( argindex >= argsize ) - return false; - - if( arg[argindex][offset]=='-' ) - { - if( arg[argindex][offset+1]=='-' ) - { - has_double_param = true; - offset += 2; - } - else - { - has_single_param = true; - offset += 1; - } - } - else - { - if( was_single_param ) - has_single_param = true; - } - -return has_single_param || has_double_param; -} - - - -bool MainParser::IsEnd() -{ - Advance(); - -return argindex >= argsize; -} - - - -bool MainParser::HasSingleParam() -{ - return has_single_param; -} - - - -bool MainParser::HasDoubleParam() -{ - return has_double_param; -} - - - -bool MainParser::Advance() -{ -bool was_incremented = false; - - while( argindex < argsize && arg[argindex][offset] == 0 ) - { - offset = 0; - argindex += 1; - was_incremented = true; - } - -return was_incremented; -} - - -} // namespace - - diff --git a/mainparser/mainparser.h b/mainparser/mainparser.h deleted file mode 100644 index 248f22a..0000000 --- a/mainparser/mainparser.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2011-2012, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_mainparser_mainparser -#define headerfile_picotools_mainparser_mainparser - - -namespace PT -{ - - -/* - a very little parser for main(int argc, char ** argv) parameters - look in sample/sample.cpp how to use the parser -*/ -class MainParser -{ -public: - - MainParser(); - MainParser(int argc, const char ** argv); - - // setting arguments passed to main(int argc, char ** argv) function - void Set(int argc, const char ** argv); - - // reseting the current state of parsing - // now you can start parsing from the beginning - // you don't have to call it for the first time - // (is automatically called by the Set method) - void Reset(); - - // checking if there is a next single or double parameter - // this method represents the main loop of checking parameters - bool NextParam(); - - // returning a single parameter (if exists) or '\0' otherwise - // single parameter means a parameter with '-' at the beginning e.g. "-a" - // next call to this method (without calling NextParam) returns the same value - // this method should be called after NextParam() - char GetSingleParam(); - - // calling GetSingleParam() and comparign with 'c' - bool IsSingleParam(char c); - - // returning a string for a double parameter or an empty string if there is no such a parameter - // double parameter means a parameter with '--' at the beginning e.g. "--output" - // next call to this method (without calling NextParam) returns the same value - // GetDoubleParam() should be called after NextParam() - // this method never returns a null pointer -- if there is no a param name (end of the string) - // a pointer to en empty string will be returned - const char * GetDoubleParam(); - - // calling GetDoubleParam() and comparing with 'param' - // so you don't have to call strcmp directly - bool IsDoubleParam(const char * param); - - // returning a string representing a value - // you have to know which parameter requires a value - // and if such a parameter is found then use this method to obtain the value - // the method advances the current pointer so next call to this method return a next value - // you can call GetValue() even when NextParam() has returned false - // in such a case this gets you the last values (those at the end of the parameter list) - // this method never returns a null pointer -- if there is no a value (end of the string) - // a pointer to en empty string will be returned - const char * GetValue(); - - // returning true if the input string is finished - // there are no more parameters or values - bool IsEnd(); - - // returning true if there is a single parameter - // should be called after NextParam() - bool HasSingleParam(); - - // returning true if there is a double parameter - // should be called after NextParam() - bool HasDoubleParam(); - -private: - - bool Advance(); - - int argindex; - int offset; - int argsize; - const char ** arg; - char empty; - bool has_single_param; - bool has_double_param; - char last_single_param; - const char * last_double_param; -}; - - -} // namespace - - -#endif diff --git a/mainparser/sample/Makefile b/mainparser/sample/Makefile deleted file mode 100644 index 81942f4..0000000 --- a/mainparser/sample/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -output=sample - -all: $(output) - -$(output): sample.cpp ../mainparser.h ../mainparser.cpp - g++ -o $(output) sample.cpp ../mainparser.cpp - -clean: - rm -f $(output) - rm -f $(output).exe diff --git a/mainspaceparser/Makefile b/mainspaceparser/Makefile deleted file mode 100644 index 5e52fc0..0000000 --- a/mainspaceparser/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=mainspaceparser.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/mainspaceparser/Makefile.dep b/mainspaceparser/Makefile.dep deleted file mode 100644 index ed0b0f8..0000000 --- a/mainspaceparser/Makefile.dep +++ /dev/null @@ -1,6 +0,0 @@ -# DO NOT DELETE - -mainspaceparser.o: mainspaceparser.h ../space/space.h ../textstream/types.h -mainspaceparser.o: ../utf8/utf8.h ../textstream/textstream.h ../date/date.h -mainspaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h -mainspaceparser.o: ../textstream/types.h diff --git a/mainspaceparser/Makefile.o.dep b/mainspaceparser/Makefile.o.dep deleted file mode 100644 index 029147f..0000000 --- a/mainspaceparser/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = mainspaceparser.o \ No newline at end of file diff --git a/mainspaceparser/mainspaceparser.cpp b/mainspaceparser/mainspaceparser.cpp deleted file mode 100644 index 59bae06..0000000 --- a/mainspaceparser/mainspaceparser.cpp +++ /dev/null @@ -1,297 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2016-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "mainspaceparser.h" -#include "utf8/utf8.h" -#include - -// REMOVE ME -#include - - -namespace PT -{ - - -MainSpaceParser::MainSpaceParser() -{ - space = 0; - options_space = 0; - use_utf8 = true; - last_status = status_ok; -} - - - -MainSpaceParser::~MainSpaceParser() -{ - - -} - - -void MainSpaceParser::UTF8(bool utf8) -{ - use_utf8 = utf8; -} - - -void MainSpaceParser::SetSpace(Space & space_ref) -{ - space = &space_ref; - options_space = 0; -} - - -std::wstring & MainSpaceParser::GetErrorToken() -{ - return last_error_token; -} - - -MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv) -{ - if( !space ) - { - return status_space_not_assigned; - } - - options_space = space->FindSpace(L"options"); - last_status = status_ok; - last_error_token.clear(); - - for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; ) - { - Parse((size_t)argc, argv, i); - } - - return last_status; -} - - -void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index) -{ - const char * pchar = argv[argv_index]; - - if( *pchar == '-' ) - { - if( *(pchar+1) == '-' ) - { - ParseMultiArgument(argc, argv, argv_index); - } - else - { - ParseSingleArgument(argc, argv, argv_index); - } - } - else - { - last_status = status_syntax_error; - ConvertStr(pchar, last_error_token); - } -} - - -void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst) -{ - if( use_utf8 ) - { - PT::UTF8ToWide(src,dst); - } - else - { - dst.clear(); - - for( ; *src ; ++src ) - dst += (wchar_t)(unsigned char)*src; - } -} - - -void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index) -{ - ConvertStr(argv[argv_index] + 1, wide_arg); - const wchar_t * wide_pchar = wide_arg.c_str(); - - temp_list_val.clear(); - bool was_option = false; - argv_index += 1; - - for( ; *wide_pchar && !was_option ; ++wide_pchar ) - { - temp_arg = *wide_pchar; - size_t opt_size = RequireOption(temp_arg); - - if( opt_size > 0 ) - { - was_option = true; - - if( *(wide_pchar+1) ) - { - temp_val = wide_pchar + 1; - temp_list_val.push_back(temp_val); - opt_size -= 1; - } - - for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index) - { - ConvertStr(argv[argv_index], temp_val); - temp_list_val.push_back(temp_val); - } - - if( opt_size > 0 ) - { - last_status = status_reading_eof; - last_error_token.clear(); - } - } - - temp_val.clear(); - AddValueToItem(temp_arg, temp_val, temp_list_val); - } -} - - -void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index) -{ - ConvertStr(argv[argv_index] + 2, temp_arg); - argv_index += 1; - - size_t opt_size = RequireOption(temp_arg); - temp_list_val.clear(); - - if( opt_size > 0 ) - { - for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index) - { - ConvertStr(argv[argv_index], temp_val); - temp_list_val.push_back(temp_val); - } - - if( opt_size > 0 ) - { - last_status = status_reading_eof; - last_error_token.clear(); - } - } - - temp_val.clear(); - AddValueToItem(temp_arg, temp_val, temp_list_val); -} - - - -void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector & list) -{ - std::wstring * val = space->GetFirstValue(name); - - if( !val ) - { - if( list.empty() ) - space->Add(name, empty_value); - else - if( list.size() == 1 ) - space->Add(name, list[0]); - else - space->table[name] = list; // !! IMPROVE ME there'll be a new api in space - } - else - { - PT::Space::Table::iterator i = space->table.find(name); - PT::Space::Value * table_value; - - if( i == space->table.end() ) - { - table_value = &space->table[name]; - table_value->push_back(*val); - //space->table_single.erase(name); - } - else - { - table_value = &i->second; - } - - if( list.empty() ) - { - table_value->push_back(empty_value); - } - else - { - for(const auto & list_item : list) - table_value->push_back(list_item); - } - } -} - - - -size_t MainSpaceParser::RequireOption(const std::wstring & arg) -{ - size_t res = 0; - - if( options_space ) - { - std::wstring * val = options_space->GetFirstValue(arg); - - if( val ) - { - /* - * IMPLEMENT ME - * add a converter to convert/inttostr.h - * - */ - - long res_long = wcstol(val->c_str(), 0, 10); - - if( res_long < 0 ) - res_long = 0; - - res = (size_t)res_long; - - //std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl; - } - } - - return res; -} - - - -} // namespace - - diff --git a/space/Makefile b/space/Makefile deleted file mode 100644 index fc2ffe6..0000000 --- a/space/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=space.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/space/Makefile.dep b/space/Makefile.dep deleted file mode 100644 index 23c03ed..0000000 --- a/space/Makefile.dep +++ /dev/null @@ -1,17 +0,0 @@ -# DO NOT DELETE - -jsontospaceparser.o: jsontospaceparser.h space.h ../textstream/types.h -jsontospaceparser.o: ../utf8/utf8.h ../textstream/textstream.h -jsontospaceparser.o: ../space/space.h ../date/date.h ../convert/inttostr.h -jsontospaceparser.o: ../membuffer/membuffer.h ../textstream/types.h -space.o: space.h ../textstream/types.h ../utf8/utf8.h -space.o: ../textstream/textstream.h ../space/space.h ../date/date.h -space.o: ../convert/inttostr.h ../membuffer/membuffer.h ../textstream/types.h -space.o: ../convert/convert.h ../convert/inttostr.h -space.o: ../convert/patternreplacer.h ../convert/strtoint.h ../convert/text.h -space.o: ../convert/misc.h -spaceparser.o: spaceparser.h space.h ../textstream/types.h ../utf8/utf8.h -spaceparser.o: ../textstream/textstream.h ../space/space.h ../date/date.h -spaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h -spaceparser.o: ../textstream/types.h -spacetojson.o: spacetojson.h space.h ../textstream/types.h diff --git a/space/Makefile.o.dep b/space/Makefile.o.dep deleted file mode 100644 index c773c04..0000000 --- a/space/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = jsontospaceparser.o space.o spaceparser.o spacetojson.o \ No newline at end of file diff --git a/space/jsontospaceparser.cpp b/space/jsontospaceparser.cpp deleted file mode 100644 index 0799054..0000000 --- a/space/jsontospaceparser.cpp +++ /dev/null @@ -1,870 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2012-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "jsontospaceparser.h" -#include "utf8/utf8.h" - - -namespace PT -{ - - - -JSONToSpaceParser::JSONToSpaceParser() -{ - root_space = 0; - SetDefault(); -} - - -void JSONToSpaceParser::SetSpace(Space * pspace) -{ - root_space = pspace; -} - - -void JSONToSpaceParser::SetSpace(Space & pspace) -{ - root_space = &pspace; -} - - -void JSONToSpaceParser::SetDefault() -{ - // you can change this separators to what you want - // you shoud not use only white characters here (as expected by IsWhite() method) - // and new line characters ('\n') - separator = ':'; - space_start = '{'; - space_end = '}'; - table_start = '['; - table_end = ']'; - option_delimiter = ','; - skip_empty = false; - use_escape_char = true; - input_as_utf8 = true; - max_nested_level = 1000; - create_table_as_space = true; -} - - - -void JSONToSpaceParser::SkipEmpty(bool skip) -{ - skip_empty = skip; -} - - -void JSONToSpaceParser::UseEscapeChar(bool escape) -{ - use_escape_char = escape; -} - - -void JSONToSpaceParser::UTF8(bool utf) -{ - input_as_utf8 = utf; -} - - -void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_) -{ - create_table_as_space = create_table_as_space_; -} - - -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name) -{ - reading_from_file = true; - - file.clear(); - file.open(file_name, std::ios_base::binary | std::ios_base::in); - - if( file ) - { - Parse(); - file.close(); - } - else - { - status = cant_open_file; - } - -return status; -} - - - -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name) -{ - return Parse(file_name.c_str()); -} - - - - -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name) -{ - PT::WideToUTF8(file_name, afile_name); - return Parse(afile_name.c_str()); -} - - - -JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name) -{ - return Parse(file_name.c_str()); -} - - - -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str) -{ - reading_from_file = false; - reading_from_wchar_string = false; - pchar_ascii = str; - pchar_unicode = 0; - - Parse(); - -return status; -} - - -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str) -{ - return ParseString(str.c_str()); -} - - -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str) -{ - reading_from_file = false; - reading_from_wchar_string = true; - pchar_unicode = str; - pchar_ascii = 0; - - Parse(); - -return status; -} - - -JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str) -{ - return ParseString(str.c_str()); -} - - -void JSONToSpaceParser::Parse() -{ - if( !root_space ) - { - status = no_space; - return; - } - - line = 1; - status = ok; - space = root_space; - skipped = 0; - current_nested_level = 0; - ReadChar(); - SkipWhite(); - - if( lastc == space_start ) - { - ParseSpace(false, false); - } - else - if( lastc == table_start ) - { - ParseTable(false); - } - else - { - // '{' or '[' expected - status = syntax_error; - } - - if( status == ok && space != root_space ) - { - // last closing '}' characters are missing (closing a space) - status = syntax_error; - } - - token.clear(); - key.clear(); - value.clear(); -} - - - - - -void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space) -{ - //current_nested_level += 1; - - if( current_nested_level > max_nested_level ) - { - status = max_nested_spaces_exceeded; - return; - } - - if( insert_new_space ) - { - SpaceStarts(has_space_name); - } - else - { - // insert_new_space as a false is used only when parsing - // the first space (root_space) - ReadChar(); // skipping the first space character '{' - } - - ParseKeyValuePairs(); - - if( insert_new_space ) - { - SpaceEnds(); - } - else - { - ReadChar(); // skipping the last space character '}' - } - - //current_nested_level -= 1; -} - - -void JSONToSpaceParser::ParseKeyValuePairs() -{ - SkipWhite(); - - while( status == ok && lastc != space_end && lastc != -1 ) - { - ReadKey(); - SkipWhite(); - - if( lastc == separator ) - { - value.clear(); - ReadChar(); // skipping separator ':' - ReadValue(false, false, true, true); - SkipWhite(); - - if( lastc == option_delimiter ) - { - ReadChar(); // skipping delimiter ',' - } - else - if( lastc != space_end && status == ok ) - { - status = syntax_error; - } - } - else - if( status == ok ) - { - status = syntax_error; - } - } -} - - -void JSONToSpaceParser::ParseTextTable() -{ - ReadChar(); // skipping table start character '[' - SkipWhite(); - value.clear(); - - while( status == ok && lastc != table_end && lastc != -1 ) - { - // all space objects inside or tables will be skipped - ReadValue(true); - SkipWhite(); - - if( lastc == option_delimiter ) - { - ReadChar(); // skipping delimiter ',' - } - else - if( lastc != table_end && status == ok ) - { - status = syntax_error; - } - } - - if( lastc == table_end ) - ReadChar(); // skipping end table character ']' - - AddKeyValuePair(); -} - - -void JSONToSpaceParser::ParseObjectsTable(bool has_key) -{ - ReadChar(); // skipping table start character '[' - SpaceStarts(has_key, false); - SkipWhite(); - - while( status == ok && lastc != table_end && lastc != -1 ) - { - // 'value' table will not be used here - // (we are creating spaces) - ReadValue(false, true); - SkipWhite(); - - if( lastc == option_delimiter ) - { - ReadChar(); // skipping delimiter ',' - } - else - if( lastc != table_end && status == ok ) - { - status = syntax_error; - } - } - - if( lastc == table_end ) - ReadChar(); // skipping end table character ']' - - SpaceEnds(false); -} - - -void JSONToSpaceParser::ParseTable(bool has_key) -{ - if( create_table_as_space ) - { - //current_nested_level += 1; - - if( current_nested_level > max_nested_level ) - { - status = max_nested_spaces_exceeded; - } - else - { - ParseObjectsTable(has_key); - } - - //current_nested_level -= 1; - } - else - { - // ParseTextTable will not create a next level - - if( !has_key ) - key.clear(); - - ParseTextTable(); // ParseTextTable will use key - } -} - - - -void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char) -{ - Space * new_space = new Space(); - space->spaces.push_back(new_space); - new_space->parent = space; - - if( has_space_name ) - new_space->name = key; - - space = new_space; - - if( skip_space_char ) - ReadChar(); // skipping space starts character '{' -} - - -void JSONToSpaceParser::SpaceEnds(bool skip_space_char) -{ - if( space == root_space ) - { - // there cannot be a loose list end character in the global space - status = syntax_error; - } - else - { - space = space->parent; - - if( skip_space_char ) - ReadChar(); // skipping closing space character '}' - } -} - - - -bool JSONToSpaceParser::IsWhite(int c) -{ - // 13 (\r) is at the end of a line in a dos file \r\n - // 160 is an unbreakable space - if( c==' ' || c=='\t' || c==13 || c==160 || c==10 ) - return true; - -return false; -} - - - -void JSONToSpaceParser::SkipWhite() -{ - while( IsWhite(lastc) ) - { - ReadChar(); - } -} - - - -void JSONToSpaceParser::Trim(std::wstring & s) -{ -std::wstring::size_type i; - - if( s.empty() ) - return; - - // looking for white characters at the end - for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i); - - if( i==0 && IsWhite(s[i]) ) - { - // the whole string has white characters - s.clear(); - return; - } - - // deleting white characters at the end - if( i != s.size() - 1 ) - s.erase(i+1, std::wstring::npos); - - // looking for white characters at the beginning - for(i=0 ; itable.find(var); - - if( i != space->table.end() ) - space->table.erase(i); -} - - - - - -void JSONToSpaceParser::ReadTokenQuoted() -{ - ReadChar(); // skipping the first quotation mark - - while( lastc != -1 && (char_was_escaped || lastc != '"') ) - { - token += static_cast(lastc); - ReadChar(); - } - - if( !char_was_escaped && lastc == '"' ) - ReadChar(); // skipping the last quotation mark - else - status = syntax_error; -} - - -void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - while( true ) - { - if( lastc == -1 || - (!char_was_escaped && - ( - lastc == space_end || - lastc == table_end || - (white_delimit && IsWhite(lastc)) || - (new_line_delimit && lastc == '\n') || - (delimit1 != -1 && lastc == delimit1) || - (delimit2 != -1 && lastc == delimit2) - ) ) ) - - { - break; - } - - token += static_cast(lastc); - ReadChar(); - } - - Trim(token); -} - - -void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - token.clear(); - SkipWhite(); - - if( !char_was_escaped && lastc == '"' ) - ReadTokenQuoted(); - else - ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2); -} - - -void JSONToSpaceParser::ReadKey() -{ - SkipWhite(); - ReadToken(false, true, separator, table_start); - key = token; -} - - -void JSONToSpaceParser::SkipText() -{ - ReadChar(); // skipping the first quote character '"' - - while( lastc != '"' && lastc != -1 ) - ReadChar(); -} - - -void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char) -{ -int mark = 1; - - skipped += 1; - ReadChar(); // skipping the first object character '{' or '[' - - do - { - if( lastc == '"' ) - SkipText(); - else - if( lastc == end_char ) - mark -= 1; - else - if( lastc == start_char ) - mark += 1; - - ReadChar(); - } - while( mark > 0 && lastc != -1 ); -} - - -void JSONToSpaceParser::SkipObject() -{ - SkipObjectOrTable(space_start, space_end); -} - - -void JSONToSpaceParser::SkipTable() -{ - SkipObjectOrTable(table_start, table_end); -} - - - -//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name) -void JSONToSpaceParser::ReadValue(bool skip_object_or_table, - bool add_space_for_text_value, - bool has_key, - bool auto_add_text_value) -{ - SkipWhite(); - - if( lastc == space_start ) - { - if( skip_object_or_table ) - SkipObject(); - else - ParseSpace(has_key); - } - else - if( lastc == table_start ) - { - if( skip_object_or_table ) - SkipTable(); - else - ParseTable(has_key); - } - else - { - if( add_space_for_text_value ) - { - SpaceStarts(false, false); - ReadToken(false, true, option_delimiter, -1); - space->name = token; - SpaceEnds(false); - } - else - { - ReadToken(false, true, option_delimiter, -1); - value.push_back(token); - - if( auto_add_text_value ) - AddKeyValuePair(); - } - } -} - - -void JSONToSpaceParser::AddKeyValuePair() -{ - if( value.empty() && skip_empty ) - { - DeleteFromTable(key); - return; - } - - space->table[key] = value; -} - - - - - - -int JSONToSpaceParser::ReadUTF8Char() -{ -int c; -bool correct; - - lastc = -1; - - do - { - PT::UTF8ToInt(file, c, correct); - - if( !file ) - return lastc; - } - while( !correct ); - - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - -int JSONToSpaceParser::ReadASCIIChar() -{ - lastc = file.get(); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - - -int JSONToSpaceParser::ReadCharFromWcharString() -{ - if( *pchar_unicode == 0 ) - lastc = -1; - else - lastc = *(pchar_unicode++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int JSONToSpaceParser::ReadCharFromUTF8String() -{ -int c; -bool correct; - - lastc = -1; - - do - { - size_t len = PT::UTF8ToInt(pchar_ascii, c, correct); - pchar_ascii += len; - } - while( *pchar_ascii && !correct ); - - if( correct ) - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; - -} - - -int JSONToSpaceParser::ReadCharFromAsciiString() -{ - if( *pchar_ascii == 0 ) - lastc = -1; - else - lastc = *(pchar_ascii++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int JSONToSpaceParser::ReadCharNoEscape() -{ - if( reading_from_file ) - { - if( input_as_utf8 ) - return ReadUTF8Char(); - else - return ReadASCIIChar(); - } - else - { - if( reading_from_wchar_string ) - { - return ReadCharFromWcharString(); - } - else - { - if( input_as_utf8 ) - return ReadCharFromUTF8String(); - else - return ReadCharFromAsciiString(); - } - } -} - -bool JSONToSpaceParser::IsHexDigit(wchar_t c) -{ - return ((c>='0' && c<='9') || - (c>='a' && c<='f') || - (c>='A' && c<='F') ); -} - - -int JSONToSpaceParser::HexToInt(wchar_t c) -{ - if( c>='0' && c<='9' ) - return c - '0'; - - if( c>='a' && c<='f' ) - return c - 'a' + 10; - - if( c>='A' && c<='F' ) - return c - 'A' + 10; - -return 0; -} - - -void JSONToSpaceParser::ReadUnicodeCodePoint() -{ -wchar_t c; -int value = 0; - - for(int i=0 ; i<4 ; ++i) - { - c = ReadCharNoEscape(); - - if( !IsHexDigit(c) ) - { - status = syntax_error; - return; - } - - value = (value << 4) | HexToInt(c); - } - - lastc = (wchar_t)value; -} - - -int JSONToSpaceParser::ReadChar() -{ - char_was_escaped = false; - ReadCharNoEscape(); - - if( use_escape_char && lastc == '\\' ) - { - char_was_escaped = true; - ReadCharNoEscape(); - - switch(lastc) - { - case '0': lastc = 0; break; - case 't': lastc = '\t'; break; - case 'r': lastc = '\r'; break; - case 'n': lastc = '\n'; break; - case 'b': lastc = 0x08; break; - case 'f': lastc = 0x0c; break; - case 'u': ReadUnicodeCodePoint(); break; - // in other cases we return the last character - } - } - -return lastc; -} - - - -} // namespace - - - - diff --git a/space/jsontospaceparser.h b/space/jsontospaceparser.h deleted file mode 100644 index 385aced..0000000 --- a/space/jsontospaceparser.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2012-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_space_jsonspaceparser -#define headerfile_picotools_space_jsonspaceparser - -#include -#include "space.h" - - - -namespace PT -{ - - - -class JSONToSpaceParser -{ -public: - - - /* - ctor -- setting default values (SetDefault() method) - */ - JSONToSpaceParser(); - - - /* - setting the root space - */ - void SetSpace(Space * pspace); - void SetSpace(Space & pspace); - - - /* - setting options of the parser to the default values - utf8 etc. - */ - void SetDefault(); - - - /* - status of parsing - */ - enum Status { ok, cant_open_file, syntax_error, max_nested_spaces_exceeded, no_space }; - - - /* - the last status of parsing, set by Parse() methods - */ - Status status; - - - /* - a number of a line in which there is a syntax_error - */ - int line; - - - /* - how many objects were skipped - used in parsing tables when create_table_as_space is false - */ - size_t skipped; - - - /* - main methods used to parse - file_name is the path to a file - */ - Status Parse(const char * file_name); - Status Parse(const std::string & file_name); - Status Parse(const wchar_t * file_name); - Status Parse(const std::wstring & file_name); - - - /* - main methods used to parse - str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) - */ - Status ParseString(const char * str); - Status ParseString(const std::string & str); - - - /* - main methods used to parse - here input string is always in unicode (wide characters) - */ - Status ParseString(const wchar_t * str); - Status ParseString(const std::wstring & str); - - - /* - if true then empty values and lists, e.g: - option = - option2 = () - will be omitted (not inserted to 'table') - default: false - */ - void SkipEmpty(bool skip); - - - /* - '\' character is used to escape other characters in a quoted string - so "some \t t\"ext" will produce "some t t"ext" - default: true - */ - void UseEscapeChar(bool escape); - - - /* - if true then the input file or string (char* or std::string) is treated as UTF-8 - */ - void UTF8(bool utf); - - - /* - - default: true - */ - void CreateTableAsSpace(bool create_table_as_space_); - -private: - - - /* - current space set by SetSpace(); - */ - Space * root_space; - - - /* - a space in which we are now - */ - Space * space; - - - /* - true if Parse() method was called - false if ParseString() was called - */ - bool reading_from_file; - - - /* - pointers to the current character - if ParseString() is in used - */ - const char * pchar_ascii; - const wchar_t * pchar_unicode; - - - /* - true if ParseString(wchar_t *) or ParseString(std::wstring&) was called - */ - bool reading_from_wchar_string; - - - /* - last read token - */ - std::wstring token; - - - /* - last read key - */ - std::wstring key; - - - /* - last read list - */ - Space::Value value; - - - /* - separator between a variable and a value, default: '=' - */ - int separator; - - - /* - space starting character, default: '{' - */ - int space_start; - - - /* - space ending character, default: '}' - */ - int space_end; - - - /* - table starting character, default: '[' - */ - int table_start; - - - /* - table ending character, default: ']' - */ - int table_end; - - - /* - option delimiter, default: ',' - */ - int option_delimiter; - - - /* - last read char - or -1 if the end - */ - int lastc; - - - /* - true if the lastc was escaped (with a backslash) - we have to know if the last sequence was \" or just " - */ - bool char_was_escaped; - - - /* - current file - */ - std::ifstream file; - - - /* - if true then empty lists, e.g: - option = - option2 = () - will be omitted (not inserted to 'table') - default: false - */ - bool skip_empty; - - - /* - input file is in UTF-8 - default: true - */ - bool input_as_utf8; - - - /* - if true you can use an escape character '\' in quoted values - */ - bool use_escape_char; - - - /* - if false we only allow the tables to consists of text items (numeric, boolean too) - objects are not allowed then - default: true - */ - bool create_table_as_space; - - - /* - - */ - size_t current_nested_level; - - - /* - - default: 1000; - */ - size_t max_nested_level; - - - std::string afile_name; - - void Parse(); - void ParseSpace(bool has_space_name, bool insert_new_space = true); - void ParseTextTable(); - void ParseObjectsTable(bool has_key); - void ParseTable(bool has_key); - void ParseKeyValuePairs(); - - void SkipText(); - void SkipObjectOrTable(int start_char, int end_char); - void SkipTable(); - void SkipObject(); - - void SpaceEnds(bool skip_space_char = true); - void SpaceStarts(bool has_space_name, bool skip_space_char = true); - - void DeleteFromTable(const std::wstring & var); - - void ReadTokenQuoted(); - void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadKey(); - void ReadValue(bool skip_object_or_table = false, - bool add_space_for_text_value = false, - bool has_key = false, - bool auto_add_text_value = false); - - void AddKeyValuePair(); - int ReadUTF8Char(); - int ReadASCIIChar(); - int ReadCharFromWcharString(); - int ReadCharFromUTF8String(); - int ReadCharFromAsciiString(); - int ReadCharNoEscape(); - int ReadChar(); - bool IsWhite(int c); - void SkipWhite(); - void Trim(std::wstring & s); - bool IsHexDigit(wchar_t c); - int HexToInt(wchar_t c); - void ReadUnicodeCodePoint(); - -}; - - -} // namespace - - -#endif diff --git a/space/space.cpp b/space/space.cpp deleted file mode 100644 index 0798dd5..0000000 --- a/space/space.cpp +++ /dev/null @@ -1,1085 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2008-2018, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -//#include -#include -#include "space.h" -#include "utf8/utf8.h" -#include "textstream/textstream.h" -#include "convert/convert.h" - - -namespace PT -{ - - - -Space::Space() -{ - parent = nullptr; -} - - -Space::~Space() -{ - Clear(); -} - - -Space::Space(const Space & s) -{ - operator=(s); -} - - -Space & Space::operator=(const Space & s) -{ - Clear(); - - name = s.name; - table = s.table; - parent = s.parent; - - for(size_t i=0 ; iparent = this; - spaces.push_back(pspace); - } - -return *this; -} - - - - - - -void Space::Clear() -{ - name.clear(); - table.clear(); - - for(size_t i=0 ; isecond; - } -} - - -const Space::Value * Space::GetValue(const wchar_t * name) const -{ - tmp_name = name; - return GetValue(tmp_name); -} - - -const Space::Value * Space::GetValue(const std::wstring & name) const -{ - Table::const_iterator t = table.find(name); - - if( t == table.cend() ) - { - return nullptr; - } - else - { - return &t->second; - } -} - - - - -Space::Value * Space::GetValueNoCase(const wchar_t * name) -{ - tmp_name = name; - return GetValueNoCase(tmp_name); -} - - -Space::Value * Space::GetValueNoCase(const std::wstring & name) -{ - Table::iterator t = table.begin(); - - for( ; t != table.end() ; ++t) - { - if( PT::EqualNoCase(t->first, name) ) - { - return &t->second; - } - } - - return nullptr; -} - - - -const Space::Value * Space::GetValueNoCase(const wchar_t * name) const -{ - tmp_name = name; - return GetValueNoCase(tmp_name); -} - - -const Space::Value * Space::GetValueNoCase(const std::wstring & name) const -{ - Table::const_iterator t = table.begin(); - - for( ; t != table.end() ; ++t) - { - if( PT::EqualNoCase(t->first, name) ) - { - return &t->second; - } - } - - return nullptr; -} - - - -std::wstring * Space::GetFirstValue(const wchar_t * name) -{ - tmp_name = name; - return GetFirstValue(tmp_name); -} - - - -std::wstring * Space::GetFirstValue(const std::wstring & name) -{ - Table::iterator t = table.find(name); - - if( t == table.end() || t->second.empty() ) - { - return nullptr; - } - else - { - return &t->second[0]; - } -} - - -// CHECK ME -const std::wstring * Space::GetFirstValue(const wchar_t * name) const -{ - tmp_name = name; - return GetFirstValue(tmp_name); -} - - -const std::wstring * Space::GetFirstValue(const std::wstring & name) const -{ - Table::const_iterator t = table.find(name); - - if( t == table.end() || t->second.empty() ) - { - return nullptr; - } - else - { - return &t->second[0]; - } -} - - - - -bool Space::HasValue(const wchar_t * name, const wchar_t * value) -{ - tmp_name = name; - tmp_value_text = value; - -return HasValue(tmp_name, tmp_value_text); -} - -bool Space::HasValue(const wchar_t * name, const std::wstring & value) -{ - tmp_name = name; - -return HasValue(tmp_name, value); -} - - -bool Space::HasValue(const std::wstring & name, const wchar_t * value) -{ - tmp_value_text = value; - -return HasValue(name, tmp_value_text); -} - - -bool Space::HasValue(const std::wstring & name, const std::wstring & value) -{ - Table::const_iterator t = table.find(name); - - if( t != table.end() ) - { - for(size_t i=0 ; i < t->second.size() ; ++i) - if( t->second[i] == value ) - return true; - } - -return false; -} - - - - - -//std::wstring Space::Text(const wchar_t * name) const -//{ -// tmp_name = name; -// return Text(tmp_name, L""); -//} - - - -std::wstring Space::Text(const wchar_t * name, const wchar_t * def) const -{ - tmp_name = name; - return Text(tmp_name, def); -} - - -std::wstring Space::Text(const std::wstring & name, const wchar_t * def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - { - return *value; - } - else - { - if( def ) - return std::wstring(def); - else - return std::wstring(); - } -} - - -std::wstring Space::Text(const std::wstring & name, const std::wstring & def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - { - return *value; - } - else - { - return def; - } -} - - - - -//std::wstring & Space::TextRef(const wchar_t * name) -//{ -// tmp_name = name; -// return TextRef(tmp_name, L""); -//} - - -std::wstring & Space::TextRef(const wchar_t * name, const wchar_t * def) -{ - tmp_name = name; - return TextRef(tmp_name, def); -} - -std::wstring & Space::TextRef(const std::wstring & name, const wchar_t * def) -{ - Value * value; - Table::iterator t = table.find(name); - - if( t != table.end() ) - { - value = &t->second; - } - else - { - value = &table[name]; - } - - if( value->empty() ) - { - if( def ) - value->push_back(def); - else - value->push_back(std::wstring()); - } - - return (*value)[0]; -} - - -std::wstring & Space::TextRef(const std::wstring & name, const std::wstring & def) -{ - return TextRef(name, def.c_str()); -} - - - - -//std::string Space::TextA(const wchar_t * name) const -//{ -// tmp_name = name; -// return TextA(tmp_name, ""); -//} - - - -std::string Space::TextA(const wchar_t * name, const char * def) const -{ - tmp_name = name; - return TextA(tmp_name, def); -} - - -std::string Space::TextA(const std::wstring & name, const char * def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - { - std::string res; - PT::WideToUTF8(*value, res); - return res; - } - else - { - if( def ) - return std::string(def); - else - return std::string(); - } -} - - -std::string Space::TextA(const std::wstring & name, const std::string & def) const -{ - return TextA(name, def.c_str()); -} - - - -std::string Space::TextA(const wchar_t * name, const wchar_t * def) const -{ - tmp_name = name; - return TextA(tmp_name, def); -} - - -std::string Space::TextA(const std::wstring & name, const wchar_t * def) const -{ - const std::wstring * value = GetFirstValue(name); - std::string res; - - if( value ) - { - PT::WideToUTF8(*value, res); - return res; - } - else - { - PT::WideToUTF8(def, res); - return res; - } -} - -std::string Space::TextA(const std::wstring & name, const std::wstring & def) const -{ - return TextA(name, def.c_str()); -} - - - - -unsigned int Space::ToUInt(const std::wstring & value) -{ - return Toui_b(value.c_str()); -} - -int Space::ToInt(const std::wstring & value) -{ - return Toi_b(value.c_str()); -} - - -unsigned long Space::ToULong(const std::wstring & value) -{ - return Toul_b(value.c_str()); -} - -long Space::ToLong(const std::wstring & value) -{ - return Tol_b(value.c_str()); -} - - -unsigned long long Space::ToULongLong(const std::wstring & value) -{ - return Toull_b(value.c_str()); -} - -long long Space::ToLongLong(const std::wstring & value) -{ - return Toll_b(value.c_str()); -} - - -size_t Space::ToSize(const std::wstring & value) -{ - if( sizeof(size_t) == sizeof(unsigned int) ) - return ToUInt(value); - else - if( sizeof(size_t) == sizeof(unsigned long) ) - return ToULong(value); - else - return ToULongLong(value); -} - - -bool Space::ToBool(const std::wstring & value) -{ - const wchar_t * str_begin = SkipWhite(value.c_str()); - const wchar_t * str_end = SkipWhiteFromBack(str_begin); - - return (PT::EqualNoCase(str_begin, str_end, L"true") || - PT::EqualNoCase(str_begin, str_end, L"yes") || - PT::EqualNoCase(str_begin, str_end, L"1") - ); -} - - - - - - -int Space::Int(const wchar_t * name, int def) const -{ - tmp_name = name; - return Int(tmp_name, def); -} - - - - -int Space::Int(const std::wstring & name, int def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToInt(*value); - -return def; -} - - - -unsigned int Space::UInt(const wchar_t * name, unsigned int def) const -{ - tmp_name = name; - return UInt(tmp_name, def); -} - - -unsigned int Space::UInt(const std::wstring & name, unsigned int def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToUInt(*value); - -return def; -} - - - - - -long Space::Long(const wchar_t * name, long def) const -{ - tmp_name = name; - return Long(tmp_name, def); -} - - -long Space::Long(const std::wstring & name, long def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToLong(*value); - -return def; -} - - -unsigned long Space::ULong(const wchar_t * name, unsigned long def) const -{ - tmp_name = name; - return ULong(tmp_name, def); -} - - -unsigned long Space::ULong(const std::wstring & name, unsigned long def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToULong(*value); - -return def; -} - - - -long long Space::LongLong(const wchar_t * name, long long def) const -{ - tmp_name = name; - return LongLong(tmp_name, def); -} - -long long Space::LongLong(const std::wstring & name, long long def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToLongLong(*value); - -return def; -} - -unsigned long long Space::ULongLong(const wchar_t * name, unsigned long long def) const -{ - tmp_name = name; - return ULongLong(tmp_name, def); -} - -unsigned long long Space::ULongLong(const std::wstring & name, unsigned long long def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToULongLong(*value); - -return def; -} - - - - - -size_t Space::Size(const wchar_t * name, size_t def) const -{ - tmp_name = name; - return Size(tmp_name, def); -} - - -size_t Space::Size(const std::wstring & name, size_t def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToSize(*value); - -return def; -} - - - - -bool Space::Bool(const wchar_t * name, bool def) const -{ - tmp_name = name; - return Bool(tmp_name, def); -} - - - -bool Space::Bool(const std::wstring & name, bool def) const -{ - const std::wstring * value = GetFirstValue(name); - - if( value ) - return ToBool(*value); - -return def; -} - - -// !! CHECKME -std::wstring & Space::FindAdd(const std::wstring & name) -{ - Value * value; - - Table::iterator t = table.find(name); - - if( t != table.end() ) - { - value = &t->second; - } - else - { - value = &table[name]; - } - - if( value->empty() ) - value->push_back(std::wstring()); - - return (*value)[0]; -} - - -// !! CHECKME -std::wstring & Space::FindAdd(const wchar_t * name) -{ - tmp_name = name; - return FindAdd(tmp_name); -} - - -// !! CHECKME -std::wstring & Space::FindAdd(const WTextStream & name) -{ - name.to_string(tmp_name); - return FindAdd(tmp_name); -} - - - - - - -/* - * - * - * - * - * CHECKME !! - */ -std::wstring & Space::Add(const std::wstring & name, const std::wstring & value, bool replace_existing) -{ - Table::iterator i = table.find(name); - - if( i == table.end() ) - { - Value & val = table[name]; - val.push_back(value); - return val.back(); - } - else - { - Value & val = i->second; - - if( replace_existing ) - val.clear(); - - val.push_back(value); - return val.back(); - } -} - - -// CHECKME !! -std::wstring & Space::Add(const std::wstring & name, const wchar_t * value, bool replace_existing) -{ - tmp_value = value; - return Add(name, tmp_value, replace_existing); -} - - -// CHECKME !! -std::wstring & Space::Add(const wchar_t * name, const wchar_t * value, bool replace_existing) -{ - tmp_name = name; - tmp_value = value; - return Add(tmp_name, tmp_value, replace_existing); -} - - -// CHECKME !! -std::wstring & Space::Add(const wchar_t * name, const std::wstring & value, bool replace_existing) -{ - tmp_name = name; - return Add(tmp_name, value, replace_existing); -} - - - - -// CHECKME !! -std::wstring & Space::Add(const wchar_t * name, const WTextStream & value, bool replace_existing) -{ - tmp_name = name; - value.to_string(tmp_value); - return Add(tmp_name, tmp_value, replace_existing); -} - - -// CHECKME !! -std::wstring & Space::Add(const std::wstring & name, const WTextStream & value, bool replace_existing) -{ - value.to_string(tmp_value); - return Add(name, tmp_value, replace_existing); -} - - -// CHECKME !! -std::wstring & Space::Add(const WTextStream & name, const WTextStream & value, bool replace_existing) -{ - name.to_string(tmp_name); - value.to_string(tmp_value); - return Add(tmp_name, tmp_value, replace_existing); -} - - - - -/* - * - * - * - * - * - */ -// CHECKME -std::wstring & Space::Add(const std::wstring & name, bool value, bool replace_existing) -{ - if( value ) - return Add(name, L"true", replace_existing); - else - return Add(name, L"false", replace_existing); -} - - -// CHECKME -std::wstring & Space::Add(const wchar_t * name, bool value, bool replace_existing) -{ - tmp_name = name; - return Add(tmp_name, value, replace_existing); -} - - - -// CHECKME -std::wstring & Space::Add(const std::wstring & name, int value, bool replace_existing) -{ -wchar_t value_str[50]; - -#if defined _WIN32 || defined _WIN64 - swprintf(value_str, L"%d", value); -#else - swprintf(value_str, sizeof(value_str)/sizeof(wchar_t), L"%d", value); -#endif - - return Add(name, value_str, replace_existing); -} - - -// CHECKME -std::wstring & Space::Add(const wchar_t * name, int value, bool replace_existing) -{ - tmp_name = name; - return Add(tmp_name, value, replace_existing); -} - - - - - -// CHECKME -std::wstring & Space::Add(const std::wstring & name, long value, bool replace_existing) -{ -wchar_t value_str[50]; - -#if defined _WIN32 || defined _WIN64 - swprintf(value_str, L"%ld", value); -#else - swprintf(value_str, sizeof(value_str)/sizeof(wchar_t), L"%ld", value); -#endif - - return Add(name, value_str, replace_existing); -} - - - -// CHECKME -std::wstring & Space::Add(const wchar_t * name, long value, bool replace_existing) -{ - tmp_name = name; - return Add(tmp_name, value, replace_existing); -} - - - -std::wstring & Space::Add(const std::wstring & name, size_t value, bool replace_existing) -{ -wchar_t value_str[50]; - -#if defined _WIN32 || defined _WIN64 - // see http://msdn.microsoft.com/en-us/library/tcxf1dw6%28v=vs.71%29.aspx - swprintf(value_str, L"%Iu", value); -#else - swprintf(value_str, sizeof(value_str)/sizeof(wchar_t), L"%zu", value); -#endif - - return Add(name, value_str, replace_existing); -} - - -std::wstring & Space::Add(const wchar_t * name, size_t value, bool replace_existing) -{ - tmp_name = name; - return Add(tmp_name, value, replace_existing); -} - - - - - - -void Space::Remove(const std::wstring & name) -{ - table.erase(name); -} - - -void Space::Remove(const wchar_t * name) -{ - tmp_name = name; - Remove(tmp_name); -} - - - - - -// CHECKME -Space & Space::AddSpace(const std::wstring & name) -{ - spaces.push_back(new Space()); - spaces.back()->name = name; - spaces.back()->parent = this; - -return *spaces.back(); -} - - -// CHECKME -Space & Space::AddSpace(const wchar_t * name) -{ - tmp_name = name; - return AddSpace(tmp_name); -} - - - - -Space * Space::FindSpace(const wchar_t * name) -{ - for(size_t i=0 ; iname == name ) // there is a special == operator in string class taking c-string as an argument - return spaces[i]; - } - -return nullptr; -} - - -Space * Space::FindSpace(const std::wstring & name) -{ - for(size_t i=0 ; iname == name ) - return spaces[i]; - } - -return nullptr; -} - - -Space & Space::FindAddSpace(const wchar_t * name) -{ - Space * space = FindSpace(name); - - if( space ) - return *space; - -return AddSpace(name); -} - - -Space & Space::FindAddSpace(const std::wstring & name) -{ - Space * space = FindSpace(name); - - if( space ) - return *space; - -return AddSpace(name); -} - - - -void Space::RemoveSpace(const wchar_t * name) -{ - for(size_t i=0 ; iname == name ) - RemoveSpace(i); - else - ++i; - } -} - - -void Space::RemoveSpace(const std::wstring & name) -{ - for(size_t i=0 ; iname == name ) - RemoveSpace(i); - else - ++i; - } -} - - -void Space::RemoveSpace(size_t child_index) -{ - if( child_index < spaces.size() ) - { - delete spaces[child_index]; - spaces.erase(spaces.begin() + child_index); - } -} - - - - - - - -bool Space::ListText(const std::wstring & name, std::vector & list) -{ - list.clear(); - - Table::iterator t = table.find(name); - - if( t != table.end() ) - { - list = t->second; - return true; - } - -return false; -} - - - -// in lists we don't use default values -bool Space::ListText(const wchar_t * name, std::vector & list) -{ - tmp_name = name; - return ListText(tmp_name, list); -} - - - - -/* - those white characters here should be the same as in spaceparser.cpp -*/ -bool Space::IsWhite(int c) -{ - // dont use '\n' here - // 13 (\r) is at the end of a line in a dos file \r\n - // 160 is an unbreakable space - if( c==' ' || c=='\t' || c==13 || c==160 ) - return true; - -return false; -} - - -bool Space::HasWhite(const std::wstring & str) -{ - for(size_t i=0 ; i - */ - -/* - * Copyright (c) 2010-2018, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_space_space -#define headerfile_picotools_space_space - -#include -#include -#include -#include "textstream/types.h" - - - -namespace PT -{ - - - -/* -A parser for parsing config files. - -A config file can look like this: - variable1 = value 1 - variable2 = " value 2 " - variable3 = (value 1, value 2) - variable4 = (" value 1 " , "value2", value 3) - -sample of use: - SpaceParser parser; - parser.Parse("/path/to/config"); - - if( parser.status == SpaceParser::ok ) - { - // the whole config we have in parser.table - } - -config syntax: - option = list - - list can consists of any number of items, if you're using more than one item you should - use brackets () - - for one item the brackets can be ommited: - option = value - white characters at the beginning of the value (and at the end) will be trimmed, - or you can use quotes: - option = "value" - option2 = "value with spaces at the end " - - the form without quotes: - option = value - should be written in one line, so this is not allowed: - option = - value - you can use a new line characters only between brackets and quotes: - option = "this is - a multiline string" - option = ( value1, - value2 ) - - but there is one requirement: the first character " or ( should be in the same line, - so this is not allowed - option = - "this is wrong" - but this is ok: - option = " - that is ok" - - empty lists: - option = () - this creates an empty list: parser.table['option'].empty() == true - - option = - this creates an empty list too (the same as previously) - - option = "" - but this doesn't create an empty list, it creates a list with one (empty) item - - commentaries: - # this is a commentary (until the end of the line) - option = value # this is a commentary too - - commentaries are treated as white characters, other example: - option = ( # this is my list - "value 1" # this is a value one - value 2 # and this is a value two - ) # end of my list - - overwriting: - option1 = some value - option1 = other value - # always the last option is used so option1 is "other value" - - list delimiter: - option1 = (value1, value2, value3) - option2 = ("value1", "value2", "value3") - above we're using a comma ',' as a list delimiter but when using quotes (second line) - the commas can be omitted: - option2 = ("value1" "value2" "value3") - - white characters: - the name of an option cannot consist of white characters - some option = value # this is wrong - some_option = value # this is ok - - which characters are allowed in an option name is defined by IsVariableChar() method - - you can use white characters in values - option = value with spaces or tabs - white characters at the beginning and at the end will be trimmed, - so if you want them use quotes: - option = " other value with spaces " - - special characters in quoted strings: - option = "this is a string with \" a quote inside" - the option will be: this is a string with " a quote inside - \\ - means one \ - basically: \char produces char - so: - "\a" gives "a" - "\\" gives "\" - "\Z" gives "Z" and so on - you can call UseEscapeChar(false) to turn this off - - -*/ - - - -class Space -{ -public: - - - /* - this is the table which represents your config file - in the Table map: the first (key) is your 'option' and the second is 'list' - */ - typedef std::vector Value; - typedef std::map Table; - - - Space(); - ~Space(); - - Space(const Space & s); - Space & operator=(const Space & s); - - // IMPROVE ME - // add move cctor - - - void Clear(); - - - - /* - returns true if such an option has 'value' - useful when testing lists (they don't have to be copied out) - */ - bool HasValue(const wchar_t * name, const wchar_t * value); - bool HasValue(const wchar_t * name, const std::wstring & value); - bool HasValue(const std::wstring & name, const wchar_t * value); - bool HasValue(const std::wstring & name, const std::wstring & value); - - - - /* - * - * methods for getting/finding a value - * - * - */ - - /* - * - * their working in O(log) - * can return a null pointer - * - */ - Value * GetValue(const wchar_t * name); - Value * GetValue(const std::wstring & name); - const Value * GetValue(const wchar_t * name) const; - const Value * GetValue(const std::wstring & name) const; - - - - // O(n) complexity - Value * GetValueNoCase(const wchar_t * name); - Value * GetValueNoCase(const std::wstring & name); - const Value * GetValueNoCase(const wchar_t * name) const; - const Value * GetValueNoCase(const std::wstring & name) const; - - - // they can return a null pointer if there is not such a 'name' - std::wstring * GetFirstValue(const wchar_t * name); - std::wstring * GetFirstValue(const std::wstring & name); - - const std::wstring * GetFirstValue(const wchar_t * name) const; - const std::wstring * GetFirstValue(const std::wstring & name) const; - - - /* - those methods are used to extract information from space.table - as a parameter they take the name of an option - and a default value (if there is no such a parameter), - they return appropriate value (either text, int or boolean) - (in lists they return the first item if exists) - - when calling Text(...) and AText(...) you should copy the object to whom a reference is returned - it will be cleared in a next call to one of these methods (as well to Int() Size() and Bool()) - - AText(...) always returns a reference to UTF-8 string - */ - //std::wstring Text(const wchar_t * name) const; - std::wstring Text(const wchar_t * name, const wchar_t * def = 0) const; - std::wstring Text(const std::wstring & name, const wchar_t * def = 0) const; - std::wstring Text(const std::wstring & name, const std::wstring & def) const; - - // returns a reference - // if there is no such an option then a new one (def value) is inserted - //std::wstring & TextRef(const wchar_t * name); - std::wstring & TextRef(const wchar_t * name, const wchar_t * def = 0); - std::wstring & TextRef(const std::wstring & name, const wchar_t * def = 0); - std::wstring & TextRef(const std::wstring & name, const std::wstring & def); - - - // returns UTF-8 string - //std::string TextA(const wchar_t * name) const; - std::string TextA(const wchar_t * name, const char * def) const; - std::string TextA(const std::wstring & name, const char * def) const; - std::string TextA(const std::wstring & name, const std::string & def) const; - - std::string TextA(const wchar_t * name, const wchar_t * def) const; - std::string TextA(const std::wstring & name, const wchar_t * def) const; - std::string TextA(const std::wstring & name, const std::wstring & def) const; - - - - int Int(const wchar_t * name, int def = 0) const; - int Int(const std::wstring & name, int def = 0) const; - unsigned int UInt(const wchar_t * name, unsigned int def = 0) const; - unsigned int UInt(const std::wstring & name, unsigned int def = 0) const; - - long Long(const wchar_t * name, long def = 0) const; - long Long(const std::wstring & name, long def = 0) const; - unsigned long ULong(const wchar_t * name, unsigned long def = 0) const; - unsigned long ULong(const std::wstring & name, unsigned long def = 0) const; - - long long LongLong(const wchar_t * name, long long def = 0) const; - long long LongLong(const std::wstring & name, long long def = 0) const; - unsigned long long ULongLong(const wchar_t * name, unsigned long long def = 0) const; - unsigned long long ULongLong(const std::wstring & name, unsigned long long def = 0) const; - - size_t Size(const wchar_t * name, size_t def = 0) const; - size_t Size(const std::wstring & name, size_t def = 0) const; - - bool Bool(const wchar_t * name, bool def = false) const; - bool Bool(const std::wstring & name, bool def = false) const; - - - - /* - * - * methods for adding a new value - * - * - */ - - - std::wstring & FindAdd(const wchar_t * name); - std::wstring & FindAdd(const std::wstring & name); - std::wstring & FindAdd(const WTextStream & name); - - std::wstring & Add(const wchar_t * name, bool value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, bool value, bool replace_existing = true); - std::wstring & Add(const wchar_t * name, int value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, int value, bool replace_existing = true); - std::wstring & Add(const wchar_t * name, long value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, long value, bool replace_existing = true); - std::wstring & Add(const wchar_t * name, size_t value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, size_t value, bool replace_existing = true); - - std::wstring & Add(const std::wstring & name, const std::wstring & value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, const wchar_t * value, bool replace_existing = true); - std::wstring & Add(const wchar_t * name, const wchar_t * value, bool replace_existing = true); - std::wstring & Add(const wchar_t * name, const std::wstring & value, bool replace_existing = true); - - std::wstring & Add(const wchar_t * name, const WTextStream & value, bool replace_existing = true); - std::wstring & Add(const std::wstring & name, const WTextStream & value, bool replace_existing = true); - std::wstring & Add(const WTextStream & name, const WTextStream & value, bool replace_existing = true); - - void Remove(const wchar_t * name); - void Remove(const std::wstring & name); - - - - Space & AddSpace(const wchar_t * name); - Space & AddSpace(const std::wstring & name); - - // looking for the first space with the specified name - // if there is not such a space those methods return a null pointer - Space * FindSpace(const wchar_t * name); - Space * FindSpace(const std::wstring & name); - - // looking for the first space with the specified name - // if there is not such a space then this methods adds such a space - Space & FindAddSpace(const wchar_t * name); - Space & FindAddSpace(const std::wstring & name); - - void RemoveSpace(const wchar_t * name); - void RemoveSpace(const std::wstring & name); - void RemoveSpace(size_t child_index); - - - /* - * - * - raw access to the parsed values - * - * - */ - - std::wstring name; // space name - Table table; // std::map > - - // childs - typedef std::vector Spaces; - Spaces spaces; - - // a parent space - // null means a root space - Space * parent; - - - - /* - those methods are used to extract lists - return true if such an option exists (but value can be an empty list) - */ - bool ListText(const wchar_t * name, std::vector & list); - bool ListText(const std::wstring & name, std::vector & list); - - - /* - serialize the content - */ - template - void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const; - - template - void SerializeTableMulti(Stream & out, bool use_indents, int level) const; - - template - static void PrintValue(Stream & out, const StringType & str, bool use_quote = true); - - template - static void PrintKey(Stream & out, const std::wstring & str); - - template - static void PrintLevel(Stream & out, bool use_indents, int level); - - - -private: - - mutable std::wstring tmp_name; - - std::wstring tmp_value; - std::wstring tmp_value_text; - std::string tmp_value_text_ascii; - - static unsigned int ToUInt(const std::wstring & value); - static int ToInt(const std::wstring & value); - static unsigned long ToULong(const std::wstring & value); - static long ToLong(const std::wstring & value); - static unsigned long long ToULongLong(const std::wstring & value); - static long long ToLongLong(const std::wstring & value); - static size_t ToSize(const std::wstring & value); - static bool ToBool(const std::wstring & value); - - static bool IsWhite(int c); - static bool HasWhite(const std::wstring & str); - -}; - - - -template -void Space::PrintLevel(Stream & out, bool use_indents, int level) -{ - if( use_indents ) - { - for(int i=0 ; i -void Space::PrintValue(Stream & out, const StringType & str, bool use_quote) -{ - if( use_quote ) - out << '\"'; - - for(size_t i=0 ; i -void Space::PrintKey(Stream & out, const std::wstring & str) -{ -bool use_quote = false; - - // CHECK ME - // HasWhite doesn't take a new line into account, is it correct to use it here? - if( str.empty() || HasWhite(str) ) - use_quote = true; - - PrintValue(out, str, use_quote); -} - - - - -template -void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const -{ -Table::const_iterator i2; -size_t v; - - for(i2 = table.begin() ; i2 != table.end() ; ++i2) - { - PrintLevel(out, use_indents, level); - PrintKey(out, i2->first); - out << L" = "; - - if( i2->second.size() != 1 ) - out << '('; - - for(v = 0 ; v < i2->second.size() ; ++v) - { - if( v > 0 ) - PrintLevel(out, use_indents, level + i2->first.size() + 3); - - PrintValue(out, i2->second[v]); - - if( v + 1 < i2->second.size() ) - out << '\n'; - } - - if( i2->second.size() != 1 ) - out << ')'; - - out << '\n'; - } -} - - -template -void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int level) const -{ - if( level > 0 ) - { - out << '\n'; - PrintLevel(out, use_indents, level); - - if( !name.empty() ) - { - PrintKey(out, name); - out << ' '; - } - - out << L"(\n"; - - if( use_comments ) - { - PrintLevel(out, use_indents, level); - out << L"# space level " << level << '\n'; - } - } - - SerializeTableMulti(out, use_indents, level); - - for(size_t i=0 ; iSerialize(out, use_indents, use_comments, level+1); - - if( level > 0 ) - { - PrintLevel(out, use_indents, level); - out << ')'; - - if( use_comments ) - { - if( name.empty() ) - out << L" # end of unnamed space"; - else - out << L" # end of space: " << name; - - out << L" (level " << level << L")"; - } - - out << '\n'; - } -} - - - -} // namespace - - - -#endif diff --git a/space/spaceparser.cpp b/space/spaceparser.cpp deleted file mode 100644 index 8415f88..0000000 --- a/space/spaceparser.cpp +++ /dev/null @@ -1,675 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2008-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "spaceparser.h" -#include "utf8/utf8.h" - - -namespace PT -{ - - - -SpaceParser::SpaceParser() -{ - root_space = 0; - SetDefault(); -} - - -void SpaceParser::SetSpace(Space * pspace) -{ - root_space = pspace; -} - - -void SpaceParser::SetSpace(Space & pspace) -{ - root_space = &pspace; -} - - -void SpaceParser::SetDefault() -{ - // you can change this separators to what you want - // you shoud not use only white characters here (as expected by IsWhite() method) - // and new line characters ('\n') - separator = '='; - commentary = '#'; - list_start = '('; - list_end = ')'; - list_delimiter = ','; - skip_empty = false; - use_escape_char = true; - input_as_utf8 = true; -} - - - -void SpaceParser::SkipEmpty(bool skip) -{ - skip_empty = skip; -} - - -void SpaceParser::UseEscapeChar(bool escape) -{ - use_escape_char = escape; -} - - -void SpaceParser::UTF8(bool utf) -{ - input_as_utf8 = utf; -} - - - - -SpaceParser::Status SpaceParser::Parse(const char * file_name) -{ - reading_from_file = true; - - file.clear(); - file.open(file_name, std::ios_base::binary | std::ios_base::in); - - if( file ) - { - Parse(); - file.close(); - } - else - { - status = cant_open_file; - } - -return status; -} - - - -SpaceParser::Status SpaceParser::Parse(const std::string & file_name) -{ - return Parse(file_name.c_str()); -} - - - - -SpaceParser::Status SpaceParser::Parse(const wchar_t * file_name) -{ - PT::WideToUTF8(file_name, afile_name); - return Parse(afile_name.c_str()); -} - - - -SpaceParser::Status SpaceParser::Parse(const std::wstring & file_name) -{ - return Parse(file_name.c_str()); -} - - - -SpaceParser::Status SpaceParser::ParseString(const char * str) -{ - reading_from_file = false; - reading_from_wchar_string = false; - pchar_ascii = str; - pchar_unicode = 0; - - Parse(); - -return status; -} - - -SpaceParser::Status SpaceParser::ParseString(const std::string & str) -{ - return ParseString(str.c_str()); -} - - -SpaceParser::Status SpaceParser::ParseString(const wchar_t * str) -{ - reading_from_file = false; - reading_from_wchar_string = true; - pchar_unicode = str; - pchar_ascii = 0; - - Parse(); - -return status; -} - - -SpaceParser::Status SpaceParser::ParseString(const std::wstring & str) -{ - return ParseString(str.c_str()); -} - - -void SpaceParser::Parse() -{ - if( !root_space ) - { - status = no_space; - return; - } - - line = 1; - status = ok; - space = root_space; - reading_commentary = false; - ReadChar(); - SkipWhiteLines(); - - ParseLoop(); - - if( status == ok && space != root_space ) - { - // last closing ')' characters are missing (closing a space) - status = syntax_error; - } - - token.clear(); - key.clear(); - value.clear(); -} - - -void SpaceParser::ParseLoop() -{ - while( status == ok && lastc != -1 ) - { - if( lastc == list_end ) - { - SpaceEnds(); - } - else - { - ReadKey(); - SkipWhite(); - - if( lastc == list_start ) - { - SpaceStarts(); - } - else - if( lastc == separator ) - { - ReadValue(); - AddKeyValuePair(); - } - else - { - status = syntax_error; - } - } - - if( status == ok ) - SkipWhiteLines(); - } -} - - -void SpaceParser::SpaceEnds() -{ - if( space == root_space ) - { - // there cannot be a loose list end character in the global space - status = syntax_error; - } - else - { - space = space->parent; - ReadChar(); // skipping closing space character ')' - SkipWhite(); - } -} - - -void SpaceParser::SpaceStarts() -{ - Space * new_space = new Space(); - space->spaces.push_back(new_space); - new_space->parent = space; - new_space->name = key; - space = new_space; - - ReadChar(); // skipping space starts character ')' -} - - - - - -/* - those white characters here should be the same as in space.h -*/ -bool SpaceParser::IsWhite(int c) -{ - // dont use '\n' here - // 13 (\r) is at the end of a line in a dos file \r\n - // 160 is an unbreakable space - if( c==' ' || c=='\t' || c==13 || c==160 ) - return true; - -return false; -} - - -/* - skip_lines is default false -*/ -void SpaceParser::SkipWhite(bool skip_lines) -{ - while( IsWhite(lastc) || lastc == commentary || (skip_lines && lastc=='\n')) - { - if( lastc == commentary ) - SkipComment(); - else - ReadChar(); - } -} - - -void SpaceParser::SkipWhiteLines() -{ - SkipWhite(true); -} - - -/* - do not skip the last \n character -*/ -void SpaceParser::SkipLine() -{ - while( lastc != -1 && lastc != '\n' ) - ReadChar(); -} - - -/* - do not skip the last \n character -*/ -void SpaceParser::SkipComment() -{ - reading_commentary = true; - SkipLine(); - reading_commentary = false; -} - - - -void SpaceParser::Trim(std::wstring & s) -{ -std::wstring::size_type i; - - if( s.empty() ) - return; - - // looking for white characters at the end - for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i); - - if( i==0 && IsWhite(s[i]) ) - { - // the whole string has white characters - s.clear(); - return; - } - - // deleting white characters at the end - if( i != s.size() - 1 ) - s.erase(i+1, std::wstring::npos); - - // looking for white characters at the beginning - for(i=0 ; itable.find(var); - - if( i != space->table.end() ) - space->table.erase(i); -} - - - - - -void SpaceParser::ReadTokenQuoted() -{ - ReadChar(); // skipping the first quotation mark - - while( lastc != -1 && (char_was_escaped || lastc != '"') ) - { - token += static_cast(lastc); - ReadChar(); - } - - if( !char_was_escaped && lastc == '"' ) - ReadChar(); // skipping the last quotation mark - else - status = syntax_error; -} - - -void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - while( true ) - { - if( lastc == commentary ) - SkipComment(); - - if( lastc == -1 || - (!char_was_escaped && - ( - lastc == list_end || - (white_delimit && IsWhite(lastc)) || - (new_line_delimit && lastc == '\n') || - (delimit1 != -1 && lastc == delimit1) || - (delimit2 != -1 && lastc == delimit2) - ) ) ) - - { - break; - } - - token += static_cast(lastc); - ReadChar(); - } - - Trim(token); -} - - -void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2) -{ - token.clear(); - SkipWhite(); - - if( !char_was_escaped && lastc == '"' ) - ReadTokenQuoted(); - else - ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2); -} - - -void SpaceParser::ReadKey() -{ - ReadToken(false, true, separator, list_start); - key = token; - SkipWhite(); -} - - - -void SpaceParser::ReadValueList() -{ - ReadChar(); // skipping the first list character ')' - SkipWhiteLines(); - - while( lastc != -1 && lastc != list_end ) - { - ReadToken(true, true, list_delimiter, list_end); - value.push_back(token); - - SkipWhiteLines(); - - if( lastc == list_delimiter ) - { - ReadChar(); - SkipWhiteLines(); - } - } - - if( lastc == list_end ) - { - ReadChar(); // skipping the last list character ')' - SkipWhite(); - } - else - { - status = syntax_error; // missing one ')' - } -} - - -void SpaceParser::ReadValueSingle() -{ - SkipWhite(); - ReadToken(false, true, -1, -1); - value.push_back(token); - SkipWhite(); -} - - -void SpaceParser::ReadValue() -{ - ReadChar(); // skipping separator '=' - value.clear(); - SkipWhite(); - - if( lastc == list_start ) - ReadValueList(); - else - ReadValueSingle(); - - SkipWhiteLines(); -} - - -void SpaceParser::AddKeyValuePair() -{ - if( value.empty() && skip_empty ) - { - DeleteFromTable(key); - return; - } - - space->table[key] = value; -} - - - - - - -int SpaceParser::ReadUTF8Char() -{ -int c; -bool correct; - - lastc = -1; - - do - { - PT::UTF8ToInt(file, c, correct); - - if( !file ) - return lastc; - } - while( !correct ); - - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - -int SpaceParser::ReadASCIIChar() -{ - lastc = file.get(); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - - - -int SpaceParser::ReadCharFromWcharString() -{ - if( *pchar_unicode == 0 ) - lastc = -1; - else - lastc = *(pchar_unicode++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int SpaceParser::ReadCharFromUTF8String() -{ -int c; -bool correct; - - lastc = -1; - - do - { - size_t len = PT::UTF8ToInt(pchar_ascii, c, correct); - pchar_ascii += len; - - if( *pchar_ascii == 0 ) - return lastc; - } - while( !correct ); - - lastc = c; - - if( lastc == '\n' ) - ++line; - -return lastc; - -} - - -int SpaceParser::ReadCharFromAsciiString() -{ - if( *pchar_ascii == 0 ) - lastc = -1; - else - lastc = *(pchar_ascii++); - - if( lastc == '\n' ) - ++line; - -return lastc; -} - - -int SpaceParser::ReadCharNoEscape() -{ - if( reading_from_file ) - { - if( input_as_utf8 ) - return ReadUTF8Char(); - else - return ReadASCIIChar(); - } - else - { - if( reading_from_wchar_string ) - { - return ReadCharFromWcharString(); - } - else - { - if( input_as_utf8 ) - return ReadCharFromUTF8String(); - else - return ReadCharFromAsciiString(); - } - } -} - - -int SpaceParser::ReadChar() -{ - char_was_escaped = false; - ReadCharNoEscape(); - - if( !reading_commentary && use_escape_char && lastc == '\\' ) - { - char_was_escaped = true; - ReadCharNoEscape(); - - switch(lastc) - { - case '0': lastc = 0; break; - case 't': lastc = '\t'; break; - case 'r': lastc = '\r'; break; - case 'n': lastc = '\n'; break; - // in other cases we return the last character - } - } - -return lastc; -} - - - -} // namespace - - - - diff --git a/space/spaceparser.h b/space/spaceparser.h deleted file mode 100644 index 2cff901..0000000 --- a/space/spaceparser.h +++ /dev/null @@ -1,325 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2010-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_confparser_spaceparser -#define headerfile_picotools_confparser_spaceparser - -#include -#include "space.h" - - - -namespace PT -{ - - - -class SpaceParser -{ -public: - - - /* - ctor -- setting default values (SetDefault() method) - */ - SpaceParser(); - - - /* - setting the root space - */ - void SetSpace(Space * pspace); - void SetSpace(Space & pspace); - - - /* - setting options of the parser to the default values - utf8 etc. - */ - void SetDefault(); - - - /* - status of parsing - */ - enum Status { ok, cant_open_file, syntax_error, no_space }; - - - /* - the last status of parsing, set by Parse() methods - */ - Status status; - - - /* - a number of a line in which there is a syntax_error - */ - int line; - - - /* - main methods used to parse - file_name is the path to a file - */ - Status Parse(const char * file_name); - Status Parse(const std::string & file_name); - Status Parse(const wchar_t * file_name); - Status Parse(const std::wstring & file_name); - - - /* - main methods used to parse - str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) - */ - Status ParseString(const char * str); - Status ParseString(const std::string & str); - - - /* - main methods used to parse - here input string is always in unicode (wide characters) - */ - Status ParseString(const wchar_t * str); - Status ParseString(const std::wstring & str); - - - /* - if true then empty values and lists, e.g: - option = - option2 = () - will be omitted (not inserted to 'table') - default: false - */ - void SkipEmpty(bool skip); - - - /* - '\' character is used to escape other characters - so "some \t t\"ext" will produce "some t t"ext" - default: true - special characters: - \0 - 0 (zero code point) - \t - tabulator (9 code point) - \r - carriage return (13 code point) - \n - a new line character (10 code point) - in other cases we return the last character so \Z gives Z and \\ gives one \ - escape character are not used in commentaries - so you can write: - # this is my comment \n but this was not a new line - */ - void UseEscapeChar(bool escape); - - - /* - if true then the input file or string (char* or std::string) is treated as UTF-8 - default: true - */ - void UTF8(bool utf); - - -private: - - - /* - current space set by SetSpace(); - */ - Space * root_space; - - - /* - a space in which we are now - */ - Space * space; - - - /* - true if Parse() method was called - false if ParseString() was called - */ - bool reading_from_file; - - - /* - pointers to the current character - if ParseString() is in used - */ - const char * pchar_ascii; - const wchar_t * pchar_unicode; - - - /* - true if ParseString(wchar_t *) or ParseString(std::wstring&) was called - */ - bool reading_from_wchar_string; - - - /* - last read token - */ - std::wstring token; - - - /* - last read key - */ - std::wstring key; - - - /* - last read list - */ - Space::Value value; - - - /* - separator between a variable and a value, default: '=' - */ - int separator; - - - /* - commentary char, default: '#' - */ - int commentary; - - - /* - list starting character, default: '(' - */ - int list_start; - - - /* - list ending character, default: ')' - */ - int list_end; - - - /* - list delimiter, default: ',' - */ - int list_delimiter; - - - /* - last read char - or -1 if the end - */ - int lastc; - - - /* - true if the lastc was escaped (with a backslash) - we have to know if the last sequence was \" or just " - */ - bool char_was_escaped; - - - /* - current file - */ - std::ifstream file; - - - /* - if true then empty lists, e.g: - option = - option2 = () - will be omitted (not inserted to 'table') - default: false - */ - bool skip_empty; - - - /* - input file is in UTF-8 - default: true - */ - bool input_as_utf8; - - - /* - if true you can use an escape character '\' in quoted values - */ - bool use_escape_char; - - - /* - true if we are reading the commentary (#) - this is to avoid parsing escape characters in the commentary - */ - bool reading_commentary; - - std::string afile_name; - - void Parse(); - void ParseLoop(); - void SpaceEnds(); - void SpaceStarts(); - - void DeleteFromTable(const std::wstring & var); - - void ReadTokenQuoted(); - void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2); - void ReadKey(); - void ReadValueList(); - void ReadValueSingle(); - void ReadValue(); - void AddKeyValuePair(); - - int ReadUTF8Char(); - int ReadASCIIChar(); - int ReadCharFromWcharString(); - int ReadCharFromUTF8String(); - int ReadCharFromAsciiString(); - int ReadCharNoEscape(); - int ReadChar(); - bool IsWhite(int c); - void SkipWhite(bool skip_lines = false); - void SkipWhiteLines(); - void SkipLine(); - void SkipComment(); - void Trim(std::wstring & s); - -}; - - -} // namespace - - -#endif diff --git a/space/spacetojson.h b/space/spacetojson.h deleted file mode 100644 index f18ac3f..0000000 --- a/space/spacetojson.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2012-2017, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_space_spacetojson -#define headerfile_picotools_space_spacetojson - -#include -#include -#include -#include "space.h" - - - -namespace PT -{ - - -class SpaceToJSON -{ -public: - - void Clear(); - - void TreatAsTable(const wchar_t * space_name); - void TreatAsTable(const std::wstring & space_name); - - void TreatAsNumeric(const wchar_t * name); - void TreatAsNumeric(const std::wstring & name); - - void TreatAsBool(const wchar_t * name); - void TreatAsBool(const std::wstring & name); - - template - void Serialize(Space & space, Stream & out, bool use_indents = false); - - -private: - - std::set numeric, boolean, table; - - - template - void Serialize(Space & space, Stream & out, bool use_indents, int level, - bool use_comma, bool treat_as_table, bool skip_name); - - template - void SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma); - - template - void PrintToken(Stream & out, const StringType & str, bool check_specials = false); - - template - void PrintLevel(Stream & out, bool use_indents, int level); - - - bool IsNumeric(const std::wstring & name); - bool IsBool(const std::wstring & name); - bool IsTable(const std::wstring & name); - -}; - - - - -template -void SpaceToJSON::PrintLevel(Stream & out, bool use_indents, int level) -{ - if( use_indents ) - { - for(int i=0 ; i -void SpaceToJSON::PrintToken(Stream & out, const StringType & str, bool is_special) -{ - if( !is_special ) - out << '\"'; - - for(size_t i=0 ; i -void SpaceToJSON::SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma) -{ -Space::Table::const_iterator i2; -size_t v; -size_t index = 0; -bool is_special; - - if( use_comma && !space.table.empty() ) - { - PrintLevel(out, use_indents, level); - out << L",\n"; - } - - for(i2 = space.table.begin() ; i2 != space.table.end() ; ++i2, ++index) - { - is_special = IsNumeric(i2->first) || IsBool(i2->first); - - PrintLevel(out, use_indents, level); - PrintToken(out, i2->first); - out << L": "; - - if( i2->second.size() != 1 ) - out << '['; - - for(v = 0 ; v < i2->second.size() ; ++v) - { - if( v > 0 ) - PrintLevel(out, use_indents, level + i2->first.size() + 3); - - PrintToken(out, i2->second[v], is_special); - - if( v + 1 < i2->second.size() ) - out << L",\n"; - } - - if( i2->second.size() != 1 ) - out << ']'; - - if( index + 1 < space.table.size() ) - out << ','; - - out << '\n'; - } -} - - - - -template -void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents, int level, - bool use_comma, bool treat_as_table, bool skip_name) -{ - if( use_comma ) - { - PrintLevel(out, use_indents, level); - out << L",\n"; - } - - PrintLevel(out, use_indents, level); - - if( !skip_name ) - { - if( space.name.empty() ) - { - out << L"\"empty\": "; - } - else - { - PrintToken(out, space.name); - out << L": "; - } - } - - if( treat_as_table ) - out << L"[\n"; - else - out << L"{\n"; - - bool printed_something = false; - - if( !treat_as_table ) - { - SerializeTableMulti(space, out, use_indents, level, false); - - if( !space.table.empty() ) - printed_something = true; - } - - /* - * !! IMPROVE ME when serializing a table - * we can make a test whether a space is empty and has a name - * in such a case put it as a string - * this is the same way as the json parser works - * - */ - for(size_t i=0 ; iname); - Serialize(*space.spaces[i], out, use_indents, level+1, printed_something, next_is_table, next_skip_name); - printed_something = true; - } - - PrintLevel(out, use_indents, level); - - if( treat_as_table ) - out << L"]\n"; - else - out << L"}\n"; -} - - - -template -void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents) -{ - bool treat_as_table = IsTable(space.name); - Serialize(space, out, use_indents, 0, false, treat_as_table, true); -} - - - - -} // namespace - -#endif - diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..683b0d1 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,45 @@ +sourcefiles:=$(shell find . -name "*.cpp") +objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles)) + + + +ifndef CXX +CXX = g++ +endif + +ifndef CXXFLAGS +CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include +endif + +ifndef AR +AR = ar +endif + + + + +libname = pikotools.a + + +all: $(libname) + + +$(libname): $(objfiles) + $(AR) rcs $(libname) $(objfiles) + + +%.o: %.cpp + $(CXX) -c $(CXXFLAGS) -o $@ $< + + +clean: + rm -f $(objfiles) + rm -f $(libname) + + +depend: + makedepend -Y. -f- $(sourcefiles) > Makefile.dep + + +-include Makefile.dep + diff --git a/src/Makefile.dep b/src/Makefile.dep new file mode 100644 index 0000000..a2410fd --- /dev/null +++ b/src/Makefile.dep @@ -0,0 +1,43 @@ +# DO NOT DELETE + +./convert/inttostr.o: ./convert/inttostr.h +./convert/misc.o: ./convert/misc.h ./convert/text.h +./convert/text.o: ./convert/text.h ./convert/text_private.h +./date/date.o: ./date/date.h convert/inttostr.h +./log/filelog.o: ./log/filelog.h textstream/textstream.h space/space.h +./log/filelog.o: textstream/types.h convert/inttostr.h date/date.h +./log/filelog.o: membuffer/membuffer.h textstream/types.h utf8/utf8.h +./log/filelog.o: utf8/utf8_templates.h utf8/utf8_private.h +./log/log.o: ./log/log.h textstream/textstream.h space/space.h +./log/log.o: textstream/types.h convert/inttostr.h date/date.h +./log/log.o: membuffer/membuffer.h textstream/types.h ./log/filelog.h +./log/log.o: utf8/utf8.h utf8/utf8_templates.h utf8/utf8_private.h +./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h +./space/space.o: utf8/utf8.h textstream/textstream.h space/space.h +./space/space.o: date/date.h membuffer/membuffer.h textstream/types.h +./space/space.o: utf8/utf8_templates.h utf8/utf8_private.h convert/convert.h +./space/space.o: ./convert/inttostr.h convert/patternreplacer.h +./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h +./space/spaceparser.o: ./space/spaceparser.h ./space/space.h +./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h +./space/spaceparser.o: textstream/textstream.h space/space.h date/date.h +./space/spaceparser.o: membuffer/membuffer.h textstream/types.h +./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h +./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h +./utf8/utf8.o: ./utf8/utf8.h textstream/textstream.h space/space.h +./utf8/utf8.o: textstream/types.h convert/inttostr.h date/date.h +./utf8/utf8.o: membuffer/membuffer.h textstream/types.h utf8/utf8_templates.h +./utf8/utf8.o: utf8/utf8_private.h +./utf8/utf8_private.o: utf8/utf8_private.h textstream/textstream.h +./utf8/utf8_private.o: space/space.h textstream/types.h convert/inttostr.h +./utf8/utf8_private.o: date/date.h membuffer/membuffer.h textstream/types.h +./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h +./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/textstream.h +./csv/csvparser.o: date/date.h membuffer/membuffer.h textstream/types.h +./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h +./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h +./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h +./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h +./mainoptions/mainoptionsparser.o: textstream/textstream.h date/date.h +./mainoptions/mainoptionsparser.o: membuffer/membuffer.h textstream/types.h +./mainoptions/mainoptionsparser.o: utf8/utf8_templates.h utf8/utf8_private.h diff --git a/convert/convert.h b/src/convert/convert.h similarity index 100% rename from convert/convert.h rename to src/convert/convert.h diff --git a/src/convert/inttostr.cpp b/src/convert/inttostr.cpp new file mode 100644 index 0000000..b9a8d6d --- /dev/null +++ b/src/convert/inttostr.cpp @@ -0,0 +1,156 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "inttostr.h" + + +namespace pt +{ + +std::string to_str(unsigned long long value, int base) +{ + std::string res; + Toa(value, res, false, base); + + return res; +} + + +std::string to_str(long long value, int base) +{ + std::string res; + Toa(value, res, false, base); + + return res; +} + + +std::string to_str(unsigned long value, int base) +{ + return to_str(static_cast(value), base); +} + + +std::string to_str(long value, int base) +{ + return to_str(static_cast(value), base); +} + + +std::string to_str(unsigned int value, int base) +{ + return to_str(static_cast(value), base); +} + + +std::string to_str(int value, int base) +{ + return to_str(static_cast(value), base); +} + + +std::string to_str(unsigned short value, int base) +{ + return to_str(static_cast(value), base); +} + + +std::string to_str(short value, int base) +{ + return to_str(static_cast(value), base); +} + + + + + +std::wstring to_wstr(unsigned long long value, int base) +{ + std::wstring res; + Toa(value, res, false, base); + + return res; +} + + +std::wstring to_wstr(long long value, int base) +{ + std::wstring res; + Toa(value, res, false, base); + + return res; +} + + +std::wstring to_wstr(unsigned long value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +std::wstring to_wstr(long value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +std::wstring to_wstr(unsigned int value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +std::wstring to_wstr(int value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +std::wstring to_wstr(unsigned short value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +std::wstring to_wstr(short value, int base) +{ + return to_wstr(static_cast(value), base); +} + + +} + diff --git a/convert/inttostr.h b/src/convert/inttostr.h similarity index 88% rename from convert/inttostr.h rename to src/convert/inttostr.h index 78e7d58..f134dc8 100644 --- a/convert/inttostr.h +++ b/src/convert/inttostr.h @@ -42,7 +42,7 @@ -namespace PT +namespace pt { @@ -270,14 +270,23 @@ void Toa(short value, StringType & res, bool clear_string = true, int base = 10) -std::wstring Toa(unsigned long long value, int base = 10); -std::wstring Toa(long long value, int base = 10); -std::wstring Toa(unsigned long value, int base = 10); -std::wstring Toa(long value, int base = 10); -std::wstring Toa(unsigned int value, int base = 10); -std::wstring Toa(int value, int base = 10); -std::wstring Toa(unsigned short value, int base = 10); -std::wstring Toa(short value, int base = 10); +std::string to_str(unsigned long long value, int base = 10); +std::string to_str(long long value, int base = 10); +std::string to_str(unsigned long value, int base = 10); +std::string to_str(long value, int base = 10); +std::string to_str(unsigned int value, int base = 10); +std::string to_str(int value, int base = 10); +std::string to_str(unsigned short value, int base = 10); +std::string to_str(short value, int base = 10); + +std::wstring to_wstr(unsigned long long value, int base = 10); +std::wstring to_wstr(long long value, int base = 10); +std::wstring to_wstr(unsigned long value, int base = 10); +std::wstring to_wstr(long value, int base = 10); +std::wstring to_wstr(unsigned int value, int base = 10); +std::wstring to_wstr(int value, int base = 10); +std::wstring to_wstr(unsigned short value, int base = 10); +std::wstring to_wstr(short value, int base = 10); diff --git a/convert/misc.cpp b/src/convert/misc.cpp similarity index 99% rename from convert/misc.cpp rename to src/convert/misc.cpp index a6cf0d7..978cce7 100644 --- a/convert/misc.cpp +++ b/src/convert/misc.cpp @@ -38,7 +38,7 @@ #include "misc.h" -namespace PT +namespace pt { diff --git a/convert/misc.h b/src/convert/misc.h similarity index 99% rename from convert/misc.h rename to src/convert/misc.h index 4f9ba87..7dbb128 100644 --- a/convert/misc.h +++ b/src/convert/misc.h @@ -42,7 +42,7 @@ #include "text.h" -namespace PT +namespace pt { void SetOverflow(bool * was_overflow, bool val); diff --git a/convert/patternreplacer.h b/src/convert/patternreplacer.h similarity index 99% rename from convert/patternreplacer.h rename to src/convert/patternreplacer.h index 0d89a8f..65569f0 100644 --- a/convert/patternreplacer.h +++ b/src/convert/patternreplacer.h @@ -44,7 +44,7 @@ -namespace PT +namespace pt { template diff --git a/convert/strtoint.h b/src/convert/strtoint.h similarity index 97% rename from convert/strtoint.h rename to src/convert/strtoint.h index 3aae7dc..9d06f4b 100644 --- a/convert/strtoint.h +++ b/src/convert/strtoint.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2017, Tomasz Sowa + * Copyright (c) 2017-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,7 +43,7 @@ #include "misc.h" -namespace PT +namespace pt { @@ -59,9 +59,9 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType ** SetOverflow(was_overflow, false); if( allow_skip_whitechars ) - str = SkipWhite(str); + str = skip_white(str); - while( !carry && IsDigit(*str, base, &digit) ) + while( !carry && is_digit(*str, base, &digit) ) { #ifdef __GNUC__ carry = __builtin_mul_overflow(res, static_cast(base), &res); @@ -82,7 +82,7 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType ** { if( after_str ) { - while( IsDigit(*str, base, &digit) ) + while( is_digit(*str, base, &digit) ) { str += 1; } @@ -108,7 +108,7 @@ long long Toll(const CharType * str, int base = 10, const CharType ** after_str SetOverflow(was_overflow, false); if( allow_skip_whitechars ) - str = SkipWhite(str); + str = skip_white(str); if( *str == '-' ) { @@ -230,7 +230,7 @@ template unsigned long long Toull_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true) { if( allow_skip_whitechars ) - str = SkipWhite(str); + str = skip_white(str); int base = 10; @@ -266,7 +266,7 @@ long long Toll_b(const CharType * str, const CharType ** after_str = 0, bool * w SetOverflow(was_overflow, false); if( allow_skip_whitechars ) - str = SkipWhite(str); + str = skip_white(str); if( *str == '-' ) { diff --git a/src/convert/text.cpp b/src/convert/text.cpp new file mode 100644 index 0000000..8551ba0 --- /dev/null +++ b/src/convert/text.cpp @@ -0,0 +1,473 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "text.h" +#include "text_private.h" + + +namespace pt +{ + +// white_chars table should be sorted (a binary search algorithm is used to find a character) +// we do not treat a new line character (10) as a white character here +// also space (32) and tab (9) are not inserted here +static const wchar_t white_chars_table[] = { + 0x000B, // LINE TABULATION (vertical tabulation) + 0x000C, // FORM FEED (FF) + 0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file + 0x0085, // NEXT LINE (NEL) + 0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE) + 0x1680, // OGHAM SPACE MARK + 0x180E, // MONGOLIAN VOWEL SEPARATOR + 0x2000, // EN QUAD + 0x2001, // EM QUAD + 0x2002, // EN SPACE + 0x2003, // EM SPACE + 0x2004, // THREE-PER-EM SPACE + 0x2005, // FOUR-PER-EM SPACE + 0x2006, // SIX-PER-EM SPACE + 0x2007, // FIGURE SPACE + 0x2008, // PUNCTUATION SPACE + 0x2009, // THIN SPACE + 0x200A, // HAIR SPACE + 0x2028, // LINE SEPARATOR + 0x2029, // PARAGRAPH SEPARATOR + 0x202F, // NARROW NO-BREAK SPACE + 0x205F, // MEDIUM MATHEMATICAL SPACE + 0x3000, // IDEOGRAPHIC SPACE + 0xFEFF, // ZERO WIDTH NO-BREAK SPACE +}; + + + + +/* + if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true) +*/ +bool is_white(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) +{ + // space (32) and tab (9) are the most common white chars + // so we check them at the beginning (optimisation) + if( c == 32 || c == 9 ) + return true; + + std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t); + std::size_t o1 = 0; + std::size_t o2 = len - 1; + + if( c == 10 ) + return treat_new_line_as_white ? true : false; + + if( !check_additional_chars ) + return false; + + if( c < white_chars_table[o1] || c > white_chars_table[o2] ) + return false; + + if( c == white_chars_table[o1] || c == white_chars_table[o2] ) + return true; + + while( o1 + 1 < o2 ) + { + std::size_t o = (o2 - o1)/2 + o1; + + if( c == white_chars_table[o] ) + return true; + + if( c > white_chars_table[o] ) + o1 = o; + else + o2 = o; + } + +return false; +} + + + +bool is_digit(wchar_t c, int base, int * digit) +{ + int d = 0; + + if( c >= '0' && c <= '9' ) + { + d = c - '0'; + } + else + if( c >= 'a' && c <= 'f' ) + { + d = c - 'a' + 10; + } + else + if( c >= 'A' && c <= 'F' ) + { + d = c - 'A' + 10; + } + else + { + if( digit ) + *digit = d; + + return false; + } + + if( digit ) + *digit = d; + + return d < base; +} + + +const char * skip_white(const char * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white); +} + +const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white); +} + +const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white); +} + +const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white); +} + +const char * skip_white_from_back(const char * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white); +} + +const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white); +} + + + + +char to_lower(char c) +{ + return pt_private::to_lower_generic(c); +} + +wchar_t to_lower(wchar_t c) +{ + return pt_private::to_lower_generic(c); +} + + +char to_upper(char c) +{ + return pt_private::to_upper_generic(c); +} + +wchar_t to_upper(wchar_t c) +{ + return pt_private::to_upper_generic(c); + +} + + + + +void to_lower_emplace(std::string & str) +{ + pt_private::to_lower_str_generic(str); +} + + +void to_lower_emplace(std::wstring & str) +{ + pt_private::to_lower_str_generic(str); +} + + +void to_upper_emplace(std::string & str) +{ + pt_private::to_upper_str_generic(str); +} + + +void to_upper_emplace(std::wstring & str) +{ + pt_private::to_upper_str_generic(str); +} + + + + + +std::string to_lower(const std::string & str) +{ + std::string res(str); + to_lower_emplace(res); + + return res; +} + + +std::wstring to_lower(const std::wstring & str) +{ + std::wstring res(str); + to_lower_emplace(res); + + return res; +} + + +std::string to_upper(const std::string & str) +{ + std::string res(str); + to_upper_emplace(res); + + return res; +} + + +std::wstring to_upper(const std::wstring & str) +{ + std::wstring res(str); + to_upper_emplace(res); + + return res; +} + + + +int compare(const char * str1, const char * str2) +{ + return pt_private::compare_generic(str1, str2); +} + +int compare(const wchar_t * str1, const wchar_t * str2) +{ + return pt_private::compare_generic(str1, str2); +} + +int compare(const std::string & str1, const std::string & str2) +{ + return pt_private::compare_str_generic(str1, str2); +} + +int compare(const std::wstring & str1, const std::wstring & str2) +{ + return pt_private::compare_str_generic(str1, str2); +} + +int compare(const char * str1_begin, const char * str1_end, const char * str2) +{ + return pt_private::compare_generic(str1_begin, str1_end, str2); +} + +int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) +{ + return pt_private::compare_generic(str1_begin, str1_end, str2); +} + + + + + + + +int compare_nc(const char * str1, const char * str2) +{ + return pt_private::compare_nc_generic(str1, str2); +} + +int compare_nc(const wchar_t * str1, const wchar_t * str2) +{ + return pt_private::compare_nc_generic(str1, str2); +} + +int compare_nc(const std::string & str1, const std::string & str2) +{ + return pt_private::compare_nc_str_generic(str1, str2); +} + +int compare_nc(const std::wstring & str1, const std::wstring & str2) +{ + return pt_private::compare_nc_str_generic(str1, str2); +} + +int compare_nc(const char * str1_begin, const char * str1_end, const char * str2) +{ + return pt_private::compare_nc_generic(str1_begin, str1_end, str2); +} + +int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) +{ + return pt_private::compare_nc_generic(str1_begin, str1_end, str2); +} + + +bool is_equal(const char * str1, const char * str2) +{ + return pt_private::compare_generic(str1, str2) == 0; +} + +bool is_equal(const wchar_t * str1, const wchar_t * str2) +{ + return pt_private::compare_generic(str1, str2) == 0; +} + + +bool is_equal(const std::string & str1, const std::string & str2) +{ + return is_equal(str1.c_str(), str2.c_str()); +} + + +bool is_equal(const std::wstring & str1, const std::wstring & str2) +{ + return is_equal(str1.c_str(), str2.c_str()); +} + + + +bool is_equal(const char * str1_begin, const char * str1_end, const char * str2) +{ + return pt_private::compare_generic(str1_begin, str1_end, str2) == 0; +} + + +bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) +{ + return pt_private::compare_generic(str1_begin, str1_end, str2) == 0; +} + + +bool is_equal_nc(const char * str1, const char * str2) +{ + return pt_private::compare_nc_generic(str1, str2) == 0; +} + + +bool is_equal_nc(const wchar_t * str1, const wchar_t * str2) +{ + return pt_private::compare_nc_generic(str1, str2) == 0; +} + + +bool is_equal_nc(const std::string & str1, const std::string & str2) +{ + return is_equal_nc(str1.c_str(), str2.c_str()); +} + + +bool is_equal_nc(const std::wstring & str1, const std::wstring & str2) +{ + return is_equal_nc(str1.c_str(), str2.c_str()); +} + + +bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2) +{ + return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0; +} + + +bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) +{ + return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0; +} + + + + +bool is_substr(const char * short_str, const char * long_str) +{ + return pt_private::is_substr_generic(short_str, long_str); +} + + +bool is_substr(const wchar_t * short_str, const wchar_t * long_str) +{ + return pt_private::is_substr_generic(short_str, long_str); +} + + +bool is_substr(const std::string & short_str, const std::string & long_str) +{ + return is_substr(short_str.c_str(), long_str.c_str()); +} + + +bool is_substr(const std::wstring & short_str, const std::wstring & long_str) +{ + return is_substr(short_str.c_str(), long_str.c_str()); +} + + +bool is_substr_nc(const char * short_str, const char * long_str) +{ + return pt_private::is_substr_nc_generic(short_str, long_str); +} + +bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str) +{ + return pt_private::is_substr_nc_generic(short_str, long_str); +} + + +bool is_substr_nc(const std::string & short_str, const std::string & long_str) +{ + return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str()); +} + + +bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str) +{ + return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str()); +} + + + + + + + +} + + diff --git a/src/convert/text.h b/src/convert/text.h new file mode 100644 index 0000000..d5dcdf2 --- /dev/null +++ b/src/convert/text.h @@ -0,0 +1,150 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_text +#define headerfile_picotools_convert_text + +#include + + +namespace pt +{ + +bool is_white(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true); +bool is_digit(wchar_t c, int base = 10, int * digit = 0); + +const char * skip_white(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true); +const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true); + + + +/* + * + * str_end is pointing at the end of the string (the last item + one) + * + * return value is a pointer to the first white character after a non-white character at the end + * or to the last+one if there is no any white characters + * + */ +const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true); +const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true); + +const char * skip_white_from_back(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true); +const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true); + + + +char to_lower(char c); +wchar_t to_lower(wchar_t c); + +char to_upper(char c); +wchar_t to_upper(wchar_t c); + +void to_lower_emplace(std::string & str); +void to_lower_emplace(std::wstring & str); + +void to_upper_emplace(std::string & str); +void to_upper_emplace(std::wstring & str); + +std::string to_lower(const std::string & str); +std::wstring to_lower(const std::wstring & str); + +std::string to_upper(const std::string & str); +std::wstring to_upper(const std::wstring & str); + + +int compare(const char * str1, const char * str2); +int compare(const wchar_t * str1, const wchar_t * str2); + +int compare(const std::string & str1, const std::string & str2); +int compare(const std::wstring & str1, const std::wstring & str2); + +int compare(const char * str1_begin, const char * str1_end, const char * str2); +int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2); + + +/* + * compare no case + */ +int compare_nc(const char * str1, const char * str2); +int compare_nc(const wchar_t * str1, const wchar_t * str2); + +int compare_nc(const std::string & str1, const std::string & str2); +int compare_nc(const std::wstring & str1, const std::wstring & str2); + +int compare_nc(const char * str1_begin, const char * str1_end, const char * str2); +int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2); + + + +bool is_equal(const char * str1, const char * str2); +bool is_equal(const wchar_t * str1, const wchar_t * str2); + +bool is_equal(const std::string & str1, const std::string & str2); +bool is_equal(const std::wstring & str1, const std::wstring & str2); + +bool is_equal(const char * str1_begin, const char * str1_end, const char * str2); +bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2); + +bool is_equal_nc(const char * str1, const char * str2); +bool is_equal_nc(const wchar_t * str1, const wchar_t * str2); + +bool is_equal_nc(const std::string & str1, const std::string & str2); +bool is_equal_nc(const std::wstring & str1, const std::wstring & str2); + +bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2); +bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2); + + +bool is_substr(const char * short_str, const char * long_str); +bool is_substr(const wchar_t * short_str, const wchar_t * long_str); + +bool is_substr(const std::string & short_str, const std::string & long_str); +bool is_substr(const std::wstring & short_str, const std::wstring & long_str); + +bool is_substr_nc(const char * short_str, const char * long_str); +bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str); + +bool is_substr_nc(const std::string & short_str, const std::string & long_str); +bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str); + + + +} + + +#endif diff --git a/src/convert/text_private.h b/src/convert/text_private.h new file mode 100644 index 0000000..2b93ac4 --- /dev/null +++ b/src/convert/text_private.h @@ -0,0 +1,309 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_text_private +#define headerfile_picotools_convert_text_private + +#include +#include "text.h" + + +namespace pt +{ + +namespace pt_private +{ + +template +CharType to_lower_generic(CharType c) +{ + if( c >= 'A' && c <= 'Z' ) + return c - 'A' + 'a'; + + return c; +} + + +template +CharType to_upper_generic(CharType c) +{ + if( c >= 'a' && c <= 'z' ) + return c - 'a' + 'A'; + + return c; +} + + +template +void to_lower_str_generic(StringType & s) +{ + typename StringType::size_type i; + + for(i=0 ; i +void to_upper_str_generic(StringType & s) +{ + typename StringType::size_type i; + + for(i=0 ; i +CharType * skip_white_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + while( is_white(static_cast(*str), check_additional_chars, treat_new_line_as_white) ) + { + str += 1; + } + + return str; +} + + +template +CharType * skip_white_from_back_generic(CharType * str_begin, CharType * str_end, bool check_additional_chars, bool treat_new_line_as_white) +{ + while( str_end > str_begin && is_white(static_cast(*(str_end-1)), check_additional_chars, treat_new_line_as_white) ) + { + str_end -= 1; + } + + return str_end; +} + + +template +CharType * skip_white_from_back_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white) +{ + CharType * str_begin = str; + + while( *str != 0 ) + { + str += 1; + } + + return skip_white_from_back_generic(str_begin, str, check_additional_chars, treat_new_line_as_white); +} + + + + +template +int compare_generic(const StringType1 * str1, const StringType2 * str2) +{ + while( *str1 && *str2 && *str1 == *str2 ) + { + ++str1; + ++str2; + } + + if( *str1 == 0 && *str2 == 0 ) + return 0; + + int c1; + int c2; + + if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1) + { + c1 = (wchar_t)(unsigned char)(*str1); + c2 = (wchar_t)(unsigned char)(*str2); + } + else + { + c1 = *str1; + c2 = *str2; + } + + return c1 - c2; +} + + +template +int compare_str_generic(const StringType1 & str1, const StringType2 & str2) +{ + return compare_generic(str1.c_str(), str2.c_str()); +} + + +template +int compare_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2) +{ + while( str1_begin < str1_end && *str2 && *str1_begin == *str2 ) + { + ++str1_begin; + ++str2; + } + + if( str1_begin == str1_end && *str2 == 0 ) + return 0; + + int c1; + int c2; + + if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1) + { + c1 = str1_begin < str1_end ? (wchar_t)(unsigned char)(*str1_begin) : 0; + c2 = (wchar_t)(unsigned char)(*str2); + } + else + { + c1 = str1_begin < str1_end ? *str1_begin : 0; + c2 = *str2; + } + + return c1 - c2; +} + + + +template +int compare_nc_generic(const StringType1 * str1, const StringType2 * str2) +{ + while( *str1 && *str2 && to_lower(*str1) == to_lower(*str2) ) + { + ++str1; + ++str2; + } + + if( *str1 == 0 && *str2 == 0 ) + return 0; + + int c1; + int c2; + + if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1) + { + c1 = to_lower((wchar_t)(unsigned char)(*str1)); + c2 = to_lower((wchar_t)(unsigned char)(*str2)); + } + else + { + c1 = to_lower(*str1); + c2 = to_lower(*str2); + } + + return c1 - c2; +} + + +template +int compare_nc_str_generic(const StringType1 & str1, const StringType2 & str2) +{ + return compare_nc(str1.c_str(), str2.c_str()); +} + + +template +int compare_nc_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2) +{ + while( str1_begin < str1_end && *str2 && to_lower(*str1_begin) == to_lower(*str2) ) + { + ++str1_begin; + ++str2; + } + + if( str1_begin == str1_end && *str2 == 0 ) + return 0; + + int c1; + int c2; + + if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1) + { + c1 = str1_begin < str1_end ? to_lower((wchar_t)(unsigned char)(*str1_begin)) : 0; + c2 = to_lower((wchar_t)(unsigned char)(*str2)); + } + else + { + c1 = str1_begin < str1_end ? to_lower(*str1_begin) : 0; + c2 = to_lower(*str2); + } + + return c1 - c2; +} + + + + +template +bool is_substr_generic(const StringType1 * short_str, const StringType2 * long_str) +{ + while( *short_str && *long_str && *short_str == *long_str ) + { + ++short_str; + ++long_str; + } + + if( *short_str == 0 ) + return true; + + return false; +} + + + +template +bool is_substr_nc_generic(const StringType1 * short_str, const StringType2 * long_str) +{ + while( *short_str && *long_str && to_lower(*short_str) == to_lower(*long_str) ) + { + ++short_str; + ++long_str; + } + + if( *short_str == 0 ) + return true; + + return false; +} + + + + +} // namespace pt_private + +} // namespace pt + + +#endif + diff --git a/src/csv/csvparser.cpp b/src/csv/csvparser.cpp new file mode 100644 index 0000000..4ab1480 --- /dev/null +++ b/src/csv/csvparser.cpp @@ -0,0 +1,416 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "csvparser.h" +#include "utf8/utf8.h" + + + +namespace pt +{ + + + + +CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space) +{ + reading_from_file = true; + space = &out_space; + + file.clear(); + file.open(file_name, std::ios_base::binary | std::ios_base::in); + + if( file ) + { + parse(); + file.close(); + } + else + { + status = cant_open_file; + } + + return status; +} + + + +CSVParser::Status CSVParser::parse_file(const std::string & file_name, Space & out_space) +{ + return parse_file(file_name.c_str(), out_space); +} + + + + +CSVParser::Status CSVParser::parse_file(const wchar_t * file_name, Space & out_space) +{ + std::string file_name_utf8; + + wide_to_utf8(file_name, file_name_utf8); + return parse_file(file_name_utf8.c_str(), out_space); +} + + + +CSVParser::Status CSVParser::parse_file(const std::wstring & file_name, Space & out_space) +{ + return parse_file(file_name.c_str(), out_space); +} + + + + + +CSVParser::Status CSVParser::parse(const char * str, Space & out_space) +{ + reading_from_file = false; + reading_from_wchar_string = false; + pchar_ascii = str; + pchar_unicode = 0; + space = &out_space; + + parse(); + + return status; +} + + + +CSVParser::Status CSVParser::parse(const std::string & str, Space & out_space) +{ + return parse(str.c_str(), out_space); +} + + +CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space) +{ + reading_from_file = false; + reading_from_wchar_string = true; + pchar_unicode = str; + pchar_ascii = 0; + space = &out_space; + + parse(); + + return status; +} + + +CSVParser::Status CSVParser::parse(const std::wstring & str, Space & out_space) +{ + return parse(str.c_str(), out_space); +} + + + +void CSVParser::parse() +{ + line = 1; + status = ok; + + space->set_empty_table(); + read_char(); // put first character to lastc + + if( lastc == -1 ) + { + // an empty file/string, in such a case we return such a space struct (if would be serialized to json): [[]] + Space row_space; + row_space.set_empty_table(); + space->add(std::move(row_space)); + } + + while( lastc != -1 ) + { + /* + * even if there is an error when parsing we continue to read the file/string + * + */ + + Space row_space; + row_space.set_empty_table(); + + parse_row(row_space); + space->add(std::move(row_space)); + } +} + + +void CSVParser::parse_row(Space & row_space) +{ + bool continue_reading; + + do + { + continue_reading = read_value_to(row_space); + } + while(continue_reading); +} + + +bool CSVParser::read_value_to(Space & row_space) +{ + Space & space_value = row_space.add_empty_space(); + space_value.set_empty_wstring(); + + if( lastc == '"' ) + { + return read_quoted_value_to(space_value.value.value_wstring); + } + else + { + return read_non_quoted_value_to(space_value.value.value_wstring); + } +} + + +bool CSVParser::read_quoted_value_to(std::wstring & value) +{ + bool is_comma = false; + bool is_value_character = true; + + while( lastc != -1 && is_value_character ) + { + read_char(); + + if( lastc == '"' ) + { + read_char(); + + if( lastc == '"' ) + { + value += lastc; + } + else + { + is_value_character = false; + } + } + else + if( lastc != -1 ) + { + value += lastc; + } + } + + if( lastc == ',' ) + { + is_comma = true; + read_char(); // skip the comma character + } + else + if( lastc == 13 ) + { + read_char(); // skip CR character + + if( lastc == 10 ) + read_char(); + } + else + if( lastc == 10 ) + { + read_char(); // skip new line character + } + + return is_comma; +} + + +bool CSVParser::read_non_quoted_value_to(std::wstring & value) +{ + while( lastc != -1 && lastc != ',' && lastc != 10 ) + { + value += lastc; + read_char(); + } + + bool is_comma = (lastc == ','); + + if( is_comma ) + { + read_char(); // skip the comma character + } + else + { + bool is_new_line = (lastc == 10); + + // check CRLF sequence + if( is_new_line && !value.empty() && value.back() == 13 ) + { + value.erase(value.size() - 1, 1); + } + + if( is_new_line ) + { + read_char(); // skip the new line character + } + } + + return is_comma; +} + + + + +int CSVParser::read_utf8_char() +{ +int c; +bool correct; + + lastc = -1; + + do + { + utf8_to_int(file, c, correct); + + if( !file ) + return lastc; + } + while( !correct ); + + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + + +int CSVParser::read_ascii_char() +{ + lastc = file.get(); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + + + +int CSVParser::read_char_from_wchar_string() +{ + if( *pchar_unicode == 0 ) + lastc = -1; + else + lastc = *(pchar_unicode++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int CSVParser::read_char_from_utf8_string() +{ +int c; +bool correct; + + lastc = -1; + + do + { + size_t len = utf8_to_int(pchar_ascii, c, correct); + pchar_ascii += len; + } + while( *pchar_ascii && !correct ); + + if( correct ) + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int CSVParser::read_char_from_ascii_string() +{ + if( *pchar_ascii == 0 ) + lastc = -1; + else + lastc = *(pchar_ascii++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int CSVParser::read_char_no_escape() +{ + if( reading_from_file ) + { + if( input_as_utf8 ) + return read_utf8_char(); + else + return read_ascii_char(); + } + else + { + if( reading_from_wchar_string ) + { + return read_char_from_wchar_string(); + } + else + { + if( input_as_utf8 ) + return read_char_from_utf8_string(); + else + return read_char_from_ascii_string(); + } + } +} + + + + +int CSVParser::read_char() +{ + return read_char_no_escape(); +} + + +} + + diff --git a/src/csv/csvparser.h b/src/csv/csvparser.h new file mode 100644 index 0000000..8370867 --- /dev/null +++ b/src/csv/csvparser.h @@ -0,0 +1,162 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_csv_csvparser +#define headerfile_picotools_csv_csvparser + +#include "space/space.h" +#include +#include + + +namespace pt +{ + +/* + * + * https://datatracker.ietf.org/doc/html/rfc4180 + * + */ +class CSVParser +{ +public: + + enum Status + { + ok, + cant_open_file, + }; + + + Status parse_file(const char * file_name, Space & out_space); + Status parse_file(const std::string & file_name, Space & out_space); + Status parse_file(const wchar_t * file_name, Space & out_space); + Status parse_file(const std::wstring & file_name, Space & out_space); + + Status parse(const char * str, Space & out_space); + Status parse(const std::string & str, Space & out_space); + Status parse(const wchar_t * str, Space & out_space); + Status parse(const std::wstring & str, Space & out_space); + + + +protected: + + + /* + the last status of parsing, set by Parse() methods + */ + Status status; + + + Space * space; + + /* + true if parse_file() method was called + false if parse() was called + */ + bool reading_from_file; + + /* + true if parse(wchar_t *) or parse(std::wstring&) was called + */ + bool reading_from_wchar_string; + + /* + pointers to the current character + if parse() is being used + */ + const char * pchar_ascii; + const wchar_t * pchar_unicode; + + + /* + last read char + or -1 if the end + */ + int lastc; + + + + /* + a number of a line in which there is a syntax_error + */ + int line; + + /* + current file + + may it would be better to make a pointer? + if we parse only a string then there is no sense to have such an object + */ + std::ifstream file; + + /* + input file is in UTF-8 + default: true + */ + bool input_as_utf8; + + + + + void parse(); + void parse_row(Space & row_space); + + bool read_value_to(Space & row_space); + bool read_quoted_value_to(std::wstring & value); + bool read_non_quoted_value_to(std::wstring & value); + + + + /* + * copied from SpaceParser + * may it would be better to have a class with those methods and inherit from it? + */ + int read_utf8_char(); + int read_ascii_char(); + int read_char_from_wchar_string(); + int read_char_from_utf8_string(); + int read_char_from_ascii_string(); + int read_char_no_escape(); + + int read_char(); +}; + +} + +#endif diff --git a/date/date.cpp b/src/date/date.cpp similarity index 99% rename from date/date.cpp rename to src/date/date.cpp index 1e79c39..6ba05d4 100644 --- a/date/date.cpp +++ b/src/date/date.cpp @@ -41,7 +41,7 @@ #include -namespace PT +namespace pt { diff --git a/date/date.h b/src/date/date.h similarity index 99% rename from date/date.h rename to src/date/date.h index f17b7ab..59938c3 100644 --- a/date/date.h +++ b/src/date/date.h @@ -44,7 +44,7 @@ -namespace PT +namespace pt { diff --git a/log/filelog.cpp b/src/log/filelog.cpp similarity index 92% rename from log/filelog.cpp rename to src/log/filelog.cpp index e4de22e..afb76f3 100644 --- a/log/filelog.cpp +++ b/src/log/filelog.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2018, Tomasz Sowa + * Copyright (c) 2018-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,7 +39,7 @@ -namespace PT +namespace pt { @@ -74,7 +74,7 @@ void FileLog::init(const std::wstring & log_file, bool log_stdout, int log_level this->log_stdout = log_stdout; this->log_level = log_level; this->save_each_line = save_each_line; - PT::WideToUTF8(log_file, this->log_file); + wide_to_utf8(log_file, this->log_file); } @@ -100,7 +100,7 @@ void FileLog::open_file() } -void FileLog::save_log(PT::WTextStream * buffer) +void FileLog::save_log(WTextStream * buffer) { if( buffer->empty() ) return; @@ -111,7 +111,7 @@ void FileLog::save_log(PT::WTextStream * buffer) { if( log_stdout ) { - PT::WideToUTF8(*buffer, std::cout); + wide_stream_to_utf8(*buffer, std::cout); } if( !log_file.empty() ) @@ -126,7 +126,7 @@ void FileLog::save_log(PT::WTextStream * buffer) if( file ) { - PT::WideToUTF8(*buffer, file); + wide_stream_to_utf8(*buffer, file); file.flush(); } } diff --git a/log/filelog.h b/src/log/filelog.h similarity index 97% rename from log/filelog.h rename to src/log/filelog.h index abf6555..07f8701 100644 --- a/log/filelog.h +++ b/src/log/filelog.h @@ -42,7 +42,7 @@ #include "textstream/textstream.h" -namespace PT +namespace pt { @@ -55,7 +55,7 @@ public: virtual ~FileLog(); virtual void init(const std::wstring & log_file, bool log_stdout, int log_level, bool save_each_line); - virtual void save_log(PT::WTextStream * buffer); + virtual void save_log(WTextStream * buffer); virtual int get_log_level(); virtual bool should_save_each_line(); diff --git a/log/log.cpp b/src/log/log.cpp similarity index 93% rename from log/log.cpp rename to src/log/log.cpp index d3903b6..fb7ec91 100644 --- a/log/log.cpp +++ b/src/log/log.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2018, Tomasz Sowa + * Copyright (c) 2018-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,8 +41,11 @@ #include "date/date.h" #include "utf8/utf8.h" +#ifdef PT_HAS_MORM +#include "morm.h" +#endif -namespace PT +namespace pt { @@ -70,13 +73,13 @@ Log::~Log() } -void Log::SetLogBuffer(PT::WTextStream * buffer) +void Log::SetLogBuffer(WTextStream * buffer) { this->buffer = buffer; } -PT::WTextStream * Log::GetLogBuffer() +WTextStream * Log::GetLogBuffer() { return buffer; } @@ -133,7 +136,7 @@ Log & Log::operator<<(const char * s) { if( buffer && file_log && s && current_level <= file_log->get_log_level() ) { - PT::UTF8ToWide(s, *buffer, false); + utf8_to_wide(s, *buffer, false); } return *this; @@ -145,7 +148,7 @@ Log & Log::operator<<(const std::string & s) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { - PT::UTF8ToWide(s, *buffer, false); + utf8_to_wide(s, *buffer, false); } return *this; @@ -157,7 +160,7 @@ Log & Log::operator<<(const std::string * s) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { - PT::UTF8ToWide(*s, *buffer, false); + utf8_to_wide(*s, *buffer, false); } return *this; @@ -287,7 +290,7 @@ Log & Log::operator<<(double s) -Log & Log::operator<<(const PT::Space & s) +Log & Log::operator<<(const Space & s) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { @@ -299,7 +302,7 @@ Log & Log::operator<<(const PT::Space & s) -Log & Log::operator<<(const PT::Date & date) +Log & Log::operator<<(const Date & date) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { @@ -310,6 +313,14 @@ Log & Log::operator<<(const PT::Date & date) } +#ifdef PT_HAS_MORM +Log & Log::operator<<(morm::Model & model) +{ + operator<<(model.to_string()); + return *this; +} +#endif + Log & Log::operator<<(Manipulators m) { diff --git a/log/log.h b/src/log/log.h similarity index 89% rename from log/log.h rename to src/log/log.h index ec1011f..d5073ae 100644 --- a/log/log.h +++ b/src/log/log.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2018, Tomasz Sowa + * Copyright (c) 2018-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,7 +45,13 @@ -namespace PT +namespace morm +{ + class Model; +} + + +namespace pt { @@ -87,8 +93,8 @@ public: Log(); virtual ~Log(); - virtual void SetLogBuffer(PT::WTextStream * buffer); - virtual PT::WTextStream * GetLogBuffer(); + virtual void SetLogBuffer(WTextStream * buffer); + virtual WTextStream * GetLogBuffer(); void SetFileLog(FileLog * file_log); FileLog * GetFileLog(); @@ -122,17 +128,24 @@ public: //virtual Log & operator<<(float s); // added virtual Log & operator<<(double s); - virtual Log & operator<<(const PT::Space & space); - virtual Log & operator<<(const PT::Date & date); + virtual Log & operator<<(const Space & space); + virtual Log & operator<<(const Date & date); + +#ifdef PT_HAS_MORM + virtual Log & operator<<(morm::Model & model); +#endif virtual Log & operator<<(Manipulators m); + + + virtual Log & LogString(const std::string & value, size_t max_size); virtual Log & LogString(const std::wstring & value, size_t max_size); template - Log & operator<<(const PT::TextStreamBase & buf); + Log & operator<<(const TextStreamBase & buf); @@ -144,7 +157,7 @@ public: protected: // buffer for the log - PT::WTextStream * buffer; + WTextStream * buffer; // file logger FileLog * file_log; @@ -192,7 +205,7 @@ Log & Log::log_string_generic(const StringType & value, size_t max_size) template -Log & Log::operator<<(const PT::TextStreamBase & buf) +Log & Log::operator<<(const TextStreamBase & buf) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { diff --git a/src/mainoptions/mainoptionsparser.cpp b/src/mainoptions/mainoptionsparser.cpp new file mode 100644 index 0000000..ae839b5 --- /dev/null +++ b/src/mainoptions/mainoptionsparser.cpp @@ -0,0 +1,397 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2016-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "mainoptionsparser.h" +#include "utf8/utf8.h" +#include + + + +namespace pt +{ + + + +MainOptionsParser::MainOptionsParser() +{ + space = 0; + arguments_required_space = 0; + should_use_utf8 = true; + last_status = status_ok; + non_option_arguments_name = L"args"; +} + + + +MainOptionsParser::~MainOptionsParser() +{ +} + + +void MainOptionsParser::use_utf8(bool utf8) +{ + should_use_utf8 = utf8; +} + + +void MainOptionsParser::set_non_options_arguments_name(const wchar_t * name) +{ + non_option_arguments_name = name; +} + + +void MainOptionsParser::set_non_options_arguments_name(const std::wstring & name) +{ + non_option_arguments_name = name; +} + + + +std::wstring & MainOptionsParser::get_wrong_option() +{ + return last_error_option; +} + +MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space) +{ + space = &out_space; + arguments_required_space = nullptr; + + return parse(argc, argv); +} + + +MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space, const Space & arguments) +{ + space = &out_space; + arguments_required_space = &arguments;; + + return parse(argc, argv); +} + + +MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv) +{ + last_status = status_ok; + last_error_option.clear(); + space->set_empty_object(); + + for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; ) + { + parse((size_t)argc, argv, i); + } + + options.clear(); + option.clear(); + argument.clear(); + arguments.clear(); + + return last_status; +} + + +void MainOptionsParser::parse(size_t argc, const char ** argv, size_t & argv_index) +{ + const char * pchar = argv[argv_index]; + + if( *pchar == '-' ) + { + if( *(pchar+1) == '-' && *(pchar+2) == 0 ) + { + // two hyphens only "--" + argv_index += 1; + parse_non_option_arguments(argc, argv, argv_index); + } + else + if( *(pchar+1) == '-' ) + { + // two hyphens and a string, such as "--abc" + parse_long_option(argc, argv, argv_index); + } + else + if( *(pchar+1) != 0 ) + { + // one hyphen and a string, such as "-abc" + parse_short_option(argc, argv, argv_index); + } + else + { + parse_non_option_arguments(argc, argv, argv_index); + } + } + else + { + parse_non_option_arguments(argc, argv, argv_index); + } +} + + +void MainOptionsParser::convert_str(const char * src, std::wstring & dst) +{ + if( should_use_utf8 ) + { + utf8_to_wide(src, dst); + } + else + { + dst.clear(); + + for( ; *src ; ++src ) + dst += (wchar_t)(unsigned char)*src; + } +} + + +void MainOptionsParser::convert_str(const char * src, size_t len, std::wstring & dst) +{ + if( should_use_utf8 ) + { + utf8_to_wide(src, len, dst); + } + else + { + dst.clear(); + + for(size_t i=0 ; i < len ; ++i) + dst += (wchar_t)(unsigned char)src[i]; + } +} + + +void MainOptionsParser::convert_str(const std::wstring & src, Space & space) +{ + if( should_use_utf8 ) + { + space.set_empty_wstring(); + space.value.value_wstring = src; + } + else + { + space.set_empty_string(); + std::string & dst = space.value.value_string; + + dst.clear(); + + for(size_t i=0 ; i < src.size() ; ++i) + dst += (char)src[i]; + } +} + + +void MainOptionsParser::parse_short_option(size_t argc, const char ** argv, size_t & argv_index) +{ + convert_str(argv[argv_index] + 1, options); + const wchar_t * options_pchar = options.c_str(); + + arguments.clear(); + bool was_argument = false; + argv_index += 1; + + for( ; *options_pchar && !was_argument && last_status == status_ok ; ++options_pchar ) + { + option = *options_pchar; + size_t args_len = how_many_arguments_required(option); + + if( args_len > 0 ) + { + was_argument = true; + + if( *(options_pchar+1) ) + { + // first argument is directly behind the option + argument = options_pchar + 1; + arguments.push_back(argument); + args_len -= 1; + } + + parse_arguments(argc, argv, argv_index, args_len); + } + + add_option_to_space(option, arguments); + } +} + + +void MainOptionsParser::parse_long_option(size_t argc, const char ** argv, size_t & argv_index) +{ + const char * option_begin = argv[argv_index] + 2; // skip first two hyphens -- + const char * option_end = option_begin; + bool is_equal_form = false; // is the option in the form with equal sign, such as: option=argument + + while( *option_end != 0 && *option_end != '=' ) + { + option_end += 1; + } + + if( *option_end == '=' ) + { + is_equal_form = true; + convert_str(option_begin, option_end - option_begin, option); + convert_str(option_end + 1, argument); + } + else + { + convert_str(option_begin, option); + } + + argv_index += 1; + size_t args_len = how_many_arguments_required(option); + arguments.clear(); + + if( is_equal_form ) + { + if( args_len == 0 ) + { + if( !argument.empty() ) + { + // report an error + last_status = status_argument_provided; + last_error_option = option; + } + } + else + if( args_len == 1 ) + { + // argument can be empty in such a case: option= + // we treat it as if the argument would not be provided + if( !argument.empty() ) + { + arguments.push_back(argument); + args_len -= 1; + } + } + else + { + // args_len is > 1 but when using option=argument form + // we can provide only one argument + last_status = status_argument_not_provided; + last_error_option = option; + } + } + + if( last_status == status_ok ) + { + parse_arguments(argc, argv, argv_index, args_len); + add_option_to_space(option, arguments); + } +} + + +void MainOptionsParser::parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len) +{ + for( ; args_len > 0 && argv_index < argc ; --args_len, ++argv_index) + { + convert_str(argv[argv_index], argument); + arguments.push_back(argument); + } + + if( args_len > 0 ) + { + last_status = status_argument_not_provided; + last_error_option = option; + } +} + + + +void MainOptionsParser::parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index) +{ + Space * table_with_args = new Space(); + table_with_args->set_empty_table(); + + for( ; argv_index < argc ; ++argv_index) + { + convert_str(argv[argv_index], argument); + table_with_args->add(argument); + } + + space->add(non_option_arguments_name, table_with_args); +} + + +void MainOptionsParser::add_option_to_space(const std::wstring & option, const std::vector & arguments) +{ + Space * option_table = space->get_object_field(option); + + if( !option_table ) + { + option_table = &space->add_empty_space(option); + } + + if( !option_table->is_table()) + { + option_table->set_empty_table(); + } + + Space * arguments_table = new Space(); + arguments_table->set_empty_table(); + + for(const std::wstring & arg : arguments) + { + Space & space_arg = arguments_table->add_empty_space(); + convert_str(arg, space_arg); + } + + option_table->add(arguments_table); +} + + + +size_t MainOptionsParser::how_many_arguments_required(const std::wstring & arg) +{ + size_t res = 0; + + if( arguments_required_space && arguments_required_space->is_object() ) + { + long res_long = arguments_required_space->to_llong(arg, 0); + + if( res_long < 0 ) + res_long = 0; + + res = (size_t)res_long; + // argument 'arg' needs 'res' options + } + + return res; +} + + + +} // namespace + + diff --git a/src/mainoptions/mainoptionsparser.h b/src/mainoptions/mainoptionsparser.h new file mode 100644 index 0000000..0930dc3 --- /dev/null +++ b/src/mainoptions/mainoptionsparser.h @@ -0,0 +1,162 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2016-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_mainoptions_mainoptionsparser +#define headerfile_picotools_mainoptions_mainoptionsparser + +#include "space/space.h" +#include +#include + + +namespace pt +{ + + +/* + * a very little parser for parsing main(int argc, char ** argv) parameters + * + * + */ +class MainOptionsParser +{ +public: + + MainOptionsParser(); + ~MainOptionsParser(); + + + /* + * status_ok - all argument have been parsed correctly + * + * status_argument_provided - an argument have been provided but was not requested + * this can be in situation when using long form with equal sign, such as: --option=argument + * and in 'options' space the option either was not set or have zero requested arguments + * + * status_argument_not_provided - an argument or arguments are required but were not provided + * this can be returned in two situations: + * 1. when using long form with equal sign, such as: --option=argument and in 'options' space + * you have requested more than one argument + * 2. when reading arguments and the input strings ended + * + */ + enum Status + { + status_ok = 0, + status_argument_provided = 1, + status_argument_not_provided = 2, + }; + + + /* + * the name of a field in the output Struct space for non-option arguments (those after two hypens --) + * default: L"args" + * they will be set as a table of strings/wstrings + * + */ + void set_non_options_arguments_name(const wchar_t * name); + void set_non_options_arguments_name(const std::wstring & name); + + + /* + * parse parameters + * argc argv have the same meaning as in the main(int argc, const char ** argv) method + * the first argument from argv is usualy the name of the program and is skip by this parser + * + * return value: look at the description of the Status enum + * + */ + Status parse(int argc, const char ** argv, Space & out_space); + Status parse(int argc, const char ** argv, Space & out_space, const Space & arguments); + + + /* + * whether or not options arguments should be converted from utf8 char* strings to wide strings (std::wstring) + * default true + * + * if true all arguments in Space struct will be saved as std::wstring + * if false all arguments will be std::string (they are read as they are without checking + * whether correct utf8 characters are encountered) + * + * arguments are always held as std::wstring (in such a case is defined ObjectType in Space struct) + * when using use_utf8(false) characters will not be treated as an utf8 string but just all 8bit char bytes + * will be copied to std::wstring + * + */ + void use_utf8(bool utf8); + + + /* + * return the last option name which was incorrectly parsed + * or an empty string if status was equal to status_ok + */ + std::wstring & get_wrong_option(); + + +private: + + Space * space; + const Space * arguments_required_space; + std::wstring non_option_arguments_name; + std::wstring options, option, argument; + std::vector arguments; + bool should_use_utf8; + Status last_status; + std::wstring last_error_option; + + + void convert_str(const char * src, std::wstring & dst); + void convert_str(const char * src, size_t len, std::wstring & dst); + void convert_str(const std::wstring & src, Space & space); + + Status parse(int argc, const char ** argv); + void parse(size_t argc, const char ** argv, size_t & argv_index); + void parse_short_option(size_t argc, const char ** argv, size_t & argv_index); + void parse_long_option(size_t argc, const char ** argv, size_t & argv_index); + void parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len); + void parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index); + size_t how_many_arguments_required(const std::wstring & arg); + void add_option_to_space(const std::wstring & option, const std::vector & arguments); + +}; + + + +} // namespace + + +#endif diff --git a/membuffer/membuffer.h b/src/membuffer/membuffer.h similarity index 99% rename from membuffer/membuffer.h rename to src/membuffer/membuffer.h index dea0493..8c14010 100644 --- a/membuffer/membuffer.h +++ b/src/membuffer/membuffer.h @@ -41,7 +41,7 @@ #include -namespace PT +namespace pt { /* diff --git a/src/space/space.cpp b/src/space/space.cpp new file mode 100644 index 0000000..bc334c2 --- /dev/null +++ b/src/space/space.cpp @@ -0,0 +1,2544 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2008-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "space.h" +#include "utf8/utf8.h" +#include "convert/convert.h" + + +namespace pt +{ + +Space::Space() +{ + initialize(); +} + + +Space::Space(const Space & space) +{ + initialize(); + copy_from(space); +} + +Space::Space(Space && space) +{ + initialize(); + move_from(std::move(space)); +} + + +Space & Space::operator=(const Space & space) +{ + copy_from(space); + return *this; +} + + +Space & Space::operator=(Space && space) +{ + move_from(std::move(space)); + return *this; +} + + +Space::~Space() +{ + remove_value(); +} + + + +Space::Space(bool val) +{ + initialize(); + set(val); +} + +Space::Space(short val) +{ + initialize(); + set(val); +} + +Space::Space(int val) +{ + initialize(); + set(val); +} + +Space::Space(long val) +{ + initialize(); + set(val); +} + + +Space::Space(long long val) +{ + initialize(); + set(val); +} + +Space::Space(unsigned short val) +{ + initialize(); + set(val); +} + +Space::Space(unsigned int val) +{ + initialize(); + set(val); +} + +Space::Space(unsigned long val) +{ + initialize(); + set(val); +} + + +Space::Space(unsigned long long val) +{ + initialize(); + set(val); +} + +Space::Space(float val) +{ + initialize(); + set(val); +} + +Space::Space(double val) +{ + initialize(); + set(val); +} + + +Space::Space(const char * str) +{ + initialize(); + set(str); +} + +Space::Space(const wchar_t * str) +{ + initialize(); + set(str); +} + +Space::Space(const std::string & str) +{ + initialize(); + set(str); +} + +Space::Space(const std::wstring & str) +{ + initialize(); + set(str); +} + +Space::Space(const Space * space) +{ + initialize(); + set(space); +} + + +void Space::clear() +{ + set_null(); +} + + + +size_t Space::str_size() const +{ + if( is_str() ) + { + return value.value_string.size(); + } + + return 0; +} + + +size_t Space::wstr_size() const +{ + if( is_wstr() ) + { + return value.value_wstring.size(); + } + + return 0; +} + + +size_t Space::object_size() const +{ + if( is_object() ) + { + return value.value_object.size(); + } + + return 0; +} + + +size_t Space::table_size() const +{ + if( is_table() ) + { + return value.value_table.size(); + } + + return 0; +} + + + + + + +void Space::set_null() +{ + initialize_value_null_if_needed(); +} + + +void Space::set_empty_string() +{ + if( type == type_string ) + { + value.value_string.clear(); + } + else + { + initialize_value_string_if_needed(); + } +} + +void Space::set_empty_wstring() +{ + if( type == type_wstring ) + { + value.value_wstring.clear(); + } + else + { + initialize_value_wstring_if_needed(); + } +} + +void Space::set_empty_table() +{ + if( type == type_table ) + { + value.value_table.clear(); + } + else + { + initialize_value_table_if_needed(); + } +} + +void Space::set_empty_object() +{ + if( type == type_object ) + { + value.value_object.clear(); + } + else + { + initialize_value_object_if_needed(); + } +} + + + + +void Space::set(bool val) +{ + initialize_value_bool_if_needed(); + value.value_bool = val; +} + +void Space::set(short val) +{ + set(static_cast(val)); +} + +void Space::set(int val) +{ + set(static_cast(val)); +} + +void Space::set(long val) +{ + set(static_cast(val)); +} + +void Space::set(long long val) +{ + initialize_value_long_if_needed(); + value.value_long = val; +} + +void Space::set(unsigned short val) +{ + set(static_cast(val)); +} + +void Space::set(unsigned int val) +{ + set(static_cast(val)); +} + +void Space::set(unsigned long val) +{ + set(static_cast(val)); +} + +void Space::set(unsigned long long val) +{ + set(static_cast(val)); +} + +void Space::set(float val) +{ + initialize_value_float_if_needed(); + value.value_float = val; +} + +void Space::set(double val) +{ + initialize_value_double_if_needed(); + value.value_double = val; +} + + +void Space::set(const char * str) +{ + if( str == nullptr ) + { + initialize_value_null_if_needed(); + } + else + { + initialize_value_string_if_needed(); + value.value_string = str; + } +} + +void Space::set(const wchar_t * str) +{ + if( str == nullptr ) + { + initialize_value_null_if_needed(); + } + else + { + initialize_value_wstring_if_needed(); + value.value_wstring = str; + } +} + +void Space::set(const std::string & str) +{ + initialize_value_string_if_needed(); + value.value_string = str; +} + +void Space::set(const std::wstring & str) +{ + initialize_value_wstring_if_needed(); + value.value_wstring = str; +} + +void Space::set(const Space & space) +{ + copy_from(space); +} + +void Space::set(const Space * space) +{ + if( space == nullptr ) + { + initialize_value_null_if_needed(); + } + else + { + copy_from(*space); + } +} + +void Space::set(Space && space) +{ + move_from(std::move(space)); +} + + + +Space & Space::add(bool val) +{ + return add_generic(val); +} + +Space & Space::add(short val) +{ + return add_generic(val); +} + +Space & Space::add(int val) +{ + return add_generic(val); +} + +Space & Space::add(long val) +{ + return add_generic(val); +} + +Space & Space::add(long long val) +{ + return add_generic(val); +} + +Space & Space::add(unsigned short val) +{ + return add_generic(val); +} + +Space & Space::add(unsigned int val) +{ + return add_generic(val); +} + +Space & Space::add(unsigned long val) +{ + return add_generic(val); +} + +Space & Space::add(unsigned long long val) +{ + return add_generic(val); +} + +Space & Space::add(float val) +{ + return add_generic(val); +} + +Space & Space::add(double val) +{ + return add_generic(val); +} + +Space & Space::add(const char * val) +{ + return add_generic(val); +} + +Space & Space::add(const wchar_t * val) +{ + return add_generic(val); +} + +Space & Space::add(const std::string & val) +{ + return add_generic(val); +} + +Space & Space::add(const std::wstring & val) +{ + return add_generic(val); +} + +Space & Space::add(const Space & space) +{ + return add_generic(space); +} + +Space & Space::add(const Space * space) +{ + return add_generic(space); +} + +Space & Space::add(Space && space) +{ + initialize_value_table_if_needed(); + Space * new_space = new Space(std::move(space)); + value.value_table.push_back(new_space); + return *value.value_table.back(); +} + + +Space & Space::add_empty_space() +{ + return add_generic(static_cast(nullptr)); +} + + + + +Space & Space::add(const wchar_t * field, bool val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, short val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, int val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, long long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, unsigned short val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, unsigned int val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, unsigned long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, unsigned long long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, float val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, double val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, const char * val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, const wchar_t * val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, const std::string & val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, const std::wstring & val) +{ + return add_generic(field, val); +} + +Space & Space::add(const wchar_t * field, const Space & space) +{ + return add_generic(field, space); +} + +Space & Space::add(const wchar_t * field, const Space * space) +{ + return add_generic(field, space); +} + +Space & Space::add(const wchar_t * field, Space && space) +{ + initialize_value_object_if_needed(); + + auto insert_res = value.value_object.insert(std::make_pair(field, nullptr)); + insert_res.first->second = new Space(std::move(space)); + + return *(insert_res.first->second); +} + +Space & Space::add_empty_space(const wchar_t * field) +{ + return add_generic(field, static_cast(nullptr)); +} + + + + +Space & Space::add(const std::wstring & field, bool val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, short val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, int val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, long long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, unsigned short val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, unsigned int val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, unsigned long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, unsigned long long val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, float val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, double val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, const char * val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, const wchar_t * val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, const std::string & val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, const std::wstring & val) +{ + return add_generic(field, val); +} + +Space & Space::add(const std::wstring & field, const Space & space) +{ + return add_generic(field, space); +} + +Space & Space::add(const std::wstring & field, const Space * space) +{ + return add_generic(field, space); +} + +Space & Space::add(const std::wstring & field, Space && space) +{ + return add(field.c_str(), std::move(space)); +} + +Space & Space::add_empty_space(const std::wstring & field) +{ + return add_generic(field, static_cast(nullptr)); +} + + + + + + +bool Space::is_null() const +{ + return type == type_null; +} + +bool Space::is_bool() const +{ + return type == type_bool; +} + +bool Space::is_llong() const +{ + return is_long_long(); +} + +bool Space::is_long_long() const +{ + return type == type_long; +} + +bool Space::is_float() const +{ + return type == type_float; +} + +bool Space::is_double() const +{ + return type == type_double; +} + +bool Space::is_numeric() const +{ + return is_long_long() || is_float() || is_double(); +} + +bool Space::is_str() const +{ + return type == type_string; +} + +bool Space::is_wstr() const +{ + return type == type_wstring; +} + +bool Space::is_text() const +{ + return is_str() || is_wstr(); +} + +bool Space::is_object() const +{ + return type == type_object; +} + +bool Space::is_table() const +{ + return type == type_table; +} + + +bool Space::to_bool() const +{ + if( type == type_bool ) + return value.value_bool; + + long long val = to_long_long(); + return (val != 0) ? true : false; +} + +short Space::to_short() const +{ + return to_generic_numeric_signed_value(); +} + +int Space::to_int() const +{ + return to_generic_numeric_signed_value(); +} + +long Space::to_long() const +{ + return to_generic_numeric_signed_value(); + +} + +long long Space::to_llong() const +{ + return to_long_long(); +} + +long long Space::to_long_long() const +{ + switch(type) + { + case type_null: + case type_object: + case type_table: + return 0; + + case type_bool: + return value.value_bool ? 1 : 0; + + case type_long: + return value.value_long; + + case type_float: + return static_cast(value.value_float); + + case type_double: + return static_cast(value.value_double); + + case type_string: + return convert_string_to_long_long(); + + case type_wstring: + return convert_wstring_to_long_long(); + } + + return 0; +} + + +unsigned long long Space::to_ullong() const +{ + return to_ulong_long(); +} + + + +long long Space::convert_string_to_long_long() const +{ + bool was_overflow = false; + const char * after_str; + long long val = Toll(value.value_string.c_str(), 10, &after_str, &was_overflow, true); + + return was_overflow ? 0 : val; +} + + +long long Space::convert_wstring_to_long_long() const +{ + bool was_overflow = false; + const wchar_t * after_str; + long long val = Toll(value.value_wstring.c_str(), 10, &after_str, &was_overflow, true); + + return was_overflow ? 0 : val; +} + + +unsigned long long Space::convert_string_to_ulong_long() const +{ + bool was_overflow = false; + const char * after_str; + unsigned long long val = Toull(value.value_string.c_str(), 10, &after_str, &was_overflow, true); + + return was_overflow ? 0 : val; +} + + +unsigned long long Space::convert_wstring_to_ulong_long() const +{ + bool was_overflow = false; + const wchar_t * after_str; + unsigned long long val = Toull(value.value_wstring.c_str(), 10, &after_str, &was_overflow, true); + + return was_overflow ? 0 : val; +} + + +unsigned short Space::to_ushort() const +{ + return to_generic_numeric_unsigned_value(); +} + +unsigned int Space::to_uint() const +{ + return to_generic_numeric_unsigned_value(); +} + +unsigned long Space::to_ulong() const +{ + return to_generic_numeric_unsigned_value(); + +} + + +unsigned long long Space::to_ulong_long() const +{ + switch(type) + { + case type_null: + case type_object: + case type_table: + return 0; + + case type_bool: + return value.value_bool ? 1 : 0; + + case type_long: + return static_cast(value.value_long); + + case type_float: + return static_cast(value.value_float); + + case type_double: + return static_cast(value.value_double); + + case type_string: + return convert_string_to_ulong_long(); + + case type_wstring: + return convert_wstring_to_ulong_long(); + } + + return 0; +} + + + +std::string Space::to_str() const +{ + if( type == type_string ) + return value.value_string; + + std::string str; + + if( type == type_wstring ) + { + TextStream stream; + serialize_string_buffer(value.value_wstring.c_str(), stream, Escape::no_escape); + stream.to_string(str); + return str; + } + + serialize_to_json_to(str); + return str; +} + + +std::wstring Space::to_wstr() const +{ + if( type == type_wstring ) + return value.value_wstring; + + std::wstring str; + + if( type == type_string ) + { + utf8_to_wide(value.value_string, str); + return str; + } + + serialize_to_json_to(str); + return str; +} + + + + +void Space::to_list(std::list & output_list, bool clear_list) const +{ + to_list_str_generic(output_list, clear_list); +} + + +void Space::to_list(std::list & output_list, bool clear_list) const +{ + to_list_wstr_generic(output_list, clear_list); +} + + +void Space::to_list(std::vector & output_list, bool clear_list) const +{ + to_list_str_generic(output_list, clear_list); +} + + +void Space::to_list(std::vector & output_list, bool clear_list) const +{ + to_list_wstr_generic(output_list, clear_list); +} + + + + + +bool Space::to_bool(const wchar_t * field, bool default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_bool() : default_value; +} + +short Space::to_short(const wchar_t * field, short default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_short() : default_value; +} + +int Space::to_int(const wchar_t * field, int default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_int() : default_value; +} + +long Space::to_long(const wchar_t * field, long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_long() : default_value; +} + +long long Space::to_llong(const wchar_t * field, long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_llong() : default_value; +} + +long long Space::to_long_long(const wchar_t * field, long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_long_long() : default_value; +} + +unsigned short Space::to_ushort(const wchar_t * field, unsigned short default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ushort() : default_value; +} + +unsigned int Space::to_uint(const wchar_t * field, unsigned int default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_uint() : default_value; +} + +unsigned long Space::to_ulong(const wchar_t * field, unsigned long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ulong() : default_value; +} + +unsigned long long Space::to_ullong(const wchar_t * field, unsigned long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ullong() : default_value; +} + +unsigned long long Space::to_ulong_long(const wchar_t * field, unsigned long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ulong_long() : default_value; +} + +std::string Space::to_str(const wchar_t * field, const char * default_value) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->to_str(); + } + + if( default_value ) + { + return std::string(default_value); + } + + return std::string(); +} + + +std::string Space::to_str(const wchar_t * field, const std::string & default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_str() : default_value; +} + + +std::wstring Space::to_wstr(const wchar_t * field, const wchar_t * default_value) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->to_wstr(); + } + + if( default_value ) + { + return std::wstring(default_value); + } + + return std::wstring(); +} + + +std::wstring Space::to_wstr(const wchar_t * field, const std::wstring & default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_wstr() : default_value; +} + + +bool Space::to_list(const wchar_t * field, std::list & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + +bool Space::to_list(const wchar_t * field, std::list & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + + +bool Space::to_list(const std::wstring & field, std::list & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + +bool Space::to_list(const std::wstring & field, std::list & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + + +bool Space::to_list(const wchar_t * field, std::vector & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + +bool Space::to_list(const wchar_t * field, std::vector & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + + +bool Space::to_list(const std::wstring & field, std::vector & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + +bool Space::to_list(const std::wstring & field, std::vector & output_list, bool clear_list) const +{ + return to_list_generic(field, output_list, clear_list); +} + + + +bool Space::to_bool(const std::wstring & field, bool default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_bool() : default_value; +} + +short Space::to_short(const std::wstring & field, short default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_short() : default_value; +} + +int Space::to_int(const std::wstring & field, int default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_int() : default_value; +} + +long Space::to_long(const std::wstring & field, long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_long() : default_value; +} + +long long Space::to_llong(const std::wstring & field, long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_llong() : default_value; +} + +long long Space::to_long_long(const std::wstring & field, long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_long_long() : default_value; +} + +unsigned short Space::to_ushort(const std::wstring & field, unsigned short default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ushort() : default_value; +} + +unsigned int Space::to_uint(const std::wstring & field, unsigned int default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_uint() : default_value; +} + +unsigned long Space::to_ulong(const std::wstring & field, unsigned long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ulong() : default_value; +} + +unsigned long long Space::to_ullong(const std::wstring & field, unsigned long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ullong() : default_value; +} + +unsigned long long Space::to_ulong_long(const std::wstring & field, unsigned long long default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_ulong_long() : default_value; +} + +std::string Space::to_str(const std::wstring & field, const char * default_value) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->to_str(); + } + + if( default_value ) + { + return std::string(default_value); + } + + return std::string(); +} + +std::string Space::to_str(const std::wstring & field, const std::string & default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_str() : default_value; +} + + +std::wstring Space::to_wstr(const std::wstring & field, const wchar_t * default_value) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->to_wstr(); + } + + if( default_value ) + { + return std::wstring(default_value); + } + + return std::wstring(); +} + +std::wstring Space::to_wstr(const std::wstring & field, const std::wstring & default_value) const +{ + const Space * space = get_object_field(field); + return space ? space->to_wstr() : default_value; +} + + + + + + +std::string Space::serialize_to_space_str(bool pretty_print) const +{ + std::string str; + serialize_to_space_to(str, pretty_print); + return str; +} + + +std::wstring Space::serialize_to_space_wstr(bool pretty_print) const +{ + std::wstring str; + serialize_to_space_to(str, pretty_print); + return str; +} + + +void Space::serialize_to_space_to(std::string & str, bool pretty_print) const +{ + TextStream stream; + serialize_to_space_stream(stream, pretty_print); + + stream.to_string(str); +} + + +void Space::serialize_to_space_to(std::wstring & str, bool pretty_print) const +{ + WTextStream stream; + serialize_to_space_stream(stream, pretty_print); + + stream.to_string(str); +} + + + +std::string Space::serialize_to_json_str() const +{ + std::string str; + serialize_to_json_to(str); + return str; +} + + +std::wstring Space::serialize_to_json_wstr() const +{ + std::wstring str; + serialize_to_json_to(str); + return str; +} + + +void Space::serialize_to_json_to(std::string & str) const +{ + TextStream stream; + serialize_to_json_stream(stream); + + stream.to_string(str); +} + + +void Space::serialize_to_json_to(std::wstring & str) const +{ + WTextStream stream; + serialize_to_json_stream(stream); + + stream.to_string(str); +} + + + +bool * Space::get_bool() +{ + return type == type_bool ? &value.value_bool : nullptr; +} + +long long * Space::get_llong() +{ + return get_long_long(); +} + +long long * Space::get_long_long() +{ + return type == type_long ? &value.value_long : nullptr; +} + +float * Space::get_float() +{ + return type == type_float ? &value.value_float : nullptr; +} + +double * Space::get_double() +{ + return type == type_double ? &value.value_double : nullptr; +} + +std::string * Space::get_str() +{ + return type == type_string ? &value.value_string : nullptr; +} + +std::wstring * Space::get_wstr() +{ + return type == type_wstring ? &value.value_wstring : nullptr; +} + +Space::ObjectType * Space::get_object() +{ + return type == type_object ? &value.value_object : nullptr; +} + +Space::TableType * Space::get_table() +{ + return type == type_table ? &value.value_table : nullptr; +} + + + +bool Space::is_equal(const char * val) const +{ + if( type == type_string ) + { + return value.value_string == val; + } + + return false; +} + +bool Space::is_equal(const std::string & val) const +{ + if( type == type_string ) + { + return value.value_string == val; + } + + return false; +} + +bool Space::is_equal(const wchar_t * val) const +{ + if( type == type_wstring ) + { + return value.value_wstring == val; + } + + return false; +} + +bool Space::is_equal(const std::wstring & val) const +{ + if( type == type_wstring ) + { + return value.value_wstring == val; + } + + return false; +} + + + +bool Space::has_value(const char * val) const +{ + if( type == type_string ) + { + return value.value_string == val; + } + + if( type == type_table ) + { + for(size_t i=0 ; i < value.value_table.size() ; ++i) + { + Space * table_item = value.value_table[i]; + + if( table_item->type == type_string ) + { + if( table_item->value.value_string == val ) + return true; + } + } + } + + return false; +} + +bool Space::has_value(const std::string & val) const +{ + return has_value(val.c_str()); +} + + +bool Space::has_value(const wchar_t * val) const +{ + if( type == type_wstring ) + { + return value.value_wstring == val; + } + + if( type == type_table ) + { + for(size_t i=0 ; i < value.value_table.size() ; ++i) + { + Space * table_item = value.value_table[i]; + + if( table_item->type == type_wstring ) + { + if( table_item->value.value_wstring == val ) + return true; + } + } + } + + return false; +} + + +bool Space::has_value(const std::wstring & val) const +{ + return has_value(val.c_str()); +} + + + + + + + +Space * Space::get_object_field(const wchar_t * field) +{ + if( is_object() ) + { + ObjectType::iterator i = value.value_object.find(field); + + if( i != value.value_object.end() ) + { + return i->second; + } + } + + return nullptr; +} + +bool * Space::get_bool(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_bool() : nullptr; +} + +long long * Space::get_llong(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_llong() : nullptr; +} + +long long * Space::get_long_long(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_long_long() : nullptr; +} + +float * Space::get_float(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_float() : nullptr; +} + +double * Space::get_double(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_double() : nullptr; +} + +std::string * Space::get_str(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_str() : nullptr; +} + +std::wstring * Space::get_wstr(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_wstr() : nullptr; +} + +Space::ObjectType * Space::get_object(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_object() : nullptr; +} + +Space::TableType * Space::get_table(const wchar_t * field) +{ + Space * space = get_object_field(field); + return space ? space->get_table() : nullptr; +} + + + + +const bool * Space::get_bool() const +{ + return type == type_bool ? &value.value_bool : nullptr; +} + +const long long * Space::get_llong() const +{ + return get_long_long(); +} + +const long long * Space::get_long_long() const +{ + return type == type_long ? &value.value_long : nullptr; +} + +const float * Space::get_float() const +{ + return type == type_float ? &value.value_float : nullptr; +} + +const double * Space::get_double() const +{ + return type == type_double ? &value.value_double : nullptr; +} + +const std::string * Space::get_str() const +{ + return type == type_string ? &value.value_string : nullptr; +} + +const std::wstring * Space::get_wstr() const +{ + return type == type_wstring ? &value.value_wstring : nullptr; +} + +const Space::ObjectType * Space::get_object() const +{ + return type == type_object ? &value.value_object : nullptr; +} + +const Space::TableType * Space::get_table() const +{ + return type == type_table ? &value.value_table : nullptr; +} + + + +bool Space::has_key(const wchar_t * field) const +{ + return get_object_field(field) != nullptr; +} + +bool Space::has_key(const std::wstring & field) const +{ + return get_object_field(field) != nullptr; +} + + + + +const Space * Space::get_object_field(const wchar_t * field) const +{ + if( is_object() ) + { + ObjectType::const_iterator i = value.value_object.find(field); + + if( i != value.value_object.end() ) + { + return i->second; + } + } + + return nullptr; +} + + +Space * Space::get_object_field(const std::wstring & field) +{ + return get_object_field(field.c_str()); +} + +const Space * Space::get_object_field(const std::wstring & field) const +{ + return get_object_field(field.c_str()); +} + + +const bool * Space::get_bool(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_bool() : nullptr; +} + +const long long * Space::get_llong(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_llong() : nullptr; +} + +const long long * Space::get_long_long(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_long_long() : nullptr; +} + +const float * Space::get_float(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_float() : nullptr; +} + +const double * Space::get_double(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_double() : nullptr; +} + +const std::string * Space::get_str(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_str() : nullptr; +} + +const std::wstring * Space::get_wstr(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_wstr() : nullptr; +} + +const Space::ObjectType * Space::get_object(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_object() : nullptr; +} + +const Space::TableType * Space::get_table(const wchar_t * field) const +{ + const Space * space = get_object_field(field); + return space ? space->get_table() : nullptr; +} + + + + + +Space * Space::get_object_field_nc(const wchar_t * field) +{ + if( is_object() ) + { + ObjectType::iterator i = value.value_object.begin(); + + while( i != value.value_object.end() ) + { + if( is_equal_nc(field, i->first.c_str()) ) + { + return i->second; + } + } + } + + return nullptr; +} + + +Space * Space::get_object_field_nc(const std::wstring & field) +{ + return get_object_field_nc(field.c_str()); +} + + +const Space * Space::get_object_field_nc(const wchar_t * field) const +{ + if( is_object() ) + { + ObjectType::const_iterator i = value.value_object.cbegin(); + + while( i != value.value_object.cend() ) + { + if( is_equal_nc(field, i->first.c_str()) ) + { + return i->second; + } + } + } + + return nullptr; +} + + +const Space * Space::get_object_field_nc(const std::wstring & field) const +{ + return get_object_field_nc(field.c_str()); +} + + + + + +void Space::remove(const wchar_t * field) +{ + remove(std::wstring(field)); +} + + +void Space::remove(const std::wstring & field) +{ + if( type == type_object ) + { + ObjectType::iterator i = value.value_object.find(field); + + if( i != value.value_object.end() ) + { + delete i->second; + i->second = nullptr; + + value.value_object.erase(i); + } + } +} + + +bool Space::is_equal(const wchar_t * field, const char * val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->is_equal(val); + } + + return false; +} + +bool Space::is_equal(const wchar_t * field, const std::string & val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->is_equal(val); + } + + return false; +} + + +bool Space::is_equal(const wchar_t * field, const wchar_t * val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->is_equal(val); + } + + return false; +} + + +bool Space::is_equal(const wchar_t * field, const std::wstring & val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->is_equal(val); + } + + return false; +} + + + + + +bool Space::has_value(const wchar_t * field, const char * val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->has_value(val); + } + + return false; +} + +bool Space::has_value(const wchar_t * field, const std::string & val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->has_value(val); + } + + return false; +} + + +bool Space::has_value(const wchar_t * field, const wchar_t * val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->has_value(val); + } + + return false; +} + + +bool Space::has_value(const wchar_t * field, const std::wstring & val) const +{ + const Space * space = get_object_field(field); + + if( space ) + { + return space->has_value(val); + } + + return false; +} + + + + +const Space * Space::find_child_space_const(const wchar_t * name) const +{ + const TableType * child_table = find_child_space_table(); + + if( child_table ) + { + for(const Space & space : *child_table) + { + if( space.is_equal(child_spaces_name, name) ) + { + return &space; + } + } + } + + return nullptr; +} + + + +const Space * Space::find_child_space_const(size_t table_index) const +{ + const TableType * child_table = find_child_space_table(); + + if( child_table && table_index < child_table->size() ) + { + return (*child_table)[table_index]; + } + + return nullptr; +} + + + +Space::TableType * Space::find_child_space_table() +{ + return get_table(child_spaces_field_table_name); +} + + +const Space::TableType * Space::find_child_space_table() const +{ + return get_table(child_spaces_field_table_name); +} + + +bool Space::child_spaces_empty() const +{ + const TableType * child_table = find_child_space_table(); + + if( child_table ) + { + return child_table->empty(); + } + + return true; +} + +size_t Space::child_spaces_size() const +{ + const TableType * child_table = find_child_space_table(); + + if( child_table ) + { + return child_table->size(); + } + + return 0; +} + + +Space * Space::find_child_space(const wchar_t * name) +{ + return const_cast(find_child_space_const(name)); +} + + +Space * Space::find_child_space(const std::wstring & name) +{ + return find_child_space(name.c_str()); +} + + +const Space * Space::find_child_space(const wchar_t * name) const +{ + return find_child_space_const(name); +} + + +const Space * Space::find_child_space(const std::wstring & name) const +{ + return find_child_space(name.c_str()); +} + + + +Space * Space::find_child_space(size_t table_index) +{ + return const_cast(find_child_space_const(table_index)); +} + +const Space * Space::find_child_space(size_t table_index) const +{ + return find_child_space_const(table_index); +} + + +Space & Space::add_child_space() +{ + initialize_child_spaces_if_needed(); + + TableType * child_table = find_child_space_table(); + child_table->push_back(new Space()); + + return *child_table->back(); +} + + +Space & Space::add_child_space(const wchar_t * space_name) +{ + initialize_child_spaces_if_needed(); + + TableType * child_table = find_child_space_table(); + child_table->push_back(new Space()); + Space * last_space = child_table->back(); + last_space->add(child_spaces_name, space_name); + + return *last_space; +} + + +Space & Space::add_child_space(const std::wstring & space_name) +{ + return add_child_space(space_name.c_str()); +} + + +Space & Space::find_add_child_space(const wchar_t * name) +{ + Space * space = find_child_space(name); + + if( !space ) + { + return add_child_space(name); + } + + return *space; +} + + +Space & Space::find_add_child_space(const std::wstring & name) +{ + return find_add_child_space(name.c_str()); +} + + +std::wstring * Space::find_child_space_name() +{ + return get_wstr(child_spaces_name); +} + + +const std::wstring * Space::find_child_space_name() const +{ + return get_wstr(child_spaces_name); +} + + +std::wstring Space::get_child_space_name() const +{ + const std::wstring * name = find_child_space_name(); + + if( name ) + { + return *name; + } + else + { + return std::wstring(); + } +} + + + +bool Space::is_child_space_name(const wchar_t * name) const +{ + return is_equal(child_spaces_name, name); +} + + +bool Space::is_child_space_name(const std::wstring & name) const +{ + return is_equal(child_spaces_name, name.c_str()); +} + + + + +void Space::remove_child_space(const wchar_t * name) +{ + TableType * child_table = find_child_space_table(); + + if( child_table ) + { + for(size_t i=0 ; isize() ; ) + { + Space * child = (*child_table)[i]; + + if( child->is_equal(child_spaces_name, name) ) + { + delete child; + child_table->erase(child_table->begin() + i); + } + else + { + ++i; + } + } + } +} + +void Space::remove_child_space(const std::wstring & name) +{ + return remove_child_space(name.c_str()); +} + + +void Space::remove_child_space(size_t index) +{ + TableType * child_table = find_child_space_table(); + + if( child_table && index < child_table->size() ) + { + Space * child = (*child_table)[index]; + delete child; + child_table->erase(child_table->begin() + index); + } +} + + + + + + +void Space::copy_value_from(const Space & space) +{ + switch(space.type) + { + case type_null: + initialize_value_null_if_needed(); + break; + + case type_bool: + initialize_value_bool_if_needed(); + value.value_bool = space.value.value_bool; + break; + + case type_long: + initialize_value_long_if_needed(); + value.value_long = space.value.value_long; + break; + + case type_float: + initialize_value_float_if_needed(); + value.value_float = space.value.value_float; + break; + + case type_double: + initialize_value_double_if_needed(); + value.value_double = space.value.value_double; + break; + + case type_string: + initialize_value_string_if_needed(); + value.value_string = space.value.value_string; + break; + + case type_wstring: + initialize_value_wstring_if_needed(); + value.value_wstring = space.value.value_wstring; + break; + + case type_object: + copy_value_object(space.value); + break; + + case type_table: + copy_value_table(space.value); + break; + } +} + + + +void Space::copy_from(const Space & space) +{ + copy_value_from(space); +} + + + + + +void Space::copy_value_object(const Value & value_from) +{ + initialize_value_object_if_needed(); + value.value_object.clear(); + + for(auto map_item : value_from.value_object) + { + Space * new_space = new Space(*map_item.second); + value.value_object.insert(std::make_pair(map_item.first, new_space)); + } +} + + +void Space::copy_value_table(const Value & value_from) +{ + initialize_value_table_if_needed(); + + value.value_table.clear(); + + for(Space * space : value_from.value_table) + { + Space * new_space = new Space(*space); + value.value_table.push_back(new_space); + } +} + + +void Space::move_value_from(Space && space) +{ + switch(space.type) + { + case type_string: + initialize_value_string_if_needed(std::move(space.value.value_string)); + break; + + case type_wstring: + initialize_value_wstring_if_needed(std::move(space.value.value_wstring)); + break; + + case type_object: + initialize_value_object_if_needed(std::move(space.value.value_object)); + break; + + case type_table: + initialize_value_table_if_needed(std::move(space.value.value_table)); + break; + + default: + copy_value_from(space); + break; + } +} + + +void Space::move_from(Space && space) +{ + move_value_from(std::move(space)); + space.type = Type::type_null; +} + + + + +void Space::initialize() +{ + type = type_null; +} + +void Space::initialize_value_null_if_needed() +{ + if( type != type_null ) + { + remove_value(); + type = type_null; + } +} + + +void Space::initialize_value_bool_if_needed() +{ + if( type != type_bool ) + { + remove_value(); + new (&value) bool; + type = type_bool; + } +} + + +void Space::initialize_value_long_if_needed() +{ + if( type != type_long ) + { + remove_value(); + new (&value) long long; + type = type_long; + } +} + + +void Space::initialize_value_float_if_needed() +{ + if( type != type_float ) + { + remove_value(); + new (&value) float; + type = type_float; + } +} + + +void Space::initialize_value_double_if_needed() +{ + if( type != type_double ) + { + remove_value(); + new (&value) double; + type = type_double; + } +} + + +void Space::initialize_value_string_if_needed() +{ + if( type != type_string ) + { + remove_value(); + new (&value) std::string; + type = type_string; + } +} + + +void Space::initialize_value_string_if_needed(std::string && str) +{ + if( type != type_string ) + { + remove_value(); + new (&value) std::string(std::move(str)); + type = type_string; + } + else + { + value.value_string = std::move(str); + } +} + + + +void Space::initialize_value_wstring_if_needed() +{ + if( type != type_wstring ) + { + remove_value(); + new (&value) std::wstring; + type = type_wstring; + } +} + + +void Space::initialize_value_wstring_if_needed(std::wstring && str) +{ + if( type != type_wstring ) + { + remove_value(); + new (&value) std::wstring(std::move(str)); + type = type_wstring; + } + else + { + value.value_wstring = std::move(str); + } +} + +void Space::initialize_value_object_if_needed() +{ + if( type != type_object ) + { + remove_value(); + new (&value) ObjectType; + type = type_object; + } +} + +void Space::initialize_value_object_if_needed(ObjectType && obj) +{ + if( type != type_object ) + { + remove_value(); + new (&value) ObjectType(std::move(obj)); + type = type_object; + } + else + { + value.value_object = std::move(obj); + } +} + + +void Space::initialize_value_table_if_needed() +{ + if( type != type_table ) + { + remove_value(); + new (&value) TableType; + type = type_table; + } +} + + +void Space::initialize_value_table_if_needed(TableType && tab) +{ + if( type != type_table ) + { + remove_value(); + new (&value) TableType(std::move(tab)); + type = type_table; + } + else + { + value.value_table = std::move(tab); + } +} + + +void Space::initialize_child_spaces_if_needed() +{ + Space * child_spaces = get_object_field(child_spaces_field_table_name); + + if( child_spaces ) + { + if( !child_spaces->is_table() ) + child_spaces->set_empty_table(); + } + else + { + Space & new_child_spaces = add_empty_space(child_spaces_field_table_name); + new_child_spaces.set_empty_table(); + } +} + + + +void Space::remove_value() +{ + switch(type) + { + case type_null: + break; + + case type_bool: + case type_long: + case type_float: + case type_double: + type = type_null; + break; + + case type_string: + { + remove_value_string(); + break; + } + + case type_wstring: + { + remove_value_wstring(); + break; + } + + case type_object: + { + remove_value_object(); + break; + } + + case type_table: + remove_value_table(); + break; + } +} + + +void Space::remove_value_string() +{ + value.value_string.~basic_string(); + type = type_null; +} + + +void Space::remove_value_wstring() +{ + value.value_wstring.~basic_string(); + type = type_null; +} + + +void Space::remove_value_object() +{ + for(auto map_item : value.value_object) + { + delete map_item.second; + map_item.second = nullptr; + } + + value.value_object.~ObjectType(); + type = type_null; +} + + +void Space::remove_value_table() +{ + for(size_t i = 0 ; i < value.value_table.size() ; ++i) + { + delete value.value_table[i]; + value.value_table[i] = nullptr; + } + + value.value_table.~TableType(); + type = type_null; +} + + + +} // namespace + diff --git a/src/space/space.h b/src/space/space.h new file mode 100644 index 0000000..8cba9f2 --- /dev/null +++ b/src/space/space.h @@ -0,0 +1,1323 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2010-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_space_space +#define headerfile_picotools_space_space + +#include +#include +#include +#include +#include +#include +#include "textstream/types.h" +#include "convert/inttostr.h" +#include + + + +namespace pt +{ + + + +/* + +Simple form: + key = value + +If value is equal to 'null' then the internal representation is equal to Type::type_null, e.g: + key = null + +If value is either 'false' or 'true' then the internal representation is Type::type_bool, e.g: + key = false + key = true + +If value is an integer number and not an overflow has occured while parsing then type of the value +is Type::type_long (type is 'long long' internally): + key = 1000 + +If value is a floating point number then type is Type::type_double (type is 'double' internally): + key = 123.45 + key = 123.45e+10 + +In other cases value has Type::type_wstring type. + +The form with quotation marks: + key = "value" +In such a case value has Type::type_wstring type and can be multiline e.g: + key = "multiline + value" +In this form you can have spaces around value, e.g.: + key = " value with spaces " + + +The value can be a list: + key = (value1,value2) +or the same written with spaces around: + key = ( value1 , value2 ) +or written in multiline fashion: + key = ( + value1, + value2, + ) +a colon after value2 is optional. + +List can be written with quotation marks too: + key = ( + "value1", + "value2", + "value3 + with a new line character inside", + ) + +List without value: + key = () +is equal to simple form without value, e.g.: + key = + + + +Difference between a simple form and a list is when parsing the colon, in simple form the colon is a part of the value, e.g.: + key = value with , a colon inside +this is equal to: + key = "value with , a colon inside" + +and in a form of a list it would have two values: + key = (value with , a colon inside) +would be equal to: + key = ("value with" , "a colon inside") + + + +*/ + + + +class Space +{ +public: + + typedef std::map ObjectType; + typedef std::vector TableType; + + constexpr static const wchar_t * child_spaces_field_table_name = L"child_spaces"; + constexpr static const wchar_t * child_spaces_name = L"name"; + + enum Escape + { + no_escape, + escape_space, + escape_json, + }; + + enum Type + { + type_null, + type_bool, + type_long, + type_float, + type_double, + type_string, + type_wstring, + type_object, + type_table, + }; + + union Value + { + bool value_bool; + long long value_long; + float value_float; + double value_double; + std::string value_string; + std::wstring value_wstring; + ObjectType value_object; + TableType value_table; + + Value() + { + } + + ~Value() + { + } + }; + + + Type type; + Value value; + + + Space(); + Space(const Space & space); + Space(Space && space); + Space & operator=(const Space & space); + Space & operator=(Space && space); + ~Space(); + + Space(bool val); + Space(short val); + Space(int val); + Space(long val); + Space(long long val); + Space(unsigned short val); + Space(unsigned int val); + Space(unsigned long val); + Space(unsigned long long val); + Space(float val); + Space(double val); + Space(const char * str); + Space(const wchar_t * str); + Space(const std::string & str); + Space(const std::wstring & str); + Space(const Space * space); + + + void clear(); + + size_t str_size() const; + size_t wstr_size() const; + size_t object_size() const; + size_t table_size() const; + + + // set a new value + void set_null(); + void set_empty_string(); + void set_empty_wstring(); + void set_empty_table(); + void set_empty_object(); + + void set(bool val); + void set(short val); + void set(int val); + void set(long val); + void set(long long val); + void set(unsigned short val); + void set(unsigned int val); + void set(unsigned long val); + void set(unsigned long long val); + void set(float val); + void set(double val); + void set(const char * str); + void set(const wchar_t * str); + void set(const std::string & str); + void set(const std::wstring & str); + void set(const Space & space); + void set(const Space * space); + void set(Space && space); + + + // add a value to the table, change to table if needed, return the reference to the new inserted item + Space & add(bool val); + Space & add(short val); + Space & add(int val); + Space & add(long val); + Space & add(long long val); + Space & add(unsigned short val); + Space & add(unsigned int val); + Space & add(unsigned long val); + Space & add(unsigned long long val); + Space & add(float val); + Space & add(double val); + Space & add(const char * val); + Space & add(const wchar_t * val); + Space & add(const std::string & val); + Space & add(const std::wstring & val); + Space & add(const Space & space); + Space & add(const Space * space); + Space & add(Space && space); + Space & add_empty_space(); // IMPROVEME rename me to something better + + + + // add a value to the object, change to object if needed, return the reference to the new inserted item + Space & add(const wchar_t * field, bool val); + Space & add(const wchar_t * field, short val); + Space & add(const wchar_t * field, int val); + Space & add(const wchar_t * field, long val); + Space & add(const wchar_t * field, long long val); + Space & add(const wchar_t * field, unsigned short val); + Space & add(const wchar_t * field, unsigned int val); + Space & add(const wchar_t * field, unsigned long val); + Space & add(const wchar_t * field, unsigned long long val); + Space & add(const wchar_t * field, float val); + Space & add(const wchar_t * field, double val); + Space & add(const wchar_t * field, const char * val); + Space & add(const wchar_t * field, const wchar_t * val); + Space & add(const wchar_t * field, const std::string & val); + Space & add(const wchar_t * field, const std::wstring & val); + Space & add(const wchar_t * field, const Space & space); + Space & add(const wchar_t * field, const Space * space); + Space & add(const wchar_t * field, Space && space); + Space & add_empty_space(const wchar_t * field); // IMPROVEME rename me to something better + + Space & add(const std::wstring & field, bool val); + Space & add(const std::wstring & field, short val); + Space & add(const std::wstring & field, int val); + Space & add(const std::wstring & field, long val); + Space & add(const std::wstring & field, long long val); + Space & add(const std::wstring & field, unsigned short val); + Space & add(const std::wstring & field, unsigned int val); + Space & add(const std::wstring & field, unsigned long val); + Space & add(const std::wstring & field, unsigned long long val); + Space & add(const std::wstring & field, float val); + Space & add(const std::wstring & field, double val); + Space & add(const std::wstring & field, const char * val); + Space & add(const std::wstring & field, const wchar_t * val); + Space & add(const std::wstring & field, const std::string & val); + Space & add(const std::wstring & field, const std::wstring & val); + Space & add(const std::wstring & field, const Space & space); + Space & add(const std::wstring & field, const Space * space); + Space & add(const std::wstring & field, Space && space); + Space & add_empty_space(const std::wstring & field); // IMPROVEME rename me to something better + + + // IMPROVEME add a similar 'set' method and cctor + template + Space & add_stream(const wchar_t * field, StreamType & str) + { + std::wstring temp; + str.to_string(temp); + + return add(field, temp); + } + + template + Space & add_stream(const std::wstring & field, StreamType & str) + { + std::wstring temp; + str.to_string(temp); + + return add(field, temp); + } + + + + bool is_null() const; + bool is_bool() const; + bool is_llong() const; + bool is_long_long() const; + bool is_float() const; + bool is_double() const; + bool is_numeric() const; + bool is_str() const; + bool is_wstr() const; + bool is_text() const; + bool is_object() const; + bool is_table() const; + + + + bool to_bool() const; + short to_short() const; + int to_int() const; + long to_long() const; + long long to_llong() const; + long long to_long_long() const; + unsigned short to_ushort() const; + unsigned int to_uint() const; + unsigned long to_ulong() const; + unsigned long long to_ullong() const; + unsigned long long to_ulong_long() const; + std::string to_str() const; + std::wstring to_wstr() const; + + void to_list(std::list & output_list, bool clear_list = true) const; + void to_list(std::list & output_list, bool clear_list = true) const; + void to_list(std::vector & output_list, bool clear_list = true) const; + void to_list(std::vector & output_list, bool clear_list = true) const; + + + // returns value from object, field is a key + bool to_bool(const wchar_t * field, bool default_value = false) const; + short to_short(const wchar_t * field, short default_value = 0) const; + int to_int(const wchar_t * field, int default_value = 0) const; + long to_long(const wchar_t * field, long default_value = 0) const; + long long to_llong(const wchar_t * field, long long default_value = 0) const; + long long to_long_long(const wchar_t * field, long long default_value = 0) const; + unsigned short to_ushort(const wchar_t * field, unsigned short default_value = 0) const; + unsigned int to_uint(const wchar_t * field, unsigned int default_value = 0) const; + unsigned long to_ulong(const wchar_t * field, unsigned long default_value = 0) const; + unsigned long long to_ullong(const wchar_t * field, unsigned long long default_value = 0) const; + unsigned long long to_ulong_long(const wchar_t * field, unsigned long long default_value = 0) const; + std::string to_str(const wchar_t * field, const char * default_value = nullptr) const; + std::string to_str(const wchar_t * field, const std::string & default_value) const; + std::wstring to_wstr(const wchar_t * field, const wchar_t * default_value = nullptr) const; + std::wstring to_wstr(const wchar_t * field, const std::wstring & default_value) const; + + bool to_list(const wchar_t * field, std::list & output_list, bool clear_list = true) const; + bool to_list(const wchar_t * field, std::list & output_list, bool clear_list = true) const; + bool to_list(const std::wstring & field, std::list & output_list, bool clear_list = true) const; + bool to_list(const std::wstring & field, std::list & output_list, bool clear_list = true) const; + + bool to_list(const wchar_t * field, std::vector & output_list, bool clear_list = true) const; + bool to_list(const wchar_t * field, std::vector & output_list, bool clear_list = true) const; + bool to_list(const std::wstring & field, std::vector & output_list, bool clear_list = true) const; + bool to_list(const std::wstring & field, std::vector & output_list, bool clear_list = true) const; + + // returns value from object, field is a key + bool to_bool(const std::wstring & field, bool default_value = false) const; + short to_short(const std::wstring & field, short default_value = 0) const; + int to_int(const std::wstring & field, int default_value = 0) const; + long to_long(const std::wstring & field, long default_value = 0) const; + long long to_llong(const std::wstring & field, long long default_value = 0) const; + long long to_long_long(const std::wstring & field, long long default_value = 0) const; + unsigned short to_ushort(const std::wstring & field, unsigned short default_value = 0) const; + unsigned int to_uint(const std::wstring & field, unsigned int default_value = 0) const; + unsigned long to_ulong(const std::wstring & field, unsigned long default_value = 0) const; + unsigned long long to_ullong(const std::wstring & field, unsigned long long default_value = 0) const; + unsigned long long to_ulong_long(const std::wstring & field, unsigned long long default_value = 0) const; + std::string to_str(const std::wstring & field, const char * default_value = nullptr) const; + std::string to_str(const std::wstring & field, const std::string & default_value) const; + std::wstring to_wstr(const std::wstring & field, const wchar_t * default_value = nullptr) const; + std::wstring to_wstr(const std::wstring & field, const std::wstring & default_value) const; + + + bool to_str_list(std::list & output_list) const; + bool to_wstr_list(std::list & output_list) const; + + + bool * get_bool(); + long long * get_llong(); + long long * get_long_long(); + float * get_float(); + double * get_double(); + std::string * get_str(); + std::wstring * get_wstr(); + ObjectType * get_object(); + TableType * get_table(); + + // may a better name? + bool is_equal(const char * val) const; + bool is_equal(const std::string & val) const; + bool is_equal(const wchar_t * val) const; + bool is_equal(const std::wstring & val) const; + + // may a better name? + bool has_value(const char * val) const; + bool has_value(const std::string & val) const; + bool has_value(const wchar_t * val) const; + bool has_value(const std::wstring & val) const; + + + // what about getters from tables? + // may something like: Space * get_table_item(size_t index)? + // and get_table_bool(size_t index) + // or just only get_bool(size_t index)? + // size_t argument will be only for tables, wchar_t* or std::wstring for objects? + + // getters from object + Space * get_object_field(const wchar_t * field); // may a better name? + const Space * get_object_field(const wchar_t * field) const; // may a better name? + Space * get_object_field(const std::wstring & field); + const Space * get_object_field(const std::wstring & field) const; // may a better name? + + + bool * get_bool(const wchar_t * field); + long long * get_llong(const wchar_t * field); + long long * get_long_long(const wchar_t * field); + float * get_float(const wchar_t * field); + double * get_double(const wchar_t * field); + std::string * get_str(const wchar_t * field); + std::wstring * get_wstr(const wchar_t * field); + ObjectType * get_object(const wchar_t * field); + TableType * get_table(const wchar_t * field); + // add these getters with std::wstring + + + const bool * get_bool() const; + const long long * get_llong() const; + const long long * get_long_long() const; + const float * get_float() const; + const double * get_double() const; + const std::string * get_str() const; + const std::wstring * get_wstr() const; + const ObjectType * get_object() const; + const TableType * get_table() const; + + bool has_key(const wchar_t * field) const; // may has_key() would be a better name? + bool has_key(const std::wstring & field) const; + + + const bool * get_bool(const wchar_t * field) const; + const long long * get_llong(const wchar_t * field) const; + const long long * get_long_long(const wchar_t * field) const; + const float * get_float(const wchar_t * field) const; + const double * get_double(const wchar_t * field) const; + const std::string * get_str(const wchar_t * field) const; + const std::wstring * get_wstr(const wchar_t * field) const; + const ObjectType * get_object(const wchar_t * field) const; + const TableType * get_table(const wchar_t * field) const; + // add these getters with std::wstring + + + // no case, has O(n) complexity + Space * get_object_field_nc(const wchar_t * field); + Space * get_object_field_nc(const std::wstring & field); + const Space * get_object_field_nc(const wchar_t * field) const; + const Space * get_object_field_nc(const std::wstring & field) const; + + + + // remove a field from an object + void remove(const wchar_t * field); + void remove(const std::wstring & field); + + + std::string serialize_to_space_str(bool pretty_print = false) const; + std::wstring serialize_to_space_wstr(bool pretty_print = false) const; + void serialize_to_space_to(std::string & str, bool pretty_print = false) const; + void serialize_to_space_to(std::wstring & str, bool pretty_print = false) const; + + template + void serialize_to_space_stream(StreamType & str, bool pretty_print = false) const + { + if( is_object() ) + { + serialize_to_space_stream(str, pretty_print, 0, true); + } + } + + + + std::string serialize_to_json_str() const; + std::wstring serialize_to_json_wstr() const; + void serialize_to_json_to(std::string & str) const; + void serialize_to_json_to(std::wstring & str) const; + + + template + void serialize_to_json_stream(StreamType & str, bool pretty_print = false) const + { + switch(type) + { + case type_null: + serialize_json_null(str); + break; + + case type_bool: + serialize_json_bool(str); + break; + + case type_long: + serialize_json_long(str); + break; + + case type_float: + serialize_json_float(str); + break; + + case type_double: + serialize_json_double(str); + break; + + case type_string: + serialize_json_string(str); + break; + + case type_wstring: + serialize_json_wstring(str); + break; + + case type_object: + serialize_json_object(str); + break; + + case type_table: + serialize_json_table(str); + break; + } + } + + + // add this method with field with std::wstring + bool is_equal(const wchar_t * field, const char * val) const; + bool is_equal(const wchar_t * field, const std::string & val) const; + bool is_equal(const wchar_t * field, const wchar_t * val) const; + bool is_equal(const wchar_t * field, const std::wstring & val) const; + + // may a better name? + // add this method with field with std::wstring + bool has_value(const wchar_t * field, const char * val) const; + bool has_value(const wchar_t * field, const std::string & val) const; + bool has_value(const wchar_t * field, const wchar_t * val) const; + bool has_value(const wchar_t * field, const std::wstring & val) const; + + + // for child spaces (used only in Space format) + TableType * find_child_space_table(); + const TableType * find_child_space_table() const; + + bool child_spaces_empty() const; + size_t child_spaces_size() const; + + Space * find_child_space(const wchar_t * name); + Space * find_child_space(const std::wstring & name); + const Space * find_child_space(const wchar_t * name) const; + const Space * find_child_space(const std::wstring & name) const; + + Space * find_child_space(size_t table_index); + const Space * find_child_space(size_t table_index) const; + + Space & add_child_space(); + Space & add_child_space(const wchar_t * space_name); + Space & add_child_space(const std::wstring & space_name); + + Space & find_add_child_space(const wchar_t * name); + Space & find_add_child_space(const std::wstring & name); + + std::wstring * find_child_space_name(); + const std::wstring * find_child_space_name() const; + + std::wstring get_child_space_name() const; + + bool is_child_space_name(const wchar_t * name) const; + bool is_child_space_name(const std::wstring & name) const; + + void remove_child_space(const wchar_t * name); + void remove_child_space(const std::wstring & name); + void remove_child_space(size_t index); + + +protected: + + + template + Space & add_generic(const ArgType & val) + { + initialize_value_table_if_needed(); + + Space * new_space = new Space(val); + value.value_table.push_back(new_space); + + return *value.value_table.back(); + } + + + template + Space & add_generic(const wchar_t * field, const ArgType & val) + { + initialize_value_object_if_needed(); + + auto insert_res = value.value_object.insert(std::make_pair(field, nullptr)); + insert_res.first->second = new Space(val); + + return *(insert_res.first->second); + } + + + template + Space & add_generic(const std::wstring & field, const ArgType & val) + { + return add_generic(field.c_str(), val); + } + + + template + ArgType to_generic_numeric_signed_value() const + { + long long val = to_long_long(); + + if( val < std::numeric_limits::min() || val > std::numeric_limits::max() ) + val = 0; + + return val; + } + + template + ArgType to_generic_numeric_unsigned_value() const + { + unsigned long long val = to_ulong_long(); + + if( val > std::numeric_limits::max() ) + val = 0; + + return val; + } + + long long convert_string_to_long_long() const; + long long convert_wstring_to_long_long() const; + + unsigned long long convert_string_to_ulong_long() const; + unsigned long long convert_wstring_to_ulong_long() const; + + + template + void to_list_str_generic(ListType & output_list, bool clear_list) const + { + if( clear_list ) + output_list.clear(); + + if( type == type_string ) + { + output_list.push_back(value.value_string); + } + else + if( type == type_table ) + { + for(size_t i = 0 ; i < value.value_table.size() ; ++i) + { + output_list.push_back(value.value_table[i]->to_str()); + } + } + else + { + output_list.push_back(to_str()); + } + } + + + template + void to_list_wstr_generic(ListType & output_list, bool clear_list) const + { + if( clear_list ) + output_list.clear(); + + if( type == type_wstring ) + { + output_list.push_back(value.value_wstring); + } + else + if( type == type_table ) + { + for(size_t i = 0 ; i < value.value_table.size() ; ++i) + { + output_list.push_back(value.value_table[i]->to_wstr()); + } + } + else + { + output_list.push_back(to_wstr()); + } + } + + template + bool to_list_generic(const wchar_t * field, ListType & output_list, bool clear_list) const + { + if( clear_list ) + output_list.clear(); + + const Space * space = get_object_field(field); + + if( space ) + { + space->to_list(output_list, false); + return true; + } + + return false; + } + + + template + bool to_list_generic(const std::wstring & field, ListType & output_list, bool clear_list) const + { + if( clear_list ) + output_list.clear(); + + const Space * space = get_object_field(field); + + if( space ) + { + space->to_list(output_list, false); + return true; + } + + return false; + } + + + + + template + void escape_to_space_format(int c, StreamType & out) const + { + // IMPLEMENT ME + escape_to_json_format(c, out); + } + + + template + void escape_to_json_format(int c, StreamType & out) const + { + switch(c) + { + case 0: out << '\\'; out << '0'; break; + case '\r': out << '\\'; out << 'r'; break; + case '\n': out << '\\'; out << 'n'; break; + case '\\': out << '\\'; out << '\\'; break; + case '"': out << '\\'; out << '\"'; break; + //case '(': out << '\\'; out << '('; break; + //case ')': out << '\\'; out << ')'; break; + //case '=': out << '\\'; out << '='; break; + default: + out << static_cast(c); + } + } + + + + template + void copy_input_string_to_output(const CharT * input_str, StreamType & out_str, Escape escape) const + { + while( *input_str ) + { + if( escape == Escape::no_escape ) + out_str << static_cast(*input_str); + else + if( escape == Escape::escape_space ) + escape_to_space_format(*input_str, out_str); + else + if( escape == Escape::escape_json ) + escape_to_json_format(*input_str, out_str); + + input_str += 1; + } + } + + template + void copy_input_stream_to_output(const StreamType & input_str, StreamType & out_str, Escape escape) const + { + typename StreamType::const_iterator i = input_str.begin(); + + while( i != input_str.end() ) + { + if( escape == Escape::no_escape ) + out_str << static_cast(*i); + else + if( escape == Escape::escape_space ) + escape_to_space_format(*i, out_str); + else + if( escape == Escape::escape_json ) + escape_to_json_format(*i, out_str); + + ++i; + } + } + + + template + void serialize_string_buffer(const char * input_str, StreamType & out_str, Escape escape) const + { + if constexpr ( sizeof(char) == sizeof(typename StreamType::char_type) ) + { + // input and output are char (we assume it is utf8) + copy_input_string_to_output(input_str, out_str, escape); + } + else + { + // input is utf8 but output is wide + copy_input_string_to_output(input_str, out_str, escape); // temporarily + + // !!!!!!!!!!!!!!!!!!! FIXME +// StreamType temp_stream; +// UTF8ToWide(input_str, temp_stream, false); +// +// copy_input_stream_to_output(temp_stream, out_str, escape); + } + } + + + template + void serialize_string_buffer(const wchar_t * input_str, StreamType & out_str, Escape escape) const + { + if constexpr ( sizeof(wchar_t) == sizeof(typename StreamType::char_type) ) + { + // input and output are wide characters + copy_input_string_to_output(input_str, out_str, escape); + } + else + { + StreamType temp_stream; + + // input is wide but output is utf8 + wide_to_utf8(input_str, temp_stream, false); + copy_input_stream_to_output(temp_stream, out_str, escape); + } + } + + + + + template + void serialize_space_null(StreamType & str) const + { + serialize_string_buffer(L"null", str, Escape::escape_space); + } + + template + void serialize_space_bool(StreamType & str) const + { + if( value.value_bool ) + { + serialize_string_buffer(L"true", str, Escape::escape_space); + } + else + { + serialize_string_buffer(L"false", str, Escape::escape_space); + } + } + + + template + void serialize_space_long(StreamType & str) const + { + wchar_t buffer[50]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + Toa(value.value_long, buffer, buffer_len); + serialize_string_buffer(buffer, str, Escape::escape_space); + } + + template + void serialize_space_float(StreamType & str) const + { + wchar_t buffer[100]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + int chars_written = std::swprintf(buffer, buffer_len, L"%e", static_cast(value.value_float)); + + if( errno == EOVERFLOW || chars_written < 0 ) + buffer[0] = 0; + + serialize_string_buffer(buffer, str, Escape::escape_space); + } + + template + void serialize_space_double(StreamType & str) const + { + wchar_t buffer[100]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + int chars_written = std::swprintf(buffer, buffer_len, L"%e", value.value_double); + + if( errno == EOVERFLOW || chars_written < 0 ) + buffer[0] = 0; + + serialize_string_buffer(buffer, str, Escape::escape_space); + } + + template + void serialize_space_string(StreamType & str) const + { + str << '"'; + serialize_string_buffer(value.value_string.c_str(), str, Escape::escape_space); + str << '"'; + } + + template + void serialize_space_wstring(StreamType & str) const + { + str << '"'; + serialize_string_buffer(value.value_wstring.c_str(), str, Escape::escape_space); + str << '"'; + } + + template + void serialize_space_object(StreamType & str, bool pretty_print, int level, bool is_main_object) const + { + if( !is_main_object ) + { + str << '{'; + print_if(pretty_print && (!value.value_object.empty() || !child_spaces_empty()), str, '\n'); + } + + bool is_first = true; + + for(auto & map_item : value.value_object) + { + if( !is_first ) + print_if(pretty_print, str, '\n', ','); + + bool quote_field = should_field_be_quoted(map_item.first); + + print_level(pretty_print, level, str); + print_if(quote_field, str, '"'); + serialize_string_buffer(map_item.first.c_str(), str, Escape::escape_space); + print_if(quote_field, str, '"'); + + print_if(pretty_print, str, ' '); + str << '='; + print_if(pretty_print, str, ' '); + + map_item.second->serialize_to_space_stream(str, pretty_print, level + 1, false); + is_first = false; + } + + print_if(!is_first && pretty_print, str, '\n'); + serialize_child_spaces(str, pretty_print, level); + + if( !is_main_object ) + { + print_level(pretty_print, level - 1, str); + str << '}'; + print_if(pretty_print, str, '\n'); + } + } + + + template + void serialize_child_spaces(StreamType & str, bool pretty_print, int level) const + { + const TableType * child_table = find_child_space_table(); + + if( child_table && !child_table->empty() ) + { + print_if(pretty_print, str, '\n'); + + for(Space * child_space : *child_table) + { + print_if(!pretty_print, str, ' '); + + const std::wstring * name = child_space->get_wstr(child_spaces_name); + + if( name && !name->empty() ) + { + bool quote_field = should_field_be_quoted(*name); + + print_level(pretty_print, level, str); + print_if(quote_field, str, '"'); + serialize_string_buffer(name->c_str(), str, Escape::escape_space); + print_if(quote_field, str, '"'); + + str << ' '; + } + + child_space->serialize_to_space_stream(str, pretty_print, level + 1, false); + print_if(pretty_print, str, '\n'); + } + } + } + + + template + void serialize_space_table(StreamType & str, bool pretty_print, int level) const + { + bool multivalue_table = false; + bool is_first = true; + + if( value.value_table.size() > 1 ) + { + multivalue_table = true; + } + + str << '('; + print_if(pretty_print && multivalue_table, str, '\n'); + + for(Space * space : value.value_table) + { + if( !is_first ) + print_if(pretty_print, str, '\n', ','); + + print_level(pretty_print && multivalue_table, level, str); + space->serialize_to_space_stream(str, pretty_print, level + 1, false); + is_first = false; + } + + print_if(pretty_print && multivalue_table, str, '\n'); + print_level(pretty_print && multivalue_table, level - 1, str); + str << ')'; + } + + + + + + template + void serialize_json_null(StreamType & str) const + { + serialize_string_buffer(L"null", str, Escape::escape_json); + } + + template + void serialize_json_bool(StreamType & str) const + { + if( value.value_bool ) + { + serialize_string_buffer(L"true", str, Escape::escape_json); + } + else + { + serialize_string_buffer(L"false", str, Escape::escape_json); + } + } + + + template + void serialize_json_long(StreamType & str) const + { + wchar_t buffer[50]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + Toa(value.value_long, buffer, buffer_len); + serialize_string_buffer(buffer, str, Escape::escape_json); + } + + template + void serialize_json_float(StreamType & str) const + { + wchar_t buffer[100]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + int chars_written = std::swprintf(buffer, buffer_len, L"%e", static_cast(value.value_float)); + + if( errno == EOVERFLOW || chars_written < 0 ) + buffer[0] = 0; + + serialize_string_buffer(buffer, str, Escape::escape_json); + } + + template + void serialize_json_double(StreamType & str) const + { + wchar_t buffer[100]; + size_t buffer_len = sizeof(buffer) / sizeof(wchar_t); + + int chars_written = std::swprintf(buffer, buffer_len, L"%e", value.value_double); + + if( errno == EOVERFLOW || chars_written < 0 ) + buffer[0] = 0; + + serialize_string_buffer(buffer, str, Escape::escape_json); + } + + template + void serialize_json_string(StreamType & str) const + { + str << '"'; + serialize_string_buffer(value.value_string.c_str(), str, Escape::escape_json); + str << '"'; + } + + template + void serialize_json_wstring(StreamType & str) const + { + str << '"'; + serialize_string_buffer(value.value_wstring.c_str(), str, Escape::escape_json); + str << '"'; + } + + template + void serialize_json_object(StreamType & str) const + { + str << '{'; + + bool is_first = true; + + for(auto & map_item : value.value_object) + { + if( !is_first ) + { + str << ','; + } + + str << '"'; + serialize_string_buffer(map_item.first.c_str(), str, Escape::escape_json); + str << '"'; + str << ':'; + map_item.second->serialize_to_json_stream(str); + is_first = false; + } + + str << '}'; + } + + template + void serialize_json_table(StreamType & str) const + { + str << '['; + + bool is_first = true; + + for(Space * space : value.value_table) + { + if( !is_first ) + { + str << ','; + } + + space->serialize_to_json_stream(str); + is_first = false; + } + + str << ']'; + } + + +protected: + + template + void serialize_to_space_stream(StreamType & str, bool pretty_print, int level, bool is_main_object) const + { + switch(type) + { + case type_null: + serialize_space_null(str); + break; + + case type_bool: + serialize_space_bool(str); + break; + + case type_long: + serialize_space_long(str); + break; + + case type_float: + serialize_space_float(str); + break; + + case type_double: + serialize_space_double(str); + break; + + case type_string: + serialize_space_string(str); + break; + + case type_wstring: + serialize_space_wstring(str); + break; + + case type_object: + serialize_space_object(str, pretty_print, level, is_main_object); + break; + + case type_table: + serialize_space_table(str, pretty_print, level); + break; + } + } + + + template + bool should_field_be_quoted(StringType & str) const + { + if( str.empty() ) + return true; + + for(size_t i = 0 ; i < str.size() ; ++i) + { + int c = str[i]; + + // '{' is used when child spaces begin + + if( c == '\n' || c == '#' || c == '=' || c == '{' || c == '}' ) + return true; + } + + return false; + } + + + template + void print_level(bool pretty_print, int level, StreamType & str) const + { + if( pretty_print ) + { + for(int i=0 ; i < level ; ++i) + { + str << ' ' << ' '; + } + } + } + + + template + void print_if(bool condition, StreamType & str, wchar_t c) const + { + if( condition ) + str << c; + } + + template + void print_if(bool condition, StreamType & str, wchar_t c1, wchar_t c2) const + { + if( condition ) + str << c1; + else + str << c2; + } + + + void copy_value_from(const Space & space); + void copy_from(const Space & space); + + void copy_value_object(const Value & value_from); + void copy_value_table(const Value & value_from); + + void move_value_from(Space && space); + void move_from(Space && space); + + void initialize(); + void initialize_value_null_if_needed(); + void initialize_value_bool_if_needed(); + void initialize_value_long_if_needed(); + void initialize_value_float_if_needed(); + void initialize_value_double_if_needed(); + void initialize_value_string_if_needed(); + void initialize_value_string_if_needed(std::string && str); + void initialize_value_wstring_if_needed(); + void initialize_value_wstring_if_needed(std::wstring && str); + void initialize_value_object_if_needed(); + void initialize_value_object_if_needed(ObjectType && obj); + void initialize_value_table_if_needed(); + void initialize_value_table_if_needed(TableType && tab); + void initialize_child_spaces_if_needed(); + + void remove_value(); + void remove_value_string(); + void remove_value_wstring(); + void remove_value_object(); + void remove_value_table(); + + const Space * find_child_space_const(const wchar_t * name) const; + const Space * find_child_space_const(size_t table_index) const; + + +}; + + + + +} // namespace + + + +#endif diff --git a/src/space/spaceparser.cpp b/src/space/spaceparser.cpp new file mode 100644 index 0000000..a5704f8 --- /dev/null +++ b/src/space/spaceparser.cpp @@ -0,0 +1,1094 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2012-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "spaceparser.h" +#include "utf8/utf8.h" +#include "convert/strtoint.h" + + +namespace pt +{ + + + + +SpaceParser::SpaceParser() +{ + root_space = nullptr; + space_start = '{'; + space_end = '}'; + option_delimiter = ','; + input_as_utf8 = true; +} + + + +void SpaceParser::use_utf8(bool utf) +{ + input_as_utf8 = utf; +} + + +int SpaceParser::get_last_parsed_line() +{ + return line; +} + + + +SpaceParser::Status SpaceParser::parse_json_file(const char * file_name, Space & out_space, bool clear_space) +{ + reading_from_file = true; + parsing_space = false; + root_space = &out_space; + + file.clear(); + file.open(file_name, std::ios_base::binary | std::ios_base::in); + + if( file ) + { + parse_root_space(clear_space); + file.close(); + } + else + { + status = cant_open_file; + } + +return status; +} + + + +SpaceParser::Status SpaceParser::parse_json_file(const std::string & file_name, Space & out_space, bool clear_space) +{ + return parse_json_file(file_name.c_str(), out_space, clear_space); +} + + + + +SpaceParser::Status SpaceParser::parse_json_file(const wchar_t * file_name, Space & out_space, bool clear_space) +{ + std::string file_name_utf8; + + wide_to_utf8(file_name, file_name_utf8); + return parse_json_file(file_name_utf8.c_str(), out_space, clear_space); +} + + + +SpaceParser::Status SpaceParser::parse_json_file(const std::wstring & file_name, Space & out_space, bool clear_space) +{ + return parse_json_file(file_name.c_str(), out_space, clear_space); +} + + + + + +SpaceParser::Status SpaceParser::parse_space_file(const char * file_name, Space & out_space, bool clear_space) +{ + reading_from_file = true; + parsing_space = true; + root_space = &out_space; + + file.clear(); + file.open(file_name, std::ios_base::binary | std::ios_base::in); + + if( file ) + { + parse_root_space(clear_space); + file.close(); + } + else + { + status = cant_open_file; + } + +return status; +} + + + +SpaceParser::Status SpaceParser::parse_space_file(const std::string & file_name, Space & out_space, bool clear_space) +{ + return parse_space_file(file_name.c_str(), out_space, clear_space); +} + + + + +SpaceParser::Status SpaceParser::parse_space_file(const wchar_t * file_name, Space & out_space, bool clear_space) +{ + std::string file_name_utf8; + + wide_to_utf8(file_name, file_name_utf8); + return parse_space_file(file_name_utf8.c_str(), out_space, clear_space); +} + + + +SpaceParser::Status SpaceParser::parse_space_file(const std::wstring & file_name, Space & out_space, bool clear_space) +{ + return parse_space_file(file_name.c_str(), out_space, clear_space); +} + + + +SpaceParser::Status SpaceParser::parse_json(const char * str, Space & out_space, bool clear_space) +{ + reading_from_file = false; + reading_from_wchar_string = false; + pchar_ascii = str; + pchar_unicode = 0; + parsing_space = false; + root_space = &out_space; + + parse_root_space(clear_space); + + return status; +} + + +SpaceParser::Status SpaceParser::parse_json(const std::string & str, Space & out_space, bool clear_space) +{ + return parse_json(str.c_str(), out_space, clear_space); +} + + +SpaceParser::Status SpaceParser::parse_json(const wchar_t * str, Space & out_space, bool clear_space) +{ + reading_from_file = false; + reading_from_wchar_string = true; + pchar_unicode = str; + pchar_ascii = 0; + parsing_space = false; + root_space = &out_space; + + parse_root_space(clear_space); + + return status; +} + + +SpaceParser::Status SpaceParser::parse_json(const std::wstring & str, Space & out_space, bool clear_space) +{ + return parse_json(str.c_str(), out_space, clear_space); +} + + + + + +SpaceParser::Status SpaceParser::parse_space(const char * str, Space & out_space, bool clear_space) +{ + reading_from_file = false; + reading_from_wchar_string = false; + pchar_ascii = str; + pchar_unicode = 0; + parsing_space = true; + root_space = &out_space; + + parse_root_space(clear_space); + + return status; +} + + +SpaceParser::Status SpaceParser::parse_space(const std::string & str, Space & out_space, bool clear_space) +{ + return parse_space(str.c_str(), out_space, clear_space); +} + + +SpaceParser::Status SpaceParser::parse_space(const wchar_t * str, Space & out_space, bool clear_space) +{ + reading_from_file = false; + reading_from_wchar_string = true; + pchar_unicode = str; + pchar_ascii = 0; + parsing_space = true; + root_space = &out_space; + + parse_root_space(clear_space); + + return status; +} + + +SpaceParser::Status SpaceParser::parse_space(const std::wstring & str, Space & out_space, bool clear_space) +{ + return parse_space(str.c_str(), out_space, clear_space); +} + + + + + +void SpaceParser::parse_root_space(bool clear_root_space) +{ + line = 1; + status = ok; + + if( clear_root_space ) + { + root_space->set_empty_object(); + } + + read_char(); // put first character to lastc + + if( parsing_space ) + { + separator = '='; + table_start = '('; + table_end = ')'; + parse_space(root_space); + } + else + { + separator = ':'; + table_start = '['; + table_end = ']'; + parse(root_space, false, false); + } + + skip_white(); + + if( lastc != -1 ) + status = syntax_error; + + token.clear(); +} + + +void SpaceParser::parse(Space * space, bool is_object_value, bool is_table_value) +{ + skip_white(); + + if( lastc == space_start ) + { + parse_space(space); + } + else + if( lastc == table_start ) + { + parse_table(space); + } + else + if( lastc == '"' ) // IMPROVEME define a variable + { + parse_text_value(space); + } + else + { + read_string_value(token, is_object_value, is_table_value); + + if( token == L"null" ) + { + space->set_null(); + } + else + if( token == L"true" ) + { + space->set(true); + } + else + if( token == L"false" ) + { + space->set(false); + } + else + if( is_integer_token() ) + { + parse_integer_value(space); + } + else + if( is_floating_point_token() ) + { + parse_floating_point_value(space); + } + else + { + if( parsing_space ) + { + space->set(token); + } + else + { + status = syntax_error; + } + } + } +} + + + + + +void SpaceParser::parse_space(Space * space) +{ + /* + * in Space format in global namespace the space start character is not required + */ + bool need_space_start_character = !parsing_space || space != root_space; + + if( need_space_start_character ) + { + read_char(); // inserting a next character after the space_start char to lastc + } + + if( !space->is_object() ) + space->set_empty_object(); + + parse_key_value_pairs(space); + + if( need_space_start_character ) + { + if( lastc == space_end ) + { + read_char(); + } + else + { + status = syntax_error; + } + } +} + + + + + +void SpaceParser::parse_text_value(Space * space) +{ + space->set_empty_wstring(); + std::wstring * str = space->get_wstr(); + + if( parsing_space ) + read_multiline_token_quoted(*str); + else + read_token_quoted(*str); +} + + +void SpaceParser::parse_integer_value(Space * space) +{ + const wchar_t * after_str = nullptr; + bool was_overflow = false; + int base = 10; + + if( parsing_space ) + { + // in Space format when the integer value begins with a zero it means + // this is an octal number + if( !token.empty() && token[0] == '0' ) + base = 8; + } + + long long val = Toll(token.c_str(), base, &after_str, &was_overflow, false); + + if( was_overflow ) + { + status = syntax_error; + } + else + if( size_t(after_str - token.c_str()) != token.size() ) + { + status = syntax_error; + } + else + { + space->set(val); + } +} + + +void SpaceParser::parse_floating_point_value(Space * space) +{ + wchar_t * after_str = nullptr; + double val = wcstod(token.c_str(), &after_str); + + if( errno == ERANGE ) + { + status = syntax_error; + } + else + if( size_t(after_str - token.c_str()) != token.size() ) + { + status = syntax_error; + } + else + { + space->set(val); + } +} + + + + +void SpaceParser::parse_table(Space * space) +{ + read_char(); // inserting a next character after the table_start char to lastc + space->set_empty_table(); + parse_values_list(space); + + if( lastc == table_end ) + { + read_char(); + } + else + { + status = syntax_error; + } +} + + + + +void SpaceParser::parse_key_value_pairs(Space * space) +{ + bool is_first = true; + skip_white(); + + while( status == ok && lastc != space_end && lastc != -1 ) + { + if( !is_first ) + { + skip_white(); + + if( lastc == option_delimiter ) + { + read_char(); // inserting a next character after the option_delimiter to lastc + + if( parsing_space ) + { + // in space format a space_end character is allowed to be after the last table item + skip_white(); + + if( lastc == space_end ) + break; + } + } + else + if( !parsing_space ) + { + // in json format the option_delimiter is required + status = syntax_error; + } + } + + if( status == ok ) + { + read_key(); + + if( status == ok ) + { + skip_white(); + + if( lastc == separator ) + { + read_char(); // inserting a next character after the separator to lastc + + Space & new_space = space->add(token.c_str(), new Space()); + parse(&new_space, true, false); + } + else + if( parsing_space && lastc == space_start ) + { + Space & new_space = space->add_child_space(token.c_str()); + parse_space(&new_space); + } + else + { + status = syntax_error; + } + } + } + + is_first = false; + skip_white(); + } +} + + + + +void SpaceParser::parse_values_list(Space * space) +{ + bool is_first = true; + skip_white(); + + while( status == ok && lastc != table_end && lastc != -1 ) + { + if( !is_first ) + { + skip_white(); + + if( lastc == option_delimiter ) // may add a new delimiter for tables? default the same as for objects... + { + read_char(); // inserting a next character after the delimiter + + if( parsing_space ) + { + // in space format a table_end character is allowed to be after the last table item + skip_white(); + + if( lastc == table_end ) + break; + } + } + else + if( !parsing_space ) + { + // in json format the option_delimiter is required + status = syntax_error; + } + } + + if( status == ok ) + { + Space * new_space = &space->add(new Space()); + parse(new_space, false, true); + } + + is_first = false; + skip_white(); + } +} + + + +bool SpaceParser::is_integer_token() +{ + if( token.empty() ) + return false; + + size_t i = 0; + + if( token[i] == '-' ) + { + i += 1; + + if( token.size() == 1 ) + return false; + } + + for( ; i < token.size() ; ++i) + { + if( token[i] < '0' || token[i] > '9' ) + { + return false; + } + } + + return true; +} + + +bool SpaceParser::is_floating_point_token() +{ + bool was_dot = false; + bool was_exponential = false; + bool was_plus_minus_sign = false; + + if( token.empty() ) + return false; + + size_t i = 0; + + if( token[i] == '-' ) + { + i += 1; + + if( token.size() == 1 ) + return false; + } + + for( ; i < token.size() ; ++i) + { + if( token[i] == '.' ) + { + if( was_dot || was_exponential ) + return false; + + was_dot = true; + } + else + if( token[i] == 'e' || token[i]=='E' ) + { + if( was_exponential ) + return false; + + was_exponential = true; + + // the exponential character cannot be the last character + if( i + 1 == token.size() ) + return false; + } + else + if( token[i] == '+' || token[i] == '-' ) + { + if( was_plus_minus_sign || !was_exponential ) + return false; + + // the plus or minus should be after the exponential character + if( i > 0 && (token[i-1] != 'e' && token[i-1] != 'E') ) + return false; + + was_plus_minus_sign = true; + } + else + if( token[i] < '0' || token[i] > '9' ) + { + return false; + } + } + + return true; +} + + + +bool SpaceParser::is_white(int c) +{ + // 13 (\r) is at the end of a line in a dos file \r\n + // 160 is an unbreakable space + if( c==' ' || c=='\t' || c==13 || c==160 || c==10 ) + return true; + + return false; +} + + +bool SpaceParser::is_alfa_numeric_char(int c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '.' || c=='-' || c=='+'; +} + + +void SpaceParser::skip_line() +{ + while( lastc != -1 && (char_was_escaped || lastc != '\n') ) + read_char(); +} + + + +void SpaceParser::skip_white() +{ + if( parsing_space ) + { + while( is_white(lastc) || (!char_was_escaped && lastc == '#') ) + { + if( lastc == '#' ) + skip_line(); + else + read_char(); + } + } + else + { + while( is_white(lastc) ) + { + read_char(); + } + } +} + + +void SpaceParser::trim_last_white(std::wstring & s) +{ + std::wstring::size_type i; + + for(i=s.size() ; i>0 && is_white(s[i-1]) ; --i) + { + } + + if( i < s.size() ) + { + s.erase(i, std::wstring::npos); + } +} + + + +void SpaceParser::read_token_until_delimiter(std::wstring & token, int delimiter1, int delimiter2) +{ + token.clear(); + + while( lastc != -1 && (char_was_escaped || (lastc != '\n' && lastc != '#' && lastc != delimiter1 && lastc != delimiter2)) ) + { + token += static_cast(lastc); + read_char(); + } + + trim_last_white(token); +} + + +void SpaceParser::read_alfa_numeric_token(std::wstring & token) +{ + token.clear(); + + while( is_alfa_numeric_char(lastc) ) + { + token += static_cast(lastc); + read_char(); + } +} + + +void SpaceParser::read_string_value(std::wstring & token, bool is_object_value, bool is_table_value) +{ + if( parsing_space ) + { + if( is_object_value ) + { + read_token_until_delimiter(token, space_end, -1); + } + else + if( is_table_value ) + { + read_token_until_delimiter(token, table_end, option_delimiter); + } + else + { + read_token_until_delimiter(token, -1, -1); + } + } + else + { + read_alfa_numeric_token(token); + } +} + + +void SpaceParser::read_space_field_token(std::wstring & token) +{ + token.clear(); + + while( lastc != -1 && (char_was_escaped || (lastc != separator && lastc != 10 && lastc != space_start && lastc != '#' )) ) + { + token += static_cast(lastc); + read_char(); + } + + trim_last_white(token); +} + + +// IMPROVEME in JSON we should not allow non-escaped a new line character +void SpaceParser::read_token_quoted(std::wstring & token) +{ + token.clear(); + read_char(); // skipping the first quotation mark + + while( lastc != -1 && (char_was_escaped || (lastc != '"' && lastc != 10)) ) + { + token += static_cast(lastc); + read_char(); + } + + if( !char_was_escaped && lastc == '"' ) + { + read_char(); // skipping the last quotation mark + } + else + { + status = syntax_error; + } +} + + +void SpaceParser::read_multiline_token_quoted(std::wstring & token) +{ + token.clear(); + read_char(); // skipping the first quotation mark + + while( lastc != -1 && (char_was_escaped || lastc != '"') ) + { + token += static_cast(lastc); + read_char(); + } + + if( !char_was_escaped && lastc == '"' ) + { + read_char(); // skipping the last quotation mark + } + else + { + status = syntax_error; + } +} + + +/* + * this method is used to read the field name (key) in an object + * or to read the space child name (used in Space format) + */ +void SpaceParser::read_key() +{ + skip_white(); + + if( parsing_space ) + { + if( lastc == '"' ) + { + read_multiline_token_quoted(token); + } + else + { + read_space_field_token(token); + } + } + else + { + if( lastc == '"' ) + { + read_token_quoted(token); + } + else + { + status = syntax_error; + } + } +} + + + + +int SpaceParser::read_utf8_char() +{ +int c; +bool correct; + + lastc = -1; + + do + { + utf8_to_int(file, c, correct); + + if( !file ) + return lastc; + } + while( !correct ); + + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + + +int SpaceParser::read_ascii_char() +{ + lastc = file.get(); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + + + +int SpaceParser::read_char_from_wchar_string() +{ + if( *pchar_unicode == 0 ) + lastc = -1; + else + lastc = *(pchar_unicode++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int SpaceParser::read_char_from_utf8_string() +{ +int c; +bool correct; + + lastc = -1; + + do + { + size_t len = utf8_to_int(pchar_ascii, c, correct); + pchar_ascii += len; + } + while( *pchar_ascii && !correct ); + + if( correct ) + lastc = c; + + if( lastc == '\n' ) + ++line; + +return lastc; + +} + + +int SpaceParser::read_char_from_ascii_string() +{ + if( *pchar_ascii == 0 ) + lastc = -1; + else + lastc = *(pchar_ascii++); + + if( lastc == '\n' ) + ++line; + +return lastc; +} + + +int SpaceParser::read_char_no_escape() +{ + if( reading_from_file ) + { + if( input_as_utf8 ) + return read_utf8_char(); + else + return read_ascii_char(); + } + else + { + if( reading_from_wchar_string ) + { + return read_char_from_wchar_string(); + } + else + { + if( input_as_utf8 ) + return read_char_from_utf8_string(); + else + return read_char_from_ascii_string(); + } + } +} + +bool SpaceParser::is_hex_digit(wchar_t c) +{ + return ((c>='0' && c<='9') || + (c>='a' && c<='f') || + (c>='A' && c<='F') ); +} + + +int SpaceParser::hex_to_int(wchar_t c) +{ + if( c>='0' && c<='9' ) + return c - '0'; + + if( c>='a' && c<='f' ) + return c - 'a' + 10; + + if( c>='A' && c<='F' ) + return c - 'A' + 10; + +return 0; +} + + +void SpaceParser::read_unicode_code_point() +{ +wchar_t c; +int value = 0; + + for(int i=0 ; i<4 ; ++i) + { + c = read_char_no_escape(); + + if( !is_hex_digit(c) ) + { + status = syntax_error; + return; + } + + value = (value << 4) | hex_to_int(c); + } + + lastc = (wchar_t)value; +} + + +int SpaceParser::read_char() +{ + char_was_escaped = false; + read_char_no_escape(); + + if( lastc == '\\' ) + { + char_was_escaped = true; + read_char_no_escape(); + + switch(lastc) + { + case '0': lastc = 0; break; + case 't': lastc = '\t'; break; + case 'r': lastc = '\r'; break; + case 'n': lastc = '\n'; break; + case 'b': lastc = 0x08; break; + case 'f': lastc = 0x0c; break; + case 'u': read_unicode_code_point(); break; + // "in other cases we return the last character, so two \\ returns one \ " + } + } + +return lastc; +} + + + +} // namespace + + + + diff --git a/src/space/spaceparser.h b/src/space/spaceparser.h new file mode 100644 index 0000000..8b89772 --- /dev/null +++ b/src/space/spaceparser.h @@ -0,0 +1,313 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2012-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_space_jsonspaceparser +#define headerfile_picotools_space_jsonspaceparser + +#include +#include "space.h" + + + +namespace pt +{ + + + + +class SpaceParser +{ +public: + + + /* + ctor -- setting default values (SetDefault() method) + */ + SpaceParser(); + + + /* + status of parsing + */ + enum Status { ok, cant_open_file, syntax_error }; + + + /* + the last status of parsing, set by parse() methods + */ + Status status; + + + + + /* + main methods used to parse a JSON file + file_name is the path to a file + */ + Status parse_json_file(const char * file_name, Space & out_space, bool clear_space = true); + Status parse_json_file(const std::string & file_name, Space & out_space, bool clear_space = true); + Status parse_json_file(const wchar_t * file_name, Space & out_space, bool clear_space = true); + Status parse_json_file(const std::wstring & file_name, Space & out_space, bool clear_space = true); + + + /* + main methods used to parse a Space file + file_name is the path to a file + */ + Status parse_space_file(const char * file_name, Space & out_space, bool clear_space = true); + Status parse_space_file(const std::string & file_name, Space & out_space, bool clear_space = true); + Status parse_space_file(const wchar_t * file_name, Space & out_space, bool clear_space = true); + Status parse_space_file(const std::wstring & file_name, Space & out_space, bool clear_space = true); + + + /* + main methods used to parse + str - input string (either 8bit ascii or UTF-8 -- see UTF8() method) + */ + Status parse_json(const char * str, Space & out_space, bool clear_space = true); + Status parse_json(const std::string & str, Space & out_space, bool clear_space = true); + + /* + main methods used to parse + here input string is always in unicode (wide characters) + */ + Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true); + Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true); + + + + Status parse_space(const char * str, Space & out_space, bool clear_space = true); + Status parse_space(const std::string & str, Space & out_space, bool clear_space = true); + Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true); + Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true); + + + + /* + * add two args parse method + * Status parse(const char * str, Space & output_space); + * + */ + + + + /* + * if true then the input file or string (char* or std::string) is treated as UTF-8 + * default true + * + * the internal storage for strings is std::wstring so if you call set_utf8(false) then + * the characters of input string will be simple static_cast<> from char to wchar_t + * + */ + void use_utf8(bool utf); + + + /* + * + * returns a number of a last parsed line + * can be used to obtain the line in which there was a syntax error + * + */ + int get_last_parsed_line(); + + +private: + + + /* + current space set by SetSpace(); + */ + Space * root_space; + + + /* + a number of a line in which there is a syntax_error + */ + int line; + + /* + true if parse() method was called + false if ParseString() was called + */ + bool reading_from_file; + + + /* + pointers to the current character + if ParseString() is in used + */ + const char * pchar_ascii; + const wchar_t * pchar_unicode; + + + /* + true if ParseString(wchar_t *) or ParseString(std::wstring&) was called + */ + bool reading_from_wchar_string; + + + /* + last read token + */ + std::wstring token; + + + /* + separator between a variable and a value, default: '=' + */ + int separator; + + + /* + space starting character, default: '{' + */ + int space_start; + + + /* + space ending character, default: '}' + */ + int space_end; + + + /* + table starting character, default: '[' + */ + int table_start; + + + /* + table ending character, default: ']' + */ + int table_end; + + + /* + option delimiter, default: ',' + */ + int option_delimiter; + + + /* + last read char + or -1 if the end + */ + int lastc; + + + /* + true if the lastc was escaped (with a backslash) + we have to know if the last sequence was \" or just " + */ + bool char_was_escaped; + + + /* + current file + + may it would be better to make a pointer? + if we parse only a string then there is no sense to have such an object + */ + std::ifstream file; + + + /* + input file is in UTF-8 + default: true + */ + bool input_as_utf8; + + + /* + * if parsing_space is false then it means we are parsing JSON format + * + */ + bool parsing_space; + + + + void parse_root_space(bool clear_root_space); + void parse(Space * space, bool is_object_value, bool is_table_value); + void parse_space(Space * space); + void parse_table(Space * space); + + void parse_key_value_pairs(Space * space); + void parse_values_list(Space * space); + + void read_key(); + + void parse_text_value(Space * space); + void parse_integer_value(Space * space); + void parse_floating_point_value(Space * space); + + bool is_alfa_numeric_char(int c); + + void read_token_until_delimiter(std::wstring & token, int delimiter1, int delimiter2); + void read_alfa_numeric_token(std::wstring & token); + void read_string_value(std::wstring & token, bool is_object_value, bool is_table_value); + + bool is_integer_token(); + bool is_floating_point_token(); + + void read_space_field_token(std::wstring & token); + void read_token_quoted(std::wstring & token); + void read_multiline_token_quoted(std::wstring & token); + + int read_utf8_char(); + int read_ascii_char(); + int read_char_from_wchar_string(); + int read_char_from_utf8_string(); + int read_char_from_ascii_string(); + int read_char_no_escape(); + int read_char(); + bool is_white(int c); + void skip_line(); + void skip_white(); + void trim_last_white(std::wstring & s); + bool is_hex_digit(wchar_t c); + int hex_to_int(wchar_t c); + void read_unicode_code_point(); + +}; + + + + +} // namespace + + +#endif diff --git a/textstream/textstream.h b/src/textstream/textstream.h similarity index 97% rename from textstream/textstream.h rename to src/textstream/textstream.h index 523f638..1ad96cc 100644 --- a/textstream/textstream.h +++ b/src/textstream/textstream.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2013, Tomasz Sowa + * Copyright (c) 2012-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ // for snprintf #include -namespace PT +namespace pt { @@ -60,17 +60,20 @@ namespace PT StringType can be either std::string or std::wstring this class doesn't use UTF-8 in any kind */ -template +template class TextStreamBase { public: TextStreamBase(); + typedef CharT char_type; + typedef MemBuffer buffer_type; typedef typename buffer_type::iterator iterator; typedef typename buffer_type::const_iterator const_iterator; + void clear(); bool empty() const; size_t size() const; @@ -106,8 +109,8 @@ public: TextStreamBase & operator<<(unsigned long long); TextStreamBase & operator<<(double); TextStreamBase & operator<<(const void *); // printing a pointer - TextStreamBase & operator<<(const PT::Space & space); - TextStreamBase & operator<<(const PT::Date & date); + TextStreamBase & operator<<(const Space & space); + TextStreamBase & operator<<(const Date & date); // min width for integer output // if the output value has less digits then first zeroes are added @@ -466,9 +469,9 @@ wchar_t buf[100]; template TextStreamBase & -TextStreamBase::operator<<(const PT::Space & space) +TextStreamBase::operator<<(const Space & space) { - space.Serialize(*this, true, false); + space.serialize_to_space_stream(*this, true); return *this; } @@ -477,7 +480,7 @@ return *this; template TextStreamBase & -TextStreamBase::operator<<(const PT::Date & date) +TextStreamBase::operator<<(const Date & date) { date.Serialize(*this); diff --git a/textstream/types.h b/src/textstream/types.h similarity index 99% rename from textstream/types.h rename to src/textstream/types.h index d2fe229..dbb84a0 100644 --- a/textstream/types.h +++ b/src/textstream/types.h @@ -40,7 +40,7 @@ -namespace PT +namespace pt { diff --git a/utf8/utf8.cpp b/src/utf8/utf8.cpp similarity index 57% rename from utf8/utf8.cpp rename to src/utf8/utf8.cpp index cfb20e5..325de87 100644 --- a/utf8/utf8.cpp +++ b/src/utf8/utf8.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2010-2018, Tomasz Sowa + * Copyright (c) 2010-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,67 +36,62 @@ */ #include "utf8.h" +#include "utf8_private.h" -namespace PT +namespace pt { -/*! - an auxiliary function for converting from UTF-8 string -*/ -static bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res) -{ - for(len=0 ; (uz & 0x80) != 0 ; ++len) - uz <<= 1; - - if( len == 1 ) - return false; - - res = uz; - - if( len > 0 ) - res >>= len; - - if( res == 0 ) - return false; - - if( len == 0 ) - len = 1; - -return true; -} - - - -/*! - an auxiliary function for converting from UTF-8 string -*/ -static bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res) -{ - if( (uz & 0xc0) != 0x80 ) - return false; - - res <<= 6; - res |= (uz & 0x3F); - -return true; -} - - /*! returns true if 'c' is a correct unicode character */ -bool UTF8_CheckRange(int c) +bool utf8_check_range(int c) { return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF); } +/*! + returns true if 'c' is a correct unicode character + + this method is used when reading from an utf8 string + how_many_bytes - means how many bytes from the utf8 string were read +*/ +bool utf8_check_range(int c, int how_many_bytes) +{ + if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 ) + { + return true; + } + + if( c >= 0x0080 && c <= 0x07ff && how_many_bytes == 2 ) + { + return true; + } + + if( c >= 0x0800 && c < 0xD800 && how_many_bytes == 3) + { + return true; + } + + if( c > 0xDFFF && c <= 0xffff && how_many_bytes == 3) + { + return true; + } + + if( c >= 0x10000 && c <= 0x10FFFF && how_many_bytes == 4 ) + { + return true; + } + +return false; +} + /*! @@ -116,7 +111,7 @@ bool UTF8_CheckRange(int c) (returns zero only if utf8_len is zero) even if there are errors the functions returns a different from zero value */ -size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct) +size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct) { size_t i, len; @@ -126,17 +121,19 @@ size_t i, len; if( utf8_len == 0 ) return 0; - if( !UTF8ToInt_FirstOctet(utf8[0], len, res) ) + if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) ) return 1; if( utf8_len < len ) return utf8_len; for(i=1 ; i0xffff ) { @@ -294,13 +291,13 @@ static void IntToWide(int c, std::wstring & res) the function returns false if there were some errors when converting */ -bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode) +bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode) { if( clear ) res.clear(); - bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { - IntToWide(c, res); + bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) { + int_to_wide(c, res); }); return status; @@ -324,14 +321,14 @@ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool cle the function returns false if there were some errors when converting */ -bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear, int mode) +bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode) { size_t utf8_len = 0; while( utf8[utf8_len] != 0 ) utf8_len += 1; -return UTF8ToWide(utf8, utf8_len, res, clear, mode); +return utf8_to_wide(utf8, utf8_len, res, clear, mode); } @@ -350,9 +347,9 @@ return UTF8ToWide(utf8, utf8_len, res, clear, mode); the function returns false if there were some errors when converting */ -bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mode) +bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode) { - return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode); + return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode); } @@ -371,7 +368,7 @@ bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mo the function returns false if there were some errors when converting */ -bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear, int mode) +bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode) { int z; bool correct, was_error = false; @@ -379,7 +376,7 @@ bool correct, was_error = false; if( clear ) res.clear(); - while( UTF8ToInt(utf8, z, correct) > 0 ) + while( utf8_to_int(utf8, z, correct) > 0 ) { if( !correct ) { @@ -390,7 +387,7 @@ bool correct, was_error = false; } else { - IntToWide(z, res); + int_to_wide(z, res); } } @@ -413,13 +410,13 @@ return !was_error; the function returns how many characters have been written to the utf8, zero means the utf8 buffer is too small or 'z' is an incorrect unicode character */ -size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len) +size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len) { char buf[10]; int i = 0; int mask = 0x3f; // 6 first bits set - if( utf8_max_len==0 || !UTF8_CheckRange(z) ) + if( utf8_max_len==0 || !utf8_check_range(z) ) return 0; if( z <= 0x7f ) @@ -467,14 +464,14 @@ return a; the function returns how many characters have been written to the utf8 string, zero means that 'z' is an incorrect unicode character */ -size_t IntToUTF8(int z, std::string & utf8, bool clear) +size_t int_to_utf8(int z, std::string & utf8, bool clear) { char buf[10]; if( clear ) utf8.clear(); - size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char)); + size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char)); size_t i; for(i=0 ; i(*wide_string); - correct = true; - - if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) ) - { - if( z>=0xD800 && z<=0xDBFF && string_len>1 ) - { - int z2 = *(wide_string+1); - - if( z2>=0xDC00 && z2<=0xDFFF ) - { - z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF)); - return 2; - } - else - { - correct = false; - return 2; - } - } - else - { - correct = false; - return 1; - } - } - else - { - correct = UTF8_CheckRange(z); - return 1; - } -} - - - -/* - an auxiliary function for converting from wide characters to UTF-8 - converting a wide character into one int - - returns how many wide characters were used - if wide_string has at least one character then the return value is always greater than zero too -*/ -static size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct) -{ -size_t min_str_len = 1; - - if( *wide_string == 0 ) - { - z = 0; - correct = false; - return 0; - } - - if( *(wide_string+1) != 0 ) - min_str_len = 2; - -return WideToInt(wide_string, min_str_len, z, correct); -} - - - -/*! - an auxiliary function for converting from wide characters to UTF-8 - - returns how many wide characters were used - if string_len is greater than 0 then the return value is always greater than zero too - - utf8_written - how many characters were saved in the utf8 string (the string doesn't have - a null terminating character) - it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read - was_utf8_buf_too_small - will be true if the utf8 buffer is too small - if this flag is true then utf8_written is equal to zero - was_error - will be true if there is an error when converting (there was an incorrect wide character) - (was_error will not be true if the utf8 buffer is too small) -*/ -static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, - size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode) -{ -int z; -bool correct; -size_t chars; - - utf8_written = 0; - was_utf8_buf_too_small = false; - chars = WideToInt(wide_string, string_len, z, correct); - - if( correct ) - { - utf8_written = IntToUTF8(z, utf8, utf8_len); - - if( utf8_written == 0 ) - was_utf8_buf_too_small = true; - } - else - { - if( mode == 1 ) - { - utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character" - - if( utf8_written == 0 ) - was_utf8_buf_too_small = true; - } - - was_error = true; - } - -return chars; -} - - - -/*! - an auxiliary function for converting from wide characters to UTF-8 - - returns how many wide characters were used - if string_len is greater than 0 then the return value is always greater than zero too -*/ -static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode) -{ -int z; -bool correct; -size_t chars; - - chars = WideToInt(wide_string, string_len, z, correct); - - if( correct ) - correct = IntToUTF8(z, utf8, false) != 0; - - if( !correct ) - { - if( mode == 1 ) - IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character" - - was_error = true; - } - -return chars; -} - - - -/*! - an auxiliary function for converting from wide characters to UTF-8 - - returns how many wide characters were used - if wide_string has at least one character then the return value is always greater than zero too -*/ -static size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode) -{ -int z; -bool correct; -size_t chars; - - chars = WideToInt(wide_string, z, correct); - - if( correct ) - correct = IntToUTF8(z, utf8, false) != 0; - - if( !correct ) - { - if( mode == 1 ) - IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character" - - was_error = true; - } - -return chars; -} - - - -/*! - an auxiliary function for converting from wide characters to UTF-8 - - returns how many wide characters were used - if string_len is greater than 0 then the return value is always greater than zero too -*/ -static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, bool & was_error, int mode) -{ -int z; -bool correct; -size_t chars; - - chars = WideToInt(wide_string, string_len, z, correct); - - if( correct ) - correct = IntToUTF8(z, utf8) != 0; - - if( !correct ) - { - if( mode == 1 ) - IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character" - - was_error = true; - } - -return chars; -} - - - -/*! - an auxiliary function for converting from wide characters to UTF-8 -*/ -static size_t WideOneToUTF8(const wchar_t * wide_string, std::ostream & utf8, bool & was_error, int mode) -{ -size_t min_str_len = 1; - - if( *wide_string == 0 ) - return 0; - - if( *(wide_string+1) != 0 ) - min_str_len = 2; - -return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode); -} - - - /*! this function converts a wide string into UTF-8 string @@ -762,7 +497,7 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode); this function returns false if there were some errors when converting */ -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode) +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode) { bool was_error = false; size_t chars; @@ -772,7 +507,7 @@ size_t chars; while( string_len > 0 ) { - chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode); + chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode); wide_string += chars; string_len -= chars; } @@ -796,7 +531,7 @@ return !was_error; this function returns false if there were some errors when converting */ -bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode) +bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode) { bool was_error = false; @@ -804,7 +539,7 @@ bool was_error = false; utf8.clear(); while( *wide_string ) - wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode); + wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode); return !was_error; } @@ -825,90 +560,13 @@ return !was_error; this function returns false if there were some errors when converting */ -bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode) +bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode) { - return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, clear, mode); + return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode); } -/*! - this function converts a wide string into UTF-8 stream - - input: - wide_string - a wide string for converting - string_len - size of the string - mode - what to do with errors when converting - 0: skip an invalid character - 1: put U+FFFD "replacement character" istead of the invalid character (default) - - output: - utf8 - a UTF-8 stream for the output sequence - - this function returns false if there were some errors when converting -*/ -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode) -{ -bool was_error = false; -size_t chars; - - while( string_len > 0 ) - { - chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode); - wide_string += chars; - string_len -= chars; - } - -return !was_error; -} - - - -/*! - this function converts a wide string into UTF-8 stream - - input: - wide_string - a null terminated wide string for converting - mode - what to do with errors when converting - 0: skip an invalid character - 1: put U+FFFD "replacement character" istead of the invalid character (default) - - output: - utf8 - a UTF-8 stream for the output sequence - - this function returns false if there were some errors when converting -*/ -bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode) -{ -bool was_error = false; - - while( *wide_string ) - wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode); - -return !was_error; -} - - - -/*! - this function converts a wide string (std::wstring) into UTF-8 stream - - input: - wide_string - a wide string for converting - mode - what to do with errors when converting - 0: skip an invalid character - 1: put U+FFFD "replacement character" istead of the invalid character (default) - - output: - utf8 - a UTF-8 stream for the output sequence - - this function returns false if there were some errors when converting -*/ -bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode) -{ - return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode); -} - /*! @@ -932,7 +590,7 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode) if there is an error when converting (there is an incorrect character in the wide string) the function will continue converting but if the buffer is too small the function breaks immediately */ -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) { bool was_error = false; bool was_buffer_to_small; @@ -942,7 +600,7 @@ size_t chars, utf8_saved; while( string_len > 0 ) { - chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode); + chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode); if( was_buffer_to_small ) { @@ -986,9 +644,9 @@ return !was_error; if there is an error when converting (there is an incorrect character in the wide string) the function will continue converting but if the buffer is too small the function breaks immediately */ -bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) +bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) { - return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode); + return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode); } @@ -1014,7 +672,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, will continue converting but if the buffer is too small the function breaks immediately (in both cases the utf8 buffer is null terminated) */ -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode) +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode) { size_t utf8_saved; bool res; @@ -1022,7 +680,7 @@ bool res; if( utf8_len == 0 ) return false; - res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode); + res = wide_to_utf8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode); utf8[utf8_saved] = 0; return res; @@ -1050,9 +708,9 @@ return res; will continue converting but if the buffer is too small the function breaks immediately (in both cases the utf8 buffer is null terminated) */ -bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode) +bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode) { - return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode); + return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode); } @@ -1077,7 +735,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, if there is an error when converting (there is an incorrect character in the wide string) the function will continue converting but if the buffer is too small the function breaks immediately */ -bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) +bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode) { bool was_error = false; bool was_buffer_to_small; @@ -1089,7 +747,7 @@ size_t len; while( *wide_string ) { len = (*(wide_string+1) == 0) ? 1 : 2; - chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode); + chars = private_namespace::wide_one_to_utf8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode); if( was_buffer_to_small ) { @@ -1132,7 +790,7 @@ return !was_error; will continue converting but if the buffer is too small the function breaks immediately (in both cases the utf8 buffer is null terminated) */ -bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode) +bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode) { size_t utf8_saved; bool res; @@ -1140,7 +798,7 @@ bool res; if( utf8_len == 0 ) return false; - res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode); + res = wide_to_utf8(wide_string, utf8, utf8_len - 1, utf8_saved, mode); utf8[utf8_saved] = 0; return res; diff --git a/src/utf8/utf8.h b/src/utf8/utf8.h new file mode 100644 index 0000000..65dbda9 --- /dev/null +++ b/src/utf8/utf8.h @@ -0,0 +1,180 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2010-2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_utf8_utf8 +#define headerfile_picotools_utf8_utf8 + +#include +#include "textstream/textstream.h" + + +namespace pt +{ + +/*! + UTF-8, a transformation format of ISO 10646 + http://tools.ietf.org/html/rfc3629 + + when wchar_t is 4 bytes length we use UTF-32 + when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs) + + UTF-16 + http://www.ietf.org/rfc/rfc2781.txt +*/ + + + +/*! + returns true if 'c' is a correct unicode character +*/ +bool utf8_check_range(int c); + + +/*! + returns true if 'c' is a correct unicode character + + this method is used when reading from an utf8 string + how_many_chars - means how many characters from utf8 string were read +*/ +bool utf8_check_range(int c, int how_many_bytes); + + +/* + * + * + * + * convertions from UTF-8 + * + * + * + */ + +/*! + converting one character from UTF-8 to an int +*/ +size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct); +size_t utf8_to_int(const char * utf8, int & res, bool & correct); +size_t utf8_to_int(const std::string & utf8, int & res, bool & correct); +size_t utf8_to_int(std::istream & utf8, int & res, bool & correct); + + +/*! + converting UTF-8 string to a wide string +*/ +bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1); +bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1); +bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1); +bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1); + +template +bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear = true, int mode = 1); // need to be tested + +template +bool utf8_to_wide(const char * utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested + +template +bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested + +template +bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested + + + +/* + * + * + * + * convertions to UTF-8 + * + * + * + */ + + +/*! + converting one int character to UTF-8 +*/ +size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len); +size_t int_to_utf8(int z, std::string & utf8, bool clear = true); + +template +size_t int_to_utf8(int z, StreamType & utf8); + + + +/*! + converting a wide string to UTF-8 string +*/ +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); +bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); +bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); + +template +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode = 1); + +template +bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode = 1); + +template +bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode = 1); + + + +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); +bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); +bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); + +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); +bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); +bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); + +template +void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested + +template +void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested + + + + +} // namespace + + +#include "utf8/utf8_templates.h" + +#endif + diff --git a/src/utf8/utf8_private.cpp b/src/utf8/utf8_private.cpp new file mode 100644 index 0000000..54aa3c1 --- /dev/null +++ b/src/utf8/utf8_private.cpp @@ -0,0 +1,283 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "utf8_private.h" + + +namespace pt +{ + +namespace private_namespace +{ + +/*! + an auxiliary function for converting from UTF-8 string +*/ +bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res) +{ + for(len=0 ; (uz & 0x80) != 0 ; ++len) + uz <<= 1; + + if( len == 1 || len > 4 ) + return false; + + res = uz; + + if( len > 0 ) + res >>= len; + + if( len == 0 ) + len = 1; + +return true; +} + + + +/*! + an auxiliary function for converting from UTF-8 string +*/ +bool utf8_to_int_add_next_octet(unsigned char uz, int & res) +{ + if( (uz & 0xc0) != 0x80 ) + return false; + + res <<= 6; + res |= (uz & 0x3F); + +return true; +} + + + + + +/* + an auxiliary function for converting from wide characters to UTF-8 + converting a wide character into one int + + returns how many wide characters were used + if string_len is greater than 0 then the return value is always greater than zero too +*/ +size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct) +{ + if( string_len == 0 ) + { + z = 0; + correct = false; + return 0; + } + + z = static_cast(*wide_string); + correct = true; + + if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) ) + { + if( z>=0xD800 && z<=0xDBFF && string_len>1 ) + { + int z2 = *(wide_string+1); + + if( z2>=0xDC00 && z2<=0xDFFF ) + { + z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF)); + return 2; + } + else + { + correct = false; + return 2; + } + } + else + { + correct = false; + return 1; + } + } + else + { + correct = utf8_check_range(z); + return 1; + } +} + + + +/* + an auxiliary function for converting from wide characters to UTF-8 + converting a wide character into one int + + returns how many wide characters were used + if wide_string has at least one character then the return value is always greater than zero too +*/ +size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct) +{ +size_t min_str_len = 1; + + if( *wide_string == 0 ) + { + z = 0; + correct = false; + return 0; + } + + if( *(wide_string+1) != 0 ) + min_str_len = 2; + +return wide_to_int(wide_string, min_str_len, z, correct); +} + + + +/*! + an auxiliary function for converting from wide characters to UTF-8 + + returns how many wide characters were used + if string_len is greater than 0 then the return value is always greater than zero too + + utf8_written - how many characters were saved in the utf8 string (the string doesn't have + a null terminating character) + it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read + was_utf8_buf_too_small - will be true if the utf8 buffer is too small + if this flag is true then utf8_written is equal to zero + was_error - will be true if there is an error when converting (there was an incorrect wide character) + (was_error will not be true if the utf8 buffer is too small) +*/ +size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, + size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode) +{ +int z; +bool correct; +size_t chars; + + utf8_written = 0; + was_utf8_buf_too_small = false; + chars = wide_to_int(wide_string, string_len, z, correct); + + if( correct ) + { + utf8_written = int_to_utf8(z, utf8, utf8_len); + + if( utf8_written == 0 ) + was_utf8_buf_too_small = true; + } + else + { + if( mode == 1 ) + { + utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character" + + if( utf8_written == 0 ) + was_utf8_buf_too_small = true; + } + + was_error = true; + } + +return chars; +} + + + +/*! + an auxiliary function for converting from wide characters to UTF-8 + + returns how many wide characters were used + if string_len is greater than 0 then the return value is always greater than zero too +*/ +size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode) +{ +int z; +bool correct; +size_t chars; + + chars = wide_to_int(wide_string, string_len, z, correct); + + if( correct ) + correct = int_to_utf8(z, utf8, false) != 0; + + if( !correct ) + { + if( mode == 1 ) + int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character" + + was_error = true; + } + +return chars; +} + + + +/*! + an auxiliary function for converting from wide characters to UTF-8 + + returns how many wide characters were used + if wide_string has at least one character then the return value is always greater than zero too +*/ +size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode) +{ +int z; +bool correct; +size_t chars; + + chars = wide_to_int(wide_string, z, correct); + + if( correct ) + correct = int_to_utf8(z, utf8, false) != 0; + + if( !correct ) + { + if( mode == 1 ) + int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character" + + was_error = true; + } + +return chars; +} + + + + + +} // namespace private_namespace + +} // namespace pt + + + diff --git a/src/utf8/utf8_private.h b/src/utf8/utf8_private.h new file mode 100644 index 0000000..5ea815f --- /dev/null +++ b/src/utf8/utf8_private.h @@ -0,0 +1,220 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_utf8_utf8_private +#define headerfile_picotools_utf8_utf8_private + +#include "textstream/textstream.h" + + +namespace pt +{ + +bool utf8_check_range(int c); +size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len); +size_t int_to_utf8(int z, std::string & utf8, bool clear); +size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct); + + +namespace private_namespace +{ +bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res); +bool utf8_to_int_add_next_octet(unsigned char uz, int & res); + +size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); +size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct); + +size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, + size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode); + +size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode); + +size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode); + + +/*! + an auxiliary function for converting from wide characters to UTF-8 + + returns how many wide characters were used + if string_len is greater than 0 then the return value is always greater than zero too +*/ +template +static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode) +{ +int z; +bool correct; +size_t chars; + + chars = wide_to_int(wide_string, string_len, z, correct); + + if( correct ) + correct = int_to_utf8(z, utf8) != 0; + + if( !correct ) + { + if( mode == 1 ) + int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character" + + was_error = true; + } + +return chars; +} + + +/*! + an auxiliary function for converting from wide characters to UTF-8 +*/ +template +static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode) +{ + size_t min_str_len = 1; + + if( *wide_string == 0 ) + return 0; + + if( *(wide_string+1) != 0 ) + min_str_len = 2; + +return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode); +} + + + +// declared in utf8.h, defined in utf8.cpp +size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct); + + + +template +bool utf8_to_wide_generic(const char * utf8, size_t utf8_len, int mode, function_type convert_function) +{ +int z; +size_t len; +bool correct, was_error = false; + + while( utf8_len > 0 ) + { + if( (unsigned char)*utf8 <= 0x7f ) + { + // small optimization + len = 1; + correct = true; + z = static_cast(*utf8); + } + else + { + len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero + } + + if( !correct ) + { + if( mode == 1 ) + convert_function(0xFFFD); // U+FFFD "replacement character" + + was_error = true; + } + else + { + convert_function(z); + } + + utf8 += len; + utf8_len -= len; + } + +return !was_error; +} + + + +template +void int_to_wide(int c, StreamType & res) +{ + if( sizeof(wchar_t)==2 && c>0xffff ) + { + // UTF16 surrogate pairs + c -= 0x10000; + res << static_cast(((c >> 10) & 0x3FF) + 0xD800); + res << static_cast((c & 0x3FF) + 0xDC00); + } + else + { + res << static_cast(c); + } +} + + +// not tested +// FIX ME it is not using surrogate pairs from input stream +// and mode parameter +template +void wide_to_utf8_generic(TextStreamBase & buffer, int mode, function_type write_function) +{ + char utf8_buffer[256]; + std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); + std::size_t utf8_sequence_max_length = 10; + std::size_t index = 0; + + typename TextStreamBase::const_iterator i = buffer.begin(); + + while( i != buffer.end() ) + { + if( index + utf8_sequence_max_length > buffer_len ) + { + write_function(utf8_buffer, index); + index = 0; + } + + index += int_to_utf8(*i, utf8_buffer + index, buffer_len - index); + ++i; + } + + if( index > 0 ) + { + write_function(utf8_buffer, index); + } +} + + + + +} // namespace private_namespace + +} // namespace pt + +#endif diff --git a/src/utf8/utf8_templates.h b/src/utf8/utf8_templates.h new file mode 100644 index 0000000..d4a5744 --- /dev/null +++ b/src/utf8/utf8_templates.h @@ -0,0 +1,271 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_utf8_utf8_templates +#define headerfile_picotools_utf8_utf8_templates + +// this file is included at the end of utf8.h + +#include "utf8_private.h" + + +namespace pt +{ + + +/*! + converting UTF-8 string to a TextStreamBase stream + (need to be tested) +*/ +// need to be tested +template +bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode) +{ + if( clear ) + res.clear(); + + bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) { + private_namespace::int_to_wide(c, res); + }); + + return status; +} + + + + +template +bool utf8_to_wide(const char * utf8, StreamType & res, bool clear, int mode) +{ +size_t utf8_len = 0; + + while( utf8[utf8_len] != 0 ) + utf8_len += 1; + +return utf8_to_wide(utf8, utf8_len, res, clear, mode); +} + + + +template +bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mode) +{ + return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode); +} + + + +// need to be tested +template +bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode) +{ +int z; +bool correct, was_error = false; + + if( clear ) + res.clear(); + + while( utf8_to_int(utf8, z, correct) > 0 ) + { + if( !correct ) + { + if( mode == 1 ) + res << 0xFFFD; // U+FFFD "replacement character" + + was_error = true; + } + else + { + private_namespace::int_to_wide(z, res); + } + } + +return !was_error; +} + + + + + + + + +/*! + this function converts one wide character into UTF-8 stream + + input: + z - wide character + + output: + utf8 - a UTF-8 stream for the output sequence + + the function returns how many characters have been written to the utf8 stream, + zero means that 'z' is an incorrect unicode character +*/ +template +size_t int_to_utf8(int z, StreamType & utf8) +{ + char buf[10]; + + size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char)); + + if( len > 0 ) + utf8.write(buf, len); + + return len; +} + + + + + +/*! + this function converts a wide string into UTF-8 stream + + input: + wide_string - a wide string for converting + string_len - size of the string + mode - what to do with errors when converting + 0: skip an invalid character + 1: put U+FFFD "replacement character" istead of the invalid character (default) + + output: + utf8 - a UTF-8 stream for the output sequence + + this function returns false if there were some errors when converting +*/ +template +bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode) +{ +bool was_error = false; +size_t chars; + + while( string_len > 0 ) + { + chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode); + wide_string += chars; + string_len -= chars; + } + +return !was_error; +} + + + + + +/*! + this function converts a wide string into UTF-8 stream + + input: + wide_string - a null terminated wide string for converting + mode - what to do with errors when converting + 0: skip an invalid character + 1: put U+FFFD "replacement character" istead of the invalid character (default) + + output: + utf8 - a UTF-8 stream for the output sequence + + this function returns false if there were some errors when converting +*/ +template +bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode) +{ +bool was_error = false; + + while( *wide_string ) + wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode); + +return !was_error; +} + + + +/*! + this function converts a wide string (std::wstring) into UTF-8 stream + + input: + wide_string - a wide string for converting + mode - what to do with errors when converting + 0: skip an invalid character + 1: put U+FFFD "replacement character" istead of the invalid character (default) + + output: + utf8 - a UTF-8 stream for the output sequence + + this function returns false if there were some errors when converting +*/ +template +bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode) +{ + return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, mode); +} + + + + +template +void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, int mode) +{ + if( clear ) + utf8.clear(); + + private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.append(utf8_buffer, buffer_len); + }); +} + + +// not tested +template +void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode) +{ + private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.write(utf8_buffer, buffer_len); + }); +} + + + + + +} // namespace pt + +#endif + + + diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..6d6de49 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,45 @@ +sourcefiles:=$(shell find . -name "*.cpp") +objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles)) + + +ifndef CXX +CXX = g++ +endif + +ifndef CXXFLAGS +CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include +endif + + +progname = tests +pikotoolslibfile = ../src/pikotools.a + + +all: $(progname) + + +$(progname): $(objfiles) FORCE + $(CXX) $(CXXFLAGS) -o $(progname) $(objfiles) $(pikotoolslibfile) + + +%.o: %.cpp + $(CXX) -c $(CXXFLAGS) -o $@ $< + + + + + +clean: + rm -f $(objfiles) + rm -f $(progname) + + +depend: + makedepend -Y. -I../src -f- $(sourcefiles) > Makefile.dep + + +FORCE: + + +-include Makefile.dep + diff --git a/tests/Makefile.dep b/tests/Makefile.dep new file mode 100644 index 0000000..7af1bb8 --- /dev/null +++ b/tests/Makefile.dep @@ -0,0 +1,25 @@ +# DO NOT DELETE + +./main.o: convert.h mainoptionsparser.h csvparser.h +./convert.o: convert.h test.h ../src/convert/convert.h +./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h +./convert.o: ../src/textstream/textstream.h ../src/space/space.h +./convert.o: ../src/textstream/types.h ../src/convert/inttostr.h +./convert.o: ../src/date/date.h ../src/membuffer/membuffer.h +./convert.o: ../src/textstream/types.h ../src/convert/strtoint.h +./convert.o: ../src/convert/text.h ../src/convert/misc.h +./test.o: test.h +./mainoptionsparser.o: mainoptionsparser.h test.h +./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h +./mainoptionsparser.o: ../src/space/space.h ../src/textstream/types.h +./mainoptionsparser.o: ../src/convert/inttostr.h ../src/utf8/utf8.h +./mainoptionsparser.o: ../src/textstream/textstream.h ../src/date/date.h +./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h +./mainoptionsparser.o: ../src/utf8/utf8_templates.h +./mainoptionsparser.o: ../src/utf8/utf8_private.h ../src/convert/convert.h +./mainoptionsparser.o: ../src/convert/inttostr.h +./mainoptionsparser.o: ../src/convert/patternreplacer.h +./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h +./mainoptionsparser.o: ../src/convert/misc.h +./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h +./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h test.h diff --git a/tests/convert.cpp b/tests/convert.cpp new file mode 100644 index 0000000..fbd4b3b --- /dev/null +++ b/tests/convert.cpp @@ -0,0 +1,1982 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "convert.h" +#include "test.h" +#include "convert/convert.h" + + +namespace pt +{ + +namespace pt_convert_tests +{ + + +void test_text1() +{ + reset_test_counter("is_white"); + + test(pt::is_white(0), false); + test(pt::is_white(1), false); + test(pt::is_white(9), true); + test(pt::is_white(10), true); + test(pt::is_white(30), false); + test(pt::is_white(32), true); + test(pt::is_white(40), false); + test(pt::is_white('a'), false); + test(pt::is_white(0xabcd), false); + + test(pt::is_white(5, false, false), false); + test(pt::is_white(9, false, false), true); + test(pt::is_white(10, false, false), false); + test(pt::is_white(31, false, false), false); + test(pt::is_white(32, false, false), true); + test(pt::is_white('z', false, false), false); + test(pt::is_white(0xbbcc, false, false), false); + test(pt::is_white(0xffff, false, false), false); + test(pt::is_white(0x10ffff, false, false), false); + + test(pt::is_white(0x000B, false, false), false); + test(pt::is_white(0x00A0, false, false), false); + test(pt::is_white(0x2001, false, false), false); + test(pt::is_white(0x2009, false, false), false); + test(pt::is_white(0xFEFF, false, false), false); + + test(pt::is_white(0x000B, true, false), true); + test(pt::is_white(0x00A0, true, false), true); + test(pt::is_white(0x2001, true, false), true); + test(pt::is_white(0x2009, true, false), true); + test(pt::is_white(0xFEFF, true, false), true); +} + + +void test_text2() +{ + reset_test_counter("is_digit"); + + int digit; + + test(pt::is_digit(0, 10, &digit), false); + test(pt::is_digit(1, 10, &digit), false); + test(pt::is_digit(10, 10, &digit), false); + test(pt::is_digit(32, 10, &digit), false); + test(pt::is_digit('z', 10, &digit), false); + test(pt::is_digit('x', 10, &digit), false); + + test(pt::is_digit('0', 2, &digit), true); + test(digit, 0); + + test(pt::is_digit('1', 2, &digit), true); + test(digit, 1); + + test(pt::is_digit('2', 2, &digit), false); + + + test(pt::is_digit('0', 8, &digit), true); + test(digit, 0); + + test(pt::is_digit('7', 8, &digit), true); + test(digit, 7); + + test(pt::is_digit('8', 8, &digit), false); + + + test(pt::is_digit('0', 10, &digit), true); + test(digit, 0); + + test(pt::is_digit('1', 10, &digit), true); + test(digit, 1); + + test(pt::is_digit('8', 10, &digit), true); + test(digit, 8); + + test(pt::is_digit('9', 10, &digit), true); + test(digit, 9); + + test(pt::is_digit('a', 10, &digit), false); + + + test(pt::is_digit('a', 16, &digit), true); + test(digit, 0x0a); + + test(pt::is_digit('c', 16, &digit), true); + test(digit, 0x0c); + + test(pt::is_digit('f', 16, &digit), true); + test(digit, 0x0f); + + test(pt::is_digit('g', 16, &digit), false); +} + + +void test_text3() +{ + reset_test_counter("skip_white"); + + test(pt::skip_white(""), ""); + test(pt::skip_white(" "), ""); + test(pt::skip_white("\n \n"), ""); + test(pt::skip_white("hello world"), "hello world"); + test(pt::skip_white(" hello world"), "hello world"); + test(pt::skip_white(" hello world"), "hello world"); + test(pt::skip_white(" hello world"), "hello world"); + test(pt::skip_white(" hello world"), "hello world"); + + test(pt::skip_white("\n\nhello world"), "hello world"); + test(pt::skip_white("\n\nhello world", true, true), "hello world"); + test(pt::skip_white("\n\nhello world", true, false), "\n\nhello world"); + test(pt::skip_white("\n\n \n\n \t\t\n\nhello world", true, true), "hello world"); + + test(pt::skip_white(L" hello world"), L"hello world"); + test(pt::skip_white(L" hello world"), L"hello world"); + test(pt::skip_white(L" hello world"), L"hello world"); + test(pt::skip_white(L" hello world"), L"hello world"); + + test(pt::skip_white(L"\n\nhello world", true, true), L"hello world"); + test(pt::skip_white(L"\n\nhello world", true, false), L"\n\nhello world"); + test(pt::skip_white(L"\n\n \n\n \t\t\n\nhello world", true, true), L"hello world"); + + test(pt::skip_white(L"\x000B hello world", true, true), L"hello world"); + test(pt::skip_white(L"\x000B hello world", false, true), L"\x000B hello world"); + + test(pt::skip_white(L"\x2029 hello world", true, true), L"hello world"); + test(pt::skip_white(L"\x2029 hello world", false, true), L"\x2029 hello world"); +} + + + +void test_text4() +{ + reset_test_counter("skip_white_from_back"); + + test(pt::skip_white_from_back(""), ""); + test(pt::skip_white_from_back(" "), " "); + test(pt::skip_white_from_back("\n \n"), "\n \n"); + + test(pt::skip_white_from_back("hello world"), ""); + test(pt::skip_white_from_back("hello world "), " "); + test(pt::skip_white_from_back("hello world "), " "); + test(pt::skip_white_from_back("hello world "), " "); + + test(pt::skip_white_from_back("hello world\n\n"), "\n\n"); + test(pt::skip_white_from_back("hello world\n\n", true, true), "\n\n"); + test(pt::skip_white_from_back("hello world\n\n", true, false), ""); + test(pt::skip_white_from_back("hello world\n\n \n\n \t\t\n\n", true, true), "\n\n \n\n \t\t\n\n"); + + + test(pt::skip_white_from_back(L"hello world "), L" "); + test(pt::skip_white_from_back(L"hello world "), L" "); + test(pt::skip_white_from_back(L"hello world\n\n\n\t\t\t "), L"\n\n\n\t\t\t "); + test(pt::skip_white_from_back(L"hello world "), L" "); + + test(pt::skip_white_from_back(L"hello world\n\n", true, true), L"\n\n"); + test(pt::skip_white_from_back(L"hello world\n\n", true, false), L""); + test(pt::skip_white_from_back(L"hello world\n\n \n\n \t\t\n\n", true, true), L"\n\n \n\n \t\t\n\n"); + + test(pt::skip_white_from_back(L"hello world \x000B", true, true), L" \x000B"); + test(pt::skip_white_from_back(L"hello world \x000B", false, true), L""); + + test(pt::skip_white_from_back(L"hello world \x2029", true, true), L" \x2029"); + test(pt::skip_white_from_back(L"hello world \x2029", false, true), L""); +} + + +void test_text5() +{ + reset_test_counter("to_lower"); + + test(pt::to_lower((char)0), (char)0); + test(pt::to_lower((char)32), (char)32); + test(pt::to_lower((char)127), (char)127); + test(pt::to_lower((char)128), (char)128); + test(pt::to_lower((char)200), (char)200); + test(pt::to_lower((char)255), (char)255); + + test(pt::to_lower('a'), 'a'); + test(pt::to_lower('c'), 'c'); + test(pt::to_lower('t'), 't'); + test(pt::to_lower('z'), 'z'); + test(pt::to_lower('0'), '0'); + test(pt::to_lower('A'), 'a'); + test(pt::to_lower('C'), 'c'); + test(pt::to_lower('X'), 'x'); + test(pt::to_lower('Z'), 'z'); + test(pt::to_lower('@'), '@'); + test(pt::to_lower('['), '['); + test(pt::to_lower('`'), '`'); + test(pt::to_lower('{'), '{'); + + test(pt::to_lower((wchar_t)0), (wchar_t)0); + test(pt::to_lower((wchar_t)32), (wchar_t)32); + test(pt::to_lower((wchar_t)127), (wchar_t)127); + test(pt::to_lower((wchar_t)128), (wchar_t)128); + test(pt::to_lower((wchar_t)200), (wchar_t)200); + test(pt::to_lower((wchar_t)255), (wchar_t)255); + + test(pt::to_lower(L'a'), L'a'); + test(pt::to_lower(L't'), L't'); + test(pt::to_lower(L'z'), L'z'); + test(pt::to_lower(L'0'), L'0'); + test(pt::to_lower(L'A'), L'a'); + test(pt::to_lower(L'C'), L'c'); + test(pt::to_lower(L'X'), L'x'); + test(pt::to_lower(L'Z'), L'z'); + test(pt::to_lower(L'@'), L'@'); + test(pt::to_lower(L'['), L'['); +} + + +void test_text6() +{ + reset_test_counter("to_upper"); + + test(pt::to_upper((char)0), (char)0); + test(pt::to_upper((char)32), (char)32); + test(pt::to_upper((char)127), (char)127); + test(pt::to_upper((char)128), (char)128); + test(pt::to_upper((char)200), (char)200); + test(pt::to_upper((char)255), (char)255); + + test(pt::to_upper('a'), 'A'); + test(pt::to_upper('c'), 'C'); + test(pt::to_upper('t'), 'T'); + test(pt::to_upper('z'), 'Z'); + test(pt::to_upper('0'), '0'); + test(pt::to_upper('A'), 'A'); + test(pt::to_upper('C'), 'C'); + test(pt::to_upper('X'), 'X'); + test(pt::to_upper('Z'), 'Z'); + test(pt::to_upper('@'), '@'); + test(pt::to_upper('['), '['); + test(pt::to_upper('`'), '`'); + test(pt::to_upper('{'), '{'); + + test(pt::to_upper((wchar_t)0), (wchar_t)0); + test(pt::to_upper((wchar_t)32), (wchar_t)32); + test(pt::to_upper((wchar_t)127), (wchar_t)127); + test(pt::to_upper((wchar_t)128), (wchar_t)128); + test(pt::to_upper((wchar_t)200), (wchar_t)200); + test(pt::to_upper((wchar_t)255), (wchar_t)255); + + test(pt::to_upper(L'a'), L'A'); + test(pt::to_upper(L't'), L'T'); + test(pt::to_upper(L'z'), L'Z'); + test(pt::to_upper(L'0'), L'0'); + test(pt::to_upper(L'A'), L'A'); + test(pt::to_upper(L'C'), L'C'); + test(pt::to_upper(L'X'), L'X'); + test(pt::to_upper(L'Z'), L'Z'); + test(pt::to_upper(L'@'), L'@'); + test(pt::to_upper(L'['), L'['); +} + + +void test_text7() +{ + reset_test_counter("to_lower_emplace std::string"); + std::string str1, str2; + + str1 = "abcdefghijklm nopqrstuvwxyz"; + str2 = str1; + to_lower_emplace(str1); + test(str1, str2); + + str1 += "A"; + str2 += "a"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += "B"; + str2 += "b"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += "@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + str2 += "@[`{cdefghijklmnopqrstuvwxyz"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += "0123456789"; + str2 += "0123456789"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += "[];'\\!@#$%^&*()_+"; + str2 += "[];'\\!@#$%^&*()_+"; + to_lower_emplace(str1); + test(str1, str2); +} + + +void test_text8() +{ + reset_test_counter("to_lower_emplace std::wstring"); + std::wstring str1, str2; + + str1 = L"abcdefghijklm nopqrstuvwxyz"; + str2 = str1; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"A"; + str2 += L"a"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"B"; + str2 += L"b"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + str2 += L"@[`{cdefghijklmnopqrstuvwxyz"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"0123456789"; + str2 += L"0123456789"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"[];'\\!@#$%^&*()_+"; + str2 += L"[];'\\!@#$%^&*()_+"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"\xabcd \xf000"; + str2 += L"\xabcd \xf000"; + to_lower_emplace(str1); + test(str1, str2); + + str1 += L"\x0000 \x1234"; + str2 += L"\x0000 \x1234"; + to_lower_emplace(str1); + test(str1, str2); +} + + +void test_text9() +{ + reset_test_counter("to_upper_emplace std::string"); + std::string str1, str2; + + str1 = "ABCDEFGHIJKLM NOPQRSTUVWXYZ"; + str2 = str1; + to_upper_emplace(str1); + test(str1, str2); + + str1 += "a"; + str2 += "A"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += "b"; + str2 += "B"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += "@[`{cdefghijklmnopqrstuvwxyz"; + str2 += "@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += "0123456789"; + str2 += "0123456789"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += "[];'\\!@#$%^&*()_+"; + str2 += "[];'\\!@#$%^&*()_+"; + to_upper_emplace(str1); + test(str1, str2); +} + + +void test_text10() +{ + reset_test_counter("to_upper_emplace std::wstring"); + std::wstring str1, str2; + + str1 = L"ABCDEFGHIJKLM NOPQRSTUVWXYZ"; + str2 = str1; + to_upper_emplace(str1); + test(str1, str2); + + str1 += L"a"; + str2 += L"A"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += L"b"; + str2 += L"B"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += L"@[`{cdefghijklmnopqrstuvwxyz"; + str2 += L"@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += L"0123456789"; + str2 += L"0123456789"; + to_upper_emplace(str1); + test(str1, str2); + + str1 += L"[];'\\!@#$%^&*()_+"; + str2 += L"[];'\\!@#$%^&*()_+"; + to_upper_emplace(str1); + test(str1, str2); +} + + + +void test_text11() +{ + reset_test_counter("to_lower std::string"); + std::string str1, str2; + + str1 = "abcdefghijklm nopqrstuvwxyz"; + str2 = str1; + test(pt::to_lower(str1), str2); + + str1 += "A"; + str2 += "a"; + test(pt::to_lower(str1), str2); + + str1 += "B"; + str2 += "b"; + test(pt::to_lower(str1), str2); + + str1 += "@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + str2 += "@[`{cdefghijklmnopqrstuvwxyz"; + test(pt::to_lower(str1), str2); + + str1 += "0123456789"; + str2 += "0123456789"; + test(pt::to_lower(str1), str2); + + str1 += "[];'\\!@#$%^&*()_+"; + str2 += "[];'\\!@#$%^&*()_+"; + test(pt::to_lower(str1), str2); +} + + +void test_text12() +{ + reset_test_counter("to_lower std::wstring"); + std::wstring str1, str2; + + str1 = L"abcdefghijklm nopqrstuvwxyz"; + str2 = str1; + test(pt::to_lower(str1), str2); + + str1 += L"A"; + str2 += L"a"; + test(pt::to_lower(str1), str2); + + str1 += L"B"; + str2 += L"b"; + test(pt::to_lower(str1), str2); + + str1 += L"@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + str2 += L"@[`{cdefghijklmnopqrstuvwxyz"; + test(pt::to_lower(str1), str2); + + str1 += L"0123456789"; + str2 += L"0123456789"; + test(pt::to_lower(str1), str2); + + str1 += L"[];'\\!@#$%^&*()_+"; + str2 += L"[];'\\!@#$%^&*()_+"; + test(pt::to_lower(str1), str2); +} + + +void test_text13() +{ + reset_test_counter("to_upper std::string"); + std::string str1, str2; + + str1 = "ABCDEFGHIJKLM NOPQRSTUVWXYZ"; + str2 = str1; + test(pt::to_upper(str1), str2); + + str1 += "a"; + str2 += "A"; + test(pt::to_upper(str1), str2); + + str1 += "b"; + str2 += "B"; + test(pt::to_upper(str1), str2); + + str1 += "@[`{cdefghijklmnopqrstuvwxyz"; + str2 += "@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + test(pt::to_upper(str1), str2); + + str1 += "0123456789"; + str2 += "0123456789"; + test(pt::to_upper(str1), str2); + + str1 += "[];'\\!@#$%^&*()_+"; + str2 += "[];'\\!@#$%^&*()_+"; + test(pt::to_upper(str1), str2); +} + + +void test_text14() +{ + reset_test_counter("to_upper std::wstring"); + std::wstring str1, str2; + + str1 = L"ABCDEFGHIJKLM NOPQRSTUVWXYZ"; + str2 = str1; + test(pt::to_upper(str1), str2); + + str1 += L"a"; + str2 += L"A"; + test(pt::to_upper(str1), str2); + + str1 += L"b"; + str2 += L"B"; + test(pt::to_upper(str1), str2); + + str1 += L"@[`{cdefghijklmnopqrstuvwxyz"; + str2 += L"@[`{CDEFGHIJKLMNOPQRSTUVWXYZ"; + test(pt::to_upper(str1), str2); + + str1 += L"0123456789"; + str2 += L"0123456789"; + test(pt::to_upper(str1), str2); + + str1 += L"[];'\\!@#$%^&*()_+"; + str2 += L"[];'\\!@#$%^&*()_+"; + test(pt::to_upper(str1), str2); +} + + +void test_text15() +{ + reset_test_counter("compare const char *"); + + test(pt::compare("", "") == 0, true); + test(pt::compare("a", "a") == 0, true); + test(pt::compare("abc", "abc") == 0, true); + test(pt::compare("ABC", "ABC") == 0, true); + test(pt::compare("hello world", "hello world") == 0, true); + test(pt::compare("hello world", "HELLO WORLD") > 0, true); + test(pt::compare("HELLO WORLD", "hello world") < 0, true); + test(pt::compare("HEllo WOrld", "heLLO woRLD") < 0, true); + test(pt::compare("heLLO woRLD", "HEllo WOrld") > 0, true); + + test(pt::compare("a", "b") < 0, true); + test(pt::compare("b", "c") < 0, true); + test(pt::compare("x", "z") < 0, true); + test(pt::compare("hello world", "xhelloworld") < 0, true); + + test(pt::compare("c", "b") > 0, true); + test(pt::compare("d", "c") > 0, true); + test(pt::compare("z", "x") > 0, true); + test(pt::compare("xhello world", "helloworld") > 0, true); + + test(pt::compare("abc8", "abc9") < 0, true); + test(pt::compare("abc9", "abc8") > 0, true); + test(pt::compare("abc8abc", "abc9abc") < 0, true); + test(pt::compare("abc9abc", "abc8abc") > 0, true); + test(pt::compare("abc9abc", "abc8") > 0, true); + test(pt::compare("abc8abc", "abc9") < 0, true); + test(pt::compare("abc8", "abc9abc") < 0, true); + test(pt::compare("abc9", "abc8abc") > 0, true); + + char foo[] = {"abc"}; + char bar[] = {"abc"}; + test(pt::compare(foo, bar) == 0, true); + + foo[0] = (char)(unsigned char)127; + bar[0] = (char)(unsigned char)128; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)128; + bar[0] = (char)(unsigned char)127; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (char)(unsigned char)1; + bar[0] = (char)(unsigned char)255; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)255; + bar[0] = (char)(unsigned char)1; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (char)(unsigned char)0; + bar[0] = (char)(unsigned char)0; + test(pt::compare(foo, bar) == 0, true); + + foo[0] = (char)(unsigned char)0; + bar[0] = (char)(unsigned char)1; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)1; + bar[0] = (char)(unsigned char)0; + test(pt::compare(foo, bar) > 0, true); +} + + + +void test_text16() +{ + reset_test_counter("compare const wchar_t *"); + + test(pt::compare(L"", L"") == 0, true); + test(pt::compare(L"a", L"a") == 0, true); + test(pt::compare(L"abc", L"abc") == 0, true); + test(pt::compare(L"ABC", L"ABC") == 0, true); + test(pt::compare(L"hello world", L"hello world") == 0, true); + test(pt::compare(L"hello world", L"HELLO WORLD") > 0, true); + test(pt::compare(L"HELLO WORLD", L"hello world") < 0, true); + test(pt::compare(L"HEllo WOrld", L"heLLO woRLD") < 0, true); + test(pt::compare(L"heLLO woRLD", L"HEllo WOrld") > 0, true); + + test(pt::compare(L"a", L"b") < 0, true); + test(pt::compare(L"b", L"c") < 0, true); + test(pt::compare(L"x", L"z") < 0, true); + test(pt::compare(L"hello world", L"xhelloworld") < 0, true); + + test(pt::compare(L"c", L"b") > 0, true); + test(pt::compare(L"d", L"c") > 0, true); + test(pt::compare(L"z", L"x") > 0, true); + test(pt::compare(L"xhello world", L"helloworld") > 0, true); + + test(pt::compare(L"abc8", L"abc9") < 0, true); + test(pt::compare(L"abc9", L"abc8") > 0, true); + test(pt::compare(L"abc8abc", L"abc9abc") < 0, true); + test(pt::compare(L"abc9abc", L"abc8abc") > 0, true); + test(pt::compare(L"abc9abc", L"abc8") > 0, true); + test(pt::compare(L"abc8abc", L"abc9") < 0, true); + test(pt::compare(L"abc8", L"abc9abc") < 0, true); + test(pt::compare(L"abc9", L"abc8abc") > 0, true); + + wchar_t foo[] = {L"abc"}; + wchar_t bar[] = {L"abc"}; + test(pt::compare(foo, bar) == 0, true); + + foo[0] = (wchar_t)127; + bar[0] = (wchar_t)128; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)128; + bar[0] = (wchar_t)127; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)255; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)255; + bar[0] = (wchar_t)1; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (wchar_t)0; + bar[0] = (wchar_t)0; + test(pt::compare(foo, bar) == 0, true); + + foo[0] = (wchar_t)0; + bar[0] = (wchar_t)1; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)0; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)0xffff; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)127; + bar[0] = (wchar_t)0xffff; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)128; + bar[0] = (wchar_t)0xffff; + test(pt::compare(foo, bar) < 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)1; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)127; + test(pt::compare(foo, bar) > 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)128; + test(pt::compare(foo, bar) > 0, true); +} + +void test_text17() +{ + reset_test_counter("compare std::string"); + + std::string str1, str2; + test(pt::compare(str1, str2) == 0, true); + + str1 = "abc"; + str2 = "abc"; + test(pt::compare(str1, str2) == 0, true); + + str1 = "aBc"; + str2 = "abc"; + test(pt::compare(str1, str2) < 0, true); + + str1 = "abc"; + str2 = "aBc"; + test(pt::compare(str1, str2) > 0, true); + + str1 = "xyz"; + str2 = "abc"; + test(pt::compare(str1, str2) > 0, true); + + str1 = "abc"; + str2 = "xyz"; + test(pt::compare(str1, str2) < 0, true); + + str1 = "abc1"; + str2 = "abc2"; + test(pt::compare(str1, str2) < 0, true); + + str1 = "abc9"; + str2 = "abc8"; + test(pt::compare(str1, str2) > 0, true); +} + +void test_text18() +{ + reset_test_counter("compare std::wstring"); + + std::wstring str1, str2; + test(pt::compare(str1, str2) == 0, true); + + str1 = L"abc"; + str2 = L"abc"; + test(pt::compare(str1, str2) == 0, true); + + str1 = L"aBc"; + str2 = L"abc"; + test(pt::compare(str1, str2) < 0, true); + + str1 = L"abc"; + str2 = L"aBc"; + test(pt::compare(str1, str2) > 0, true); + + str1 = L"xyz"; + str2 = L"abc"; + test(pt::compare(str1, str2) > 0, true); + + str1 = L"abc"; + str2 = L"xyz"; + test(pt::compare(str1, str2) < 0, true); + + str1 = L"abc1"; + str2 = L"abc2"; + test(pt::compare(str1, str2) < 0, true); + + str1 = L"abc9"; + str2 = L"abc8"; + test(pt::compare(str1, str2) > 0, true); +} + +void test_text19() +{ + reset_test_counter("compare const char* str1_begin, str1_end, str2"); + + char foo[] = {"abcdef"}; + char bar[] = {"abcdef"}; + size_t len = sizeof(foo) / sizeof(char) - 1; // minus terminating zero + + test(pt::compare(foo, foo + len, bar) == 0, true); + test(pt::compare(foo, foo + len - 1, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)127; + bar[len - 1] = (char)(unsigned char)128; + test(pt::compare(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)128; + bar[len - 1] = (char)(unsigned char)127; + test(pt::compare(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (char)(unsigned char)1; + bar[len - 1] = (char)(unsigned char)255; + test(pt::compare(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)255; + bar[len - 1] = (char)(unsigned char)1; + test(pt::compare(foo, foo + len, bar) > 0, true); + + foo[len - 1] = 'f'; + bar[len - 1] = 'f'; + bar[3] = 'X'; + test(pt::compare(foo, foo + len, bar) > 0, true); + foo[3] = 'A'; + test(pt::compare(foo, foo + len, bar) < 0, true); +} + +void test_text20() +{ + reset_test_counter("compare const wchar_t * str1_begin, str1_end, str2"); + + wchar_t foo[] = {L"abcdef"}; + wchar_t bar[] = {L"abcdef"}; + size_t len = sizeof(foo) / sizeof(wchar_t) - 1; // minus terminating zero + + test(pt::compare(foo, foo + len, bar) == 0, true); + test(pt::compare(foo, foo + len - 1, bar) < 0, true); + + foo[len - 1] = (wchar_t)127; + bar[len - 1] = (wchar_t)128; + test(pt::compare(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)128; + bar[len - 1] = (wchar_t)127; + test(pt::compare(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)255; + test(pt::compare(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)255; + bar[len - 1] = (wchar_t)1; + test(pt::compare(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)0xffff; + test(pt::compare(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)0xffff; + bar[len - 1] = (wchar_t)1; + test(pt::compare(foo, foo + len, bar) > 0, true); + + foo[len - 1] = L'f'; + bar[len - 1] = L'f'; + bar[3] = L'X'; + test(pt::compare(foo, foo + len, bar) > 0, true); + foo[3] = L'A'; + test(pt::compare(foo, foo + len, bar) < 0, true); +} + + + +void test_text21() +{ + reset_test_counter("compare_nc const char *"); + + test(pt::compare_nc("", "") == 0, true); + test(pt::compare_nc("a", "a") == 0, true); + test(pt::compare_nc("abc", "abc") == 0, true); + test(pt::compare_nc("ABC", "ABC") == 0, true); + test(pt::compare_nc("hello world", "hello world") == 0, true); + test(pt::compare_nc("hello world", "HELLO WORLD") == 0, true); + test(pt::compare_nc("HELLO WORLD", "hello world") == 0, true); + test(pt::compare_nc("HEllo WOrld", "heLLO woRLD") == 0, true); + test(pt::compare_nc("heLLO woRLD", "HEllo WOrld")== 0, true); + + test(pt::compare_nc("a", "b") < 0, true); + test(pt::compare_nc("b", "c") < 0, true); + test(pt::compare_nc("x", "z") < 0, true); + test(pt::compare_nc("hello world", "xhelloworld") < 0, true); + + test(pt::compare_nc("c", "b") > 0, true); + test(pt::compare_nc("d", "c") > 0, true); + test(pt::compare_nc("z", "x") > 0, true); + test(pt::compare_nc("xhello world", "helloworld") > 0, true); + + test(pt::compare_nc("abc8", "abc9") < 0, true); + test(pt::compare_nc("abc9", "abc8") > 0, true); + test(pt::compare_nc("abc8abc", "abc9abc") < 0, true); + test(pt::compare_nc("abc9abc", "abc8abc") > 0, true); + test(pt::compare_nc("abc9abc", "abc8") > 0, true); + test(pt::compare_nc("abc8abc", "abc9") < 0, true); + test(pt::compare_nc("abc8", "abc9abc") < 0, true); + test(pt::compare_nc("abc9", "abc8abc") > 0, true); + + char foo[] = {"abc"}; + char bar[] = {"abc"}; + test(pt::compare_nc(foo, bar) == 0, true); + + foo[0] = (char)(unsigned char)127; + bar[0] = (char)(unsigned char)128; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)128; + bar[0] = (char)(unsigned char)127; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (char)(unsigned char)1; + bar[0] = (char)(unsigned char)255; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)255; + bar[0] = (char)(unsigned char)1; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (char)(unsigned char)0; + bar[0] = (char)(unsigned char)0; + test(pt::compare_nc(foo, bar) == 0, true); + + foo[0] = (char)(unsigned char)0; + bar[0] = (char)(unsigned char)1; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (char)(unsigned char)1; + bar[0] = (char)(unsigned char)0; + test(pt::compare_nc(foo, bar) > 0, true); +} + + + +void test_text22() +{ + reset_test_counter("compare_nc const wchar_t *"); + + test(pt::compare_nc(L"", L"") == 0, true); + test(pt::compare_nc(L"a", L"a") == 0, true); + test(pt::compare_nc(L"abc", L"abc") == 0, true); + test(pt::compare_nc(L"ABC", L"ABC") == 0, true); + test(pt::compare_nc(L"hello world", L"hello world") == 0, true); + test(pt::compare_nc(L"hello world", L"HELLO WORLD") == 0, true); + test(pt::compare_nc(L"HELLO WORLD", L"hello world") == 0, true); + test(pt::compare_nc(L"HEllo WOrld", L"heLLO woRLD") == 0, true); + test(pt::compare_nc(L"heLLO woRLD", L"HEllo WOrld") == 0, true); + + test(pt::compare_nc(L"a", L"b") < 0, true); + test(pt::compare_nc(L"b", L"c") < 0, true); + test(pt::compare_nc(L"x", L"z") < 0, true); + test(pt::compare_nc(L"hello world", L"xhelloworld") < 0, true); + + test(pt::compare_nc(L"c", L"b") > 0, true); + test(pt::compare_nc(L"d", L"c") > 0, true); + test(pt::compare_nc(L"z", L"x") > 0, true); + test(pt::compare_nc(L"xhello world", L"helloworld") > 0, true); + + test(pt::compare_nc(L"abc8", L"abc9") < 0, true); + test(pt::compare_nc(L"abc9", L"abc8") > 0, true); + test(pt::compare_nc(L"abc8abc", L"abc9abc") < 0, true); + test(pt::compare_nc(L"abc9abc", L"abc8abc") > 0, true); + test(pt::compare_nc(L"abc9abc", L"abc8") > 0, true); + test(pt::compare_nc(L"abc8abc", L"abc9") < 0, true); + test(pt::compare_nc(L"abc8", L"abc9abc") < 0, true); + test(pt::compare_nc(L"abc9", L"abc8abc") > 0, true); + + wchar_t foo[] = {L"abc"}; + wchar_t bar[] = {L"abc"}; + test(pt::compare_nc(foo, bar) == 0, true); + + foo[0] = (wchar_t)127; + bar[0] = (wchar_t)128; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)128; + bar[0] = (wchar_t)127; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)255; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)255; + bar[0] = (wchar_t)1; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (wchar_t)0; + bar[0] = (wchar_t)0; + test(pt::compare_nc(foo, bar) == 0, true); + + foo[0] = (wchar_t)0; + bar[0] = (wchar_t)1; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)0; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (wchar_t)1; + bar[0] = (wchar_t)0xffff; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)127; + bar[0] = (wchar_t)0xffff; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)128; + bar[0] = (wchar_t)0xffff; + test(pt::compare_nc(foo, bar) < 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)1; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)127; + test(pt::compare_nc(foo, bar) > 0, true); + + foo[0] = (wchar_t)0xffff; + bar[0] = (wchar_t)128; + test(pt::compare_nc(foo, bar) > 0, true); +} + +void test_text23() +{ + reset_test_counter("compare_nc std::string"); + + std::string str1, str2; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = "abc"; + str2 = "abc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = "aBc"; + str2 = "abc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = "abc"; + str2 = "aBc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = "xyz"; + str2 = "abc"; + test(pt::compare_nc(str1, str2) > 0, true); + + str1 = "abc"; + str2 = "xyz"; + test(pt::compare_nc(str1, str2) < 0, true); + + str1 = "abc1"; + str2 = "abc2"; + test(pt::compare_nc(str1, str2) < 0, true); + + str1 = "abc9"; + str2 = "abc8"; + test(pt::compare_nc(str1, str2) > 0, true); +} + +void test_text24() +{ + reset_test_counter("compare_nc std::wstring"); + + std::wstring str1, str2; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = L"abc"; + str2 = L"abc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = L"aBc"; + str2 = L"abc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = L"abc"; + str2 = L"aBc"; + test(pt::compare_nc(str1, str2) == 0, true); + + str1 = L"xyz"; + str2 = L"abc"; + test(pt::compare_nc(str1, str2) > 0, true); + + str1 = L"abc"; + str2 = L"xyz"; + test(pt::compare_nc(str1, str2) < 0, true); + + str1 = L"abc1"; + str2 = L"abc2"; + test(pt::compare_nc(str1, str2) < 0, true); + + str1 = L"abc9"; + str2 = L"abc8"; + test(pt::compare_nc(str1, str2) > 0, true); +} + +void test_text25() +{ + reset_test_counter("compare_nc const char* str1_begin, str1_end, str2"); + + char foo[] = {"abcdef"}; + char bar[] = {"abcdef"}; + size_t len = sizeof(foo) / sizeof(char) - 1; // minus terminating zero + + test(pt::compare_nc(foo, foo + len, bar) == 0, true); + test(pt::compare_nc(foo, foo + len - 1, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)127; + bar[len - 1] = (char)(unsigned char)128; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)128; + bar[len - 1] = (char)(unsigned char)127; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (char)(unsigned char)1; + bar[len - 1] = (char)(unsigned char)255; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (char)(unsigned char)255; + bar[len - 1] = (char)(unsigned char)1; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); + + foo[len - 1] = 'f'; + bar[len - 1] = 'f'; + bar[3] = 'X'; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + foo[3] = 'Z'; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); +} + +void test_text26() +{ + reset_test_counter("compare_nc const wchar_t * str1_begin, str1_end, str2"); + + wchar_t foo[] = {L"abcdef"}; + wchar_t bar[] = {L"abcdef"}; + size_t len = sizeof(foo) / sizeof(wchar_t) - 1; // minus terminating zero + + test(pt::compare_nc(foo, foo + len, bar) == 0, true); + test(pt::compare_nc(foo, foo + len - 1, bar) < 0, true); + + foo[len - 1] = (wchar_t)127; + bar[len - 1] = (wchar_t)128; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)128; + bar[len - 1] = (wchar_t)127; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)255; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)255; + bar[len - 1] = (wchar_t)1; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)0xffff; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + + foo[len - 1] = (wchar_t)0xffff; + bar[len - 1] = (wchar_t)1; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); + + foo[len - 1] = L'f'; + bar[len - 1] = L'f'; + bar[3] = L'X'; + test(pt::compare_nc(foo, foo + len, bar) < 0, true); + foo[3] = L'Z'; + test(pt::compare_nc(foo, foo + len, bar) > 0, true); +} + + +void test_text27() +{ + reset_test_counter("is_equal char *"); + + test(is_equal("", ""), true); + test(is_equal("", "abc"), false); + test(is_equal("abc", ""), false); + test(is_equal("abc", "abc"), true); + test(is_equal("aBc", "aBc"), true); + test(is_equal("aBc", "abc"), false); + test(is_equal("abc", "aBc"), false); + test(is_equal("abc", "defgh"), false); + test(is_equal("defgh", "abc"), false); +} + +void test_text28() +{ + reset_test_counter("is_equal wchar_t *"); + + test(is_equal(L"", L""), true); + test(is_equal(L"", L"abc"), false); + test(is_equal(L"abc", L""), false); + test(is_equal(L"abc", L"abc"), true); + test(is_equal(L"aBc", L"aBc"), true); + test(is_equal(L"aBc", L"abc"), false); + test(is_equal(L"abc", L"aBc"), false); + test(is_equal(L"abc", L"defgh"), false); + test(is_equal(L"defgh", L"abc"), false); +} + +void test_text29() +{ + reset_test_counter("is_equal std::string"); + + std::string str1, str2; + test(is_equal(str1, str2), true); + + str2 = "xyz"; + test(is_equal(str1, str2), false); + + str1 = "xyz"; + str2 = ""; + test(is_equal(str1, str2), false); + + str1 = "xyz"; + str2 = "xYz"; + test(is_equal(str1, str2), false); + + str1 = "xYz"; + str2 = "xyz"; + test(is_equal(str1, str2), false); + + str1 = "abcxyz"; + str2 = "abc"; + test(is_equal(str1, str2), false); + + str1 = "abc"; + str2 = "abcxyz"; + test(is_equal(str1, str2), false); + + str1 = "xyzabc"; + str2 = "abc"; + test(is_equal(str1, str2), false); + + str1 = "abc"; + str2 = "xyzabc"; + test(is_equal(str1, str2), false); + + str1 = "xyz"; + str2 = "xyz"; + test(is_equal(str1, str2), true); +} + + +void test_text30() +{ + reset_test_counter("is_equal std::wstring"); + + std::wstring str1, str2; + test(is_equal(str1, str2), true); + + str2 = L"xyz"; + test(is_equal(str1, str2), false); + + str1 = L"xyz"; + str2 = L""; + test(is_equal(str1, str2), false); + + str1 = L"xyz"; + str2 = L"xYz"; + test(is_equal(str1, str2), false); + + str1 = L"xYz"; + str2 = L"xyz"; + test(is_equal(str1, str2), false); + + str1 = L"abcxyz"; + str2 = L"abc"; + test(is_equal(str1, str2), false); + + str1 = L"abc"; + str2 = L"abcxyz"; + test(is_equal(str1, str2), false); + + str1 = L"xyzabc"; + str2 = L"abc"; + test(is_equal(str1, str2), false); + + str1 = L"abc"; + str2 = L"xyzabc"; + test(is_equal(str1, str2), false); + + str1 = L"xyz"; + str2 = L"xyz"; + test(is_equal(str1, str2), true); +} + + +void test_text31() +{ + reset_test_counter("is_equal const char*, str1_begin, str1_end, str2"); + + char foo[] = {"ABCDEF"}; + char bar[] = {"ABCDEF"}; + size_t len = sizeof(foo) / sizeof(char) - 1; // minus terminating zero + + test(pt::is_equal(foo, foo + len, bar), true); + test(pt::is_equal(foo, foo + len - 1, bar), false); + + foo[len - 1] = (char)(unsigned char)127; + bar[len - 1] = (char)(unsigned char)128; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)128; + bar[len - 1] = (char)(unsigned char)127; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)1; + bar[len - 1] = (char)(unsigned char)255; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)255; + bar[len - 1] = (char)(unsigned char)1; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = 'f'; + bar[len - 1] = 'f'; + bar[3] = 'X'; + test(pt::is_equal(foo, foo + len, bar), false); + foo[3] = 'A'; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[3] = '1'; + bar[3] = '1'; + test(pt::is_equal(foo, foo + len, bar), true); +} + + +void test_text32() +{ + reset_test_counter("is_equal const wchar_t*, str1_begin, str1_end, str2"); + + wchar_t foo[] = {L"ABCDEF"}; + wchar_t bar[] = {L"ABCDEF"}; + size_t len = sizeof(foo) / sizeof(wchar_t) - 1; // minus terminating zero + + test(pt::is_equal(foo, foo + len, bar), true); + test(pt::is_equal(foo, foo + len - 1, bar), false); + + foo[len - 1] = (wchar_t)127; + bar[len - 1] = (wchar_t)128; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)128; + bar[len - 1] = (wchar_t)127; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)255; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)255; + bar[len - 1] = (wchar_t)1; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[len - 1] = L'f'; + bar[len - 1] = L'f'; + bar[3] = L'X'; + test(pt::is_equal(foo, foo + len, bar), false); + foo[3] = L'A'; + test(pt::is_equal(foo, foo + len, bar), false); + + foo[3] = L'1'; + bar[3] = L'1'; + test(pt::is_equal(foo, foo + len, bar), true); +} + + + +void test_text33() +{ + reset_test_counter("is_equal_nc char *"); + + test(is_equal_nc("", ""), true); + test(is_equal_nc("", "abc"), false); + test(is_equal_nc("abc", ""), false); + test(is_equal_nc("abc", "abc"), true); + test(is_equal_nc("aBc", "aBc"), true); + test(is_equal_nc("aBc", "abc"), true); + test(is_equal_nc("abc", "aBc"), true); + test(is_equal_nc("abc", "defgh"), false); + test(is_equal_nc("defgh", "abc"), false); +} + +void test_text34() +{ + reset_test_counter("is_equal_nc wchar_t *"); + + test(is_equal_nc(L"", L""), true); + test(is_equal_nc(L"", L"abc"), false); + test(is_equal_nc(L"abc", L""), false); + test(is_equal_nc(L"abc", L"abc"), true); + test(is_equal_nc(L"aBc", L"aBc"), true); + test(is_equal_nc(L"aBc", L"abc"), true); + test(is_equal_nc(L"abc", L"aBc"), true); + test(is_equal_nc(L"abc", L"defgh"), false); + test(is_equal_nc(L"defgh", L"abc"), false); +} + +void test_text35() +{ + reset_test_counter("is_equal_nc std::string"); + + std::string str1, str2; + test(is_equal_nc(str1, str2), true); + + str2 = "xyz"; + test(is_equal_nc(str1, str2), false); + + str1 = "xyz"; + str2 = ""; + test(is_equal_nc(str1, str2), false); + + str1 = "xyz"; + str2 = "xYz"; + test(is_equal_nc(str1, str2), true); + + str1 = "xYz"; + str2 = "xyz"; + test(is_equal_nc(str1, str2), true); + + str1 = "abcxyz"; + str2 = "abc"; + test(is_equal_nc(str1, str2), false); + + str1 = "abc"; + str2 = "abcxyz"; + test(is_equal_nc(str1, str2), false); + + str1 = "xyzabc"; + str2 = "abc"; + test(is_equal_nc(str1, str2), false); + + str1 = "abc"; + str2 = "xyzabc"; + test(is_equal_nc(str1, str2), false); + + str1 = "xyz"; + str2 = "xyz"; + test(is_equal_nc(str1, str2), true); +} + + +void test_text36() +{ + reset_test_counter("is_equal_nc std::wstring"); + + std::wstring str1, str2; + test(is_equal_nc(str1, str2), true); + + str2 = L"xyz"; + test(is_equal_nc(str1, str2), false); + + str1 = L"xyz"; + str2 = L""; + test(is_equal_nc(str1, str2), false); + + str1 = L"xyz"; + str2 = L"xYz"; + test(is_equal_nc(str1, str2), true); + + str1 = L"xYz"; + str2 = L"xyz"; + test(is_equal_nc(str1, str2), true); + + str1 = L"abcxyz"; + str2 = L"abc"; + test(is_equal_nc(str1, str2), false); + + str1 = L"abc"; + str2 = L"abcxyz"; + test(is_equal_nc(str1, str2), false); + + str1 = L"xyzabc"; + str2 = L"abc"; + test(is_equal_nc(str1, str2), false); + + str1 = L"abc"; + str2 = L"xyzabc"; + test(is_equal_nc(str1, str2), false); + + str1 = L"xyz"; + str2 = L"xyz"; + test(is_equal_nc(str1, str2), true); +} + + +void test_text37() +{ + reset_test_counter("is_equal_nc const char*, str1_begin, str1_end, str2"); + + char foo[] = {"ABCDEF"}; + char bar[] = {"ABCDEF"}; + size_t len = sizeof(foo) / sizeof(char) - 1; // minus terminating zero + + test(pt::is_equal_nc(foo, foo + len, bar), true); + test(pt::is_equal_nc(foo, foo + len - 1, bar), false); + + foo[len - 1] = (char)(unsigned char)127; + bar[len - 1] = (char)(unsigned char)128; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)128; + bar[len - 1] = (char)(unsigned char)127; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)1; + bar[len - 1] = (char)(unsigned char)255; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (char)(unsigned char)255; + bar[len - 1] = (char)(unsigned char)1; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = 'f'; + bar[len - 1] = 'f'; + bar[3] = 'X'; + test(pt::is_equal_nc(foo, foo + len, bar), false); + foo[3] = 'A'; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[3] = '1'; + bar[3] = '1'; + test(pt::is_equal_nc(foo, foo + len, bar), true); + + foo[3] = 'h'; + bar[3] = 'H'; + test(pt::is_equal_nc(foo, foo + len, bar), true); + + foo[3] = 'H'; + bar[3] = 'h'; + test(pt::is_equal_nc(foo, foo + len, bar), true); +} + + +void test_text38() +{ + reset_test_counter("is_equal_nc const wchar_t*, str1_begin, str1_end, str2"); + + wchar_t foo[] = {L"ABCDEF"}; + wchar_t bar[] = {L"ABCDEF"}; + size_t len = sizeof(foo) / sizeof(wchar_t) - 1; // minus terminating zero + + test(pt::is_equal_nc(foo, foo + len, bar), true); + test(pt::is_equal_nc(foo, foo + len - 1, bar), false); + + foo[len - 1] = (wchar_t)127; + bar[len - 1] = (wchar_t)128; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)128; + bar[len - 1] = (wchar_t)127; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)1; + bar[len - 1] = (wchar_t)255; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = (wchar_t)255; + bar[len - 1] = (wchar_t)1; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[len - 1] = L'f'; + bar[len - 1] = L'f'; + bar[3] = L'X'; + test(pt::is_equal_nc(foo, foo + len, bar), false); + foo[3] = L'A'; + test(pt::is_equal_nc(foo, foo + len, bar), false); + + foo[3] = L'1'; + bar[3] = L'1'; + test(pt::is_equal_nc(foo, foo + len, bar), true); + + foo[3] = L'h'; + bar[3] = L'H'; + test(pt::is_equal_nc(foo, foo + len, bar), true); + + foo[3] = L'H'; + bar[3] = L'h'; + test(pt::is_equal_nc(foo, foo + len, bar), true); +} + + +void test_text39() +{ + reset_test_counter("is_substr const char *"); + + test(pt::is_substr("", ""), true); + test(pt::is_substr("", "a"), true); + test(pt::is_substr("a", ""), false); + test(pt::is_substr("a", "a"), true); + test(pt::is_substr("a", "ab"), true); + test(pt::is_substr("abc", "abb"), false); + test(pt::is_substr("abc", "abcd"), true); + test(pt::is_substr("XYZ", "XYZabc"), true); + test(pt::is_substr("XYZ", "xYz"), false); + test(pt::is_substr("hello world", "hello world"), true); + test(pt::is_substr("hello world", "abc hello world"), false); + test(pt::is_substr("hello world", "hello worldabc"), true); + test(pt::is_substr("Hello World", "hello world"), false); + test(pt::is_substr("hello world", "Hello World"), false); +} + + +void test_text40() +{ + reset_test_counter("is_substr const wchar_t *"); + + test(pt::is_substr(L"", L""), true); + test(pt::is_substr(L"", L"a"), true); + test(pt::is_substr(L"a", L""), false); + test(pt::is_substr(L"a", L"a"), true); + test(pt::is_substr(L"a", L"ab"), true); + test(pt::is_substr(L"abc", L"abb"), false); + test(pt::is_substr(L"abc", L"abcd"), true); + test(pt::is_substr(L"XYZ", L"XYZabc"), true); + test(pt::is_substr(L"XYZ", L"xYz"), false); + test(pt::is_substr(L"hello world", L"hello world"), true); + test(pt::is_substr(L"hello world", L"abc hello world"), false); + test(pt::is_substr(L"hello world", L"hello worldabc"), true); + test(pt::is_substr(L"Hello World", L"hello world"), false); + test(pt::is_substr(L"hello world", L"Hello World"), false); +} + + + +void test_text41() +{ + reset_test_counter("is_substr std::string"); + + std::string str1, str2; + test(pt::is_substr(str1, str2), true); + + str1 = ""; + str2 = "a"; + test(pt::is_substr(str1, str2), true); + + str1 = "a"; + str2 = "a"; + test(pt::is_substr(str1, str2), true); + + str1 = "a"; + str2 = ""; + test(pt::is_substr(str1, str2), false); + + str1 = "abcd"; + str2 = "abcd"; + test(pt::is_substr(str1, str2), true); + + str1 = "abcdefg"; + str2 = "abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = "abcd"; + str2 = "abcdefg"; + test(pt::is_substr(str1, str2), true); + + str1 = "aBCd"; + str2 = "abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = "abcd"; + str2 = "aBCd"; + test(pt::is_substr(str1, str2), false); + + str1 = "aBCdefg"; + str2 = "abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = "aBCd"; + str2 = "abcdefg"; + test(pt::is_substr(str1, str2), false); + + str1 = "abcd"; + str2 = "aBCdefg"; + test(pt::is_substr(str1, str2), false); +} + + +void test_text42() +{ + reset_test_counter("is_substr std::wstring"); + + std::wstring str1, str2; + test(pt::is_substr(str1, str2), true); + + str1 = L""; + str2 = L"a"; + test(pt::is_substr(str1, str2), true); + + str1 = L"a"; + str2 = L"a"; + test(pt::is_substr(str1, str2), true); + + str1 = L"a"; + str2 = L""; + test(pt::is_substr(str1, str2), false); + + str1 = L"abcd"; + str2 = L"abcd"; + test(pt::is_substr(str1, str2), true); + + str1 = L"abcdefg"; + str2 = L"abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = L"abcd"; + str2 = L"abcdefg"; + test(pt::is_substr(str1, str2), true); + + str1 = L"aBCd"; + str2 = L"abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = L"abcd"; + str2 = L"aBCd"; + test(pt::is_substr(str1, str2), false); + + str1 = L"aBCdefg"; + str2 = L"abcd"; + test(pt::is_substr(str1, str2), false); + + str1 = L"aBCd"; + str2 = L"abcdefg"; + test(pt::is_substr(str1, str2), false); + + str1 = L"abcd"; + str2 = L"aBCdefg"; + test(pt::is_substr(str1, str2), false); +} + + + +void test_text43() +{ + reset_test_counter("is_substr_nc const char *"); + + test(pt::is_substr_nc("", ""), true); + test(pt::is_substr_nc("", "a"), true); + test(pt::is_substr_nc("a", ""), false); + test(pt::is_substr_nc("a", "a"), true); + test(pt::is_substr_nc("a", "ab"), true); + test(pt::is_substr_nc("abc", "abb"), false); + test(pt::is_substr_nc("abc", "abcd"), true); + test(pt::is_substr_nc("XYZ", "XYZabc"), true); + test(pt::is_substr_nc("XYZ", "xYz"), true); + test(pt::is_substr_nc("hello world", "hello world"), true); + test(pt::is_substr_nc("hello world", "abc hello world"), false); + test(pt::is_substr_nc("hello world", "hello worldabc"), true); + test(pt::is_substr_nc("Hello World", "hello world"), true); + test(pt::is_substr_nc("hello world", "Hello World"), true); +} + + +void test_text44() +{ + reset_test_counter("is_substr_nc const wchar_t *"); + + test(pt::is_substr_nc(L"", L""), true); + test(pt::is_substr_nc(L"", L"a"), true); + test(pt::is_substr_nc(L"a", L""), false); + test(pt::is_substr_nc(L"a", L"a"), true); + test(pt::is_substr_nc(L"a", L"ab"), true); + test(pt::is_substr_nc(L"abc", L"abb"), false); + test(pt::is_substr_nc(L"abc", L"abcd"), true); + test(pt::is_substr_nc(L"XYZ", L"XYZabc"), true); + test(pt::is_substr_nc(L"XYZ", L"xYz"), true); + test(pt::is_substr_nc(L"hello world", L"hello world"), true); + test(pt::is_substr_nc(L"hello world", L"abc hello world"), false); + test(pt::is_substr_nc(L"hello world", L"hello worldabc"), true); + test(pt::is_substr_nc(L"Hello World", L"hello world"), true); + test(pt::is_substr_nc(L"hello world", L"Hello World"), true); +} + + + +void test_text45() +{ + reset_test_counter("is_substr_nc std::string"); + + std::string str1, str2; + test(pt::is_substr_nc(str1, str2), true); + + str1 = ""; + str2 = "a"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "a"; + str2 = "a"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "a"; + str2 = ""; + test(pt::is_substr_nc(str1, str2), false); + + str1 = "abcd"; + str2 = "abcd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "abcdefg"; + str2 = "abcd"; + test(pt::is_substr_nc(str1, str2), false); + + str1 = "abcd"; + str2 = "abcdefg"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "aBCd"; + str2 = "abcd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "abcd"; + str2 = "aBCd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "aBCdefg"; + str2 = "abcd"; + test(pt::is_substr_nc(str1, str2), false); + + str1 = "aBCd"; + str2 = "abcdefg"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = "abcd"; + str2 = "aBCdefg"; + test(pt::is_substr_nc(str1, str2), true); +} + + + +void test_text46() +{ + reset_test_counter("is_substr_nc std::wstring"); + + std::wstring str1, str2; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L""; + str2 = L"a"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"a"; + str2 = L"a"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"a"; + str2 = L""; + test(pt::is_substr_nc(str1, str2), false); + + str1 = L"abcd"; + str2 = L"abcd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"abcdefg"; + str2 = L"abcd"; + test(pt::is_substr_nc(str1, str2), false); + + str1 = L"abcd"; + str2 = L"abcdefg"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"aBCd"; + str2 = L"abcd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"abcd"; + str2 = L"aBCd"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"aBCdefg"; + str2 = L"abcd"; + test(pt::is_substr_nc(str1, str2), false); + + str1 = L"aBCd"; + str2 = L"abcdefg"; + test(pt::is_substr_nc(str1, str2), true); + + str1 = L"abcd"; + str2 = L"aBCdefg"; + test(pt::is_substr_nc(str1, str2), true); +} + + + + +void make_tests() +{ + test_text1(); + test_text2(); + test_text3(); + test_text4(); + test_text5(); + test_text6(); + test_text7(); + test_text8(); + test_text9(); + test_text10(); + test_text11(); + test_text12(); + test_text13(); + test_text14(); + test_text15(); + test_text16(); + test_text17(); + test_text18(); + test_text19(); + test_text20(); + test_text21(); + test_text22(); + test_text23(); + test_text24(); + test_text25(); + test_text26(); + test_text27(); + test_text28(); + test_text29(); + test_text30(); + test_text31(); + test_text32(); + test_text33(); + test_text34(); + test_text35(); + test_text36(); + test_text37(); + test_text38(); + test_text39(); + test_text40(); + test_text41(); + test_text42(); + test_text43(); + test_text44(); + test_text45(); + test_text46(); +} + + +} + +} + + diff --git a/tests/convert.h b/tests/convert.h new file mode 100644 index 0000000..e3c0421 --- /dev/null +++ b/tests/convert.h @@ -0,0 +1,57 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_tests_convert +#define headerfile_picotools_tests_convert + +namespace pt +{ + +namespace pt_convert_tests +{ + + + +void make_tests(); + + + +} + +} + +#endif diff --git a/tests/csvparser.cpp b/tests/csvparser.cpp new file mode 100644 index 0000000..eecbc3e --- /dev/null +++ b/tests/csvparser.cpp @@ -0,0 +1,326 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "csvparser.h" +#include "csv/csvparser.h" +#include "test.h" + + + +namespace pt +{ + +namespace pt_csvparser_tests +{ + + + +void test_csvparser(const char * input_str, const char * expected_json) +{ + CSVParser csv_parser; + Space space; + std::string json; + + CSVParser::Status status = csv_parser.parse(input_str, space); + + space.serialize_to_json_to(json); + + std::cout << "csv parsed as: " << json << std::endl; + test(json.c_str(), expected_json); +} + + + +void test_csvparser1() +{ + const char * input_str = ""; + const char * expected_json = R"json([[]])json"; + + test_csvparser(input_str, expected_json); +} + + +void test_csvparser2() +{ + const char * input_str = ","; + const char * expected_json = R"json([["",""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser3() +{ + const char * input_str = "field1"; + const char * expected_json = R"json([["field1"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser4() +{ + const char * input_str = R"csvstring(field1,field2,field3)csvstring"; + const char * expected_json = R"json([["field1","field2","field3"]])json"; + + test_csvparser(input_str, expected_json); +} + + +void test_csvparser5() +{ + const char * input_str = "\n"; + const char * expected_json = R"json([[""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser6() +{ + const char * input_str = "\r\n"; + const char * expected_json = R"json([[""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser7() +{ + const char * input_str = "field1\r\n"; + const char * expected_json = R"json([["field1"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser8() +{ + const char * input_str = ",\r\n"; + const char * expected_json = R"json([["",""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser9() +{ + const char * input_str = "field1\r\nfield2"; + const char * expected_json = R"json([["field1"],["field2"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser10() +{ + const char * input_str = "field1,field2\r\nfield3,field4"; + const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser11() +{ + const char * input_str = "field1,field2\r\nfield3,field4\r\n"; + const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser12() +{ + const char * input_str = "field1,field2\nfield3,field4\n"; + const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser13() +{ + const char * input_str = R"csv("")csv"; + const char * expected_json = R"json([[""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser14() +{ + const char * input_str = "\"\"\n"; + const char * expected_json = R"json([[""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser15() +{ + const char * input_str = "\"\"\r\n"; + const char * expected_json = R"json([[""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser16() +{ + const char * input_str = "\"\",\r\n"; + const char * expected_json = R"json([["",""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser17() +{ + const char * input_str = "\"\",\n"; + const char * expected_json = R"json([["",""]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser18() +{ + const char * input_str = "\"field1\""; + const char * expected_json = R"json([["field1"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser19() +{ + const char * input_str = "\"field1, with comma\""; + const char * expected_json = R"json([["field1, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser20() +{ + const char * input_str = "\"field1, with comma\"\r\n"; + const char * expected_json = R"json([["field1, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser21() +{ + const char * input_str = "\"field1, with comma\"\n"; + const char * expected_json = R"json([["field1, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + + +void test_csvparser22() +{ + const char * input_str = "\"field1, with comma\",\"field2\""; + const char * expected_json = R"json([["field1, with comma","field2"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser23() +{ + const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\""; + const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser24() +{ + const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\"\r\n"; + const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser25() +{ + const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\r\n\"field3\",\"field4, with comma\""; + const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser26() +{ + const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\n\"field3\",\"field4, with comma\"\n"; + const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void test_csvparser27() +{ + const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"syntax error\n\"field3\",\"field4, with comma\"\n"; + const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["syntax error"],["field3","field4, with comma"]])json"; + + test_csvparser(input_str, expected_json); +} + +void make_tests() +{ + reset_test_counter("CSVParser"); + test_csvparser1(); + test_csvparser2(); + test_csvparser3(); + test_csvparser4(); + test_csvparser5(); + test_csvparser6(); + test_csvparser7(); + test_csvparser8(); + test_csvparser9(); + test_csvparser10(); + test_csvparser11(); + test_csvparser12(); + test_csvparser13(); + test_csvparser14(); + test_csvparser15(); + test_csvparser16(); + test_csvparser17(); + test_csvparser18(); + test_csvparser19(); + test_csvparser20(); + test_csvparser21(); + test_csvparser22(); + test_csvparser23(); + test_csvparser24(); + test_csvparser25(); + test_csvparser26(); + test_csvparser27(); +} + + + + +} + +} + + diff --git a/tests/csvparser.h b/tests/csvparser.h new file mode 100644 index 0000000..69c2821 --- /dev/null +++ b/tests/csvparser.h @@ -0,0 +1,56 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_tests_csvparser +#define headerfile_picotools_tests_csvparser + + +namespace pt +{ + +namespace pt_csvparser_tests +{ + + +void make_tests(); + + +} + +} + +#endif diff --git a/mainparser/sample/sample.cpp b/tests/main.cpp similarity index 50% rename from mainparser/sample/sample.cpp rename to tests/main.cpp index 17f38ce..7b53280 100644 --- a/mainparser/sample/sample.cpp +++ b/tests/main.cpp @@ -1,23 +1,23 @@ /* - * This file is a part of MainParser -- simple parser for main() parameters + * This file is a part of PikoTools * and is distributed under the (new) BSD licence. * Author: Tomasz Sowa */ -/* - * Copyright (c) 2011, Tomasz Sowa +/* + * Copyright (c) 2021, Tomasz Sowa * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -35,83 +35,37 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ - - +#include "convert.h" +#include "mainoptionsparser.h" +#include "csvparser.h" #include -#include -#include "../mainparser.h" -int main() + +namespace pt { -MainParser mp; - - // suppose you call a 'programname' in such a way: - // $ programname -a -b - c --longparam -- otherlongparam -xyz paramwithvalue -x --longparam2 longwithvalue lastvalue1 lastvalue2 lastvalue3 - // so the main() function get this table as input: - - const char * tab[] = { - "programname", - "-a", - "-b", - "-", - "c", - "--longparam", - "--", - "otherlongparam", - "-xyz", - "paramwithvalue", - "-x", - "--longparam2", - "longwithvalue", - "lastvalue1", // some values left at the end - "lastvalue2", // you can get them by using GetValue() method - "lastvalue3", - }; - - mp.Set(sizeof(tab)/sizeof(const char*), tab); - - while( mp.NextParam() ) - { - if( mp.GetSingleParam() != 0 ) - { - std::cout << "-" << mp.GetSingleParam() << std::endl; - - // we know that 'z' requires a value - if( mp.GetSingleParam() == 'z' ) - std::cout << "value for z: " << mp.GetValue() << std::endl; - } - - if( *mp.GetDoubleParam() ) - { - std::cout << "--" << mp.GetDoubleParam() << std::endl; - - // we know that "longparam2" requires a value - if( strcmp(mp.GetDoubleParam(), "longparam2") == 0 ) - std::cout << "value for longparam2: " << mp.GetValue() << std::endl; - } - } - - while( !mp.IsEnd() ) - std::cout << mp.GetValue() << std::endl; +bool was_error = false; +int test_counter = 0; +const char * test_msg = nullptr; } -/* -program output: --a --b --c ---longparam ---otherlongparam --x --y --z -value for z: paramwithvalue --x ---longparam2 -value for longparam2: longwithvalue -lastvalue1 -lastvalue2 -lastvalue3 -*/ +int main(int argc, const char ** argv) +{ + pt::pt_convert_tests::make_tests(); + pt::pt_mainoptions_tests::make_tests(); + pt::pt_csvparser_tests::make_tests(); + + if( pt::was_error ) + { + std::cout << "some of the tests failed" << std::endl; + } + else + { + std::cout << "*********************************" << std::endl; + std::cout << "* all tests passed successfully *" << std::endl; + std::cout << "*********************************" << std::endl; + } + + return !pt::was_error ? 0 : 1; +} diff --git a/tests/mainoptionsparser.cpp b/tests/mainoptionsparser.cpp new file mode 100644 index 0000000..720f3e8 --- /dev/null +++ b/tests/mainoptionsparser.cpp @@ -0,0 +1,332 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2021, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "mainoptionsparser.h" +#include "test.h" +#include "mainoptions/mainoptionsparser.h" +#include "utf8/utf8.h" +#include "convert/convert.h" + +namespace pt +{ + +namespace pt_mainoptions_tests +{ + + +struct MainOptionsParserOutputTest +{ + MainOptionsParser::Status status; + const char * option_err; + const char * json; +}; + + +void print_status(MainOptionsParser::Status status) +{ + if( status == MainOptionsParser::status_ok ) + { + std::cout << "MainOptionsParser::status_ok"; + } + else + if( status == MainOptionsParser::status_argument_not_provided ) + { + std::cout << "MainOptionsParser::status_argument_not_provided"; + } + else + if( status == MainOptionsParser::status_argument_provided ) + { + std::cout << "MainOptionsParser::status_argument_provided"; + } +} + + +bool has_space_in_str(const char * arg) +{ + while( *arg ) + { + if( is_white((wchar_t)*arg) ) + return true; + + arg += 1; + } + + return false; +} + + +void print_args(int to_index, const char ** argv) +{ + for(int i=0 ; i <= to_index ; ++i) + { + bool has_space = has_space_in_str(argv[i]); + + if( has_space ) + std::cout << "\""; + + std::cout << argv[i]; + + if( has_space ) + std::cout << "\""; + + std::cout << " "; + } + + std::cout << std::endl; +} + + + +void test_mainoptionsparser(size_t len, const char ** argv, const Space & arguments_required, MainOptionsParserOutputTest * output) +{ + reset_test_counter("mainoptionsparser"); + std::cout << "Testing MainArgsParser" << std::endl; + + MainOptionsParser parser; + Space space; + + /* + * set to true when creating new tests (you can copy console output to the cpp file) + */ + bool prepare_tests = false; + + for(size_t i = 0 ; i < len ; ++i) + { + if( !prepare_tests ) + print_args(i, argv); + + MainOptionsParser::Status status = parser.parse(i + 1, argv, space, arguments_required); + + std::wstring & err_wstr = parser.get_wrong_option(); + std::string err_str; + wide_to_utf8(err_wstr, err_str); + + std::string json; + space.serialize_to_json_to(json); + + std::cout << "{"; + print_status(status); + std::cout << ", " << "\"" << err_str << "\", " << "R\"json(" << json << ")json\"" << "}," << std::endl; + + if( !prepare_tests ) + { + test("status", status, output[i].status); + test("err_arg", err_str.c_str(), output[i].option_err); + test("json", json.c_str(), output[i].json); + } + } +} + + + + +void test_mainoptionsparser1() +{ + const char * argv[] = { + "program_name", + "-a", + "-b", + "-c", + "-d", + "argument for d", + "-b", + "--long", + "--foo", + "foo-one", + "foo-two", + "--long-option", + "--bar", + "bar1", + "bar2", + "bar3", + "-x", + "--piggy2=option_for_piggy2", + "--piggy3", + "--bar", + "xbar1", + "xbar2", + "xbar3", + "--piggy2 another_option_for_piggy2", + "--", + "non-option-argument1", + "non-option-argument2", + "non-option-argument3", + }; + + MainOptionsParserOutputTest output[] = { + {MainOptionsParser::status_ok, "", R"json({})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "d", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[["argument for d"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"long":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[[]],"long":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one"]],"long":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":[],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2","non-option-argument3"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"}, + }; + + Space arguments_required; + arguments_required.add(L"d", 1); + arguments_required.add(L"foo", 2); + arguments_required.add(L"bar", 3); + arguments_required.add(L"piggy", 1); + arguments_required.add(L"piggy2", 1); + + size_t len = sizeof(argv) / sizeof(const char *); + test_mainoptionsparser(len, argv, arguments_required, output); +} + + + +void test_mainoptionsparser2() +{ + const char * argv[] = { + "program_name", + "--long1", + "--long2=with-argument", + "--long3", + "-a", + "--=option-for-empty-argument", + "-b", + "arg b 1", + "arg b 2", + "-c", + "-f file-name with spaces", + "--xxx", + "arg 1", + "arg 2", + "arg 3", + "-", /* first non-option argument */ + "non-option-argument2", + "non-option-argument3", + "non-option-argument4", + }; + + MainOptionsParserOutputTest output[] = { + {MainOptionsParser::status_ok, "", R"json({})json"}, + {MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[[]]})json"}, + {MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1"]]})json"}, + {MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"}, + {MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3","non-option-argument4"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"}, + }; + + Space arguments_required; + arguments_required.add(L"long2", 1); + arguments_required.add(L"b", 2); + arguments_required.add(L"f", 1); + arguments_required.add(L"xxx", 3); + arguments_required.add(L"", 1); + + size_t len = sizeof(argv) / sizeof(const char *); + test_mainoptionsparser(len, argv, arguments_required, output); +} + + + +void test_mainoptionsparser3() +{ + const char * argv[] = { + "program_name", + "--long1", + "--long2=with-argument", + "--long3", + }; + + MainOptionsParserOutputTest output[] = { + {MainOptionsParser::status_ok, "", R"json({})json"}, + {MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"}, + {MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"}, + {MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"}, + }; + + Space arguments_required; + arguments_required.add(L"non-existing", 1); + + size_t len = sizeof(argv) / sizeof(const char *); + test_mainoptionsparser(len, argv, arguments_required, output); +} + + + + +void make_tests() +{ + test_mainoptionsparser1(); + test_mainoptionsparser2(); + test_mainoptionsparser3(); +} + + +} + + +} + diff --git a/convert/inttostr.cpp b/tests/mainoptionsparser.h similarity index 66% rename from convert/inttostr.cpp rename to tests/mainoptionsparser.h index b4ae639..45c0d0c 100644 --- a/convert/inttostr.cpp +++ b/tests/mainoptionsparser.h @@ -35,66 +35,28 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "inttostr.h" +#ifndef headerfile_picotools_tests_mainoptionsparser +#define headerfile_picotools_tests_mainoptionsparser -namespace PT +namespace pt +{ + +namespace pt_mainoptions_tests { -std::wstring Toa(unsigned long long value, int base) -{ - std::wstring res; - Toa(value, res, false, base); - - return res; -} -std::wstring Toa(long long value, int base) -{ - std::wstring res; - Toa(value, res, false, base); - return res; -} +void make_tests(); -std::wstring Toa(unsigned long value, int base) -{ - return Toa(static_cast(value), base); -} -std::wstring Toa(long value, int base) -{ - return Toa(static_cast(value), base); -} - - -std::wstring Toa(unsigned int value, int base) -{ - return Toa(static_cast(value), base); -} - - -std::wstring Toa(int value, int base) -{ - return Toa(static_cast(value), base); -} - - -std::wstring Toa(unsigned short value, int base) -{ - return Toa(static_cast(value), base); -} - - -std::wstring Toa(short value, int base) -{ - return Toa(static_cast(value), base); -} - } +} + +#endif diff --git a/space/spacetojson.cpp b/tests/test.cpp similarity index 60% rename from space/spacetojson.cpp rename to tests/test.cpp index b8b46ae..2547c1a 100644 --- a/space/spacetojson.cpp +++ b/tests/test.cpp @@ -4,20 +4,20 @@ * Author: Tomasz Sowa */ -/* - * Copyright (c) 2012, Tomasz Sowa +/* + * Copyright (c) 2021, Tomasz Sowa * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -35,81 +35,76 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#include "spacetojson.h" +#include "test.h" -namespace PT + +namespace pt { -void SpaceToJSON::Clear() +void reset_test_counter() { - numeric.clear(); - boolean.clear(); - table.clear(); + test_counter = 1; + test_msg = nullptr; } -void SpaceToJSON::TreatAsTable(const wchar_t * space_name) +void reset_test_counter(const char * msg) { - table.insert(space_name); + test_counter = 1; + test_msg = msg; } -void SpaceToJSON::TreatAsTable(const std::wstring & space_name) +void test_status(bool status) { - table.insert(space_name); + if( status ) + { + std::cout << " OK"; + } + else + { + std::cout << " Fail"; + was_error = true; + } + + std::cout << std::endl; } - -void SpaceToJSON::TreatAsNumeric(const wchar_t * name) +template<> +bool test(const char * test_msg, const char * provided, const char * expected) { - numeric.insert(name); + std::cout << "test " << test_counter << ": "; + + if( test_msg ) + std::cout << test_msg; + + bool status = (std::strcmp(provided, expected) == 0); + test_status(status); + test_counter += 1; + + return status; } -void SpaceToJSON::TreatAsNumeric(const std::wstring & name) +template<> +bool test(const char * test_msg, const wchar_t * provided, const wchar_t * expected) { - numeric.insert(name); + std::cout << "test " << test_counter << ": "; + + if( test_msg ) + std::cout << test_msg; + + bool status = (std::wcscmp(provided, expected) == 0); + test_status(status); + test_counter += 1; + + return status; +} + } -void SpaceToJSON::TreatAsBool(const wchar_t * name) -{ - boolean.insert(name); -} - -void SpaceToJSON::TreatAsBool(const std::wstring & name) -{ - boolean.insert(name); -} - - - -bool SpaceToJSON::IsNumeric(const std::wstring & name) -{ - std::set::iterator i = numeric.find(name); - return i != numeric.end(); -} - - -bool SpaceToJSON::IsBool(const std::wstring & name) -{ - std::set::iterator i = boolean.find(name); - return i != boolean.end(); -} - -bool SpaceToJSON::IsTable(const std::wstring & name) -{ - std::set::iterator i = table.find(name); - return i != table.end(); -} - - - - - -} // namespace - diff --git a/mainspaceparser/mainspaceparser.h b/tests/test.h similarity index 54% rename from mainspaceparser/mainspaceparser.h rename to tests/test.h index 6148179..46d9da2 100644 --- a/mainspaceparser/mainspaceparser.h +++ b/tests/test.h @@ -4,20 +4,20 @@ * Author: Tomasz Sowa */ -/* - * Copyright (c) 2016, Tomasz Sowa +/* + * Copyright (c) 2021, Tomasz Sowa * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. @@ -35,66 +35,58 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef headerfile_picotools_mainspaceparser_mainparser -#define headerfile_picotools_mainspaceparser_mainparser +#ifndef headerfile_picotools_tests_test +#define headerfile_picotools_tests_test -#include "space/space.h" -#include -#include +#include +#include -namespace PT + +namespace pt { +extern int test_counter; +extern const char * test_msg; +extern bool was_error; -/* - a very little parser for main(int argc, char ** argv) parameters - look in sample/sample.cpp how to use the parser -*/ -class MainSpaceParser +void test_status(bool status); +void reset_test_counter(); +void reset_test_counter(const char * msg); + + + +template +bool test(const char * test_msg, type_t provided, type_t expected) { -public: + std::cout << "test " << test_counter << ": "; - MainSpaceParser(); - ~MainSpaceParser(); + if( test_msg ) + std::cout << test_msg; - enum Status - { - status_ok = 0, - status_space_not_assigned = 1, - status_syntax_error = 2, - status_reading_eof = 3 /* CHANGE ME give a better name */ - }; + bool status = provided == expected; + test_status(status); + test_counter += 1; - void SetSpace(Space & space); - Status Parse(int argc, const char ** argv); - - void UTF8(bool utf8); - - std::wstring & GetErrorToken(); - -private: - - Space * space; - Space * options_space; - std::wstring wide_arg, temp_arg, temp_val; - std::vector temp_list_val; - bool use_utf8; - Status last_status; - std::wstring last_error_token; + return status; +} - void ConvertStr(const char * src, std::wstring & dst); - void Parse(size_t argc, const char ** argv, size_t & argv_index); - void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index); - void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index); - size_t RequireOption(const std::wstring & arg); - void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector & list); - -}; +template +bool test(type_t provided, type_t expected) +{ + return test(test_msg, provided, expected); +} -} // namespace +template<> +bool test(const char * test_msg, const char * provided, const char * expected); +template<> +bool test(const char * test_msg, const wchar_t * provided, const wchar_t * expected); + + + +} #endif diff --git a/utf8/Makefile b/utf8/Makefile deleted file mode 100644 index 33c71a6..0000000 --- a/utf8/Makefile +++ /dev/null @@ -1,27 +0,0 @@ -include Makefile.o.dep - -libname=utf8.a - -all: $(libname) - -$(libname): $(o) - $(AR) rcs $(libname) $(o) - - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) -I.. $< - - - -depend: - makedepend -Y. -I.. -f- *.cpp > Makefile.dep - echo -n "o = " > Makefile.o.dep - ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep - - -clean: - rm -f *.o - rm -f $(libname) - - -include Makefile.dep diff --git a/utf8/Makefile.dep b/utf8/Makefile.dep deleted file mode 100644 index 9d53a86..0000000 --- a/utf8/Makefile.dep +++ /dev/null @@ -1,5 +0,0 @@ -# DO NOT DELETE - -utf8.o: utf8.h ../textstream/textstream.h ../space/space.h -utf8.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h -utf8.o: ../membuffer/membuffer.h ../textstream/types.h diff --git a/utf8/Makefile.o.dep b/utf8/Makefile.o.dep deleted file mode 100644 index 25d2b0d..0000000 --- a/utf8/Makefile.o.dep +++ /dev/null @@ -1 +0,0 @@ -o = utf8.o \ No newline at end of file diff --git a/utf8/utf8.h b/utf8/utf8.h deleted file mode 100644 index c33a68d..0000000 --- a/utf8/utf8.h +++ /dev/null @@ -1,334 +0,0 @@ -/* - * This file is a part of PikoTools - * and is distributed under the (new) BSD licence. - * Author: Tomasz Sowa - */ - -/* - * Copyright (c) 2010-2018, Tomasz Sowa - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the name Tomasz Sowa nor the names of contributors to this - * project may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef headerfile_picotools_utf8_utf8 -#define headerfile_picotools_utf8_utf8 - -#include -#include -#include "textstream/textstream.h" - - -namespace PT -{ - - - -/*! - UTF-8, a transformation format of ISO 10646 - http://tools.ietf.org/html/rfc3629 - - when wchar_t is 4 bytes length we use UTF-32 - when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs) - - UTF-16 - http://www.ietf.org/rfc/rfc2781.txt -*/ - - - -/*! - returns true if 'c' is a correct unicode character -*/ -bool UTF8_CheckRange(int c); - - - -/*! - converting one character from UTF-8 to an int -*/ -size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct); -size_t UTF8ToInt(const char * utf8, int & res, bool & correct); -size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct); -size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct); - - -/*! - converting UTF-8 string to a wide string -*/ -bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1); -bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1); -bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1); -bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1); - - -/*! - converting UTF-8 string to a WTextStream stream - (need to be tested) -*/ -/* - implemented as templates below -bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1); -bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear = true, int mode = 1); -bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear = true, int mode = 1); -bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear = true, int mode = 1); -*/ - -/*! - converting one int character to UTF-8 -*/ -size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len); -size_t IntToUTF8(int z, std::string & utf8, bool clear = true ); -size_t IntToUTF8(int z, std::ostream & utf8); - - -/*! - converting a wide string to UTF-8 string -*/ -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); -bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); -bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); - -// implemented as a template below -//void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested - -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1); -bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1); -bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1); - -// implemented as a template below -//void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested - -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); -bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); -bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); -// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); - -bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); -bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); -bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); -// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, int mode = 1); - - - -namespace private_namespace -{ -template -bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function) -{ -int z; -size_t len; -bool correct, was_error = false; - - while( utf8_len > 0 ) - { - if( (unsigned char)*utf8 <= 0x7f ) - { - // small optimization - len = 1; - correct = true; - z = static_cast(*utf8); - } - else - { - len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero - } - - if( !correct ) - { - if( mode == 1 ) - convert_function(0xFFFD); // U+FFFD "replacement character" - - was_error = true; - } - else - { - convert_function(z); - } - - utf8 += len; - utf8_len -= len; - } - -return !was_error; -} - - - -template -void IntToWide(int c, TextStreamBase & res) -{ - if( sizeof(wchar_t)==2 && c>0xffff ) - { - // UTF16 surrogate pairs - c -= 0x10000; - res << static_cast(((c >> 10) & 0x3FF) + 0xD800); - res << static_cast((c & 0x3FF) + 0xDC00); - } - else - { - res << static_cast(c); - } -} - - -// not tested -// FIX ME it is not using surrogate pairs from input stream -// and mode parameter -template -void WideToUTF8Generic(TextStreamBase & buffer, int mode, function_type write_function) -{ - char utf8_buffer[256]; - std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); - std::size_t utf8_sequence_max_length = 10; - std::size_t index = 0; - - typename TextStreamBase::const_iterator i = buffer.begin(); - - while( i != buffer.end() ) - { - if( index + utf8_sequence_max_length > buffer_len ) - { - write_function(utf8_buffer, index); - index = 0; - } - - index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index); - ++i; - } - - if( index > 0 ) - { - write_function(utf8_buffer, index); - } -} - - -} // namespace - - - - -// need to be tested -template -bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase & res, bool clear = true, int mode = 1) -{ - if( clear ) - res.clear(); - - bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { - private_namespace::IntToWide(c, res); - }); - - return status; -} - - -// need to be tested -template -bool UTF8ToWide(const char * utf8, TextStreamBase & res, bool clear = true, int mode = 1) -{ -size_t utf8_len = 0; - - while( utf8[utf8_len] != 0 ) - utf8_len += 1; - -return UTF8ToWide(utf8, utf8_len, res, clear, mode); -} - - -// need to be tested -template -bool UTF8ToWide(const std::string & utf8, TextStreamBase & res, bool clear = true, int mode = 1) -{ - return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode); -} - - -// need to be tested -template -bool UTF8ToWide(std::istream & utf8, TextStreamBase & res, bool clear = true, int mode = 1) -{ -int z; -bool correct, was_error = false; - - if( clear ) - res.clear(); - - while( UTF8ToInt(utf8, z, correct) > 0 ) - { - if( !correct ) - { - if( mode == 1 ) - res << 0xFFFD; // U+FFFD "replacement character" - - was_error = true; - } - else - { - private_namespace::IntToWide(z, res); - } - } - -return !was_error; -} - - - - -// not tested -template -void WideToUTF8(TextStreamBase & buffer, std::string & utf8, bool clear = true, int mode = 1) -{ - if( clear ) - utf8.clear(); - - private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ - utf8.append(utf8_buffer, buffer_len); - }); -} - - -// not tested -template -void WideToUTF8(TextStreamBase & buffer, std::ostream & utf8, int mode = 1) -{ - private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ - utf8.write(utf8_buffer, buffer_len); - }); -} - - - - - - -} // namespace - - -#endif -