From 7d513728449f0029cc56de6075b0b06e68c4e6be Mon Sep 17 00:00:00 2001 From: Tomasz Sowa Date: Tue, 5 Dec 2017 16:32:21 +0000 Subject: [PATCH] added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e --- Makefile | 7 +- convert/Makefile | 27 +++ convert/Makefile.dep | 4 + convert/Makefile.o.dep | 1 + convert/convert.h | 3 +- convert/inttostr.h | 6 +- convert/misc.cpp | 55 +++++ convert/misc.h | 55 +++++ convert/strtoint.h | 211 +++++++++++++++++ convert/text.cpp | 159 +++++++++++++ convert/text.h | 69 ++++++ date/Makefile.o.dep | 2 +- mainparser/Makefile.o.dep | 2 +- mainspaceparser/Makefile.o.dep | 2 +- mainspaceparser/mainspaceparser.cpp | 4 +- space/Makefile.dep | 3 +- space/Makefile.o.dep | 2 +- space/space.cpp | 354 +++++++++++++++++++++------- space/space.h | 140 +++++++---- utf8/Makefile.o.dep | 2 +- 20 files changed, 970 insertions(+), 138 deletions(-) create mode 100644 convert/Makefile create mode 100644 convert/Makefile.dep create mode 100644 convert/Makefile.o.dep create mode 100644 convert/misc.cpp create mode 100644 convert/misc.h create mode 100644 convert/strtoint.h create mode 100644 convert/text.cpp create mode 100644 convert/text.h diff --git a/Makefile b/Makefile index b17a30a..5400daf 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ export LDFLAGS export AR -all: space mainparser mainspaceparser utf8 date +all: space mainparser mainspaceparser utf8 date convert @@ -45,6 +45,9 @@ utf8: FORCE date: FORCE @cd date ; $(MAKE) -e +convert: FORCE + @cd convert ; $(MAKE) -e + @@ -58,6 +61,7 @@ clean: @cd mainspaceparser ; $(MAKE) -e clean @cd utf8 ; $(MAKE) -e clean @cd date ; $(MAKE) -e clean + @cd convert ; $(MAKE) -e clean depend: @cd space ; $(MAKE) -e depend @@ -65,3 +69,4 @@ depend: @cd mainspaceparser ; $(MAKE) -e depend @cd utf8 ; $(MAKE) -e depend @cd date ; $(MAKE) -e depend + @cd convert ; $(MAKE) -e depend diff --git a/convert/Makefile b/convert/Makefile new file mode 100644 index 0000000..3b74c9f --- /dev/null +++ b/convert/Makefile @@ -0,0 +1,27 @@ +include Makefile.o.dep + +libname=convert.a + +all: $(libname) + +$(libname): $(o) + $(AR) rcs $(libname) $(o) + + +%.o: %.cpp + $(CXX) -c $(CXXFLAGS) -I.. $< + + + +depend: + makedepend -Y. -I.. -f- *.cpp > Makefile.dep + echo -n "o = " > Makefile.o.dep + ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep + + +clean: + rm -f *.o + rm -f $(libname) + + +include Makefile.dep diff --git a/convert/Makefile.dep b/convert/Makefile.dep new file mode 100644 index 0000000..de285f0 --- /dev/null +++ b/convert/Makefile.dep @@ -0,0 +1,4 @@ +# DO NOT DELETE + +misc.o: misc.h text.h +text.o: text.h diff --git a/convert/Makefile.o.dep b/convert/Makefile.o.dep new file mode 100644 index 0000000..9fe3548 --- /dev/null +++ b/convert/Makefile.o.dep @@ -0,0 +1 @@ +o = misc.o text.o \ No newline at end of file diff --git a/convert/convert.h b/convert/convert.h index c370307..99f68b7 100644 --- a/convert/convert.h +++ b/convert/convert.h @@ -40,6 +40,7 @@ #include "inttostr.h" - +#include "strtoint.h" +#include "text.h" #endif diff --git a/convert/inttostr.h b/convert/inttostr.h index 997e890..bb41912 100644 --- a/convert/inttostr.h +++ b/convert/inttostr.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2013, Tomasz Sowa + * Copyright (c) 2012-2017, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,6 +45,10 @@ namespace PT { + + + + // if the buffer is too small it will be terminated at the beginning (empty string) // and the function returns false template diff --git a/convert/misc.cpp b/convert/misc.cpp new file mode 100644 index 0000000..a6cf0d7 --- /dev/null +++ b/convert/misc.cpp @@ -0,0 +1,55 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "misc.h" + + +namespace PT +{ + + +void SetOverflow(bool * was_overflow, bool val) +{ + if( was_overflow ) + *was_overflow = val; +} + + + + +} + diff --git a/convert/misc.h b/convert/misc.h new file mode 100644 index 0000000..4f9ba87 --- /dev/null +++ b/convert/misc.h @@ -0,0 +1,55 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_misc +#define headerfile_picotools_convert_misc + +#include +#include "text.h" + + +namespace PT +{ + +void SetOverflow(bool * was_overflow, bool val); + + +} + + +#endif + diff --git a/convert/strtoint.h b/convert/strtoint.h new file mode 100644 index 0000000..b1f2271 --- /dev/null +++ b/convert/strtoint.h @@ -0,0 +1,211 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_strtoint +#define headerfile_picotools_convert_strtoint + +#include +#include "text.h" +#include "misc.h" + + +namespace PT +{ + + + + +template +unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + unsigned long long res = 0; + bool carry = false; + int digit; + + SetOverflow(was_overflow, false); + str = SkipWhite(str); + + while( !carry && IsDigit(*str, base, &digit) ) + { + #ifdef __GNUC__ + carry = __builtin_mul_overflow(res, static_cast(base), &res); + + if( !carry ) + { + carry = __builtin_add_overflow(res, static_cast(digit), &res); + } + #else + // on other compilers than GCC or CLANG we do not test overflow at the moment + res = res * static_cast(base) + static_cast(digit); + #endif + + str += 1; + } + + if( carry ) + { + while( IsDigit(*str, base, &digit) ) + { + str += 1; + } + + SetOverflow(was_overflow, true); + res = 0; + } + + if( after_str ) + *after_str = str; + + return res; +} + + +template +long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + bool was_sign = false; + bool was_overflow_u = false; + + SetOverflow(was_overflow, false); + str = SkipWhite(str); + + if( *str == '-' ) + { + was_sign = true; + str += 1; + } + + // we do not trim spaces between a sign and a digit + + unsigned long long uval = Toull(str, base, after_str, &was_overflow_u); + unsigned long long sign_add = ( was_sign ) ? 1 : 0; + + if( was_overflow_u ) + { + SetOverflow(was_overflow, true); + return 0; + } + + if( uval > static_cast(std::numeric_limits::max()) + sign_add ) + { + SetOverflow(was_overflow, true); + return 0; + } + + if( was_sign ) + { + return static_cast(0) - static_cast(uval); + } + + return static_cast(uval); +} + + + +template +IntegerType ToUnsignedIntegerType(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + bool was_overflow_ll = false; + + SetOverflow(was_overflow, false); + unsigned long long val = Toull(str, base, after_str, &was_overflow_ll); + + if( was_overflow_ll || val > static_cast(std::numeric_limits::max()) ) + { + SetOverflow(was_overflow, true); + return 0; + } + + return static_cast(val); +} + + +template +unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + return ToUnsignedIntegerType(str, base, after_str, was_overflow); +} + +template +unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + return ToUnsignedIntegerType(str, base, after_str, was_overflow); +} + + + + + +template +IntegerType ToIntegerType(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + bool was_overflow_ll = false; + + SetOverflow(was_overflow, false); + long long val = Toll(str, base, after_str, &was_overflow_ll); + + if( was_overflow_ll || + val < static_cast(std::numeric_limits::min()) || + val > static_cast(std::numeric_limits::max()) ) + { + SetOverflow(was_overflow, true); + return 0; + } + + return static_cast(val); +} + + + +template +long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + return ToIntegerType(str, base, after_str, was_overflow); +} + +template +int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) +{ + return ToIntegerType(str, base, after_str, was_overflow); +} + + + +} + + +#endif diff --git a/convert/text.cpp b/convert/text.cpp new file mode 100644 index 0000000..67af9fd --- /dev/null +++ b/convert/text.cpp @@ -0,0 +1,159 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "text.h" + + +namespace PT +{ + +// white_chars table should be sorted (a binary search algorithm is used to find a character) +// we do not treat a new line character (10) as a white character here +// also space (32) and tab (9) are not inserted here +static const wchar_t white_chars_table[] = { + 0x000B, // LINE TABULATION (vertical tabulation) + 0x000C, // FORM FEED (FF) + 0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file + 0x0085, // NEXT LINE (NEL) + 0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE) + 0x1680, // OGHAM SPACE MARK + 0x180E, // MONGOLIAN VOWEL SEPARATOR + 0x2000, // EN QUAD + 0x2001, // EM QUAD + 0x2002, // EN SPACE + 0x2003, // EM SPACE + 0x2004, // THREE-PER-EM SPACE + 0x2005, // FOUR-PER-EM SPACE + 0x2006, // SIX-PER-EM SPACE + 0x2007, // FIGURE SPACE + 0x2008, // PUNCTUATION SPACE + 0x2009, // THIN SPACE + 0x200A, // HAIR SPACE + 0x2028, // LINE SEPARATOR + 0x2029, // PARAGRAPH SEPARATOR + 0x202F, // NARROW NO-BREAK SPACE + 0x205F, // MEDIUM MATHEMATICAL SPACE + 0x3000, // IDEOGRAPHIC SPACE + 0xFEFF, // ZERO WIDTH NO-BREAK SPACE +}; + + + + +/* + if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true) +*/ +bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) +{ + // space (32) and tab (9) are the most common white chars + // so we check them at the beginning (optimisation) + if( c == 32 || c == 9 ) + return true; + + std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t); + std::size_t o1 = 0; + std::size_t o2 = len - 1; + + if( c == 10 ) + return treat_new_line_as_white ? true : false; + + if( !check_additional_chars ) + return false; + + if( c < white_chars_table[o1] || c > white_chars_table[o2] ) + return false; + + if( c == white_chars_table[o1] || c == white_chars_table[o2] ) + return true; + + while( o1 + 1 < o2 ) + { + std::size_t o = (o2 - o1)/2 + o1; + + if( c == white_chars_table[o] ) + return true; + + if( c > white_chars_table[o] ) + o1 = o; + else + o2 = o; + } + +return false; +} + + + +bool IsDigit(wchar_t c, int base, int * digit) +{ + int d = 0; + + if( c >= '0' && c <= '9' ) + { + d = c - '0'; + } + else + if( c >= 'a' && c <= 'f' ) + { + d = c - 'a' + 10; + } + else + if( c >= 'A' && c <= 'F' ) + { + d = c - 'A' + 10; + } + else + { + if( digit ) + *digit = d; + + return false; + } + + if( digit ) + *digit = d; + + return d < base; +} + + + + +} + + diff --git a/convert/text.h b/convert/text.h new file mode 100644 index 0000000..09dec29 --- /dev/null +++ b/convert/text.h @@ -0,0 +1,69 @@ +/* + * This file is a part of PikoTools + * and is distributed under the (new) BSD licence. + * Author: Tomasz Sowa + */ + +/* + * Copyright (c) 2017, Tomasz Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name Tomasz Sowa nor the names of contributors to this + * project may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef headerfile_picotools_convert_text +#define headerfile_picotools_convert_text + + + +namespace PT +{ + +bool IsWhite(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true); + + +bool IsDigit(wchar_t c, int base = 10, int * digit = 0); + + +template +CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) +{ + while( IsWhite(static_cast(*str), check_additional_chars, treat_new_line_as_white) ) + { + str += 1; + } + + return str; +} + + + + +} + + +#endif diff --git a/date/Makefile.o.dep b/date/Makefile.o.dep index a562b44..1e1c891 100644 --- a/date/Makefile.o.dep +++ b/date/Makefile.o.dep @@ -1 +1 @@ -o = date.o +o = date.o \ No newline at end of file diff --git a/mainparser/Makefile.o.dep b/mainparser/Makefile.o.dep index bf313b1..0a6c3a7 100644 --- a/mainparser/Makefile.o.dep +++ b/mainparser/Makefile.o.dep @@ -1 +1 @@ -o = mainparser.o +o = mainparser.o \ No newline at end of file diff --git a/mainspaceparser/Makefile.o.dep b/mainspaceparser/Makefile.o.dep index cda6e8b..029147f 100644 --- a/mainspaceparser/Makefile.o.dep +++ b/mainspaceparser/Makefile.o.dep @@ -1 +1 @@ -o = mainspaceparser.o +o = mainspaceparser.o \ No newline at end of file diff --git a/mainspaceparser/mainspaceparser.cpp b/mainspaceparser/mainspaceparser.cpp index 454aa30..59bae06 100644 --- a/mainspaceparser/mainspaceparser.cpp +++ b/mainspaceparser/mainspaceparser.cpp @@ -218,7 +218,7 @@ void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector & list) { - std::wstring * val = space->GetValue(name); + std::wstring * val = space->GetFirstValue(name); if( !val ) { @@ -266,7 +266,7 @@ size_t MainSpaceParser::RequireOption(const std::wstring & arg) if( options_space ) { - std::wstring * val = options_space->GetValue(arg); + std::wstring * val = options_space->GetFirstValue(arg); if( val ) { diff --git a/space/Makefile.dep b/space/Makefile.dep index 1992b2b..ab26c66 100644 --- a/space/Makefile.dep +++ b/space/Makefile.dep @@ -4,7 +4,8 @@ jsontospaceparser.o: jsontospaceparser.h space.h ../textstream/types.h jsontospaceparser.o: ../utf8/utf8.h space.o: space.h ../textstream/types.h ../utf8/utf8.h space.o: ../textstream/textstream.h ../space/space.h ../date/date.h -space.o: ../convert/convert.h ../convert/inttostr.h ../membuffer/membuffer.h +space.o: ../convert/convert.h ../convert/inttostr.h ../convert/strtoint.h +space.o: ../convert/text.h ../convert/misc.h ../membuffer/membuffer.h space.o: ../textstream/types.h spaceparser.o: spaceparser.h space.h ../textstream/types.h ../utf8/utf8.h spacetojson.o: spacetojson.h space.h ../textstream/types.h diff --git a/space/Makefile.o.dep b/space/Makefile.o.dep index 12e1a4e..c773c04 100644 --- a/space/Makefile.o.dep +++ b/space/Makefile.o.dep @@ -1 +1 @@ -o = jsontospaceparser.o space.o spaceparser.o spacetojson.o +o = jsontospaceparser.o space.o spaceparser.o spacetojson.o \ No newline at end of file diff --git a/space/space.cpp b/space/space.cpp index 1905c78..7669505 100644 --- a/space/space.cpp +++ b/space/space.cpp @@ -40,7 +40,7 @@ #include "space.h" #include "utf8/utf8.h" #include "textstream/textstream.h" - +#include "convert/convert.h" namespace PT @@ -102,15 +102,15 @@ void Space::Clear() -std::wstring * Space::GetValue(const wchar_t * name) +std::wstring * Space::GetFirstValue(const wchar_t * name) { tmp_name = name; - return GetValue(tmp_name); + return GetFirstValue(tmp_name); } -std::wstring * Space::GetValue(const std::wstring & name) +std::wstring * Space::GetFirstValue(const std::wstring & name) { Table::iterator t = table.find(name); @@ -125,8 +125,15 @@ std::wstring * Space::GetValue(const std::wstring & name) } +// CHECK ME +const std::wstring * Space::GetFirstValue(const wchar_t * name) const +{ + tmp_name = name; + return GetFirstValue(tmp_name); +} -const std::wstring * Space::GetValue(const std::wstring & name) const + +const std::wstring * Space::GetFirstValue(const std::wstring & name) const { Table::const_iterator t = table.find(name); @@ -185,7 +192,7 @@ return false; -std::wstring & Space::Text(const wchar_t * name) +std::wstring Space::Text(const wchar_t * name) { tmp_name = name; return Text(tmp_name, L""); @@ -193,16 +200,16 @@ std::wstring & Space::Text(const wchar_t * name) -std::wstring & Space::Text(const wchar_t * name, const wchar_t * def) +std::wstring Space::Text(const wchar_t * name, const wchar_t * def) { tmp_name = name; return Text(tmp_name, def); } -std::wstring & Space::Text(const std::wstring & name, const wchar_t * def) +std::wstring Space::Text(const std::wstring & name, const wchar_t * def) { - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) { @@ -210,54 +217,210 @@ std::wstring & Space::Text(const std::wstring & name, const wchar_t * def) } else { - tmp_value_text = def; - return tmp_value_text; + return std::wstring(def); +// tmp_value_text = def; +// return tmp_value_text; } } - -std::string & Space::AText(const wchar_t * name) +std::wstring Space::Text(const std::wstring & name, const std::wstring & def) { - tmp_name = name; - return AText(tmp_name, ""); -} - - - -std::string & Space::AText(const wchar_t * name, const char * def) -{ - tmp_name = name; - return AText(tmp_name, def); -} - - -std::string & Space::AText(const std::wstring & name, const char * def) -{ - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) { - PT::WideToUTF8(*value, tmp_value_text_ascii); - return tmp_value_text_ascii; + return *value; } else { - tmp_value_text_ascii = def; - return tmp_value_text_ascii; + return def; } } -int Space::Int(const wchar_t * name) +std::wstring & Space::TextRef(const wchar_t * name) { tmp_name = name; - return Int(tmp_name, 0); + return TextRef(tmp_name, L""); } +std::wstring & Space::TextRef(const wchar_t * name, const wchar_t * def) +{ + tmp_name = name; + return TextRef(tmp_name, def); +} + +std::wstring & Space::TextRef(const std::wstring & name, const wchar_t * def) +{ + Table::iterator t = table.find(name); + + if( t == table.end() ) + { + Value & v = table[name]; + v.push_back(def); + return v[0]; + } + else + if( t->second.empty() ) + { + Value & v = t->second; + v.push_back(def); + return v[0]; + } + else + { + return t->second[0]; + } +} + + +std::wstring & Space::TextRef(const std::wstring & name, const std::wstring & def) +{ + return TextRef(name, def.c_str()); +} + + + + +std::string Space::TextA(const wchar_t * name) +{ + tmp_name = name; + return TextA(tmp_name, ""); +} + + + +std::string Space::TextA(const wchar_t * name, const char * def) +{ + tmp_name = name; + return TextA(tmp_name, def); +} + + +std::string Space::TextA(const std::wstring & name, const char * def) +{ + std::wstring * value = GetFirstValue(name); + + if( value ) + { + std::string res; + PT::WideToUTF8(*value, res); + return res; + } + else + { + return def; + } +} + + +std::string Space::TextA(const std::wstring & name, const std::string & def) +{ + return TextA(name, def.c_str()); +} + + + + +int Space::CheckIntegerBase(const std::wstring & value, const wchar_t ** save_ptr) +{ + const wchar_t * ptr = SkipWhite(value.c_str()); + int base = 10; + + if( *ptr == '0' ) + { + base = 8; + ptr += 1; // we can skip the first zero + } + else + if( *ptr == '-' && *(ptr+1) == '0' ) + { + base = 8; + // ptr is pointing to '-', do not increment it here + } + + *save_ptr = ptr; + return base; +} + + + + +unsigned int Space::ToUInt(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Toui(ptr, base); +} + +int Space::ToInt(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Toi(ptr, base); +} + + +unsigned long Space::ToULong(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Toul(ptr, base); +} + +long Space::ToLong(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Tol(ptr, base); +} + + +unsigned long long Space::ToULongLong(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Toull(ptr, base); +} + +long long Space::ToLongLong(const std::wstring & value) +{ + const wchar_t * ptr; + int base = CheckIntegerBase(value, &ptr); + return Toll(ptr, base); +} + + +size_t Space::ToSize(const std::wstring & value) +{ + if( sizeof(size_t) == sizeof(unsigned int) ) + return ToUInt(value); + else + if( sizeof(size_t) == sizeof(unsigned long) ) + return ToULong(value); + else + return ToULongLong(value); +} + + +bool Space::ToBool(const std::wstring & value) +{ + // IMPROVE ME add support for trimming white chars? + return (EqualNoCase(value.c_str(), L"true") || + EqualNoCase(value.c_str(), L"yes") || + EqualNoCase(value.c_str(), L"1") + ); +} + + + + + + int Space::Int(const wchar_t * name, int def) { tmp_name = name; @@ -265,18 +428,11 @@ int Space::Int(const wchar_t * name, int def) } -int Space::ToInt(const std::wstring & value) -{ - // !! FIXME what if value is empty? - long res = (value[0] == '0')? wcstol(value.c_str() + 1, 0, 8) : wcstol(value.c_str(), 0, 10); - -return static_cast(res); -} int Space::Int(const std::wstring & name, int def) { - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) return ToInt(*value); @@ -286,15 +442,27 @@ return def; - - -long Space::Long(const wchar_t * name) +unsigned int Space::UInt(const wchar_t * name, unsigned int def) { tmp_name = name; - return Long(tmp_name, 0); + return UInt(tmp_name, def); } +unsigned int Space::UInt(const std::wstring & name, unsigned int def) +{ + std::wstring * value = GetFirstValue(name); + + if( value ) + return ToUInt(*value); + +return def; +} + + + + + long Space::Long(const wchar_t * name, long def) { tmp_name = name; @@ -302,16 +470,9 @@ long Space::Long(const wchar_t * name, long def) } -long Space::ToLong(const std::wstring & value) -{ - // !! FIXME what if value is empty? - return (value[0] == '0')? wcstol(value.c_str() + 1, 0, 8) : wcstol(value.c_str(), 0, 10); -} - - long Space::Long(const std::wstring & name, long def) { - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) return ToLong(*value); @@ -320,14 +481,61 @@ return def; } - -size_t Space::Size(const wchar_t * name) +unsigned long Space::ULong(const wchar_t * name, unsigned long def) { tmp_name = name; - return Size(tmp_name, 0); + return ULong(tmp_name, def); } +unsigned long Space::ULong(const std::wstring & name, unsigned long def) +{ + std::wstring * value = GetFirstValue(name); + + if( value ) + return ToULong(*value); + +return def; +} + + + +long long Space::LongLong(const wchar_t * name, long long def) +{ + tmp_name = name; + return LongLong(tmp_name, def); +} + +long long Space::LongLong(const std::wstring & name, long long def) +{ + std::wstring * value = GetFirstValue(name); + + if( value ) + return ToLongLong(*value); + +return def; +} + +unsigned long long Space::ULongLong(const wchar_t * name, unsigned long long def) +{ + tmp_name = name; + return ULongLong(tmp_name, def); +} + +unsigned long long Space::ULongLong(const std::wstring & name, unsigned long long def) +{ + std::wstring * value = GetFirstValue(name); + + if( value ) + return ToULongLong(*value); + +return def; +} + + + + + size_t Space::Size(const wchar_t * name, size_t def) { tmp_name = name; @@ -335,19 +543,9 @@ size_t Space::Size(const wchar_t * name, size_t def) } - -size_t Space::ToSize(const std::wstring & value) -{ - // !! FIXME what if value is empty? - unsigned long res = (value[0] == '0')? wcstoul(value.c_str() + 1, 0, 8) : wcstoul(value.c_str(), 0, 10); - -return static_cast(res); -} - - size_t Space::Size(const std::wstring & name, size_t def) { - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) return ToSize(*value); @@ -358,13 +556,6 @@ return def; -bool Space::Bool(const wchar_t * name) -{ - tmp_name = name; - return Bool(tmp_name, false); -} - - bool Space::Bool(const wchar_t * name, bool def) { tmp_name = name; @@ -373,18 +564,9 @@ bool Space::Bool(const wchar_t * name, bool def) -bool Space::ToBool(const std::wstring & value) -{ - return (EqualNoCase(value.c_str(), L"true") || - EqualNoCase(value.c_str(), L"yes") || - EqualNoCase(value.c_str(), L"1") - ); -} - - bool Space::Bool(const std::wstring & name, bool def) { - std::wstring * value = GetValue(name); + std::wstring * value = GetFirstValue(name); if( value ) return ToBool(*value); diff --git a/space/space.h b/space/space.h index 9130277..d94e5cc 100644 --- a/space/space.h +++ b/space/space.h @@ -164,7 +164,16 @@ config syntax: class Space { public: - + + + /* + this is the table which represents your config file + in the Table map: the first (key) is your 'option' and the second is 'list' + */ + typedef std::vector Value; + typedef std::map Table; + + Space(); ~Space(); @@ -174,13 +183,6 @@ public: void Clear(); - // these methods return true if 'name' was found - // in other case they return false and 'out' will be equal 'def' - // they can return a null pointer if there is not such a 'name' - std::wstring * GetValue(const wchar_t * name); - std::wstring * GetValue(const std::wstring & name); - const std::wstring * GetValue(const std::wstring & name) const; - /* returns true if such an option has 'value' @@ -192,6 +194,31 @@ public: bool HasValue(const std::wstring & name, const std::wstring & value); + + /* + * + * methods for getting/finding a value + * + * + */ + + // moze tu powinno być FindValue? + Value * GetValue(const wchar_t * name); + Value * GetValue(const std::wstring & name); + + const Value * GetValue(const wchar_t * name) const; + const Value * GetValue(const std::wstring & name) const; + + + // moze tu powinno być FindFirstValue? + // they can return a null pointer if there is not such a 'name' + std::wstring * GetFirstValue(const wchar_t * name); + std::wstring * GetFirstValue(const std::wstring & name); + + const std::wstring * GetFirstValue(const wchar_t * name) const; + const std::wstring * GetFirstValue(const std::wstring & name) const; + + /* those methods are used to extract information from space.table as a parameter they take the name of an option @@ -204,24 +231,56 @@ public: AText(...) always returns a reference to UTF-8 string */ - std::wstring & Text(const wchar_t * name); - std::wstring & Text(const wchar_t * name, const wchar_t * def); - std::wstring & Text(const std::wstring & name, const wchar_t * def); - std::string & AText(const wchar_t * name); - std::string & AText(const wchar_t * name, const char * def); - std::string & AText(const std::wstring & name, const char * def); - int Int(const wchar_t * name); - int Int(const wchar_t * name, int def); - int Int(const std::wstring & name, int def); - long Long(const wchar_t * name); - long Long(const wchar_t * name, long def); - long Long(const std::wstring & name, long def); - size_t Size(const wchar_t * name); - size_t Size(const wchar_t * name, size_t def); - size_t Size(const std::wstring & name, size_t def); - bool Bool(const wchar_t *); - bool Bool(const wchar_t * name, bool def); - bool Bool(const std::wstring & name, bool def); + std::wstring Text(const wchar_t * name); + std::wstring Text(const wchar_t * name, const wchar_t * def); + std::wstring Text(const std::wstring & name, const wchar_t * def); + std::wstring Text(const std::wstring & name, const std::wstring & def); + + // returns a reference + // if there is no such an option then a new one is inserted + std::wstring & TextRef(const wchar_t * name); + std::wstring & TextRef(const wchar_t * name, const wchar_t * def); + std::wstring & TextRef(const std::wstring & name, const wchar_t * def); + std::wstring & TextRef(const std::wstring & name, const std::wstring & def); + + + // returns UTF-8 string + std::string TextA(const wchar_t * name); + std::string TextA(const wchar_t * name, const char * def); + std::string TextA(const std::wstring & name, const char * def); + std::string TextA(const std::wstring & name, const std::string & def); + + + int Int(const wchar_t * name, int def = 0); + int Int(const std::wstring & name, int def = 0); + unsigned int UInt(const wchar_t * name, unsigned int def = 0); + unsigned int UInt(const std::wstring & name, unsigned int def = 0); + + long Long(const wchar_t * name, long def = 0); + long Long(const std::wstring & name, long def = 0); + unsigned long ULong(const wchar_t * name, unsigned long def = 0); + unsigned long ULong(const std::wstring & name, unsigned long def = 0); + + long long LongLong(const wchar_t * name, long long def = 0); + long long LongLong(const std::wstring & name, long long def = 0); + unsigned long long ULongLong(const wchar_t * name, unsigned long long def = 0); + unsigned long long ULongLong(const std::wstring & name, unsigned long long def = 0); + + size_t Size(const wchar_t * name, size_t def = 0); + size_t Size(const std::wstring & name, size_t def = 0); + + bool Bool(const wchar_t * name, bool def = false); + bool Bool(const std::wstring & name, bool def = false); + + + + /* + * + * methods for adding a new value + * + * + */ + std::wstring & FindAdd(const wchar_t * name); std::wstring & FindAdd(const std::wstring & name); @@ -276,21 +335,12 @@ public: * */ - /* - this is the table which represents your config file - in the Table map: the first (key) is your 'option' and the second is 'list' - */ - typedef std::vector Value; - typedef std::map Table; - - - std::wstring name; // space name Table table; // std::map > // childs typedef std::vector Spaces; - std::vector spaces; + Spaces spaces; // a parent space // null means a root space @@ -328,15 +378,23 @@ public: private: - std::wstring tmp_name; + mutable std::wstring tmp_name; + std::wstring tmp_value; std::wstring tmp_value_text; std::string tmp_value_text_ascii; - int ToInt(const std::wstring & value); - long ToLong(const std::wstring & value); - size_t ToSize(const std::wstring & value); - bool ToBool(const std::wstring & value); + int CheckIntegerBase(const std::wstring & value, const wchar_t ** save_ptr); + + unsigned int ToUInt(const std::wstring & value); + int ToInt(const std::wstring & value); + unsigned long ToULong(const std::wstring & value); + long ToLong(const std::wstring & value); + unsigned long long ToULongLong(const std::wstring & value); + long long ToLongLong(const std::wstring & value); + size_t ToSize(const std::wstring & value); + bool ToBool(const std::wstring & value); + wchar_t ToSmall(wchar_t c); bool EqualNoCase(const wchar_t * str1, const wchar_t * str2); static bool IsWhite(int c); diff --git a/utf8/Makefile.o.dep b/utf8/Makefile.o.dep index 33f18af..25d2b0d 100644 --- a/utf8/Makefile.o.dep +++ b/utf8/Makefile.o.dep @@ -1 +1 @@ -o = utf8.o +o = utf8.o \ No newline at end of file