Merge pull request 'api2021 part I' (#4) from api2021 into master

Reviewed-on: #4
This commit is contained in:
Tomasz Sowa 2021-05-27 10:37:35 +02:00
commit 848cdf9c03
83 changed files with 11747 additions and 6653 deletions

21
.editorconfig Normal file
View File

@ -0,0 +1,21 @@
# EditorConfig
# https://editorconfig.org/
# https://editorconfig-specification.readthedocs.io/
# top-most EditorConfig file
root = true
# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true
# 4 space indentation
[*.{h,cpp,html,css,js,conf,txt}]
indent_style = tab
indent_size = 4
# Tab indentation (no size specified)
[Makefile]
indent_style = tab
indent_size = 4

4
.gitignore vendored
View File

@ -2,4 +2,6 @@
.project
.settings/
*.o
*.a
src/pikotools.a
tests/tests
m

View File

@ -1,76 +1,58 @@
# Makefile for GNU make
ifndef CXX
CXX = clang++
endif
ifndef CXXFLAGS
CXXFLAGS = -Wall -O2 -I/usr/local/include
endif
ifndef LDFLAGS
LDFLAGS = -L/usr/local/lib
endif
ifndef AR
AR = ar
endif
#CXX=g++5
#CXXFLAGS=-Wall -O0 -g3 -gdwarf-2 -std=c++14
export CXX
export CXXFLAGS
export LDFLAGS
export AR
all: space mainparser mainspaceparser utf8 date convert log
all: src
src: FORCE
$(MAKE) -C src
space: FORCE
@cd space ; $(MAKE) -e
tests: FORCE
$(MAKE) -C src
$(MAKE) -C tests
mainparser: FORCE
@cd mainparser ; $(MAKE) -e
mainspaceparser: FORCE
@cd mainspaceparser ; $(MAKE) -e
tests-gcc10: FORCE
env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C src
env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C tests
utf8: FORCE
@cd utf8 ; $(MAKE) -e
date: FORCE
@cd date ; $(MAKE) -e
tests-clang: FORCE
env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
convert: FORCE
@cd convert ; $(MAKE) -e
log: FORCE
@cd log ; $(MAKE) -e
tests-clang-sa: FORCE
env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
tests-clang-sm: FORCE
env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
tests-clang-su: FORCE
env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
clean: FORCE
$(MAKE) -C src clean
$(MAKE) -C tests clean
depend: FORCE
$(MAKE) -C src depend
$(MAKE) -C tests depend
FORCE:
clean:
@cd space ; $(MAKE) -e clean
@cd mainparser ; $(MAKE) -e clean
@cd mainspaceparser ; $(MAKE) -e clean
@cd utf8 ; $(MAKE) -e clean
@cd date ; $(MAKE) -e clean
@cd convert ; $(MAKE) -e clean
@cd log ; $(MAKE) -e clean
depend:
@cd space ; $(MAKE) -e depend
@cd mainparser ; $(MAKE) -e depend
@cd mainspaceparser ; $(MAKE) -e depend
@cd utf8 ; $(MAKE) -e depend
@cd date ; $(MAKE) -e depend
@cd convert ; $(MAKE) -e depend
@cd log ; $(MAKE) -e depend

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=convert.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,5 +0,0 @@
# DO NOT DELETE
inttostr.o: inttostr.h
misc.o: misc.h text.h
text.o: text.h

View File

@ -1 +0,0 @@
o = inttostr.o misc.o text.o

View File

@ -1,196 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstddef>
#include "text.h"
namespace PT
{
// white_chars table should be sorted (a binary search algorithm is used to find a character)
// we do not treat a new line character (10) as a white character here
// also space (32) and tab (9) are not inserted here
static const wchar_t white_chars_table[] = {
0x000B, // LINE TABULATION (vertical tabulation)
0x000C, // FORM FEED (FF)
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
0x0085, // NEXT LINE (NEL)
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
0x1680, // OGHAM SPACE MARK
0x180E, // MONGOLIAN VOWEL SEPARATOR
0x2000, // EN QUAD
0x2001, // EM QUAD
0x2002, // EN SPACE
0x2003, // EM SPACE
0x2004, // THREE-PER-EM SPACE
0x2005, // FOUR-PER-EM SPACE
0x2006, // SIX-PER-EM SPACE
0x2007, // FIGURE SPACE
0x2008, // PUNCTUATION SPACE
0x2009, // THIN SPACE
0x200A, // HAIR SPACE
0x2028, // LINE SEPARATOR
0x2029, // PARAGRAPH SEPARATOR
0x202F, // NARROW NO-BREAK SPACE
0x205F, // MEDIUM MATHEMATICAL SPACE
0x3000, // IDEOGRAPHIC SPACE
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
};
/*
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
*/
bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
{
// space (32) and tab (9) are the most common white chars
// so we check them at the beginning (optimisation)
if( c == 32 || c == 9 )
return true;
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
std::size_t o1 = 0;
std::size_t o2 = len - 1;
if( c == 10 )
return treat_new_line_as_white ? true : false;
if( !check_additional_chars )
return false;
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
return false;
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
return true;
while( o1 + 1 < o2 )
{
std::size_t o = (o2 - o1)/2 + o1;
if( c == white_chars_table[o] )
return true;
if( c > white_chars_table[o] )
o1 = o;
else
o2 = o;
}
return false;
}
bool IsDigit(wchar_t c, int base, int * digit)
{
int d = 0;
if( c >= '0' && c <= '9' )
{
d = c - '0';
}
else
if( c >= 'a' && c <= 'f' )
{
d = c - 'a' + 10;
}
else
if( c >= 'A' && c <= 'F' )
{
d = c - 'A' + 10;
}
else
{
if( digit )
*digit = d;
return false;
}
if( digit )
*digit = d;
return d < base;
}
wchar_t ToLower(wchar_t c)
{
if( c >= 'A' && c <= 'Z' )
return c - 'A' + 'a';
return c;
}
wchar_t ToUpper(wchar_t c)
{
if( c >= 'a' && c <= 'z' )
return c - 'a' + 'A';
return c;
}
void ToLower(std::wstring & s)
{
std::wstring::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = ToLower(s[i]);
}
void ToUpper(std::wstring & s)
{
std::wstring::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = ToUpper(s[i]);
}
}

View File

@ -1,269 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_text
#define headerfile_picotools_convert_text
#include <string>
namespace PT
{
bool IsWhite(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true);
bool IsDigit(wchar_t c, int base = 10, int * digit = 0);
template<class CharType>
CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
while( IsWhite(static_cast<wchar_t>(*str), check_additional_chars, treat_new_line_as_white) )
{
str += 1;
}
return str;
}
/*
*
* str_end is pointing at the end of the string (the last item + one)
*
* return value is a pointer to the first white character after a non-white character at the end
* or to the last+one if there is no any white characters
*
*/
template<class CharType>
CharType * SkipWhiteFromBack(CharType * str_begin, CharType * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
while( str_end > str_begin && IsWhite(static_cast<wchar_t>(*(str_end-1)), check_additional_chars, treat_new_line_as_white) )
{
str_end -= 1;
}
return str_end;
}
template<class CharType>
CharType * SkipWhiteFromBack(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
CharType * str_begin = str;
while( *str != 0 )
{
str += 1;
}
return SkipWhiteFromBack(str_begin, str, check_additional_chars, treat_new_line_as_white);
}
wchar_t ToLower(wchar_t c);
wchar_t ToUpper(wchar_t c);
// change to a template
void ToLower(std::wstring & s);
void ToUpper(std::wstring & s);
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && ToLower(*str1) == ToLower(*str2) )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return 0;
return (int)ToLower(*str1) - (int)ToLower(*str2);
}
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 & str1, const StringType2 & str2)
{
return CompareNoCase(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
int CompareNoCasep(const StringType1 * str1, const StringType2 * str2)
{
return CompareNoCase(str1, str2);
}
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
while( str1_begin < str1_end && *str2 && ToLower(*str1_begin) == ToLower(*str2) )
{
++str1_begin;
++str2;
}
if( str1_begin == str1_end && *str2 == 0 )
return 0;
wchar_t str1_char = 0;
if( str1_begin < str1_end )
str1_char = *str1_begin;
return (int)ToLower(str1_char) - (int)ToLower(*str2);
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1, const StringType2 * str2)
{
return CompareNoCase(str1, str2) == 0;
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 & str1, const StringType2 & str2)
{
return EqualNoCase(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
bool EqualNoCasep(const StringType1 * str1, const StringType2 * str2)
{
return EqualNoCase(str1, str2);
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
return CompareNoCase(str1_begin, str1_end, str2) == 0;
}
template<class StringType1, class StringType2>
bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 * short_str, const StringType2 * long_str)
{
return IsSubStringp(short_str, long_str);
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringp(short_str.c_str(), long_str.c_str());
}
template<class StringType1, class StringType2>
bool IsSubStringNoCasep(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && ToLower(*short_str) == ToLower(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str)
{
return IsSubStringNoCasep(short_str, long_str);
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringNoCasep(short_str.c_str(), long_str.c_str());
}
}
#endif

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=date.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,3 +0,0 @@
# DO NOT DELETE
date.o: date.h ../convert/inttostr.h

View File

@ -1 +0,0 @@
o = date.o

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=log.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,9 +0,0 @@
# DO NOT DELETE
filelog.o: filelog.h ../textstream/textstream.h ../space/space.h
filelog.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
filelog.o: ../membuffer/membuffer.h ../textstream/types.h ../utf8/utf8.h
log.o: log.h ../textstream/textstream.h ../space/space.h
log.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
log.o: ../membuffer/membuffer.h ../textstream/types.h filelog.h
log.o: ../utf8/utf8.h

View File

@ -1 +0,0 @@
o = filelog.o log.o

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=mainparser.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,3 +0,0 @@
# DO NOT DELETE
mainparser.o: mainparser.h

View File

@ -1 +0,0 @@
o = mainparser.o

View File

@ -1,237 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2011-2012, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mainparser.h"
#include <string.h>
namespace PT
{
MainParser::MainParser()
{
argsize = 0;
arg = 0;
Reset();
}
MainParser::MainParser(int argc, const char ** argv)
{
Set(argc, argv);
Reset();
}
void MainParser::Set(int argc, const char ** argv)
{
argsize = argc;
arg = argv;
Reset();
}
void MainParser::Reset()
{
argindex = 1;
offset = 0;
has_single_param = false;
has_double_param = false;
}
char MainParser::GetSingleParam()
{
if( !has_single_param )
return 0;
if( last_single_param != 0 )
return last_single_param;
Advance();
if( argindex >= argsize )
return 0;
last_single_param = arg[argindex][offset];
offset += 1;
return last_single_param;
}
bool MainParser::IsSingleParam(char c)
{
return GetSingleParam() == c;
}
const char * MainParser::GetDoubleParam()
{
empty = 0;
if( !has_double_param )
return &empty;
if( last_double_param != &empty )
return last_double_param;
Advance();
if( argindex >= argsize )
return &empty;
last_double_param = &arg[argindex][offset];
offset = 0;
argindex += 1;
return last_double_param;
}
bool MainParser::IsDoubleParam(const char * param)
{
return strcmp(GetDoubleParam(), param) == 0;
}
const char * MainParser::GetValue()
{
empty = 0;
Advance();
if( argindex >= argsize )
return &empty;
const char * value = &arg[argindex][offset];
offset = 0;
argindex += 1;
has_single_param = false;
has_double_param = false;
return value;
}
bool MainParser::NextParam()
{
bool was_single_param = has_single_param;
has_single_param = false;
has_double_param = false;
last_single_param = 0;
last_double_param = &empty;
empty = 0;
if( Advance() )
was_single_param = false;
if( argindex >= argsize )
return false;
if( arg[argindex][offset]=='-' )
{
if( arg[argindex][offset+1]=='-' )
{
has_double_param = true;
offset += 2;
}
else
{
has_single_param = true;
offset += 1;
}
}
else
{
if( was_single_param )
has_single_param = true;
}
return has_single_param || has_double_param;
}
bool MainParser::IsEnd()
{
Advance();
return argindex >= argsize;
}
bool MainParser::HasSingleParam()
{
return has_single_param;
}
bool MainParser::HasDoubleParam()
{
return has_double_param;
}
bool MainParser::Advance()
{
bool was_incremented = false;
while( argindex < argsize && arg[argindex][offset] == 0 )
{
offset = 0;
argindex += 1;
was_incremented = true;
}
return was_incremented;
}
} // namespace

View File

@ -1,132 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2011-2012, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_mainparser_mainparser
#define headerfile_picotools_mainparser_mainparser
namespace PT
{
/*
a very little parser for main(int argc, char ** argv) parameters
look in sample/sample.cpp how to use the parser
*/
class MainParser
{
public:
MainParser();
MainParser(int argc, const char ** argv);
// setting arguments passed to main(int argc, char ** argv) function
void Set(int argc, const char ** argv);
// reseting the current state of parsing
// now you can start parsing from the beginning
// you don't have to call it for the first time
// (is automatically called by the Set method)
void Reset();
// checking if there is a next single or double parameter
// this method represents the main loop of checking parameters
bool NextParam();
// returning a single parameter (if exists) or '\0' otherwise
// single parameter means a parameter with '-' at the beginning e.g. "-a"
// next call to this method (without calling NextParam) returns the same value
// this method should be called after NextParam()
char GetSingleParam();
// calling GetSingleParam() and comparign with 'c'
bool IsSingleParam(char c);
// returning a string for a double parameter or an empty string if there is no such a parameter
// double parameter means a parameter with '--' at the beginning e.g. "--output"
// next call to this method (without calling NextParam) returns the same value
// GetDoubleParam() should be called after NextParam()
// this method never returns a null pointer -- if there is no a param name (end of the string)
// a pointer to en empty string will be returned
const char * GetDoubleParam();
// calling GetDoubleParam() and comparing with 'param'
// so you don't have to call strcmp directly
bool IsDoubleParam(const char * param);
// returning a string representing a value
// you have to know which parameter requires a value
// and if such a parameter is found then use this method to obtain the value
// the method advances the current pointer so next call to this method return a next value
// you can call GetValue() even when NextParam() has returned false
// in such a case this gets you the last values (those at the end of the parameter list)
// this method never returns a null pointer -- if there is no a value (end of the string)
// a pointer to en empty string will be returned
const char * GetValue();
// returning true if the input string is finished
// there are no more parameters or values
bool IsEnd();
// returning true if there is a single parameter
// should be called after NextParam()
bool HasSingleParam();
// returning true if there is a double parameter
// should be called after NextParam()
bool HasDoubleParam();
private:
bool Advance();
int argindex;
int offset;
int argsize;
const char ** arg;
char empty;
bool has_single_param;
bool has_double_param;
char last_single_param;
const char * last_double_param;
};
} // namespace
#endif

View File

@ -1,10 +0,0 @@
output=sample
all: $(output)
$(output): sample.cpp ../mainparser.h ../mainparser.cpp
g++ -o $(output) sample.cpp ../mainparser.cpp
clean:
rm -f $(output)
rm -f $(output).exe

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=mainspaceparser.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,6 +0,0 @@
# DO NOT DELETE
mainspaceparser.o: mainspaceparser.h ../space/space.h ../textstream/types.h
mainspaceparser.o: ../utf8/utf8.h ../textstream/textstream.h ../date/date.h
mainspaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h
mainspaceparser.o: ../textstream/types.h

View File

@ -1 +0,0 @@
o = mainspaceparser.o

View File

@ -1,297 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2016-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mainspaceparser.h"
#include "utf8/utf8.h"
#include <string.h>
// REMOVE ME
#include <iostream>
namespace PT
{
MainSpaceParser::MainSpaceParser()
{
space = 0;
options_space = 0;
use_utf8 = true;
last_status = status_ok;
}
MainSpaceParser::~MainSpaceParser()
{
}
void MainSpaceParser::UTF8(bool utf8)
{
use_utf8 = utf8;
}
void MainSpaceParser::SetSpace(Space & space_ref)
{
space = &space_ref;
options_space = 0;
}
std::wstring & MainSpaceParser::GetErrorToken()
{
return last_error_token;
}
MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv)
{
if( !space )
{
return status_space_not_assigned;
}
options_space = space->FindSpace(L"options");
last_status = status_ok;
last_error_token.clear();
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
{
Parse((size_t)argc, argv, i);
}
return last_status;
}
void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index)
{
const char * pchar = argv[argv_index];
if( *pchar == '-' )
{
if( *(pchar+1) == '-' )
{
ParseMultiArgument(argc, argv, argv_index);
}
else
{
ParseSingleArgument(argc, argv, argv_index);
}
}
else
{
last_status = status_syntax_error;
ConvertStr(pchar, last_error_token);
}
}
void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst)
{
if( use_utf8 )
{
PT::UTF8ToWide(src,dst);
}
else
{
dst.clear();
for( ; *src ; ++src )
dst += (wchar_t)(unsigned char)*src;
}
}
void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index)
{
ConvertStr(argv[argv_index] + 1, wide_arg);
const wchar_t * wide_pchar = wide_arg.c_str();
temp_list_val.clear();
bool was_option = false;
argv_index += 1;
for( ; *wide_pchar && !was_option ; ++wide_pchar )
{
temp_arg = *wide_pchar;
size_t opt_size = RequireOption(temp_arg);
if( opt_size > 0 )
{
was_option = true;
if( *(wide_pchar+1) )
{
temp_val = wide_pchar + 1;
temp_list_val.push_back(temp_val);
opt_size -= 1;
}
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
}
if( opt_size > 0 )
{
last_status = status_reading_eof;
last_error_token.clear();
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
}
}
void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index)
{
ConvertStr(argv[argv_index] + 2, temp_arg);
argv_index += 1;
size_t opt_size = RequireOption(temp_arg);
temp_list_val.clear();
if( opt_size > 0 )
{
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
}
if( opt_size > 0 )
{
last_status = status_reading_eof;
last_error_token.clear();
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
}
void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list)
{
std::wstring * val = space->GetFirstValue(name);
if( !val )
{
if( list.empty() )
space->Add(name, empty_value);
else
if( list.size() == 1 )
space->Add(name, list[0]);
else
space->table[name] = list; // !! IMPROVE ME there'll be a new api in space
}
else
{
PT::Space::Table::iterator i = space->table.find(name);
PT::Space::Value * table_value;
if( i == space->table.end() )
{
table_value = &space->table[name];
table_value->push_back(*val);
//space->table_single.erase(name);
}
else
{
table_value = &i->second;
}
if( list.empty() )
{
table_value->push_back(empty_value);
}
else
{
for(const auto & list_item : list)
table_value->push_back(list_item);
}
}
}
size_t MainSpaceParser::RequireOption(const std::wstring & arg)
{
size_t res = 0;
if( options_space )
{
std::wstring * val = options_space->GetFirstValue(arg);
if( val )
{
/*
* IMPLEMENT ME
* add a converter to convert/inttostr.h
*
*/
long res_long = wcstol(val->c_str(), 0, 10);
if( res_long < 0 )
res_long = 0;
res = (size_t)res_long;
//std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl;
}
}
return res;
}
} // namespace

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=space.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,17 +0,0 @@
# DO NOT DELETE
jsontospaceparser.o: jsontospaceparser.h space.h ../textstream/types.h
jsontospaceparser.o: ../utf8/utf8.h ../textstream/textstream.h
jsontospaceparser.o: ../space/space.h ../date/date.h ../convert/inttostr.h
jsontospaceparser.o: ../membuffer/membuffer.h ../textstream/types.h
space.o: space.h ../textstream/types.h ../utf8/utf8.h
space.o: ../textstream/textstream.h ../space/space.h ../date/date.h
space.o: ../convert/inttostr.h ../membuffer/membuffer.h ../textstream/types.h
space.o: ../convert/convert.h ../convert/inttostr.h
space.o: ../convert/patternreplacer.h ../convert/strtoint.h ../convert/text.h
space.o: ../convert/misc.h
spaceparser.o: spaceparser.h space.h ../textstream/types.h ../utf8/utf8.h
spaceparser.o: ../textstream/textstream.h ../space/space.h ../date/date.h
spaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h
spaceparser.o: ../textstream/types.h
spacetojson.o: spacetojson.h space.h ../textstream/types.h

View File

@ -1 +0,0 @@
o = jsontospaceparser.o space.o spaceparser.o spacetojson.o

View File

@ -1,870 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdlib>
#include <wchar.h>
#include "jsontospaceparser.h"
#include "utf8/utf8.h"
namespace PT
{
JSONToSpaceParser::JSONToSpaceParser()
{
root_space = 0;
SetDefault();
}
void JSONToSpaceParser::SetSpace(Space * pspace)
{
root_space = pspace;
}
void JSONToSpaceParser::SetSpace(Space & pspace)
{
root_space = &pspace;
}
void JSONToSpaceParser::SetDefault()
{
// you can change this separators to what you want
// you shoud not use only white characters here (as expected by IsWhite() method)
// and new line characters ('\n')
separator = ':';
space_start = '{';
space_end = '}';
table_start = '[';
table_end = ']';
option_delimiter = ',';
skip_empty = false;
use_escape_char = true;
input_as_utf8 = true;
max_nested_level = 1000;
create_table_as_space = true;
}
void JSONToSpaceParser::SkipEmpty(bool skip)
{
skip_empty = skip;
}
void JSONToSpaceParser::UseEscapeChar(bool escape)
{
use_escape_char = escape;
}
void JSONToSpaceParser::UTF8(bool utf)
{
input_as_utf8 = utf;
}
void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_)
{
create_table_as_space = create_table_as_space_;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
{
reading_from_file = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
Parse();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name)
{
return Parse(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name)
{
PT::WideToUTF8(file_name, afile_name);
return Parse(afile_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name)
{
return Parse(file_name.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
Parse();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str)
{
return ParseString(str.c_str());
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
Parse();
return status;
}
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str)
{
return ParseString(str.c_str());
}
void JSONToSpaceParser::Parse()
{
if( !root_space )
{
status = no_space;
return;
}
line = 1;
status = ok;
space = root_space;
skipped = 0;
current_nested_level = 0;
ReadChar();
SkipWhite();
if( lastc == space_start )
{
ParseSpace(false, false);
}
else
if( lastc == table_start )
{
ParseTable(false);
}
else
{
// '{' or '[' expected
status = syntax_error;
}
if( status == ok && space != root_space )
{
// last closing '}' characters are missing (closing a space)
status = syntax_error;
}
token.clear();
key.clear();
value.clear();
}
void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space)
{
//current_nested_level += 1;
if( current_nested_level > max_nested_level )
{
status = max_nested_spaces_exceeded;
return;
}
if( insert_new_space )
{
SpaceStarts(has_space_name);
}
else
{
// insert_new_space as a false is used only when parsing
// the first space (root_space)
ReadChar(); // skipping the first space character '{'
}
ParseKeyValuePairs();
if( insert_new_space )
{
SpaceEnds();
}
else
{
ReadChar(); // skipping the last space character '}'
}
//current_nested_level -= 1;
}
void JSONToSpaceParser::ParseKeyValuePairs()
{
SkipWhite();
while( status == ok && lastc != space_end && lastc != -1 )
{
ReadKey();
SkipWhite();
if( lastc == separator )
{
value.clear();
ReadChar(); // skipping separator ':'
ReadValue(false, false, true, true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != space_end && status == ok )
{
status = syntax_error;
}
}
else
if( status == ok )
{
status = syntax_error;
}
}
}
void JSONToSpaceParser::ParseTextTable()
{
ReadChar(); // skipping table start character '['
SkipWhite();
value.clear();
while( status == ok && lastc != table_end && lastc != -1 )
{
// all space objects inside or tables will be skipped
ReadValue(true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != table_end && status == ok )
{
status = syntax_error;
}
}
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
AddKeyValuePair();
}
void JSONToSpaceParser::ParseObjectsTable(bool has_key)
{
ReadChar(); // skipping table start character '['
SpaceStarts(has_key, false);
SkipWhite();
while( status == ok && lastc != table_end && lastc != -1 )
{
// 'value' table will not be used here
// (we are creating spaces)
ReadValue(false, true);
SkipWhite();
if( lastc == option_delimiter )
{
ReadChar(); // skipping delimiter ','
}
else
if( lastc != table_end && status == ok )
{
status = syntax_error;
}
}
if( lastc == table_end )
ReadChar(); // skipping end table character ']'
SpaceEnds(false);
}
void JSONToSpaceParser::ParseTable(bool has_key)
{
if( create_table_as_space )
{
//current_nested_level += 1;
if( current_nested_level > max_nested_level )
{
status = max_nested_spaces_exceeded;
}
else
{
ParseObjectsTable(has_key);
}
//current_nested_level -= 1;
}
else
{
// ParseTextTable will not create a next level
if( !has_key )
key.clear();
ParseTextTable(); // ParseTextTable will use key
}
}
void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char)
{
Space * new_space = new Space();
space->spaces.push_back(new_space);
new_space->parent = space;
if( has_space_name )
new_space->name = key;
space = new_space;
if( skip_space_char )
ReadChar(); // skipping space starts character '{'
}
void JSONToSpaceParser::SpaceEnds(bool skip_space_char)
{
if( space == root_space )
{
// there cannot be a loose list end character in the global space
status = syntax_error;
}
else
{
space = space->parent;
if( skip_space_char )
ReadChar(); // skipping closing space character '}'
}
}
bool JSONToSpaceParser::IsWhite(int c)
{
// 13 (\r) is at the end of a line in a dos file \r\n
// 160 is an unbreakable space
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
return true;
return false;
}
void JSONToSpaceParser::SkipWhite()
{
while( IsWhite(lastc) )
{
ReadChar();
}
}
void JSONToSpaceParser::Trim(std::wstring & s)
{
std::wstring::size_type i;
if( s.empty() )
return;
// looking for white characters at the end
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
if( i==0 && IsWhite(s[i]) )
{
// the whole string has white characters
s.clear();
return;
}
// deleting white characters at the end
if( i != s.size() - 1 )
s.erase(i+1, std::wstring::npos);
// looking for white characters at the beginning
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
// deleting white characters at the beginning
if( i != 0 )
s.erase(0, i);
}
void JSONToSpaceParser::DeleteFromTable(const std::wstring & var)
{
Space::Table::iterator i = space->table.find(var);
if( i != space->table.end() )
space->table.erase(i);
}
void JSONToSpaceParser::ReadTokenQuoted()
{
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || lastc != '"') )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
if( !char_was_escaped && lastc == '"' )
ReadChar(); // skipping the last quotation mark
else
status = syntax_error;
}
void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == -1 ||
(!char_was_escaped &&
(
lastc == space_end ||
lastc == table_end ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2)
) ) )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void JSONToSpaceParser::ReadKey()
{
SkipWhite();
ReadToken(false, true, separator, table_start);
key = token;
}
void JSONToSpaceParser::SkipText()
{
ReadChar(); // skipping the first quote character '"'
while( lastc != '"' && lastc != -1 )
ReadChar();
}
void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char)
{
int mark = 1;
skipped += 1;
ReadChar(); // skipping the first object character '{' or '['
do
{
if( lastc == '"' )
SkipText();
else
if( lastc == end_char )
mark -= 1;
else
if( lastc == start_char )
mark += 1;
ReadChar();
}
while( mark > 0 && lastc != -1 );
}
void JSONToSpaceParser::SkipObject()
{
SkipObjectOrTable(space_start, space_end);
}
void JSONToSpaceParser::SkipTable()
{
SkipObjectOrTable(table_start, table_end);
}
//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name)
void JSONToSpaceParser::ReadValue(bool skip_object_or_table,
bool add_space_for_text_value,
bool has_key,
bool auto_add_text_value)
{
SkipWhite();
if( lastc == space_start )
{
if( skip_object_or_table )
SkipObject();
else
ParseSpace(has_key);
}
else
if( lastc == table_start )
{
if( skip_object_or_table )
SkipTable();
else
ParseTable(has_key);
}
else
{
if( add_space_for_text_value )
{
SpaceStarts(false, false);
ReadToken(false, true, option_delimiter, -1);
space->name = token;
SpaceEnds(false);
}
else
{
ReadToken(false, true, option_delimiter, -1);
value.push_back(token);
if( auto_add_text_value )
AddKeyValuePair();
}
}
}
void JSONToSpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
return;
}
space->table[key] = value;
}
int JSONToSpaceParser::ReadUTF8Char()
{
int c;
bool correct;
lastc = -1;
do
{
PT::UTF8ToInt(file, c, correct);
if( !file )
return lastc;
}
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadASCIIChar()
{
lastc = file.get();
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromWcharString()
{
if( *pchar_unicode == 0 )
lastc = -1;
else
lastc = *(pchar_unicode++);
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromUTF8String()
{
int c;
bool correct;
lastc = -1;
do
{
size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
pchar_ascii += len;
}
while( *pchar_ascii && !correct );
if( correct )
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharFromAsciiString()
{
if( *pchar_ascii == 0 )
lastc = -1;
else
lastc = *(pchar_ascii++);
if( lastc == '\n' )
++line;
return lastc;
}
int JSONToSpaceParser::ReadCharNoEscape()
{
if( reading_from_file )
{
if( input_as_utf8 )
return ReadUTF8Char();
else
return ReadASCIIChar();
}
else
{
if( reading_from_wchar_string )
{
return ReadCharFromWcharString();
}
else
{
if( input_as_utf8 )
return ReadCharFromUTF8String();
else
return ReadCharFromAsciiString();
}
}
}
bool JSONToSpaceParser::IsHexDigit(wchar_t c)
{
return ((c>='0' && c<='9') ||
(c>='a' && c<='f') ||
(c>='A' && c<='F') );
}
int JSONToSpaceParser::HexToInt(wchar_t c)
{
if( c>='0' && c<='9' )
return c - '0';
if( c>='a' && c<='f' )
return c - 'a' + 10;
if( c>='A' && c<='F' )
return c - 'A' + 10;
return 0;
}
void JSONToSpaceParser::ReadUnicodeCodePoint()
{
wchar_t c;
int value = 0;
for(int i=0 ; i<4 ; ++i)
{
c = ReadCharNoEscape();
if( !IsHexDigit(c) )
{
status = syntax_error;
return;
}
value = (value << 4) | HexToInt(c);
}
lastc = (wchar_t)value;
}
int JSONToSpaceParser::ReadChar()
{
char_was_escaped = false;
ReadCharNoEscape();
if( use_escape_char && lastc == '\\' )
{
char_was_escaped = true;
ReadCharNoEscape();
switch(lastc)
{
case '0': lastc = 0; break;
case 't': lastc = '\t'; break;
case 'r': lastc = '\r'; break;
case 'n': lastc = '\n'; break;
case 'b': lastc = 0x08; break;
case 'f': lastc = 0x0c; break;
case 'u': ReadUnicodeCodePoint(); break;
// in other cases we return the last character
}
}
return lastc;
}
} // namespace

View File

@ -1,360 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_jsonspaceparser
#define headerfile_picotools_space_jsonspaceparser
#include <fstream>
#include "space.h"
namespace PT
{
class JSONToSpaceParser
{
public:
/*
ctor -- setting default values (SetDefault() method)
*/
JSONToSpaceParser();
/*
setting the root space
*/
void SetSpace(Space * pspace);
void SetSpace(Space & pspace);
/*
setting options of the parser to the default values
utf8 etc.
*/
void SetDefault();
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error, max_nested_spaces_exceeded, no_space };
/*
the last status of parsing, set by Parse() methods
*/
Status status;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
how many objects were skipped
used in parsing tables when create_table_as_space is false
*/
size_t skipped;
/*
main methods used to parse
file_name is the path to a file
*/
Status Parse(const char * file_name);
Status Parse(const std::string & file_name);
Status Parse(const wchar_t * file_name);
Status Parse(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status ParseString(const char * str);
Status ParseString(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status ParseString(const wchar_t * str);
Status ParseString(const std::wstring & str);
/*
if true then empty values and lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
void SkipEmpty(bool skip);
/*
'\' character is used to escape other characters in a quoted string
so "some \t t\"ext" will produce "some t t"ext"
default: true
*/
void UseEscapeChar(bool escape);
/*
if true then the input file or string (char* or std::string) is treated as UTF-8
*/
void UTF8(bool utf);
/*
default: true
*/
void CreateTableAsSpace(bool create_table_as_space_);
private:
/*
current space set by SetSpace();
*/
Space * root_space;
/*
a space in which we are now
*/
Space * space;
/*
true if Parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
last read token
*/
std::wstring token;
/*
last read key
*/
std::wstring key;
/*
last read list
*/
Space::Value value;
/*
separator between a variable and a value, default: '='
*/
int separator;
/*
space starting character, default: '{'
*/
int space_start;
/*
space ending character, default: '}'
*/
int space_end;
/*
table starting character, default: '['
*/
int table_start;
/*
table ending character, default: ']'
*/
int table_end;
/*
option delimiter, default: ','
*/
int option_delimiter;
/*
last read char
or -1 if the end
*/
int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/*
current file
*/
std::ifstream file;
/*
if true then empty lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
bool skip_empty;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
/*
if true you can use an escape character '\' in quoted values
*/
bool use_escape_char;
/*
if false we only allow the tables to consists of text items (numeric, boolean too)
objects are not allowed then
default: true
*/
bool create_table_as_space;
/*
*/
size_t current_nested_level;
/*
default: 1000;
*/
size_t max_nested_level;
std::string afile_name;
void Parse();
void ParseSpace(bool has_space_name, bool insert_new_space = true);
void ParseTextTable();
void ParseObjectsTable(bool has_key);
void ParseTable(bool has_key);
void ParseKeyValuePairs();
void SkipText();
void SkipObjectOrTable(int start_char, int end_char);
void SkipTable();
void SkipObject();
void SpaceEnds(bool skip_space_char = true);
void SpaceStarts(bool has_space_name, bool skip_space_char = true);
void DeleteFromTable(const std::wstring & var);
void ReadTokenQuoted();
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadKey();
void ReadValue(bool skip_object_or_table = false,
bool add_space_for_text_value = false,
bool has_key = false,
bool auto_add_text_value = false);
void AddKeyValuePair();
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
int ReadCharFromUTF8String();
int ReadCharFromAsciiString();
int ReadCharNoEscape();
int ReadChar();
bool IsWhite(int c);
void SkipWhite();
void Trim(std::wstring & s);
bool IsHexDigit(wchar_t c);
int HexToInt(wchar_t c);
void ReadUnicodeCodePoint();
};
} // namespace
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,567 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_space
#define headerfile_picotools_space_space
#include <string>
#include <vector>
#include <map>
#include "textstream/types.h"
namespace PT
{
/*
A parser for parsing config files.
A config file can look like this:
variable1 = value 1
variable2 = " value 2 "
variable3 = (value 1, value 2)
variable4 = (" value 1 " , "value2", value 3)
sample of use:
SpaceParser parser;
parser.Parse("/path/to/config");
if( parser.status == SpaceParser::ok )
{
// the whole config we have in parser.table
}
config syntax:
option = list
list can consists of any number of items, if you're using more than one item you should
use brackets ()
for one item the brackets can be ommited:
option = value
white characters at the beginning of the value (and at the end) will be trimmed,
or you can use quotes:
option = "value"
option2 = "value with spaces at the end "
the form without quotes:
option = value
should be written in one line, so this is not allowed:
option =
value
you can use a new line characters only between brackets and quotes:
option = "this is
a multiline string"
option = ( value1,
value2 )
but there is one requirement: the first character " or ( should be in the same line,
so this is not allowed
option =
"this is wrong"
but this is ok:
option = "
that is ok"
empty lists:
option = ()
this creates an empty list: parser.table['option'].empty() == true
option =
this creates an empty list too (the same as previously)
option = ""
but this doesn't create an empty list, it creates a list with one (empty) item
commentaries:
# this is a commentary (until the end of the line)
option = value # this is a commentary too
commentaries are treated as white characters, other example:
option = ( # this is my list
"value 1" # this is a value one
value 2 # and this is a value two
) # end of my list
overwriting:
option1 = some value
option1 = other value
# always the last option is used so option1 is "other value"
list delimiter:
option1 = (value1, value2, value3)
option2 = ("value1", "value2", "value3")
above we're using a comma ',' as a list delimiter but when using quotes (second line)
the commas can be omitted:
option2 = ("value1" "value2" "value3")
white characters:
the name of an option cannot consist of white characters
some option = value # this is wrong
some_option = value # this is ok
which characters are allowed in an option name is defined by IsVariableChar() method
you can use white characters in values
option = value with spaces or tabs
white characters at the beginning and at the end will be trimmed,
so if you want them use quotes:
option = " other value with spaces "
special characters in quoted strings:
option = "this is a string with \" a quote inside"
the option will be: this is a string with " a quote inside
\\ - means one \
basically: \char produces char
so:
"\a" gives "a"
"\\" gives "\"
"\Z" gives "Z" and so on
you can call UseEscapeChar(false) to turn this off
*/
class Space
{
public:
/*
this is the table which represents your config file
in the Table map: the first (key) is your 'option' and the second is 'list'
*/
typedef std::vector<std::wstring> Value;
typedef std::map<std::wstring, Value> Table;
Space();
~Space();
Space(const Space & s);
Space & operator=(const Space & s);
// IMPROVE ME
// add move cctor
void Clear();
/*
returns true if such an option has 'value'
useful when testing lists (they don't have to be copied out)
*/
bool HasValue(const wchar_t * name, const wchar_t * value);
bool HasValue(const wchar_t * name, const std::wstring & value);
bool HasValue(const std::wstring & name, const wchar_t * value);
bool HasValue(const std::wstring & name, const std::wstring & value);
/*
*
* methods for getting/finding a value
*
*
*/
/*
*
* their working in O(log)
* can return a null pointer
*
*/
Value * GetValue(const wchar_t * name);
Value * GetValue(const std::wstring & name);
const Value * GetValue(const wchar_t * name) const;
const Value * GetValue(const std::wstring & name) const;
// O(n) complexity
Value * GetValueNoCase(const wchar_t * name);
Value * GetValueNoCase(const std::wstring & name);
const Value * GetValueNoCase(const wchar_t * name) const;
const Value * GetValueNoCase(const std::wstring & name) const;
// they can return a null pointer if there is not such a 'name'
std::wstring * GetFirstValue(const wchar_t * name);
std::wstring * GetFirstValue(const std::wstring & name);
const std::wstring * GetFirstValue(const wchar_t * name) const;
const std::wstring * GetFirstValue(const std::wstring & name) const;
/*
those methods are used to extract information from space.table
as a parameter they take the name of an option
and a default value (if there is no such a parameter),
they return appropriate value (either text, int or boolean)
(in lists they return the first item if exists)
when calling Text(...) and AText(...) you should copy the object to whom a reference is returned
it will be cleared in a next call to one of these methods (as well to Int() Size() and Bool())
AText(...) always returns a reference to UTF-8 string
*/
//std::wstring Text(const wchar_t * name) const;
std::wstring Text(const wchar_t * name, const wchar_t * def = 0) const;
std::wstring Text(const std::wstring & name, const wchar_t * def = 0) const;
std::wstring Text(const std::wstring & name, const std::wstring & def) const;
// returns a reference
// if there is no such an option then a new one (def value) is inserted
//std::wstring & TextRef(const wchar_t * name);
std::wstring & TextRef(const wchar_t * name, const wchar_t * def = 0);
std::wstring & TextRef(const std::wstring & name, const wchar_t * def = 0);
std::wstring & TextRef(const std::wstring & name, const std::wstring & def);
// returns UTF-8 string
//std::string TextA(const wchar_t * name) const;
std::string TextA(const wchar_t * name, const char * def) const;
std::string TextA(const std::wstring & name, const char * def) const;
std::string TextA(const std::wstring & name, const std::string & def) const;
std::string TextA(const wchar_t * name, const wchar_t * def) const;
std::string TextA(const std::wstring & name, const wchar_t * def) const;
std::string TextA(const std::wstring & name, const std::wstring & def) const;
int Int(const wchar_t * name, int def = 0) const;
int Int(const std::wstring & name, int def = 0) const;
unsigned int UInt(const wchar_t * name, unsigned int def = 0) const;
unsigned int UInt(const std::wstring & name, unsigned int def = 0) const;
long Long(const wchar_t * name, long def = 0) const;
long Long(const std::wstring & name, long def = 0) const;
unsigned long ULong(const wchar_t * name, unsigned long def = 0) const;
unsigned long ULong(const std::wstring & name, unsigned long def = 0) const;
long long LongLong(const wchar_t * name, long long def = 0) const;
long long LongLong(const std::wstring & name, long long def = 0) const;
unsigned long long ULongLong(const wchar_t * name, unsigned long long def = 0) const;
unsigned long long ULongLong(const std::wstring & name, unsigned long long def = 0) const;
size_t Size(const wchar_t * name, size_t def = 0) const;
size_t Size(const std::wstring & name, size_t def = 0) const;
bool Bool(const wchar_t * name, bool def = false) const;
bool Bool(const std::wstring & name, bool def = false) const;
/*
*
* methods for adding a new value
*
*
*/
std::wstring & FindAdd(const wchar_t * name);
std::wstring & FindAdd(const std::wstring & name);
std::wstring & FindAdd(const WTextStream & name);
std::wstring & Add(const wchar_t * name, bool value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, bool value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, int value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, int value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, long value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, long value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, size_t value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, size_t value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, const std::wstring & value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, const wchar_t * value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, const wchar_t * value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, const std::wstring & value, bool replace_existing = true);
std::wstring & Add(const wchar_t * name, const WTextStream & value, bool replace_existing = true);
std::wstring & Add(const std::wstring & name, const WTextStream & value, bool replace_existing = true);
std::wstring & Add(const WTextStream & name, const WTextStream & value, bool replace_existing = true);
void Remove(const wchar_t * name);
void Remove(const std::wstring & name);
Space & AddSpace(const wchar_t * name);
Space & AddSpace(const std::wstring & name);
// looking for the first space with the specified name
// if there is not such a space those methods return a null pointer
Space * FindSpace(const wchar_t * name);
Space * FindSpace(const std::wstring & name);
// looking for the first space with the specified name
// if there is not such a space then this methods adds such a space
Space & FindAddSpace(const wchar_t * name);
Space & FindAddSpace(const std::wstring & name);
void RemoveSpace(const wchar_t * name);
void RemoveSpace(const std::wstring & name);
void RemoveSpace(size_t child_index);
/*
*
*
raw access to the parsed values
*
*
*/
std::wstring name; // space name
Table table; // std::map<std::wstring, std::vector<std::wstring> >
// childs
typedef std::vector<Space*> Spaces;
Spaces spaces;
// a parent space
// null means a root space
Space * parent;
/*
those methods are used to extract lists
return true if such an option exists (but value can be an empty list)
*/
bool ListText(const wchar_t * name, std::vector<std::wstring> & list);
bool ListText(const std::wstring & name, std::vector<std::wstring> & list);
/*
serialize the content
*/
template<class Stream>
void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const;
template<class Stream>
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
template<class Stream, class StringType>
static void PrintValue(Stream & out, const StringType & str, bool use_quote = true);
template<class Stream>
static void PrintKey(Stream & out, const std::wstring & str);
template<class Stream>
static void PrintLevel(Stream & out, bool use_indents, int level);
private:
mutable std::wstring tmp_name;
std::wstring tmp_value;
std::wstring tmp_value_text;
std::string tmp_value_text_ascii;
static unsigned int ToUInt(const std::wstring & value);
static int ToInt(const std::wstring & value);
static unsigned long ToULong(const std::wstring & value);
static long ToLong(const std::wstring & value);
static unsigned long long ToULongLong(const std::wstring & value);
static long long ToLongLong(const std::wstring & value);
static size_t ToSize(const std::wstring & value);
static bool ToBool(const std::wstring & value);
static bool IsWhite(int c);
static bool HasWhite(const std::wstring & str);
};
template<class Stream>
void Space::PrintLevel(Stream & out, bool use_indents, int level)
{
if( use_indents )
{
for(int i=0 ; i<level ; ++i)
out << ' ';
}
}
template<class Stream, class StringType>
void Space::PrintValue(Stream & out, const StringType & str, bool use_quote)
{
if( use_quote )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
{
switch(str[i])
{
case 0: out << '\\'; out << '0'; break;
case '\r': out << '\\'; out << 'r'; break;
case '\n': out << '\\'; out << 'n'; break;
case '\\': out << '\\'; out << '\\'; break;
case '"': out << '\\'; out << '\"'; break;
case '(': out << '\\'; out << '('; break;
case ')': out << '\\'; out << ')'; break;
case '=': out << '\\'; out << '='; break;
default:
out << str[i];
}
}
if( use_quote )
out << '\"';
}
template<class Stream>
void Space::PrintKey(Stream & out, const std::wstring & str)
{
bool use_quote = false;
// CHECK ME
// HasWhite doesn't take a new line into account, is it correct to use it here?
if( str.empty() || HasWhite(str) )
use_quote = true;
PrintValue(out, str, use_quote);
}
template<class Stream>
void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const
{
Table::const_iterator i2;
size_t v;
for(i2 = table.begin() ; i2 != table.end() ; ++i2)
{
PrintLevel(out, use_indents, level);
PrintKey(out, i2->first);
out << L" = ";
if( i2->second.size() != 1 )
out << '(';
for(v = 0 ; v < i2->second.size() ; ++v)
{
if( v > 0 )
PrintLevel(out, use_indents, level + i2->first.size() + 3);
PrintValue(out, i2->second[v]);
if( v + 1 < i2->second.size() )
out << '\n';
}
if( i2->second.size() != 1 )
out << ')';
out << '\n';
}
}
template<class Stream>
void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int level) const
{
if( level > 0 )
{
out << '\n';
PrintLevel(out, use_indents, level);
if( !name.empty() )
{
PrintKey(out, name);
out << ' ';
}
out << L"(\n";
if( use_comments )
{
PrintLevel(out, use_indents, level);
out << L"# space level " << level << '\n';
}
}
SerializeTableMulti(out, use_indents, level);
for(size_t i=0 ; i<spaces.size() ; ++i)
spaces[i]->Serialize(out, use_indents, use_comments, level+1);
if( level > 0 )
{
PrintLevel(out, use_indents, level);
out << ')';
if( use_comments )
{
if( name.empty() )
out << L" # end of unnamed space";
else
out << L" # end of space: " << name;
out << L" (level " << level << L")";
}
out << '\n';
}
}
} // namespace
#endif

View File

@ -1,675 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2008-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstdlib>
#include <wchar.h>
#include "spaceparser.h"
#include "utf8/utf8.h"
namespace PT
{
SpaceParser::SpaceParser()
{
root_space = 0;
SetDefault();
}
void SpaceParser::SetSpace(Space * pspace)
{
root_space = pspace;
}
void SpaceParser::SetSpace(Space & pspace)
{
root_space = &pspace;
}
void SpaceParser::SetDefault()
{
// you can change this separators to what you want
// you shoud not use only white characters here (as expected by IsWhite() method)
// and new line characters ('\n')
separator = '=';
commentary = '#';
list_start = '(';
list_end = ')';
list_delimiter = ',';
skip_empty = false;
use_escape_char = true;
input_as_utf8 = true;
}
void SpaceParser::SkipEmpty(bool skip)
{
skip_empty = skip;
}
void SpaceParser::UseEscapeChar(bool escape)
{
use_escape_char = escape;
}
void SpaceParser::UTF8(bool utf)
{
input_as_utf8 = utf;
}
SpaceParser::Status SpaceParser::Parse(const char * file_name)
{
reading_from_file = true;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
Parse();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
SpaceParser::Status SpaceParser::Parse(const std::string & file_name)
{
return Parse(file_name.c_str());
}
SpaceParser::Status SpaceParser::Parse(const wchar_t * file_name)
{
PT::WideToUTF8(file_name, afile_name);
return Parse(afile_name.c_str());
}
SpaceParser::Status SpaceParser::Parse(const std::wstring & file_name)
{
return Parse(file_name.c_str());
}
SpaceParser::Status SpaceParser::ParseString(const char * str)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
Parse();
return status;
}
SpaceParser::Status SpaceParser::ParseString(const std::string & str)
{
return ParseString(str.c_str());
}
SpaceParser::Status SpaceParser::ParseString(const wchar_t * str)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
Parse();
return status;
}
SpaceParser::Status SpaceParser::ParseString(const std::wstring & str)
{
return ParseString(str.c_str());
}
void SpaceParser::Parse()
{
if( !root_space )
{
status = no_space;
return;
}
line = 1;
status = ok;
space = root_space;
reading_commentary = false;
ReadChar();
SkipWhiteLines();
ParseLoop();
if( status == ok && space != root_space )
{
// last closing ')' characters are missing (closing a space)
status = syntax_error;
}
token.clear();
key.clear();
value.clear();
}
void SpaceParser::ParseLoop()
{
while( status == ok && lastc != -1 )
{
if( lastc == list_end )
{
SpaceEnds();
}
else
{
ReadKey();
SkipWhite();
if( lastc == list_start )
{
SpaceStarts();
}
else
if( lastc == separator )
{
ReadValue();
AddKeyValuePair();
}
else
{
status = syntax_error;
}
}
if( status == ok )
SkipWhiteLines();
}
}
void SpaceParser::SpaceEnds()
{
if( space == root_space )
{
// there cannot be a loose list end character in the global space
status = syntax_error;
}
else
{
space = space->parent;
ReadChar(); // skipping closing space character ')'
SkipWhite();
}
}
void SpaceParser::SpaceStarts()
{
Space * new_space = new Space();
space->spaces.push_back(new_space);
new_space->parent = space;
new_space->name = key;
space = new_space;
ReadChar(); // skipping space starts character ')'
}
/*
those white characters here should be the same as in space.h
*/
bool SpaceParser::IsWhite(int c)
{
// dont use '\n' here
// 13 (\r) is at the end of a line in a dos file \r\n
// 160 is an unbreakable space
if( c==' ' || c=='\t' || c==13 || c==160 )
return true;
return false;
}
/*
skip_lines is default false
*/
void SpaceParser::SkipWhite(bool skip_lines)
{
while( IsWhite(lastc) || lastc == commentary || (skip_lines && lastc=='\n'))
{
if( lastc == commentary )
SkipComment();
else
ReadChar();
}
}
void SpaceParser::SkipWhiteLines()
{
SkipWhite(true);
}
/*
do not skip the last \n character
*/
void SpaceParser::SkipLine()
{
while( lastc != -1 && lastc != '\n' )
ReadChar();
}
/*
do not skip the last \n character
*/
void SpaceParser::SkipComment()
{
reading_commentary = true;
SkipLine();
reading_commentary = false;
}
void SpaceParser::Trim(std::wstring & s)
{
std::wstring::size_type i;
if( s.empty() )
return;
// looking for white characters at the end
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
if( i==0 && IsWhite(s[i]) )
{
// the whole string has white characters
s.clear();
return;
}
// deleting white characters at the end
if( i != s.size() - 1 )
s.erase(i+1, std::wstring::npos);
// looking for white characters at the beginning
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
// deleting white characters at the beginning
if( i != 0 )
s.erase(0, i);
}
void SpaceParser::DeleteFromTable(const std::wstring & var)
{
Space::Table::iterator i = space->table.find(var);
if( i != space->table.end() )
space->table.erase(i);
}
void SpaceParser::ReadTokenQuoted()
{
ReadChar(); // skipping the first quotation mark
while( lastc != -1 && (char_was_escaped || lastc != '"') )
{
token += static_cast<wchar_t>(lastc);
ReadChar();
}
if( !char_was_escaped && lastc == '"' )
ReadChar(); // skipping the last quotation mark
else
status = syntax_error;
}
void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
while( true )
{
if( lastc == commentary )
SkipComment();
if( lastc == -1 ||
(!char_was_escaped &&
(
lastc == list_end ||
(white_delimit && IsWhite(lastc)) ||
(new_line_delimit && lastc == '\n') ||
(delimit1 != -1 && lastc == delimit1) ||
(delimit2 != -1 && lastc == delimit2)
) ) )
{
break;
}
token += static_cast<wchar_t>(lastc);
ReadChar();
}
Trim(token);
}
void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
{
token.clear();
SkipWhite();
if( !char_was_escaped && lastc == '"' )
ReadTokenQuoted();
else
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
}
void SpaceParser::ReadKey()
{
ReadToken(false, true, separator, list_start);
key = token;
SkipWhite();
}
void SpaceParser::ReadValueList()
{
ReadChar(); // skipping the first list character ')'
SkipWhiteLines();
while( lastc != -1 && lastc != list_end )
{
ReadToken(true, true, list_delimiter, list_end);
value.push_back(token);
SkipWhiteLines();
if( lastc == list_delimiter )
{
ReadChar();
SkipWhiteLines();
}
}
if( lastc == list_end )
{
ReadChar(); // skipping the last list character ')'
SkipWhite();
}
else
{
status = syntax_error; // missing one ')'
}
}
void SpaceParser::ReadValueSingle()
{
SkipWhite();
ReadToken(false, true, -1, -1);
value.push_back(token);
SkipWhite();
}
void SpaceParser::ReadValue()
{
ReadChar(); // skipping separator '='
value.clear();
SkipWhite();
if( lastc == list_start )
ReadValueList();
else
ReadValueSingle();
SkipWhiteLines();
}
void SpaceParser::AddKeyValuePair()
{
if( value.empty() && skip_empty )
{
DeleteFromTable(key);
return;
}
space->table[key] = value;
}
int SpaceParser::ReadUTF8Char()
{
int c;
bool correct;
lastc = -1;
do
{
PT::UTF8ToInt(file, c, correct);
if( !file )
return lastc;
}
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int SpaceParser::ReadASCIIChar()
{
lastc = file.get();
if( lastc == '\n' )
++line;
return lastc;
}
int SpaceParser::ReadCharFromWcharString()
{
if( *pchar_unicode == 0 )
lastc = -1;
else
lastc = *(pchar_unicode++);
if( lastc == '\n' )
++line;
return lastc;
}
int SpaceParser::ReadCharFromUTF8String()
{
int c;
bool correct;
lastc = -1;
do
{
size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
pchar_ascii += len;
if( *pchar_ascii == 0 )
return lastc;
}
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int SpaceParser::ReadCharFromAsciiString()
{
if( *pchar_ascii == 0 )
lastc = -1;
else
lastc = *(pchar_ascii++);
if( lastc == '\n' )
++line;
return lastc;
}
int SpaceParser::ReadCharNoEscape()
{
if( reading_from_file )
{
if( input_as_utf8 )
return ReadUTF8Char();
else
return ReadASCIIChar();
}
else
{
if( reading_from_wchar_string )
{
return ReadCharFromWcharString();
}
else
{
if( input_as_utf8 )
return ReadCharFromUTF8String();
else
return ReadCharFromAsciiString();
}
}
}
int SpaceParser::ReadChar()
{
char_was_escaped = false;
ReadCharNoEscape();
if( !reading_commentary && use_escape_char && lastc == '\\' )
{
char_was_escaped = true;
ReadCharNoEscape();
switch(lastc)
{
case '0': lastc = 0; break;
case 't': lastc = '\t'; break;
case 'r': lastc = '\r'; break;
case 'n': lastc = '\n'; break;
// in other cases we return the last character
}
}
return lastc;
}
} // namespace

View File

@ -1,325 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_confparser_spaceparser
#define headerfile_picotools_confparser_spaceparser
#include <fstream>
#include "space.h"
namespace PT
{
class SpaceParser
{
public:
/*
ctor -- setting default values (SetDefault() method)
*/
SpaceParser();
/*
setting the root space
*/
void SetSpace(Space * pspace);
void SetSpace(Space & pspace);
/*
setting options of the parser to the default values
utf8 etc.
*/
void SetDefault();
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error, no_space };
/*
the last status of parsing, set by Parse() methods
*/
Status status;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
main methods used to parse
file_name is the path to a file
*/
Status Parse(const char * file_name);
Status Parse(const std::string & file_name);
Status Parse(const wchar_t * file_name);
Status Parse(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status ParseString(const char * str);
Status ParseString(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status ParseString(const wchar_t * str);
Status ParseString(const std::wstring & str);
/*
if true then empty values and lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
void SkipEmpty(bool skip);
/*
'\' character is used to escape other characters
so "some \t t\"ext" will produce "some t t"ext"
default: true
special characters:
\0 - 0 (zero code point)
\t - tabulator (9 code point)
\r - carriage return (13 code point)
\n - a new line character (10 code point)
in other cases we return the last character so \Z gives Z and \\ gives one \
escape character are not used in commentaries
so you can write:
# this is my comment \n but this was not a new line
*/
void UseEscapeChar(bool escape);
/*
if true then the input file or string (char* or std::string) is treated as UTF-8
default: true
*/
void UTF8(bool utf);
private:
/*
current space set by SetSpace();
*/
Space * root_space;
/*
a space in which we are now
*/
Space * space;
/*
true if Parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
last read token
*/
std::wstring token;
/*
last read key
*/
std::wstring key;
/*
last read list
*/
Space::Value value;
/*
separator between a variable and a value, default: '='
*/
int separator;
/*
commentary char, default: '#'
*/
int commentary;
/*
list starting character, default: '('
*/
int list_start;
/*
list ending character, default: ')'
*/
int list_end;
/*
list delimiter, default: ','
*/
int list_delimiter;
/*
last read char
or -1 if the end
*/
int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/*
current file
*/
std::ifstream file;
/*
if true then empty lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
bool skip_empty;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
/*
if true you can use an escape character '\' in quoted values
*/
bool use_escape_char;
/*
true if we are reading the commentary (#)
this is to avoid parsing escape characters in the commentary
*/
bool reading_commentary;
std::string afile_name;
void Parse();
void ParseLoop();
void SpaceEnds();
void SpaceStarts();
void DeleteFromTable(const std::wstring & var);
void ReadTokenQuoted();
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
void ReadKey();
void ReadValueList();
void ReadValueSingle();
void ReadValue();
void AddKeyValuePair();
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
int ReadCharFromUTF8String();
int ReadCharFromAsciiString();
int ReadCharNoEscape();
int ReadChar();
bool IsWhite(int c);
void SkipWhite(bool skip_lines = false);
void SkipWhiteLines();
void SkipLine();
void SkipComment();
void Trim(std::wstring & s);
};
} // namespace
#endif

View File

@ -1,271 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_spacetojson
#define headerfile_picotools_space_spacetojson
#include <string>
#include <vector>
#include <set>
#include "space.h"
namespace PT
{
class SpaceToJSON
{
public:
void Clear();
void TreatAsTable(const wchar_t * space_name);
void TreatAsTable(const std::wstring & space_name);
void TreatAsNumeric(const wchar_t * name);
void TreatAsNumeric(const std::wstring & name);
void TreatAsBool(const wchar_t * name);
void TreatAsBool(const std::wstring & name);
template<class Stream>
void Serialize(Space & space, Stream & out, bool use_indents = false);
private:
std::set<std::wstring> numeric, boolean, table;
template<class Stream>
void Serialize(Space & space, Stream & out, bool use_indents, int level,
bool use_comma, bool treat_as_table, bool skip_name);
template<class Stream>
void SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma);
template<class Stream, class StringType>
void PrintToken(Stream & out, const StringType & str, bool check_specials = false);
template<class Stream>
void PrintLevel(Stream & out, bool use_indents, int level);
bool IsNumeric(const std::wstring & name);
bool IsBool(const std::wstring & name);
bool IsTable(const std::wstring & name);
};
template<class Stream>
void SpaceToJSON::PrintLevel(Stream & out, bool use_indents, int level)
{
if( use_indents )
{
for(int i=0 ; i<level ; ++i)
out << ' ';
}
}
template<class Stream, class StringType>
void SpaceToJSON::PrintToken(Stream & out, const StringType & str, bool is_special)
{
if( !is_special )
out << '\"';
for(size_t i=0 ; i<str.size() ; ++i)
{
switch(str[i])
{
case 0: out << '\\'; out << '0'; break; // may to skip this character is better?
case '\r': out << '\\'; out << 'r'; break;
case '\n': out << '\\'; out << 'n'; break;
case '\t': out << '\\'; out << 't'; break;
case 0x08: out << '\\'; out << 'b'; break;
case 0x0c: out << '\\'; out << 'f'; break;
case '\\': out << '\\'; out << '\\'; break;
//case '/': out << '\\'; out << '/'; break; // slash doesn't have to be escaped
case '"': out << '\\'; out << '\"'; break;
default:
out << str[i];
}
}
if( !is_special )
out << '\"';
}
template<class Stream>
void SpaceToJSON::SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma)
{
Space::Table::const_iterator i2;
size_t v;
size_t index = 0;
bool is_special;
if( use_comma && !space.table.empty() )
{
PrintLevel(out, use_indents, level);
out << L",\n";
}
for(i2 = space.table.begin() ; i2 != space.table.end() ; ++i2, ++index)
{
is_special = IsNumeric(i2->first) || IsBool(i2->first);
PrintLevel(out, use_indents, level);
PrintToken(out, i2->first);
out << L": ";
if( i2->second.size() != 1 )
out << '[';
for(v = 0 ; v < i2->second.size() ; ++v)
{
if( v > 0 )
PrintLevel(out, use_indents, level + i2->first.size() + 3);
PrintToken(out, i2->second[v], is_special);
if( v + 1 < i2->second.size() )
out << L",\n";
}
if( i2->second.size() != 1 )
out << ']';
if( index + 1 < space.table.size() )
out << ',';
out << '\n';
}
}
template<class Stream>
void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents, int level,
bool use_comma, bool treat_as_table, bool skip_name)
{
if( use_comma )
{
PrintLevel(out, use_indents, level);
out << L",\n";
}
PrintLevel(out, use_indents, level);
if( !skip_name )
{
if( space.name.empty() )
{
out << L"\"empty\": ";
}
else
{
PrintToken(out, space.name);
out << L": ";
}
}
if( treat_as_table )
out << L"[\n";
else
out << L"{\n";
bool printed_something = false;
if( !treat_as_table )
{
SerializeTableMulti(space, out, use_indents, level, false);
if( !space.table.empty() )
printed_something = true;
}
/*
* !! IMPROVE ME when serializing a table
* we can make a test whether a space is empty and has a name
* in such a case put it as a string
* this is the same way as the json parser works
*
*/
for(size_t i=0 ; i<space.spaces.size() ; ++i)
{
bool next_skip_name = treat_as_table;
bool next_is_table = IsTable(space.spaces[i]->name);
Serialize(*space.spaces[i], out, use_indents, level+1, printed_something, next_is_table, next_skip_name);
printed_something = true;
}
PrintLevel(out, use_indents, level);
if( treat_as_table )
out << L"]\n";
else
out << L"}\n";
}
template<class Stream>
void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents)
{
bool treat_as_table = IsTable(space.name);
Serialize(space, out, use_indents, 0, false, treat_as_table, true);
}
} // namespace
#endif

45
src/Makefile Normal file
View File

@ -0,0 +1,45 @@
sourcefiles:=$(shell find . -name "*.cpp")
objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles))
ifndef CXX
CXX = g++
endif
ifndef CXXFLAGS
CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include
endif
ifndef AR
AR = ar
endif
libname = pikotools.a
all: $(libname)
$(libname): $(objfiles)
$(AR) rcs $(libname) $(objfiles)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -o $@ $<
clean:
rm -f $(objfiles)
rm -f $(libname)
depend:
makedepend -Y. -f- $(sourcefiles) > Makefile.dep
-include Makefile.dep

43
src/Makefile.dep Normal file
View File

@ -0,0 +1,43 @@
# DO NOT DELETE
./convert/inttostr.o: ./convert/inttostr.h
./convert/misc.o: ./convert/misc.h ./convert/text.h
./convert/text.o: ./convert/text.h ./convert/text_private.h
./date/date.o: ./date/date.h convert/inttostr.h
./log/filelog.o: ./log/filelog.h textstream/textstream.h space/space.h
./log/filelog.o: textstream/types.h convert/inttostr.h date/date.h
./log/filelog.o: membuffer/membuffer.h textstream/types.h utf8/utf8.h
./log/filelog.o: utf8/utf8_templates.h utf8/utf8_private.h
./log/log.o: ./log/log.h textstream/textstream.h space/space.h
./log/log.o: textstream/types.h convert/inttostr.h date/date.h
./log/log.o: membuffer/membuffer.h textstream/types.h ./log/filelog.h
./log/log.o: utf8/utf8.h utf8/utf8_templates.h utf8/utf8_private.h
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
./space/space.o: utf8/utf8.h textstream/textstream.h space/space.h
./space/space.o: date/date.h membuffer/membuffer.h textstream/types.h
./space/space.o: utf8/utf8_templates.h utf8/utf8_private.h convert/convert.h
./space/space.o: ./convert/inttostr.h convert/patternreplacer.h
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
./space/spaceparser.o: textstream/textstream.h space/space.h date/date.h
./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./utf8/utf8.o: ./utf8/utf8.h textstream/textstream.h space/space.h
./utf8/utf8.o: textstream/types.h convert/inttostr.h date/date.h
./utf8/utf8.o: membuffer/membuffer.h textstream/types.h utf8/utf8_templates.h
./utf8/utf8.o: utf8/utf8_private.h
./utf8/utf8_private.o: utf8/utf8_private.h textstream/textstream.h
./utf8/utf8_private.o: space/space.h textstream/types.h convert/inttostr.h
./utf8/utf8_private.o: date/date.h membuffer/membuffer.h textstream/types.h
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/textstream.h
./csv/csvparser.o: date/date.h membuffer/membuffer.h textstream/types.h
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
./mainoptions/mainoptionsparser.o: textstream/textstream.h date/date.h
./mainoptions/mainoptionsparser.o: membuffer/membuffer.h textstream/types.h
./mainoptions/mainoptionsparser.o: utf8/utf8_templates.h utf8/utf8_private.h

156
src/convert/inttostr.cpp Normal file
View File

@ -0,0 +1,156 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "inttostr.h"
namespace pt
{
std::string to_str(unsigned long long value, int base)
{
std::string res;
Toa(value, res, false, base);
return res;
}
std::string to_str(long long value, int base)
{
std::string res;
Toa(value, res, false, base);
return res;
}
std::string to_str(unsigned long value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(long value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::string to_str(unsigned int value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(int value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::string to_str(unsigned short value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(short value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
std::wstring to_wstr(long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
std::wstring to_wstr(unsigned long value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(long value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned int value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(int value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned short value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(short value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
}

View File

@ -42,7 +42,7 @@
namespace PT
namespace pt
{
@ -270,14 +270,23 @@ void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
std::wstring Toa(unsigned long long value, int base = 10);
std::wstring Toa(long long value, int base = 10);
std::wstring Toa(unsigned long value, int base = 10);
std::wstring Toa(long value, int base = 10);
std::wstring Toa(unsigned int value, int base = 10);
std::wstring Toa(int value, int base = 10);
std::wstring Toa(unsigned short value, int base = 10);
std::wstring Toa(short value, int base = 10);
std::string to_str(unsigned long long value, int base = 10);
std::string to_str(long long value, int base = 10);
std::string to_str(unsigned long value, int base = 10);
std::string to_str(long value, int base = 10);
std::string to_str(unsigned int value, int base = 10);
std::string to_str(int value, int base = 10);
std::string to_str(unsigned short value, int base = 10);
std::string to_str(short value, int base = 10);
std::wstring to_wstr(unsigned long long value, int base = 10);
std::wstring to_wstr(long long value, int base = 10);
std::wstring to_wstr(unsigned long value, int base = 10);
std::wstring to_wstr(long value, int base = 10);
std::wstring to_wstr(unsigned int value, int base = 10);
std::wstring to_wstr(int value, int base = 10);
std::wstring to_wstr(unsigned short value, int base = 10);
std::wstring to_wstr(short value, int base = 10);

View File

@ -38,7 +38,7 @@
#include "misc.h"
namespace PT
namespace pt
{

View File

@ -42,7 +42,7 @@
#include "text.h"
namespace PT
namespace pt
{
void SetOverflow(bool * was_overflow, bool val);

View File

@ -44,7 +44,7 @@
namespace PT
namespace pt
{
template<typename CharType, typename StrType>

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -43,7 +43,7 @@
#include "misc.h"
namespace PT
namespace pt
{
@ -59,9 +59,9 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType **
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
str = skip_white(str);
while( !carry && IsDigit(*str, base, &digit) )
while( !carry && is_digit(*str, base, &digit) )
{
#ifdef __GNUC__
carry = __builtin_mul_overflow(res, static_cast<unsigned long long>(base), &res);
@ -82,7 +82,7 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType **
{
if( after_str )
{
while( IsDigit(*str, base, &digit) )
while( is_digit(*str, base, &digit) )
{
str += 1;
}
@ -108,7 +108,7 @@ long long Toll(const CharType * str, int base = 10, const CharType ** after_str
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
str = skip_white(str);
if( *str == '-' )
{
@ -230,7 +230,7 @@ template<typename CharType>
unsigned long long Toull_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
if( allow_skip_whitechars )
str = SkipWhite(str);
str = skip_white(str);
int base = 10;
@ -266,7 +266,7 @@ long long Toll_b(const CharType * str, const CharType ** after_str = 0, bool * w
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
str = skip_white(str);
if( *str == '-' )
{

473
src/convert/text.cpp Normal file
View File

@ -0,0 +1,473 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstddef>
#include "text.h"
#include "text_private.h"
namespace pt
{
// white_chars table should be sorted (a binary search algorithm is used to find a character)
// we do not treat a new line character (10) as a white character here
// also space (32) and tab (9) are not inserted here
static const wchar_t white_chars_table[] = {
0x000B, // LINE TABULATION (vertical tabulation)
0x000C, // FORM FEED (FF)
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
0x0085, // NEXT LINE (NEL)
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
0x1680, // OGHAM SPACE MARK
0x180E, // MONGOLIAN VOWEL SEPARATOR
0x2000, // EN QUAD
0x2001, // EM QUAD
0x2002, // EN SPACE
0x2003, // EM SPACE
0x2004, // THREE-PER-EM SPACE
0x2005, // FOUR-PER-EM SPACE
0x2006, // SIX-PER-EM SPACE
0x2007, // FIGURE SPACE
0x2008, // PUNCTUATION SPACE
0x2009, // THIN SPACE
0x200A, // HAIR SPACE
0x2028, // LINE SEPARATOR
0x2029, // PARAGRAPH SEPARATOR
0x202F, // NARROW NO-BREAK SPACE
0x205F, // MEDIUM MATHEMATICAL SPACE
0x3000, // IDEOGRAPHIC SPACE
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
};
/*
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
*/
bool is_white(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
{
// space (32) and tab (9) are the most common white chars
// so we check them at the beginning (optimisation)
if( c == 32 || c == 9 )
return true;
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
std::size_t o1 = 0;
std::size_t o2 = len - 1;
if( c == 10 )
return treat_new_line_as_white ? true : false;
if( !check_additional_chars )
return false;
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
return false;
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
return true;
while( o1 + 1 < o2 )
{
std::size_t o = (o2 - o1)/2 + o1;
if( c == white_chars_table[o] )
return true;
if( c > white_chars_table[o] )
o1 = o;
else
o2 = o;
}
return false;
}
bool is_digit(wchar_t c, int base, int * digit)
{
int d = 0;
if( c >= '0' && c <= '9' )
{
d = c - '0';
}
else
if( c >= 'a' && c <= 'f' )
{
d = c - 'a' + 10;
}
else
if( c >= 'A' && c <= 'F' )
{
d = c - 'A' + 10;
}
else
{
if( digit )
*digit = d;
return false;
}
if( digit )
*digit = d;
return d < base;
}
const char * skip_white(const char * str, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white);
}
const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white);
}
const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white);
}
const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white);
}
const char * skip_white_from_back(const char * str, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white);
}
const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white)
{
return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white);
}
char to_lower(char c)
{
return pt_private::to_lower_generic(c);
}
wchar_t to_lower(wchar_t c)
{
return pt_private::to_lower_generic(c);
}
char to_upper(char c)
{
return pt_private::to_upper_generic(c);
}
wchar_t to_upper(wchar_t c)
{
return pt_private::to_upper_generic(c);
}
void to_lower_emplace(std::string & str)
{
pt_private::to_lower_str_generic(str);
}
void to_lower_emplace(std::wstring & str)
{
pt_private::to_lower_str_generic(str);
}
void to_upper_emplace(std::string & str)
{
pt_private::to_upper_str_generic(str);
}
void to_upper_emplace(std::wstring & str)
{
pt_private::to_upper_str_generic(str);
}
std::string to_lower(const std::string & str)
{
std::string res(str);
to_lower_emplace(res);
return res;
}
std::wstring to_lower(const std::wstring & str)
{
std::wstring res(str);
to_lower_emplace(res);
return res;
}
std::string to_upper(const std::string & str)
{
std::string res(str);
to_upper_emplace(res);
return res;
}
std::wstring to_upper(const std::wstring & str)
{
std::wstring res(str);
to_upper_emplace(res);
return res;
}
int compare(const char * str1, const char * str2)
{
return pt_private::compare_generic(str1, str2);
}
int compare(const wchar_t * str1, const wchar_t * str2)
{
return pt_private::compare_generic(str1, str2);
}
int compare(const std::string & str1, const std::string & str2)
{
return pt_private::compare_str_generic(str1, str2);
}
int compare(const std::wstring & str1, const std::wstring & str2)
{
return pt_private::compare_str_generic(str1, str2);
}
int compare(const char * str1_begin, const char * str1_end, const char * str2)
{
return pt_private::compare_generic(str1_begin, str1_end, str2);
}
int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
{
return pt_private::compare_generic(str1_begin, str1_end, str2);
}
int compare_nc(const char * str1, const char * str2)
{
return pt_private::compare_nc_generic(str1, str2);
}
int compare_nc(const wchar_t * str1, const wchar_t * str2)
{
return pt_private::compare_nc_generic(str1, str2);
}
int compare_nc(const std::string & str1, const std::string & str2)
{
return pt_private::compare_nc_str_generic(str1, str2);
}
int compare_nc(const std::wstring & str1, const std::wstring & str2)
{
return pt_private::compare_nc_str_generic(str1, str2);
}
int compare_nc(const char * str1_begin, const char * str1_end, const char * str2)
{
return pt_private::compare_nc_generic(str1_begin, str1_end, str2);
}
int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
{
return pt_private::compare_nc_generic(str1_begin, str1_end, str2);
}
bool is_equal(const char * str1, const char * str2)
{
return pt_private::compare_generic(str1, str2) == 0;
}
bool is_equal(const wchar_t * str1, const wchar_t * str2)
{
return pt_private::compare_generic(str1, str2) == 0;
}
bool is_equal(const std::string & str1, const std::string & str2)
{
return is_equal(str1.c_str(), str2.c_str());
}
bool is_equal(const std::wstring & str1, const std::wstring & str2)
{
return is_equal(str1.c_str(), str2.c_str());
}
bool is_equal(const char * str1_begin, const char * str1_end, const char * str2)
{
return pt_private::compare_generic(str1_begin, str1_end, str2) == 0;
}
bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
{
return pt_private::compare_generic(str1_begin, str1_end, str2) == 0;
}
bool is_equal_nc(const char * str1, const char * str2)
{
return pt_private::compare_nc_generic(str1, str2) == 0;
}
bool is_equal_nc(const wchar_t * str1, const wchar_t * str2)
{
return pt_private::compare_nc_generic(str1, str2) == 0;
}
bool is_equal_nc(const std::string & str1, const std::string & str2)
{
return is_equal_nc(str1.c_str(), str2.c_str());
}
bool is_equal_nc(const std::wstring & str1, const std::wstring & str2)
{
return is_equal_nc(str1.c_str(), str2.c_str());
}
bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2)
{
return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0;
}
bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
{
return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0;
}
bool is_substr(const char * short_str, const char * long_str)
{
return pt_private::is_substr_generic(short_str, long_str);
}
bool is_substr(const wchar_t * short_str, const wchar_t * long_str)
{
return pt_private::is_substr_generic(short_str, long_str);
}
bool is_substr(const std::string & short_str, const std::string & long_str)
{
return is_substr(short_str.c_str(), long_str.c_str());
}
bool is_substr(const std::wstring & short_str, const std::wstring & long_str)
{
return is_substr(short_str.c_str(), long_str.c_str());
}
bool is_substr_nc(const char * short_str, const char * long_str)
{
return pt_private::is_substr_nc_generic(short_str, long_str);
}
bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str)
{
return pt_private::is_substr_nc_generic(short_str, long_str);
}
bool is_substr_nc(const std::string & short_str, const std::string & long_str)
{
return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str());
}
bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str)
{
return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str());
}
}

150
src/convert/text.h Normal file
View File

@ -0,0 +1,150 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_text
#define headerfile_picotools_convert_text
#include <string>
namespace pt
{
bool is_white(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true);
bool is_digit(wchar_t c, int base = 10, int * digit = 0);
const char * skip_white(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
/*
*
* str_end is pointing at the end of the string (the last item + one)
*
* return value is a pointer to the first white character after a non-white character at the end
* or to the last+one if there is no any white characters
*
*/
const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true);
const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true);
const char * skip_white_from_back(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
char to_lower(char c);
wchar_t to_lower(wchar_t c);
char to_upper(char c);
wchar_t to_upper(wchar_t c);
void to_lower_emplace(std::string & str);
void to_lower_emplace(std::wstring & str);
void to_upper_emplace(std::string & str);
void to_upper_emplace(std::wstring & str);
std::string to_lower(const std::string & str);
std::wstring to_lower(const std::wstring & str);
std::string to_upper(const std::string & str);
std::wstring to_upper(const std::wstring & str);
int compare(const char * str1, const char * str2);
int compare(const wchar_t * str1, const wchar_t * str2);
int compare(const std::string & str1, const std::string & str2);
int compare(const std::wstring & str1, const std::wstring & str2);
int compare(const char * str1_begin, const char * str1_end, const char * str2);
int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
/*
* compare no case
*/
int compare_nc(const char * str1, const char * str2);
int compare_nc(const wchar_t * str1, const wchar_t * str2);
int compare_nc(const std::string & str1, const std::string & str2);
int compare_nc(const std::wstring & str1, const std::wstring & str2);
int compare_nc(const char * str1_begin, const char * str1_end, const char * str2);
int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
bool is_equal(const char * str1, const char * str2);
bool is_equal(const wchar_t * str1, const wchar_t * str2);
bool is_equal(const std::string & str1, const std::string & str2);
bool is_equal(const std::wstring & str1, const std::wstring & str2);
bool is_equal(const char * str1_begin, const char * str1_end, const char * str2);
bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
bool is_equal_nc(const char * str1, const char * str2);
bool is_equal_nc(const wchar_t * str1, const wchar_t * str2);
bool is_equal_nc(const std::string & str1, const std::string & str2);
bool is_equal_nc(const std::wstring & str1, const std::wstring & str2);
bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2);
bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
bool is_substr(const char * short_str, const char * long_str);
bool is_substr(const wchar_t * short_str, const wchar_t * long_str);
bool is_substr(const std::string & short_str, const std::string & long_str);
bool is_substr(const std::wstring & short_str, const std::wstring & long_str);
bool is_substr_nc(const char * short_str, const char * long_str);
bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str);
bool is_substr_nc(const std::string & short_str, const std::string & long_str);
bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str);
}
#endif

309
src/convert/text_private.h Normal file
View File

@ -0,0 +1,309 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_text_private
#define headerfile_picotools_convert_text_private
#include <string>
#include "text.h"
namespace pt
{
namespace pt_private
{
template<class CharType>
CharType to_lower_generic(CharType c)
{
if( c >= 'A' && c <= 'Z' )
return c - 'A' + 'a';
return c;
}
template<class CharType>
CharType to_upper_generic(CharType c)
{
if( c >= 'a' && c <= 'z' )
return c - 'a' + 'A';
return c;
}
template<class StringType>
void to_lower_str_generic(StringType & s)
{
typename StringType::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = to_lower(s[i]);
}
template<class StringType>
void to_upper_str_generic(StringType & s)
{
typename StringType::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = to_upper(s[i]);
}
template<class CharType>
CharType * skip_white_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white)
{
while( is_white(static_cast<wchar_t>(*str), check_additional_chars, treat_new_line_as_white) )
{
str += 1;
}
return str;
}
template<class CharType>
CharType * skip_white_from_back_generic(CharType * str_begin, CharType * str_end, bool check_additional_chars, bool treat_new_line_as_white)
{
while( str_end > str_begin && is_white(static_cast<wchar_t>(*(str_end-1)), check_additional_chars, treat_new_line_as_white) )
{
str_end -= 1;
}
return str_end;
}
template<class CharType>
CharType * skip_white_from_back_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white)
{
CharType * str_begin = str;
while( *str != 0 )
{
str += 1;
}
return skip_white_from_back_generic(str_begin, str, check_additional_chars, treat_new_line_as_white);
}
template<class StringType1, class StringType2>
int compare_generic(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && *str1 == *str2 )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = (wchar_t)(unsigned char)(*str1);
c2 = (wchar_t)(unsigned char)(*str2);
}
else
{
c1 = *str1;
c2 = *str2;
}
return c1 - c2;
}
template<class StringType1, class StringType2>
int compare_str_generic(const StringType1 & str1, const StringType2 & str2)
{
return compare_generic(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
int compare_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
while( str1_begin < str1_end && *str2 && *str1_begin == *str2 )
{
++str1_begin;
++str2;
}
if( str1_begin == str1_end && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = str1_begin < str1_end ? (wchar_t)(unsigned char)(*str1_begin) : 0;
c2 = (wchar_t)(unsigned char)(*str2);
}
else
{
c1 = str1_begin < str1_end ? *str1_begin : 0;
c2 = *str2;
}
return c1 - c2;
}
template<class StringType1, class StringType2>
int compare_nc_generic(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && to_lower(*str1) == to_lower(*str2) )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = to_lower((wchar_t)(unsigned char)(*str1));
c2 = to_lower((wchar_t)(unsigned char)(*str2));
}
else
{
c1 = to_lower(*str1);
c2 = to_lower(*str2);
}
return c1 - c2;
}
template<class StringType1, class StringType2>
int compare_nc_str_generic(const StringType1 & str1, const StringType2 & str2)
{
return compare_nc(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
int compare_nc_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
while( str1_begin < str1_end && *str2 && to_lower(*str1_begin) == to_lower(*str2) )
{
++str1_begin;
++str2;
}
if( str1_begin == str1_end && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = str1_begin < str1_end ? to_lower((wchar_t)(unsigned char)(*str1_begin)) : 0;
c2 = to_lower((wchar_t)(unsigned char)(*str2));
}
else
{
c1 = str1_begin < str1_end ? to_lower(*str1_begin) : 0;
c2 = to_lower(*str2);
}
return c1 - c2;
}
template<class StringType1, class StringType2>
bool is_substr_generic(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && *short_str == *long_str )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool is_substr_nc_generic(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && to_lower(*short_str) == to_lower(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
} // namespace pt_private
} // namespace pt
#endif

416
src/csv/csvparser.cpp Normal file
View File

@ -0,0 +1,416 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "csvparser.h"
#include "utf8/utf8.h"
namespace pt
{
CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space)
{
reading_from_file = true;
space = &out_space;
file.clear();
file.open(file_name, std::ios_base::binary | std::ios_base::in);
if( file )
{
parse();
file.close();
}
else
{
status = cant_open_file;
}
return status;
}
CSVParser::Status CSVParser::parse_file(const std::string & file_name, Space & out_space)
{
return parse_file(file_name.c_str(), out_space);
}
CSVParser::Status CSVParser::parse_file(const wchar_t * file_name, Space & out_space)
{
std::string file_name_utf8;
wide_to_utf8(file_name, file_name_utf8);
return parse_file(file_name_utf8.c_str(), out_space);
}
CSVParser::Status CSVParser::parse_file(const std::wstring & file_name, Space & out_space)
{
return parse_file(file_name.c_str(), out_space);
}
CSVParser::Status CSVParser::parse(const char * str, Space & out_space)
{
reading_from_file = false;
reading_from_wchar_string = false;
pchar_ascii = str;
pchar_unicode = 0;
space = &out_space;
parse();
return status;
}
CSVParser::Status CSVParser::parse(const std::string & str, Space & out_space)
{
return parse(str.c_str(), out_space);
}
CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space)
{
reading_from_file = false;
reading_from_wchar_string = true;
pchar_unicode = str;
pchar_ascii = 0;
space = &out_space;
parse();
return status;
}
CSVParser::Status CSVParser::parse(const std::wstring & str, Space & out_space)
{
return parse(str.c_str(), out_space);
}
void CSVParser::parse()
{
line = 1;
status = ok;
space->set_empty_table();
read_char(); // put first character to lastc
if( lastc == -1 )
{
// an empty file/string, in such a case we return such a space struct (if would be serialized to json): [[]]
Space row_space;
row_space.set_empty_table();
space->add(std::move(row_space));
}
while( lastc != -1 )
{
/*
* even if there is an error when parsing we continue to read the file/string
*
*/
Space row_space;
row_space.set_empty_table();
parse_row(row_space);
space->add(std::move(row_space));
}
}
void CSVParser::parse_row(Space & row_space)
{
bool continue_reading;
do
{
continue_reading = read_value_to(row_space);
}
while(continue_reading);
}
bool CSVParser::read_value_to(Space & row_space)
{
Space & space_value = row_space.add_empty_space();
space_value.set_empty_wstring();
if( lastc == '"' )
{
return read_quoted_value_to(space_value.value.value_wstring);
}
else
{
return read_non_quoted_value_to(space_value.value.value_wstring);
}
}
bool CSVParser::read_quoted_value_to(std::wstring & value)
{
bool is_comma = false;
bool is_value_character = true;
while( lastc != -1 && is_value_character )
{
read_char();
if( lastc == '"' )
{
read_char();
if( lastc == '"' )
{
value += lastc;
}
else
{
is_value_character = false;
}
}
else
if( lastc != -1 )
{
value += lastc;
}
}
if( lastc == ',' )
{
is_comma = true;
read_char(); // skip the comma character
}
else
if( lastc == 13 )
{
read_char(); // skip CR character
if( lastc == 10 )
read_char();
}
else
if( lastc == 10 )
{
read_char(); // skip new line character
}
return is_comma;
}
bool CSVParser::read_non_quoted_value_to(std::wstring & value)
{
while( lastc != -1 && lastc != ',' && lastc != 10 )
{
value += lastc;
read_char();
}
bool is_comma = (lastc == ',');
if( is_comma )
{
read_char(); // skip the comma character
}
else
{
bool is_new_line = (lastc == 10);
// check CRLF sequence
if( is_new_line && !value.empty() && value.back() == 13 )
{
value.erase(value.size() - 1, 1);
}
if( is_new_line )
{
read_char(); // skip the new line character
}
}
return is_comma;
}
int CSVParser::read_utf8_char()
{
int c;
bool correct;
lastc = -1;
do
{
utf8_to_int(file, c, correct);
if( !file )
return lastc;
}
while( !correct );
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int CSVParser::read_ascii_char()
{
lastc = file.get();
if( lastc == '\n' )
++line;
return lastc;
}
int CSVParser::read_char_from_wchar_string()
{
if( *pchar_unicode == 0 )
lastc = -1;
else
lastc = *(pchar_unicode++);
if( lastc == '\n' )
++line;
return lastc;
}
int CSVParser::read_char_from_utf8_string()
{
int c;
bool correct;
lastc = -1;
do
{
size_t len = utf8_to_int(pchar_ascii, c, correct);
pchar_ascii += len;
}
while( *pchar_ascii && !correct );
if( correct )
lastc = c;
if( lastc == '\n' )
++line;
return lastc;
}
int CSVParser::read_char_from_ascii_string()
{
if( *pchar_ascii == 0 )
lastc = -1;
else
lastc = *(pchar_ascii++);
if( lastc == '\n' )
++line;
return lastc;
}
int CSVParser::read_char_no_escape()
{
if( reading_from_file )
{
if( input_as_utf8 )
return read_utf8_char();
else
return read_ascii_char();
}
else
{
if( reading_from_wchar_string )
{
return read_char_from_wchar_string();
}
else
{
if( input_as_utf8 )
return read_char_from_utf8_string();
else
return read_char_from_ascii_string();
}
}
}
int CSVParser::read_char()
{
return read_char_no_escape();
}
}

162
src/csv/csvparser.h Normal file
View File

@ -0,0 +1,162 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_csv_csvparser
#define headerfile_picotools_csv_csvparser
#include "space/space.h"
#include <string>
#include <fstream>
namespace pt
{
/*
*
* https://datatracker.ietf.org/doc/html/rfc4180
*
*/
class CSVParser
{
public:
enum Status
{
ok,
cant_open_file,
};
Status parse_file(const char * file_name, Space & out_space);
Status parse_file(const std::string & file_name, Space & out_space);
Status parse_file(const wchar_t * file_name, Space & out_space);
Status parse_file(const std::wstring & file_name, Space & out_space);
Status parse(const char * str, Space & out_space);
Status parse(const std::string & str, Space & out_space);
Status parse(const wchar_t * str, Space & out_space);
Status parse(const std::wstring & str, Space & out_space);
protected:
/*
the last status of parsing, set by Parse() methods
*/
Status status;
Space * space;
/*
true if parse_file() method was called
false if parse() was called
*/
bool reading_from_file;
/*
true if parse(wchar_t *) or parse(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
pointers to the current character
if parse() is being used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
last read char
or -1 if the end
*/
int lastc;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
current file
may it would be better to make a pointer?
if we parse only a string then there is no sense to have such an object
*/
std::ifstream file;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
void parse();
void parse_row(Space & row_space);
bool read_value_to(Space & row_space);
bool read_quoted_value_to(std::wstring & value);
bool read_non_quoted_value_to(std::wstring & value);
/*
* copied from SpaceParser
* may it would be better to have a class with those methods and inherit from it?
*/
int read_utf8_char();
int read_ascii_char();
int read_char_from_wchar_string();
int read_char_from_utf8_string();
int read_char_from_ascii_string();
int read_char_no_escape();
int read_char();
};
}
#endif

View File

@ -41,7 +41,7 @@
#include <string.h>
namespace PT
namespace pt
{

View File

@ -44,7 +44,7 @@
namespace PT
namespace pt
{

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* Copyright (c) 2018-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,7 +39,7 @@
namespace PT
namespace pt
{
@ -74,7 +74,7 @@ void FileLog::init(const std::wstring & log_file, bool log_stdout, int log_level
this->log_stdout = log_stdout;
this->log_level = log_level;
this->save_each_line = save_each_line;
PT::WideToUTF8(log_file, this->log_file);
wide_to_utf8(log_file, this->log_file);
}
@ -100,7 +100,7 @@ void FileLog::open_file()
}
void FileLog::save_log(PT::WTextStream * buffer)
void FileLog::save_log(WTextStream * buffer)
{
if( buffer->empty() )
return;
@ -111,7 +111,7 @@ void FileLog::save_log(PT::WTextStream * buffer)
{
if( log_stdout )
{
PT::WideToUTF8(*buffer, std::cout);
wide_stream_to_utf8(*buffer, std::cout);
}
if( !log_file.empty() )
@ -126,7 +126,7 @@ void FileLog::save_log(PT::WTextStream * buffer)
if( file )
{
PT::WideToUTF8(*buffer, file);
wide_stream_to_utf8(*buffer, file);
file.flush();
}
}

View File

@ -42,7 +42,7 @@
#include "textstream/textstream.h"
namespace PT
namespace pt
{
@ -55,7 +55,7 @@ public:
virtual ~FileLog();
virtual void init(const std::wstring & log_file, bool log_stdout, int log_level, bool save_each_line);
virtual void save_log(PT::WTextStream * buffer);
virtual void save_log(WTextStream * buffer);
virtual int get_log_level();
virtual bool should_save_each_line();

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* Copyright (c) 2018-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -41,8 +41,11 @@
#include "date/date.h"
#include "utf8/utf8.h"
#ifdef PT_HAS_MORM
#include "morm.h"
#endif
namespace PT
namespace pt
{
@ -70,13 +73,13 @@ Log::~Log()
}
void Log::SetLogBuffer(PT::WTextStream * buffer)
void Log::SetLogBuffer(WTextStream * buffer)
{
this->buffer = buffer;
}
PT::WTextStream * Log::GetLogBuffer()
WTextStream * Log::GetLogBuffer()
{
return buffer;
}
@ -133,7 +136,7 @@ Log & Log::operator<<(const char * s)
{
if( buffer && file_log && s && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(s, *buffer, false);
utf8_to_wide(s, *buffer, false);
}
return *this;
@ -145,7 +148,7 @@ Log & Log::operator<<(const std::string & s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(s, *buffer, false);
utf8_to_wide(s, *buffer, false);
}
return *this;
@ -157,7 +160,7 @@ Log & Log::operator<<(const std::string * s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(*s, *buffer, false);
utf8_to_wide(*s, *buffer, false);
}
return *this;
@ -287,7 +290,7 @@ Log & Log::operator<<(double s)
Log & Log::operator<<(const PT::Space & s)
Log & Log::operator<<(const Space & s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
@ -299,7 +302,7 @@ Log & Log::operator<<(const PT::Space & s)
Log & Log::operator<<(const PT::Date & date)
Log & Log::operator<<(const Date & date)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
@ -310,6 +313,14 @@ Log & Log::operator<<(const PT::Date & date)
}
#ifdef PT_HAS_MORM
Log & Log::operator<<(morm::Model & model)
{
operator<<(model.to_string());
return *this;
}
#endif
Log & Log::operator<<(Manipulators m)
{

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* Copyright (c) 2018-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -45,7 +45,13 @@
namespace PT
namespace morm
{
class Model;
}
namespace pt
{
@ -87,8 +93,8 @@ public:
Log();
virtual ~Log();
virtual void SetLogBuffer(PT::WTextStream * buffer);
virtual PT::WTextStream * GetLogBuffer();
virtual void SetLogBuffer(WTextStream * buffer);
virtual WTextStream * GetLogBuffer();
void SetFileLog(FileLog * file_log);
FileLog * GetFileLog();
@ -122,17 +128,24 @@ public:
//virtual Log & operator<<(float s); // added
virtual Log & operator<<(double s);
virtual Log & operator<<(const PT::Space & space);
virtual Log & operator<<(const PT::Date & date);
virtual Log & operator<<(const Space & space);
virtual Log & operator<<(const Date & date);
#ifdef PT_HAS_MORM
virtual Log & operator<<(morm::Model & model);
#endif
virtual Log & operator<<(Manipulators m);
virtual Log & LogString(const std::string & value, size_t max_size);
virtual Log & LogString(const std::wstring & value, size_t max_size);
template<typename char_type, size_t stack_size, size_t heap_block_size>
Log & operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf);
Log & operator<<(const TextStreamBase<char_type, stack_size, heap_block_size> & buf);
@ -144,7 +157,7 @@ public:
protected:
// buffer for the log
PT::WTextStream * buffer;
WTextStream * buffer;
// file logger
FileLog * file_log;
@ -192,7 +205,7 @@ Log & Log::log_string_generic(const StringType & value, size_t max_size)
template<typename char_type, size_t stack_size, size_t heap_block_size>
Log & Log::operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf)
Log & Log::operator<<(const TextStreamBase<char_type, stack_size, heap_block_size> & buf)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{

View File

@ -0,0 +1,397 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2016-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mainoptionsparser.h"
#include "utf8/utf8.h"
#include <string.h>
namespace pt
{
MainOptionsParser::MainOptionsParser()
{
space = 0;
arguments_required_space = 0;
should_use_utf8 = true;
last_status = status_ok;
non_option_arguments_name = L"args";
}
MainOptionsParser::~MainOptionsParser()
{
}
void MainOptionsParser::use_utf8(bool utf8)
{
should_use_utf8 = utf8;
}
void MainOptionsParser::set_non_options_arguments_name(const wchar_t * name)
{
non_option_arguments_name = name;
}
void MainOptionsParser::set_non_options_arguments_name(const std::wstring & name)
{
non_option_arguments_name = name;
}
std::wstring & MainOptionsParser::get_wrong_option()
{
return last_error_option;
}
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space)
{
space = &out_space;
arguments_required_space = nullptr;
return parse(argc, argv);
}
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space, const Space & arguments)
{
space = &out_space;
arguments_required_space = &arguments;;
return parse(argc, argv);
}
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv)
{
last_status = status_ok;
last_error_option.clear();
space->set_empty_object();
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
{
parse((size_t)argc, argv, i);
}
options.clear();
option.clear();
argument.clear();
arguments.clear();
return last_status;
}
void MainOptionsParser::parse(size_t argc, const char ** argv, size_t & argv_index)
{
const char * pchar = argv[argv_index];
if( *pchar == '-' )
{
if( *(pchar+1) == '-' && *(pchar+2) == 0 )
{
// two hyphens only "--"
argv_index += 1;
parse_non_option_arguments(argc, argv, argv_index);
}
else
if( *(pchar+1) == '-' )
{
// two hyphens and a string, such as "--abc"
parse_long_option(argc, argv, argv_index);
}
else
if( *(pchar+1) != 0 )
{
// one hyphen and a string, such as "-abc"
parse_short_option(argc, argv, argv_index);
}
else
{
parse_non_option_arguments(argc, argv, argv_index);
}
}
else
{
parse_non_option_arguments(argc, argv, argv_index);
}
}
void MainOptionsParser::convert_str(const char * src, std::wstring & dst)
{
if( should_use_utf8 )
{
utf8_to_wide(src, dst);
}
else
{
dst.clear();
for( ; *src ; ++src )
dst += (wchar_t)(unsigned char)*src;
}
}
void MainOptionsParser::convert_str(const char * src, size_t len, std::wstring & dst)
{
if( should_use_utf8 )
{
utf8_to_wide(src, len, dst);
}
else
{
dst.clear();
for(size_t i=0 ; i < len ; ++i)
dst += (wchar_t)(unsigned char)src[i];
}
}
void MainOptionsParser::convert_str(const std::wstring & src, Space & space)
{
if( should_use_utf8 )
{
space.set_empty_wstring();
space.value.value_wstring = src;
}
else
{
space.set_empty_string();
std::string & dst = space.value.value_string;
dst.clear();
for(size_t i=0 ; i < src.size() ; ++i)
dst += (char)src[i];
}
}
void MainOptionsParser::parse_short_option(size_t argc, const char ** argv, size_t & argv_index)
{
convert_str(argv[argv_index] + 1, options);
const wchar_t * options_pchar = options.c_str();
arguments.clear();
bool was_argument = false;
argv_index += 1;
for( ; *options_pchar && !was_argument && last_status == status_ok ; ++options_pchar )
{
option = *options_pchar;
size_t args_len = how_many_arguments_required(option);
if( args_len > 0 )
{
was_argument = true;
if( *(options_pchar+1) )
{
// first argument is directly behind the option
argument = options_pchar + 1;
arguments.push_back(argument);
args_len -= 1;
}
parse_arguments(argc, argv, argv_index, args_len);
}
add_option_to_space(option, arguments);
}
}
void MainOptionsParser::parse_long_option(size_t argc, const char ** argv, size_t & argv_index)
{
const char * option_begin = argv[argv_index] + 2; // skip first two hyphens --
const char * option_end = option_begin;
bool is_equal_form = false; // is the option in the form with equal sign, such as: option=argument
while( *option_end != 0 && *option_end != '=' )
{
option_end += 1;
}
if( *option_end == '=' )
{
is_equal_form = true;
convert_str(option_begin, option_end - option_begin, option);
convert_str(option_end + 1, argument);
}
else
{
convert_str(option_begin, option);
}
argv_index += 1;
size_t args_len = how_many_arguments_required(option);
arguments.clear();
if( is_equal_form )
{
if( args_len == 0 )
{
if( !argument.empty() )
{
// report an error
last_status = status_argument_provided;
last_error_option = option;
}
}
else
if( args_len == 1 )
{
// argument can be empty in such a case: option=
// we treat it as if the argument would not be provided
if( !argument.empty() )
{
arguments.push_back(argument);
args_len -= 1;
}
}
else
{
// args_len is > 1 but when using option=argument form
// we can provide only one argument
last_status = status_argument_not_provided;
last_error_option = option;
}
}
if( last_status == status_ok )
{
parse_arguments(argc, argv, argv_index, args_len);
add_option_to_space(option, arguments);
}
}
void MainOptionsParser::parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len)
{
for( ; args_len > 0 && argv_index < argc ; --args_len, ++argv_index)
{
convert_str(argv[argv_index], argument);
arguments.push_back(argument);
}
if( args_len > 0 )
{
last_status = status_argument_not_provided;
last_error_option = option;
}
}
void MainOptionsParser::parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index)
{
Space * table_with_args = new Space();
table_with_args->set_empty_table();
for( ; argv_index < argc ; ++argv_index)
{
convert_str(argv[argv_index], argument);
table_with_args->add(argument);
}
space->add(non_option_arguments_name, table_with_args);
}
void MainOptionsParser::add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments)
{
Space * option_table = space->get_object_field(option);
if( !option_table )
{
option_table = &space->add_empty_space(option);
}
if( !option_table->is_table())
{
option_table->set_empty_table();
}
Space * arguments_table = new Space();
arguments_table->set_empty_table();
for(const std::wstring & arg : arguments)
{
Space & space_arg = arguments_table->add_empty_space();
convert_str(arg, space_arg);
}
option_table->add(arguments_table);
}
size_t MainOptionsParser::how_many_arguments_required(const std::wstring & arg)
{
size_t res = 0;
if( arguments_required_space && arguments_required_space->is_object() )
{
long res_long = arguments_required_space->to_llong(arg, 0);
if( res_long < 0 )
res_long = 0;
res = (size_t)res_long;
// argument 'arg' needs 'res' options
}
return res;
}
} // namespace

View File

@ -0,0 +1,162 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2016-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_mainoptions_mainoptionsparser
#define headerfile_picotools_mainoptions_mainoptionsparser
#include "space/space.h"
#include <string>
#include <vector>
namespace pt
{
/*
* a very little parser for parsing main(int argc, char ** argv) parameters
*
*
*/
class MainOptionsParser
{
public:
MainOptionsParser();
~MainOptionsParser();
/*
* status_ok - all argument have been parsed correctly
*
* status_argument_provided - an argument have been provided but was not requested
* this can be in situation when using long form with equal sign, such as: --option=argument
* and in 'options' space the option either was not set or have zero requested arguments
*
* status_argument_not_provided - an argument or arguments are required but were not provided
* this can be returned in two situations:
* 1. when using long form with equal sign, such as: --option=argument and in 'options' space
* you have requested more than one argument
* 2. when reading arguments and the input strings ended
*
*/
enum Status
{
status_ok = 0,
status_argument_provided = 1,
status_argument_not_provided = 2,
};
/*
* the name of a field in the output Struct space for non-option arguments (those after two hypens --)
* default: L"args"
* they will be set as a table of strings/wstrings
*
*/
void set_non_options_arguments_name(const wchar_t * name);
void set_non_options_arguments_name(const std::wstring & name);
/*
* parse parameters
* argc argv have the same meaning as in the main(int argc, const char ** argv) method
* the first argument from argv is usualy the name of the program and is skip by this parser
*
* return value: look at the description of the Status enum
*
*/
Status parse(int argc, const char ** argv, Space & out_space);
Status parse(int argc, const char ** argv, Space & out_space, const Space & arguments);
/*
* whether or not options arguments should be converted from utf8 char* strings to wide strings (std::wstring)
* default true
*
* if true all arguments in Space struct will be saved as std::wstring
* if false all arguments will be std::string (they are read as they are without checking
* whether correct utf8 characters are encountered)
*
* arguments are always held as std::wstring (in such a case is defined ObjectType in Space struct)
* when using use_utf8(false) characters will not be treated as an utf8 string but just all 8bit char bytes
* will be copied to std::wstring
*
*/
void use_utf8(bool utf8);
/*
* return the last option name which was incorrectly parsed
* or an empty string if status was equal to status_ok
*/
std::wstring & get_wrong_option();
private:
Space * space;
const Space * arguments_required_space;
std::wstring non_option_arguments_name;
std::wstring options, option, argument;
std::vector<std::wstring> arguments;
bool should_use_utf8;
Status last_status;
std::wstring last_error_option;
void convert_str(const char * src, std::wstring & dst);
void convert_str(const char * src, size_t len, std::wstring & dst);
void convert_str(const std::wstring & src, Space & space);
Status parse(int argc, const char ** argv);
void parse(size_t argc, const char ** argv, size_t & argv_index);
void parse_short_option(size_t argc, const char ** argv, size_t & argv_index);
void parse_long_option(size_t argc, const char ** argv, size_t & argv_index);
void parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len);
void parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index);
size_t how_many_arguments_required(const std::wstring & arg);
void add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments);
};
} // namespace
#endif

View File

@ -41,7 +41,7 @@
#include <iostream>
namespace PT
namespace pt
{
/*

2544
src/space/space.cpp Normal file

File diff suppressed because it is too large Load Diff

1323
src/space/space.h Normal file

File diff suppressed because it is too large Load Diff

1094
src/space/spaceparser.cpp Normal file

File diff suppressed because it is too large Load Diff

313
src/space/spaceparser.h Normal file
View File

@ -0,0 +1,313 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_jsonspaceparser
#define headerfile_picotools_space_jsonspaceparser
#include <fstream>
#include "space.h"
namespace pt
{
class SpaceParser
{
public:
/*
ctor -- setting default values (SetDefault() method)
*/
SpaceParser();
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error };
/*
the last status of parsing, set by parse() methods
*/
Status status;
/*
main methods used to parse a JSON file
file_name is the path to a file
*/
Status parse_json_file(const char * file_name, Space & out_space, bool clear_space = true);
Status parse_json_file(const std::string & file_name, Space & out_space, bool clear_space = true);
Status parse_json_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
Status parse_json_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
/*
main methods used to parse a Space file
file_name is the path to a file
*/
Status parse_space_file(const char * file_name, Space & out_space, bool clear_space = true);
Status parse_space_file(const std::string & file_name, Space & out_space, bool clear_space = true);
Status parse_space_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
Status parse_space_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status parse_json(const char * str, Space & out_space, bool clear_space = true);
Status parse_json(const std::string & str, Space & out_space, bool clear_space = true);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true);
Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);
Status parse_space(const char * str, Space & out_space, bool clear_space = true);
Status parse_space(const std::string & str, Space & out_space, bool clear_space = true);
Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true);
Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);
/*
* add two args parse method
* Status parse(const char * str, Space & output_space);
*
*/
/*
* if true then the input file or string (char* or std::string) is treated as UTF-8
* default true
*
* the internal storage for strings is std::wstring so if you call set_utf8(false) then
* the characters of input string will be simple static_cast<> from char to wchar_t
*
*/
void use_utf8(bool utf);
/*
*
* returns a number of a last parsed line
* can be used to obtain the line in which there was a syntax error
*
*/
int get_last_parsed_line();
private:
/*
current space set by SetSpace();
*/
Space * root_space;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
true if parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
last read token
*/
std::wstring token;
/*
separator between a variable and a value, default: '='
*/
int separator;
/*
space starting character, default: '{'
*/
int space_start;
/*
space ending character, default: '}'
*/
int space_end;
/*
table starting character, default: '['
*/
int table_start;
/*
table ending character, default: ']'
*/
int table_end;
/*
option delimiter, default: ','
*/
int option_delimiter;
/*
last read char
or -1 if the end
*/
int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/*
current file
may it would be better to make a pointer?
if we parse only a string then there is no sense to have such an object
*/
std::ifstream file;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
/*
* if parsing_space is false then it means we are parsing JSON format
*
*/
bool parsing_space;
void parse_root_space(bool clear_root_space);
void parse(Space * space, bool is_object_value, bool is_table_value);
void parse_space(Space * space);
void parse_table(Space * space);
void parse_key_value_pairs(Space * space);
void parse_values_list(Space * space);
void read_key();
void parse_text_value(Space * space);
void parse_integer_value(Space * space);
void parse_floating_point_value(Space * space);
bool is_alfa_numeric_char(int c);
void read_token_until_delimiter(std::wstring & token, int delimiter1, int delimiter2);
void read_alfa_numeric_token(std::wstring & token);
void read_string_value(std::wstring & token, bool is_object_value, bool is_table_value);
bool is_integer_token();
bool is_floating_point_token();
void read_space_field_token(std::wstring & token);
void read_token_quoted(std::wstring & token);
void read_multiline_token_quoted(std::wstring & token);
int read_utf8_char();
int read_ascii_char();
int read_char_from_wchar_string();
int read_char_from_utf8_string();
int read_char_from_ascii_string();
int read_char_no_escape();
int read_char();
bool is_white(int c);
void skip_line();
void skip_white();
void trim_last_white(std::wstring & s);
bool is_hex_digit(wchar_t c);
int hex_to_int(wchar_t c);
void read_unicode_code_point();
};
} // namespace
#endif

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2013, Tomasz Sowa
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -49,7 +49,7 @@
// for snprintf
#include <cstdio>
namespace PT
namespace pt
{
@ -60,17 +60,20 @@ namespace PT
StringType can be either std::string or std::wstring
this class doesn't use UTF-8 in any kind
*/
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename CharT, size_t stack_size, size_t heap_block_size>
class TextStreamBase
{
public:
TextStreamBase();
typedef CharT char_type;
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
typedef typename buffer_type::iterator iterator;
typedef typename buffer_type::const_iterator const_iterator;
void clear();
bool empty() const;
size_t size() const;
@ -106,8 +109,8 @@ public:
TextStreamBase & operator<<(unsigned long long);
TextStreamBase & operator<<(double);
TextStreamBase & operator<<(const void *); // printing a pointer
TextStreamBase & operator<<(const PT::Space & space);
TextStreamBase & operator<<(const PT::Date & date);
TextStreamBase & operator<<(const Space & space);
TextStreamBase & operator<<(const Date & date);
// min width for integer output
// if the output value has less digits then first zeroes are added
@ -466,9 +469,9 @@ wchar_t buf[100];
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Space & space)
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Space & space)
{
space.Serialize(*this, true, false);
space.serialize_to_space_stream(*this, true);
return *this;
}
@ -477,7 +480,7 @@ return *this;
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Date & date)
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Date & date)
{
date.Serialize(*this);

View File

@ -40,7 +40,7 @@
namespace PT
namespace pt
{

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2010-2018, Tomasz Sowa
* Copyright (c) 2010-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -36,67 +36,62 @@
*/
#include "utf8.h"
#include "utf8_private.h"
namespace PT
namespace pt
{
/*!
an auxiliary function for converting from UTF-8 string
*/
static bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
{
for(len=0 ; (uz & 0x80) != 0 ; ++len)
uz <<= 1;
if( len == 1 )
return false;
res = uz;
if( len > 0 )
res >>= len;
if( res == 0 )
return false;
if( len == 0 )
len = 1;
return true;
}
/*!
an auxiliary function for converting from UTF-8 string
*/
static bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res)
{
if( (uz & 0xc0) != 0x80 )
return false;
res <<= 6;
res |= (uz & 0x3F);
return true;
}
/*!
returns true if 'c' is a correct unicode character
*/
bool UTF8_CheckRange(int c)
bool utf8_check_range(int c)
{
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
}
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_bytes - means how many bytes from the utf8 string were read
*/
bool utf8_check_range(int c, int how_many_bytes)
{
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
{
return true;
}
if( c >= 0x0080 && c <= 0x07ff && how_many_bytes == 2 )
{
return true;
}
if( c >= 0x0800 && c < 0xD800 && how_many_bytes == 3)
{
return true;
}
if( c > 0xDFFF && c <= 0xffff && how_many_bytes == 3)
{
return true;
}
if( c >= 0x10000 && c <= 0x10FFFF && how_many_bytes == 4 )
{
return true;
}
return false;
}
/*!
@ -116,7 +111,7 @@ bool UTF8_CheckRange(int c)
(returns zero only if utf8_len is zero)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct)
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct)
{
size_t i, len;
@ -126,17 +121,19 @@ size_t i, len;
if( utf8_len == 0 )
return 0;
if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
return 1;
if( utf8_len < len )
return utf8_len;
for(i=1 ; i<len ; ++i)
if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
{
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
return i;
}
if( UTF8_CheckRange(res) )
if( utf8_check_range(res, len) )
correct = true;
return len;
@ -158,7 +155,7 @@ return len;
(returns zero only if the string has '\0' at the first character)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const char * utf8, int & res, bool & correct)
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
{
size_t i, len;
@ -168,7 +165,7 @@ size_t i, len;
if( *utf8 == 0 )
return 0;
if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
return 1;
for(i=1 ; i<len ; ++i)
@ -176,11 +173,11 @@ size_t i, len;
if( utf8[i] == 0 )
return i;
if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
return i;
}
if( UTF8_CheckRange(res) )
if( utf8_check_range(res, len) )
correct = true;
return len;
@ -203,9 +200,9 @@ return len;
(returns zero only if utf8 is empty)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
{
return UTF8ToInt(utf8.c_str(), utf8.size(), res, correct);
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
}
@ -222,7 +219,7 @@ size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
the function returns how many characters have been used from the input stream
*/
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct)
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
{
size_t i, len;
unsigned char uz;
@ -235,7 +232,7 @@ unsigned char uz;
if( !utf8 )
return 0;
if( !UTF8ToInt_FirstOctet(uz, len, res) )
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
return 1;
for(i=1 ; i<len ; ++i)
@ -245,11 +242,11 @@ unsigned char uz;
if( !utf8 )
return i;
if( !UTF8ToInt_AddNextOctet(uz, res) )
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
return i;
}
if( UTF8_CheckRange(res) )
if( utf8_check_range(res, len) )
correct = true;
return len;
@ -261,7 +258,7 @@ return len;
/*
*/
static void IntToWide(int c, std::wstring & res)
static void int_to_wide(int c, std::wstring & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
@ -294,13 +291,13 @@ static void IntToWide(int c, std::wstring & res)
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
{
if( clear )
res.clear();
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
IntToWide(c, res);
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
int_to_wide(c, res);
});
return status;
@ -324,14 +321,14 @@ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool cle
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear, int mode)
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
{
size_t utf8_len = 0;
while( utf8[utf8_len] != 0 )
utf8_len += 1;
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
return utf8_to_wide(utf8, utf8_len, res, clear, mode);
}
@ -350,9 +347,9 @@ return UTF8ToWide(utf8, utf8_len, res, clear, mode);
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mode)
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
{
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
}
@ -371,7 +368,7 @@ bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mo
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear, int mode)
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
{
int z;
bool correct, was_error = false;
@ -379,7 +376,7 @@ bool correct, was_error = false;
if( clear )
res.clear();
while( UTF8ToInt(utf8, z, correct) > 0 )
while( utf8_to_int(utf8, z, correct) > 0 )
{
if( !correct )
{
@ -390,7 +387,7 @@ bool correct, was_error = false;
}
else
{
IntToWide(z, res);
int_to_wide(z, res);
}
}
@ -413,13 +410,13 @@ return !was_error;
the function returns how many characters have been written to the utf8,
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
*/
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len)
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
{
char buf[10];
int i = 0;
int mask = 0x3f; // 6 first bits set
if( utf8_max_len==0 || !UTF8_CheckRange(z) )
if( utf8_max_len==0 || !utf8_check_range(z) )
return 0;
if( z <= 0x7f )
@ -467,14 +464,14 @@ return a;
the function returns how many characters have been written to the utf8 string,
zero means that 'z' is an incorrect unicode character
*/
size_t IntToUTF8(int z, std::string & utf8, bool clear)
size_t int_to_utf8(int z, std::string & utf8, bool clear)
{
char buf[10];
if( clear )
utf8.clear();
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
size_t i;
for(i=0 ; i<len ; ++i)
@ -485,268 +482,6 @@ return len;
/*!
this function converts one wide character into UTF-8 stream
input:
z - wide character
output:
utf8 - a UTF-8 stream for the output sequence
the function returns how many characters have been written to the utf8 stream,
zero means that 'z' is an incorrect unicode character
*/
size_t IntToUTF8(int z, std::ostream & utf8)
{
char buf[10];
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
size_t i;
for(i=0 ; i<len ; ++i)
utf8 << buf[i];
return len;
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
{
if( string_len == 0 )
{
z = 0;
correct = false;
return 0;
}
z = static_cast<int>(*wide_string);
correct = true;
if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
{
if( z>=0xD800 && z<=0xDBFF && string_len>1 )
{
int z2 = *(wide_string+1);
if( z2>=0xDC00 && z2<=0xDFFF )
{
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
return 2;
}
else
{
correct = false;
return 2;
}
}
else
{
correct = false;
return 1;
}
}
else
{
correct = UTF8_CheckRange(z);
return 1;
}
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
static size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
{
z = 0;
correct = false;
return 0;
}
if( *(wide_string+1) != 0 )
min_str_len = 2;
return WideToInt(wide_string, min_str_len, z, correct);
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
a null terminating character)
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
if this flag is true then utf8_written is equal to zero
was_error - will be true if there is an error when converting (there was an incorrect wide character)
(was_error will not be true if the utf8 buffer is too small)
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
utf8_written = 0;
was_utf8_buf_too_small = false;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
{
utf8_written = IntToUTF8(z, utf8, utf8_len);
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
else
{
if( mode == 1 )
{
utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
correct = IntToUTF8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, z, correct);
if( correct )
correct = IntToUTF8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
correct = IntToUTF8(z, utf8) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
*/
static size_t WideOneToUTF8(const wchar_t * wide_string, std::ostream & utf8, bool & was_error, int mode)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
return 0;
if( *(wide_string+1) != 0 )
min_str_len = 2;
return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
}
/*!
this function converts a wide string into UTF-8 string
@ -762,7 +497,7 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
{
bool was_error = false;
size_t chars;
@ -772,7 +507,7 @@ size_t chars;
while( string_len > 0 )
{
chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
wide_string += chars;
string_len -= chars;
}
@ -796,7 +531,7 @@ return !was_error;
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
{
bool was_error = false;
@ -804,7 +539,7 @@ bool was_error = false;
utf8.clear();
while( *wide_string )
wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
return !was_error;
}
@ -825,90 +560,13 @@ return !was_error;
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - size of the string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode)
{
bool was_error = false;
size_t chars;
while( string_len > 0 )
{
chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
wide_string += chars;
string_len -= chars;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode)
{
bool was_error = false;
while( *wide_string )
wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode);
}
/*!
@ -932,7 +590,7 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
@ -942,7 +600,7 @@ size_t chars, utf8_saved;
while( string_len > 0 )
{
chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
@ -986,9 +644,9 @@ return !was_error;
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
}
@ -1014,7 +672,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
@ -1022,7 +680,7 @@ bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
res = wide_to_utf8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;
@ -1050,9 +708,9 @@ return res;
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
}
@ -1077,7 +735,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
@ -1089,7 +747,7 @@ size_t len;
while( *wide_string )
{
len = (*(wide_string+1) == 0) ? 1 : 2;
chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
chars = private_namespace::wide_one_to_utf8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
@ -1132,7 +790,7 @@ return !was_error;
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
@ -1140,7 +798,7 @@ bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
res = wide_to_utf8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;

180
src/utf8/utf8.h Normal file
View File

@ -0,0 +1,180 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8
#define headerfile_picotools_utf8_utf8
#include <string>
#include "textstream/textstream.h"
namespace pt
{
/*!
UTF-8, a transformation format of ISO 10646
http://tools.ietf.org/html/rfc3629
when wchar_t is 4 bytes length we use UTF-32
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
UTF-16
http://www.ietf.org/rfc/rfc2781.txt
*/
/*!
returns true if 'c' is a correct unicode character
*/
bool utf8_check_range(int c);
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_chars - means how many characters from utf8 string were read
*/
bool utf8_check_range(int c, int how_many_bytes);
/*
*
*
*
* convertions from UTF-8
*
*
*
*/
/*!
converting one character from UTF-8 to an int
*/
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
size_t utf8_to_int(const char * utf8, int & res, bool & correct);
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct);
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct);
/*!
converting UTF-8 string to a wide string
*/
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1);
template<typename StreamType>
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool utf8_to_wide(const char * utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
/*
*
*
*
* convertions to UTF-8
*
*
*
*/
/*!
converting one int character to UTF-8
*/
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
template<typename StreamType>
size_t int_to_utf8(int z, StreamType & utf8);
/*!
converting a wide string to UTF-8 string
*/
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode = 1);
template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode = 1);
template<typename StreamType>
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
template<typename StreamType>
void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested
template<typename StreamTypeIn, typename StreamTypeOut>
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested
} // namespace
#include "utf8/utf8_templates.h"
#endif

283
src/utf8/utf8_private.cpp Normal file
View File

@ -0,0 +1,283 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "utf8_private.h"
namespace pt
{
namespace private_namespace
{
/*!
an auxiliary function for converting from UTF-8 string
*/
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
{
for(len=0 ; (uz & 0x80) != 0 ; ++len)
uz <<= 1;
if( len == 1 || len > 4 )
return false;
res = uz;
if( len > 0 )
res >>= len;
if( len == 0 )
len = 1;
return true;
}
/*!
an auxiliary function for converting from UTF-8 string
*/
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
{
if( (uz & 0xc0) != 0x80 )
return false;
res <<= 6;
res |= (uz & 0x3F);
return true;
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
{
if( string_len == 0 )
{
z = 0;
correct = false;
return 0;
}
z = static_cast<int>(*wide_string);
correct = true;
if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
{
if( z>=0xD800 && z<=0xDBFF && string_len>1 )
{
int z2 = *(wide_string+1);
if( z2>=0xDC00 && z2<=0xDFFF )
{
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
return 2;
}
else
{
correct = false;
return 2;
}
}
else
{
correct = false;
return 1;
}
}
else
{
correct = utf8_check_range(z);
return 1;
}
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
{
z = 0;
correct = false;
return 0;
}
if( *(wide_string+1) != 0 )
min_str_len = 2;
return wide_to_int(wide_string, min_str_len, z, correct);
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
a null terminating character)
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
if this flag is true then utf8_written is equal to zero
was_error - will be true if there is an error when converting (there was an incorrect wide character)
(was_error will not be true if the utf8 buffer is too small)
*/
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
utf8_written = 0;
was_utf8_buf_too_small = false;
chars = wide_to_int(wide_string, string_len, z, correct);
if( correct )
{
utf8_written = int_to_utf8(z, utf8, utf8_len);
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
else
{
if( mode == 1 )
{
utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = wide_to_int(wide_string, string_len, z, correct);
if( correct )
correct = int_to_utf8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = wide_to_int(wide_string, z, correct);
if( correct )
correct = int_to_utf8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
} // namespace private_namespace
} // namespace pt

220
src/utf8/utf8_private.h Normal file
View File

@ -0,0 +1,220 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8_private
#define headerfile_picotools_utf8_utf8_private
#include "textstream/textstream.h"
namespace pt
{
bool utf8_check_range(int c);
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
size_t int_to_utf8(int z, std::string & utf8, bool clear);
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
namespace private_namespace
{
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
bool utf8_to_int_add_next_octet(unsigned char uz, int & res);
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
template<typename StreamType>
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = wide_to_int(wide_string, string_len, z, correct);
if( correct )
correct = int_to_utf8(z, utf8) != 0;
if( !correct )
{
if( mode == 1 )
int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
*/
template<typename StreamType>
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
return 0;
if( *(wide_string+1) != 0 )
min_str_len = 2;
return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
}
// declared in utf8.h, defined in utf8.cpp
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
template<typename function_type>
bool utf8_to_wide_generic(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
{
int z;
size_t len;
bool correct, was_error = false;
while( utf8_len > 0 )
{
if( (unsigned char)*utf8 <= 0x7f )
{
// small optimization
len = 1;
correct = true;
z = static_cast<unsigned char>(*utf8);
}
else
{
len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
}
if( !correct )
{
if( mode == 1 )
convert_function(0xFFFD); // U+FFFD "replacement character"
was_error = true;
}
else
{
convert_function(z);
}
utf8 += len;
utf8_len -= len;
}
return !was_error;
}
template<typename StreamType>
void int_to_wide(int c, StreamType & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res << static_cast<wchar_t>(c);
}
}
// not tested
// FIX ME it is not using surrogate pairs from input stream
// and mode parameter
template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
void wide_to_utf8_generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
{
char utf8_buffer[256];
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
std::size_t utf8_sequence_max_length = 10;
std::size_t index = 0;
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
while( i != buffer.end() )
{
if( index + utf8_sequence_max_length > buffer_len )
{
write_function(utf8_buffer, index);
index = 0;
}
index += int_to_utf8(*i, utf8_buffer + index, buffer_len - index);
++i;
}
if( index > 0 )
{
write_function(utf8_buffer, index);
}
}
} // namespace private_namespace
} // namespace pt
#endif

271
src/utf8/utf8_templates.h Normal file
View File

@ -0,0 +1,271 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8_templates
#define headerfile_picotools_utf8_utf8_templates
// this file is included at the end of utf8.h
#include "utf8_private.h"
namespace pt
{
/*!
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
(need to be tested)
*/
// need to be tested
template<typename StreamType>
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
{
if( clear )
res.clear();
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
private_namespace::int_to_wide(c, res);
});
return status;
}
template<typename StreamType>
bool utf8_to_wide(const char * utf8, StreamType & res, bool clear, int mode)
{
size_t utf8_len = 0;
while( utf8[utf8_len] != 0 )
utf8_len += 1;
return utf8_to_wide(utf8, utf8_len, res, clear, mode);
}
template<typename StreamType>
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mode)
{
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
}
// need to be tested
template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
{
int z;
bool correct, was_error = false;
if( clear )
res.clear();
while( utf8_to_int(utf8, z, correct) > 0 )
{
if( !correct )
{
if( mode == 1 )
res << 0xFFFD; // U+FFFD "replacement character"
was_error = true;
}
else
{
private_namespace::int_to_wide(z, res);
}
}
return !was_error;
}
/*!
this function converts one wide character into UTF-8 stream
input:
z - wide character
output:
utf8 - a UTF-8 stream for the output sequence
the function returns how many characters have been written to the utf8 stream,
zero means that 'z' is an incorrect unicode character
*/
template<typename StreamType>
size_t int_to_utf8(int z, StreamType & utf8)
{
char buf[10];
size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
if( len > 0 )
utf8.write(buf, len);
return len;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - size of the string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
{
bool was_error = false;
size_t chars;
while( string_len > 0 )
{
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
wide_string += chars;
string_len -= chars;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
{
bool was_error = false;
while( *wide_string )
wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
{
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, mode);
}
template<typename StreamType>
void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, int mode)
{
if( clear )
utf8.clear();
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.append(utf8_buffer, buffer_len);
});
}
// not tested
template<typename StreamTypeIn, typename StreamTypeOut>
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
{
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.write(utf8_buffer, buffer_len);
});
}
} // namespace pt
#endif

45
tests/Makefile Normal file
View File

@ -0,0 +1,45 @@
sourcefiles:=$(shell find . -name "*.cpp")
objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles))
ifndef CXX
CXX = g++
endif
ifndef CXXFLAGS
CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include
endif
progname = tests
pikotoolslibfile = ../src/pikotools.a
all: $(progname)
$(progname): $(objfiles) FORCE
$(CXX) $(CXXFLAGS) -o $(progname) $(objfiles) $(pikotoolslibfile)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -o $@ $<
clean:
rm -f $(objfiles)
rm -f $(progname)
depend:
makedepend -Y. -I../src -f- $(sourcefiles) > Makefile.dep
FORCE:
-include Makefile.dep

25
tests/Makefile.dep Normal file
View File

@ -0,0 +1,25 @@
# DO NOT DELETE
./main.o: convert.h mainoptionsparser.h csvparser.h
./convert.o: convert.h test.h ../src/convert/convert.h
./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h
./convert.o: ../src/textstream/textstream.h ../src/space/space.h
./convert.o: ../src/textstream/types.h ../src/convert/inttostr.h
./convert.o: ../src/date/date.h ../src/membuffer/membuffer.h
./convert.o: ../src/textstream/types.h ../src/convert/strtoint.h
./convert.o: ../src/convert/text.h ../src/convert/misc.h
./test.o: test.h
./mainoptionsparser.o: mainoptionsparser.h test.h
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
./mainoptionsparser.o: ../src/space/space.h ../src/textstream/types.h
./mainoptionsparser.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
./mainoptionsparser.o: ../src/textstream/textstream.h ../src/date/date.h
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
./mainoptionsparser.o: ../src/utf8/utf8_templates.h
./mainoptionsparser.o: ../src/utf8/utf8_private.h ../src/convert/convert.h
./mainoptionsparser.o: ../src/convert/inttostr.h
./mainoptionsparser.o: ../src/convert/patternreplacer.h
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
./mainoptionsparser.o: ../src/convert/misc.h
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h test.h

1982
tests/convert.cpp Normal file

File diff suppressed because it is too large Load Diff

57
tests/convert.h Normal file
View File

@ -0,0 +1,57 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_tests_convert
#define headerfile_picotools_tests_convert
namespace pt
{
namespace pt_convert_tests
{
void make_tests();
}
}
#endif

326
tests/csvparser.cpp Normal file
View File

@ -0,0 +1,326 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "csvparser.h"
#include "csv/csvparser.h"
#include "test.h"
namespace pt
{
namespace pt_csvparser_tests
{
void test_csvparser(const char * input_str, const char * expected_json)
{
CSVParser csv_parser;
Space space;
std::string json;
CSVParser::Status status = csv_parser.parse(input_str, space);
space.serialize_to_json_to(json);
std::cout << "csv parsed as: " << json << std::endl;
test(json.c_str(), expected_json);
}
void test_csvparser1()
{
const char * input_str = "";
const char * expected_json = R"json([[]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser2()
{
const char * input_str = ",";
const char * expected_json = R"json([["",""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser3()
{
const char * input_str = "field1";
const char * expected_json = R"json([["field1"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser4()
{
const char * input_str = R"csvstring(field1,field2,field3)csvstring";
const char * expected_json = R"json([["field1","field2","field3"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser5()
{
const char * input_str = "\n";
const char * expected_json = R"json([[""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser6()
{
const char * input_str = "\r\n";
const char * expected_json = R"json([[""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser7()
{
const char * input_str = "field1\r\n";
const char * expected_json = R"json([["field1"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser8()
{
const char * input_str = ",\r\n";
const char * expected_json = R"json([["",""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser9()
{
const char * input_str = "field1\r\nfield2";
const char * expected_json = R"json([["field1"],["field2"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser10()
{
const char * input_str = "field1,field2\r\nfield3,field4";
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser11()
{
const char * input_str = "field1,field2\r\nfield3,field4\r\n";
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser12()
{
const char * input_str = "field1,field2\nfield3,field4\n";
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser13()
{
const char * input_str = R"csv("")csv";
const char * expected_json = R"json([[""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser14()
{
const char * input_str = "\"\"\n";
const char * expected_json = R"json([[""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser15()
{
const char * input_str = "\"\"\r\n";
const char * expected_json = R"json([[""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser16()
{
const char * input_str = "\"\",\r\n";
const char * expected_json = R"json([["",""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser17()
{
const char * input_str = "\"\",\n";
const char * expected_json = R"json([["",""]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser18()
{
const char * input_str = "\"field1\"";
const char * expected_json = R"json([["field1"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser19()
{
const char * input_str = "\"field1, with comma\"";
const char * expected_json = R"json([["field1, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser20()
{
const char * input_str = "\"field1, with comma\"\r\n";
const char * expected_json = R"json([["field1, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser21()
{
const char * input_str = "\"field1, with comma\"\n";
const char * expected_json = R"json([["field1, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser22()
{
const char * input_str = "\"field1, with comma\",\"field2\"";
const char * expected_json = R"json([["field1, with comma","field2"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser23()
{
const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\"";
const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser24()
{
const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\"\r\n";
const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser25()
{
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\r\n\"field3\",\"field4, with comma\"";
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser26()
{
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\n\"field3\",\"field4, with comma\"\n";
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void test_csvparser27()
{
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"syntax error\n\"field3\",\"field4, with comma\"\n";
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["syntax error"],["field3","field4, with comma"]])json";
test_csvparser(input_str, expected_json);
}
void make_tests()
{
reset_test_counter("CSVParser");
test_csvparser1();
test_csvparser2();
test_csvparser3();
test_csvparser4();
test_csvparser5();
test_csvparser6();
test_csvparser7();
test_csvparser8();
test_csvparser9();
test_csvparser10();
test_csvparser11();
test_csvparser12();
test_csvparser13();
test_csvparser14();
test_csvparser15();
test_csvparser16();
test_csvparser17();
test_csvparser18();
test_csvparser19();
test_csvparser20();
test_csvparser21();
test_csvparser22();
test_csvparser23();
test_csvparser24();
test_csvparser25();
test_csvparser26();
test_csvparser27();
}
}
}

56
tests/csvparser.h Normal file
View File

@ -0,0 +1,56 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_tests_csvparser
#define headerfile_picotools_tests_csvparser
namespace pt
{
namespace pt_csvparser_tests
{
void make_tests();
}
}
#endif

View File

@ -1,11 +1,11 @@
/*
* This file is a part of MainParser -- simple parser for main() parameters
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2011, Tomasz Sowa
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -35,83 +35,37 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "convert.h"
#include "mainoptionsparser.h"
#include "csvparser.h"
#include <iostream>
#include <string.h>
#include "../mainparser.h"
int main()
namespace pt
{
MainParser mp;
// suppose you call a 'programname' in such a way:
// $ programname -a -b - c --longparam -- otherlongparam -xyz paramwithvalue -x --longparam2 longwithvalue lastvalue1 lastvalue2 lastvalue3
// so the main() function get this table as input:
const char * tab[] = {
"programname",
"-a",
"-b",
"-",
"c",
"--longparam",
"--",
"otherlongparam",
"-xyz",
"paramwithvalue",
"-x",
"--longparam2",
"longwithvalue",
"lastvalue1", // some values left at the end
"lastvalue2", // you can get them by using GetValue() method
"lastvalue3",
};
mp.Set(sizeof(tab)/sizeof(const char*), tab);
while( mp.NextParam() )
{
if( mp.GetSingleParam() != 0 )
{
std::cout << "-" << mp.GetSingleParam() << std::endl;
// we know that 'z' requires a value
if( mp.GetSingleParam() == 'z' )
std::cout << "value for z: " << mp.GetValue() << std::endl;
bool was_error = false;
int test_counter = 0;
const char * test_msg = nullptr;
}
if( *mp.GetDoubleParam() )
int main(int argc, const char ** argv)
{
std::cout << "--" << mp.GetDoubleParam() << std::endl;
pt::pt_convert_tests::make_tests();
pt::pt_mainoptions_tests::make_tests();
pt::pt_csvparser_tests::make_tests();
// we know that "longparam2" requires a value
if( strcmp(mp.GetDoubleParam(), "longparam2") == 0 )
std::cout << "value for longparam2: " << mp.GetValue() << std::endl;
if( pt::was_error )
{
std::cout << "some of the tests failed" << std::endl;
}
else
{
std::cout << "*********************************" << std::endl;
std::cout << "* all tests passed successfully *" << std::endl;
std::cout << "*********************************" << std::endl;
}
while( !mp.IsEnd() )
std::cout << mp.GetValue() << std::endl;
return !pt::was_error ? 0 : 1;
}
/*
program output:
-a
-b
-c
--longparam
--otherlongparam
-x
-y
-z
value for z: paramwithvalue
-x
--longparam2
value for longparam2: longwithvalue
lastvalue1
lastvalue2
lastvalue3
*/

332
tests/mainoptionsparser.cpp Normal file
View File

@ -0,0 +1,332 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <iostream>
#include "mainoptionsparser.h"
#include "test.h"
#include "mainoptions/mainoptionsparser.h"
#include "utf8/utf8.h"
#include "convert/convert.h"
namespace pt
{
namespace pt_mainoptions_tests
{
struct MainOptionsParserOutputTest
{
MainOptionsParser::Status status;
const char * option_err;
const char * json;
};
void print_status(MainOptionsParser::Status status)
{
if( status == MainOptionsParser::status_ok )
{
std::cout << "MainOptionsParser::status_ok";
}
else
if( status == MainOptionsParser::status_argument_not_provided )
{
std::cout << "MainOptionsParser::status_argument_not_provided";
}
else
if( status == MainOptionsParser::status_argument_provided )
{
std::cout << "MainOptionsParser::status_argument_provided";
}
}
bool has_space_in_str(const char * arg)
{
while( *arg )
{
if( is_white((wchar_t)*arg) )
return true;
arg += 1;
}
return false;
}
void print_args(int to_index, const char ** argv)
{
for(int i=0 ; i <= to_index ; ++i)
{
bool has_space = has_space_in_str(argv[i]);
if( has_space )
std::cout << "\"";
std::cout << argv[i];
if( has_space )
std::cout << "\"";
std::cout << " ";
}
std::cout << std::endl;
}
void test_mainoptionsparser(size_t len, const char ** argv, const Space & arguments_required, MainOptionsParserOutputTest * output)
{
reset_test_counter("mainoptionsparser");
std::cout << "Testing MainArgsParser" << std::endl;
MainOptionsParser parser;
Space space;
/*
* set to true when creating new tests (you can copy console output to the cpp file)
*/
bool prepare_tests = false;
for(size_t i = 0 ; i < len ; ++i)
{
if( !prepare_tests )
print_args(i, argv);
MainOptionsParser::Status status = parser.parse(i + 1, argv, space, arguments_required);
std::wstring & err_wstr = parser.get_wrong_option();
std::string err_str;
wide_to_utf8(err_wstr, err_str);
std::string json;
space.serialize_to_json_to(json);
std::cout << "{";
print_status(status);
std::cout << ", " << "\"" << err_str << "\", " << "R\"json(" << json << ")json\"" << "}," << std::endl;
if( !prepare_tests )
{
test("status", status, output[i].status);
test("err_arg", err_str.c_str(), output[i].option_err);
test("json", json.c_str(), output[i].json);
}
}
}
void test_mainoptionsparser1()
{
const char * argv[] = {
"program_name",
"-a",
"-b",
"-c",
"-d",
"argument for d",
"-b",
"--long",
"--foo",
"foo-one",
"foo-two",
"--long-option",
"--bar",
"bar1",
"bar2",
"bar3",
"-x",
"--piggy2=option_for_piggy2",
"--piggy3",
"--bar",
"xbar1",
"xbar2",
"xbar3",
"--piggy2 another_option_for_piggy2",
"--",
"non-option-argument1",
"non-option-argument2",
"non-option-argument3",
};
MainOptionsParserOutputTest output[] = {
{MainOptionsParser::status_ok, "", R"json({})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "d", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[["argument for d"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"long":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[[]],"long":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one"]],"long":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":[],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2","non-option-argument3"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
};
Space arguments_required;
arguments_required.add(L"d", 1);
arguments_required.add(L"foo", 2);
arguments_required.add(L"bar", 3);
arguments_required.add(L"piggy", 1);
arguments_required.add(L"piggy2", 1);
size_t len = sizeof(argv) / sizeof(const char *);
test_mainoptionsparser(len, argv, arguments_required, output);
}
void test_mainoptionsparser2()
{
const char * argv[] = {
"program_name",
"--long1",
"--long2=with-argument",
"--long3",
"-a",
"--=option-for-empty-argument",
"-b",
"arg b 1",
"arg b 2",
"-c",
"-f file-name with spaces",
"--xxx",
"arg 1",
"arg 2",
"arg 3",
"-", /* first non-option argument */
"non-option-argument2",
"non-option-argument3",
"non-option-argument4",
};
MainOptionsParserOutputTest output[] = {
{MainOptionsParser::status_ok, "", R"json({})json"},
{MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[[]]})json"},
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1"]]})json"},
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3","non-option-argument4"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
};
Space arguments_required;
arguments_required.add(L"long2", 1);
arguments_required.add(L"b", 2);
arguments_required.add(L"f", 1);
arguments_required.add(L"xxx", 3);
arguments_required.add(L"", 1);
size_t len = sizeof(argv) / sizeof(const char *);
test_mainoptionsparser(len, argv, arguments_required, output);
}
void test_mainoptionsparser3()
{
const char * argv[] = {
"program_name",
"--long1",
"--long2=with-argument",
"--long3",
};
MainOptionsParserOutputTest output[] = {
{MainOptionsParser::status_ok, "", R"json({})json"},
{MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"},
{MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"},
{MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"},
};
Space arguments_required;
arguments_required.add(L"non-existing", 1);
size_t len = sizeof(argv) / sizeof(const char *);
test_mainoptionsparser(len, argv, arguments_required, output);
}
void make_tests()
{
test_mainoptionsparser1();
test_mainoptionsparser2();
test_mainoptionsparser3();
}
}
}

View File

@ -35,66 +35,28 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "inttostr.h"
#ifndef headerfile_picotools_tests_mainoptionsparser
#define headerfile_picotools_tests_mainoptionsparser
namespace PT
namespace pt
{
namespace pt_mainoptions_tests
{
std::wstring Toa(unsigned long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
std::wstring Toa(long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
void make_tests();
std::wstring Toa(unsigned long value, int base)
{
return Toa(static_cast<unsigned long long>(value), base);
}
std::wstring Toa(long value, int base)
{
return Toa(static_cast<long long>(value), base);
}
std::wstring Toa(unsigned int value, int base)
{
return Toa(static_cast<unsigned long long>(value), base);
}
std::wstring Toa(int value, int base)
{
return Toa(static_cast<long long>(value), base);
}
std::wstring Toa(unsigned short value, int base)
{
return Toa(static_cast<unsigned long long>(value), base);
}
std::wstring Toa(short value, int base)
{
return Toa(static_cast<long long>(value), base);
}
}
}
#endif

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012, Tomasz Sowa
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -35,81 +35,76 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "spacetojson.h"
#include "test.h"
namespace PT
namespace pt
{
void SpaceToJSON::Clear()
void reset_test_counter()
{
numeric.clear();
boolean.clear();
table.clear();
test_counter = 1;
test_msg = nullptr;
}
void SpaceToJSON::TreatAsTable(const wchar_t * space_name)
void reset_test_counter(const char * msg)
{
table.insert(space_name);
test_counter = 1;
test_msg = msg;
}
void SpaceToJSON::TreatAsTable(const std::wstring & space_name)
void test_status(bool status)
{
table.insert(space_name);
if( status )
{
std::cout << " OK";
}
else
{
std::cout << " Fail";
was_error = true;
}
std::cout << std::endl;
}
void SpaceToJSON::TreatAsNumeric(const wchar_t * name)
template<>
bool test<const char*>(const char * test_msg, const char * provided, const char * expected)
{
numeric.insert(name);
std::cout << "test " << test_counter << ": ";
if( test_msg )
std::cout << test_msg;
bool status = (std::strcmp(provided, expected) == 0);
test_status(status);
test_counter += 1;
return status;
}
void SpaceToJSON::TreatAsNumeric(const std::wstring & name)
template<>
bool test<const wchar_t*>(const char * test_msg, const wchar_t * provided, const wchar_t * expected)
{
numeric.insert(name);
std::cout << "test " << test_counter << ": ";
if( test_msg )
std::cout << test_msg;
bool status = (std::wcscmp(provided, expected) == 0);
test_status(status);
test_counter += 1;
return status;
}
}
void SpaceToJSON::TreatAsBool(const wchar_t * name)
{
boolean.insert(name);
}
void SpaceToJSON::TreatAsBool(const std::wstring & name)
{
boolean.insert(name);
}
bool SpaceToJSON::IsNumeric(const std::wstring & name)
{
std::set<std::wstring>::iterator i = numeric.find(name);
return i != numeric.end();
}
bool SpaceToJSON::IsBool(const std::wstring & name)
{
std::set<std::wstring>::iterator i = boolean.find(name);
return i != boolean.end();
}
bool SpaceToJSON::IsTable(const std::wstring & name)
{
std::set<std::wstring>::iterator i = table.find(name);
return i != table.end();
}
} // namespace

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2016, Tomasz Sowa
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -35,66 +35,58 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_mainspaceparser_mainparser
#define headerfile_picotools_mainspaceparser_mainparser
#ifndef headerfile_picotools_tests_test
#define headerfile_picotools_tests_test
#include "space/space.h"
#include <string>
#include <vector>
#include <iostream>
#include <cstring>
namespace PT
namespace pt
{
extern int test_counter;
extern const char * test_msg;
extern bool was_error;
/*
a very little parser for main(int argc, char ** argv) parameters
look in sample/sample.cpp how to use the parser
*/
class MainSpaceParser
void test_status(bool status);
void reset_test_counter();
void reset_test_counter(const char * msg);
template<typename type_t>
bool test(const char * test_msg, type_t provided, type_t expected)
{
public:
std::cout << "test " << test_counter << ": ";
MainSpaceParser();
~MainSpaceParser();
if( test_msg )
std::cout << test_msg;
enum Status
bool status = provided == expected;
test_status(status);
test_counter += 1;
return status;
}
template<typename type_t>
bool test(type_t provided, type_t expected)
{
status_ok = 0,
status_space_not_assigned = 1,
status_syntax_error = 2,
status_reading_eof = 3 /* CHANGE ME give a better name */
};
void SetSpace(Space & space);
Status Parse(int argc, const char ** argv);
void UTF8(bool utf8);
std::wstring & GetErrorToken();
private:
Space * space;
Space * options_space;
std::wstring wide_arg, temp_arg, temp_val;
std::vector<std::wstring> temp_list_val;
bool use_utf8;
Status last_status;
std::wstring last_error_token;
return test(test_msg, provided, expected);
}
void ConvertStr(const char * src, std::wstring & dst);
void Parse(size_t argc, const char ** argv, size_t & argv_index);
void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index);
void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index);
size_t RequireOption(const std::wstring & arg);
void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list);
template<>
bool test<const char*>(const char * test_msg, const char * provided, const char * expected);
};
template<>
bool test<const wchar_t*>(const char * test_msg, const wchar_t * provided, const wchar_t * expected);
} // namespace
}
#endif

View File

@ -1,27 +0,0 @@
include Makefile.o.dep
libname=utf8.a
all: $(libname)
$(libname): $(o)
$(AR) rcs $(libname) $(o)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -I.. $<
depend:
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
echo -n "o = " > Makefile.o.dep
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
clean:
rm -f *.o
rm -f $(libname)
include Makefile.dep

View File

@ -1,5 +0,0 @@
# DO NOT DELETE
utf8.o: utf8.h ../textstream/textstream.h ../space/space.h
utf8.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
utf8.o: ../membuffer/membuffer.h ../textstream/types.h

View File

@ -1 +0,0 @@
o = utf8.o

View File

@ -1,334 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8
#define headerfile_picotools_utf8_utf8
#include <fstream>
#include <string>
#include "textstream/textstream.h"
namespace PT
{
/*!
UTF-8, a transformation format of ISO 10646
http://tools.ietf.org/html/rfc3629
when wchar_t is 4 bytes length we use UTF-32
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
UTF-16
http://www.ietf.org/rfc/rfc2781.txt
*/
/*!
returns true if 'c' is a correct unicode character
*/
bool UTF8_CheckRange(int c);
/*!
converting one character from UTF-8 to an int
*/
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
size_t UTF8ToInt(const char * utf8, int & res, bool & correct);
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct);
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct);
/*!
converting UTF-8 string to a wide string
*/
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1);
/*!
converting UTF-8 string to a WTextStream stream
(need to be tested)
*/
/*
implemented as templates below
bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear = true, int mode = 1);
bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear = true, int mode = 1);
*/
/*!
converting one int character to UTF-8
*/
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
size_t IntToUTF8(int z, std::string & utf8, bool clear = true );
size_t IntToUTF8(int z, std::ostream & utf8);
/*!
converting a wide string to UTF-8 string
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
// implemented as a template below
//void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1);
// implemented as a template below
//void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, int mode = 1);
namespace private_namespace
{
template<typename function_type>
bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
{
int z;
size_t len;
bool correct, was_error = false;
while( utf8_len > 0 )
{
if( (unsigned char)*utf8 <= 0x7f )
{
// small optimization
len = 1;
correct = true;
z = static_cast<unsigned char>(*utf8);
}
else
{
len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero
}
if( !correct )
{
if( mode == 1 )
convert_function(0xFFFD); // U+FFFD "replacement character"
was_error = true;
}
else
{
convert_function(z);
}
utf8 += len;
utf8_len -= len;
}
return !was_error;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void IntToWide(int c, TextStreamBase<char_type, stack_size, heap_block_size> & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res << static_cast<wchar_t>(c);
}
}
// not tested
// FIX ME it is not using surrogate pairs from input stream
// and mode parameter
template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
void WideToUTF8Generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
{
char utf8_buffer[256];
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
std::size_t utf8_sequence_max_length = 10;
std::size_t index = 0;
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
while( i != buffer.end() )
{
if( index + utf8_sequence_max_length > buffer_len )
{
write_function(utf8_buffer, index);
index = 0;
}
index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index);
++i;
}
if( index > 0 )
{
write_function(utf8_buffer, index);
}
}
} // namespace
// need to be tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
{
if( clear )
res.clear();
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
private_namespace::IntToWide(c, res);
});
return status;
}
// need to be tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool UTF8ToWide(const char * utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
{
size_t utf8_len = 0;
while( utf8[utf8_len] != 0 )
utf8_len += 1;
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
}
// need to be tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool UTF8ToWide(const std::string & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
{
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
}
// need to be tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool UTF8ToWide(std::istream & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
{
int z;
bool correct, was_error = false;
if( clear )
res.clear();
while( UTF8ToInt(utf8, z, correct) > 0 )
{
if( !correct )
{
if( mode == 1 )
res << 0xFFFD; // U+FFFD "replacement character"
was_error = true;
}
else
{
private_namespace::IntToWide(z, res);
}
}
return !was_error;
}
// not tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::string & utf8, bool clear = true, int mode = 1)
{
if( clear )
utf8.clear();
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.append(utf8_buffer, buffer_len);
});
}
// not tested
template<typename char_type, size_t stack_size, size_t heap_block_size>
void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::ostream & utf8, int mode = 1)
{
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.write(utf8_buffer, buffer_len);
});
}
} // namespace
#endif